diff options
author | Shih-wei Liao <sliao@google.com> | 2010-04-07 12:21:42 -0700 |
---|---|---|
committer | Shih-wei Liao <sliao@google.com> | 2010-04-07 12:21:42 -0700 |
commit | e4454320b3cfffe926a487c33fbeb454366de2f8 (patch) | |
tree | 133c05da684edf4a3b2529bcacfa996298c455f6 /lib | |
parent | 20570085304f0a4ab4f112a01d77958bbd2827a1 (diff) | |
download | external_llvm-e4454320b3cfffe926a487c33fbeb454366de2f8.zip external_llvm-e4454320b3cfffe926a487c33fbeb454366de2f8.tar.gz external_llvm-e4454320b3cfffe926a487c33fbeb454366de2f8.tar.bz2 |
libbcc
Change-Id: Ieaa3ebd5a38f370752495549f8870b534eeedfc5
Diffstat (limited to 'lib')
389 files changed, 24366 insertions, 13935 deletions
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp index 761cd46..1053955 100644 --- a/lib/Analysis/AliasAnalysisCounter.cpp +++ b/lib/Analysis/AliasAnalysisCounter.cpp @@ -162,7 +162,7 @@ AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) { errs() << MRString << ": Ptr: "; errs() << "[" << Size << "B] "; WriteAsOperand(errs(), P, true, M); - errs() << "\t<->" << *CS.getInstruction(); + errs() << "\t<->" << *CS.getInstruction() << '\n'; } return R; } diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index 6b0a956..308b9e3 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -115,11 +115,11 @@ bool AAEval::runOnFunction(Function &F) { SetVector<CallSite> CallSites; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) - if (isa<PointerType>(I->getType())) // Add all pointer arguments + if (I->getType()->isPointerTy()) // Add all pointer arguments Pointers.insert(I); for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { - if (isa<PointerType>(I->getType())) // Add all pointer instructions + if (I->getType()->isPointerTy()) // Add all pointer instructions Pointers.insert(&*I); Instruction &Inst = *I; User::op_iterator OI = Inst.op_begin(); @@ -128,7 +128,7 @@ bool AAEval::runOnFunction(Function &F) { isa<Function>(CS.getCalledValue())) ++OI; // Skip actual functions for direct function calls. for (; OI != Inst.op_end(); ++OI) - if (isa<PointerType>((*OI)->getType()) && !isa<ConstantPointerNull>(*OI)) + if ((*OI)->getType()->isPointerTy() && !isa<ConstantPointerNull>(*OI)) Pointers.insert(*OI); if (CS.getInstruction()) CallSites.insert(CS); diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk new file mode 100644 index 0000000..1d038f2 --- /dev/null +++ b/lib/Analysis/Android.mk @@ -0,0 +1,69 @@ +LOCAL_PATH:= $(call my-dir) + +analysis_SRC_FILES := \ + AliasAnalysis.cpp \ + AliasAnalysisCounter.cpp \ + AliasAnalysisEvaluator.cpp \ + AliasDebugger.cpp \ + AliasSetTracker.cpp \ + Analysis.cpp \ + BasicAliasAnalysis.cpp \ + CFGPrinter.cpp \ + CaptureTracking.cpp \ + ConstantFolding.cpp \ + DbgInfoPrinter.cpp \ + DebugInfo.cpp \ + DomPrinter.cpp \ + IVUsers.cpp \ + InlineCost.cpp \ + InstCount.cpp \ + InstructionSimplify.cpp \ + Interval.cpp \ + IntervalPartition.cpp \ + LazyValueInfo.cpp \ + LibCallAliasAnalysis.cpp \ + LibCallSemantics.cpp \ + LiveValues.cpp \ + MemoryBuiltins.cpp \ + MemoryDependenceAnalysis.cpp \ + LoopDependenceAnalysis.cpp \ + LoopInfo.cpp \ + LoopPass.cpp \ + PHITransAddr.cpp \ + PointerTracking.cpp \ + PostDominators.cpp \ + ProfileEstimatorPass.cpp \ + ProfileInfo.cpp \ + ProfileInfoLoader.cpp \ + ProfileInfoLoaderPass.cpp \ + ProfileVerifierPass.cpp \ + ScalarEvolution.cpp \ + ScalarEvolutionAliasAnalysis.cpp \ + ScalarEvolutionExpander.cpp \ + SparsePropagation.cpp \ + Trace.cpp \ + ValueTracking.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(analysis_SRC_FILES) + +LOCAL_MODULE:= libLLVMAnalysis + +include $(LLVM_HOST_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(analysis_SRC_FILES) + +LOCAL_MODULE:= libLLVMAnalysis + +include $(LLVM_DEVICE_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 36b831c..31a649d 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -290,7 +290,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI, ++ArgNo) { // Only look at the no-capture pointer arguments. - if (!isa<PointerType>((*CI)->getType()) || + if (!(*CI)->getType()->isPointerTy() || !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture)) continue; @@ -662,7 +662,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, // Are we checking for alias of the same value? if (V1 == V2) return MustAlias; - if (!isa<PointerType>(V1->getType()) || !isa<PointerType>(V2->getType())) + if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy()) return NoAlias; // Scalars cannot alias each other // Figure out what objects these things are pointing to if we can. diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 10a8b11..8767c18 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -44,7 +44,7 @@ static int const Threshold = 20; /// counts as capturing it or not. bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures) { - assert(isa<PointerType>(V->getType()) && "Capture is for pointers only!"); + assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); SmallVector<Use*, Threshold> Worklist; SmallSet<Use*, Threshold> Visited; int Count = 0; diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index ba87040..114db2d 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -80,7 +80,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, // First thing is first. We only want to think about integer here, so if // we have something in FP form, recast it as integer. - if (DstEltTy->isFloatingPoint()) { + if (DstEltTy->isFloatingPointTy()) { // Fold to an vector of integers with same size as our FP type. unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits(); const Type *DestIVTy = @@ -95,7 +95,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, // Okay, we know the destination is integer, if the input is FP, convert // it to integer first. - if (SrcEltTy->isFloatingPoint()) { + if (SrcEltTy->isFloatingPointTy()) { unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); const Type *SrcIVTy = VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt); @@ -359,7 +359,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, MapTy = Type::getInt32PtrTy(C->getContext()); else if (LoadTy->isDoubleTy()) MapTy = Type::getInt64PtrTy(C->getContext()); - else if (isa<VectorType>(LoadTy)) { + else if (LoadTy->isVectorTy()) { MapTy = IntegerType::get(C->getContext(), TD.getTypeAllocSizeInBits(LoadTy)); MapTy = PointerType::getUnqual(MapTy); @@ -605,7 +605,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, SmallVector<Constant*, 32> NewIdxs; do { if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) { - if (isa<PointerType>(ATy)) { + if (ATy->isPointerTy()) { // The only pointer indexing we'll do is on the first index of the GEP. if (!NewIdxs.empty()) break; @@ -783,45 +783,12 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if // the int size is >= the ptr size. This requires knowing the width of a // pointer, so it can't be done in ConstantExpr::getCast. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) if (TD && - TD->getPointerSizeInBits() <= - CE->getType()->getScalarSizeInBits()) { - if (CE->getOpcode() == Instruction::PtrToInt) - return FoldBitCast(CE->getOperand(0), DestTy, *TD); - - // If there's a constant offset added to the integer value before - // it is casted back to a pointer, see if the expression can be - // converted into a GEP. - if (CE->getOpcode() == Instruction::Add) - if (ConstantInt *L = dyn_cast<ConstantInt>(CE->getOperand(0))) - if (ConstantExpr *R = dyn_cast<ConstantExpr>(CE->getOperand(1))) - if (R->getOpcode() == Instruction::PtrToInt) - if (GlobalVariable *GV = - dyn_cast<GlobalVariable>(R->getOperand(0))) { - const PointerType *GVTy = cast<PointerType>(GV->getType()); - if (const ArrayType *AT = - dyn_cast<ArrayType>(GVTy->getElementType())) { - const Type *ElTy = AT->getElementType(); - uint64_t AllocSize = TD->getTypeAllocSize(ElTy); - APInt PSA(L->getValue().getBitWidth(), AllocSize); - if (ElTy == cast<PointerType>(DestTy)->getElementType() && - L->getValue().urem(PSA) == 0) { - APInt ElemIdx = L->getValue().udiv(PSA); - if (ElemIdx.ult(APInt(ElemIdx.getBitWidth(), - AT->getNumElements()))) { - Constant *Index[] = { - Constant::getNullValue(CE->getType()), - ConstantInt::get(ElTy->getContext(), ElemIdx) - }; - return - ConstantExpr::getGetElementPtr(GV, &Index[0], 2); - } - } - } - } - } - } + TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() && + CE->getOpcode() == Instruction::PtrToInt) + return FoldBitCast(CE->getOperand(0), DestTy, *TD); + return ConstantExpr::getCast(Opcode, Ops[0], DestTy); case Instruction::Trunc: case Instruction::ZExt: @@ -1179,6 +1146,12 @@ llvm::ConstantFoldCall(Function *F, return 0; } + if (isa<UndefValue>(Operands[0])) { + if (Name.startswith("llvm.bswap")) + return Operands[0]; + return 0; + } + return 0; } diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 258f1db..5cfe666 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -1007,12 +1007,15 @@ DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context, /// CreateBlock - This creates a descriptor for a lexical block with the /// specified parent VMContext. -DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context) { +DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context, + unsigned LineNo, unsigned Col) { Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_lexical_block), - Context.getNode() + Context.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt32Ty(VMContext), Col) }; - return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 2)); + return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 4)); } /// CreateNameSpace - This creates new descriptor for a namespace diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp deleted file mode 100644 index 4180206..0000000 --- a/lib/Analysis/IPA/Andersens.cpp +++ /dev/null @@ -1,2868 +0,0 @@ -//===- Andersens.cpp - Andersen's Interprocedural Alias Analysis ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines an implementation of Andersen's interprocedural alias -// analysis -// -// In pointer analysis terms, this is a subset-based, flow-insensitive, -// field-sensitive, and context-insensitive algorithm pointer algorithm. -// -// This algorithm is implemented as three stages: -// 1. Object identification. -// 2. Inclusion constraint identification. -// 3. Offline constraint graph optimization -// 4. Inclusion constraint solving. -// -// The object identification stage identifies all of the memory objects in the -// program, which includes globals, heap allocated objects, and stack allocated -// objects. -// -// The inclusion constraint identification stage finds all inclusion constraints -// in the program by scanning the program, looking for pointer assignments and -// other statements that effect the points-to graph. For a statement like "A = -// B", this statement is processed to indicate that A can point to anything that -// B can point to. Constraints can handle copies, loads, and stores, and -// address taking. -// -// The offline constraint graph optimization portion includes offline variable -// substitution algorithms intended to compute pointer and location -// equivalences. Pointer equivalences are those pointers that will have the -// same points-to sets, and location equivalences are those variables that -// always appear together in points-to sets. It also includes an offline -// cycle detection algorithm that allows cycles to be collapsed sooner -// during solving. -// -// The inclusion constraint solving phase iteratively propagates the inclusion -// constraints until a fixed point is reached. This is an O(N^3) algorithm. -// -// Function constraints are handled as if they were structs with X fields. -// Thus, an access to argument X of function Y is an access to node index -// getNode(Y) + X. This representation allows handling of indirect calls -// without any issues. To wit, an indirect call Y(a,b) is equivalent to -// *(Y + 1) = a, *(Y + 2) = b. -// The return node for a function is always located at getNode(F) + -// CallReturnPos. The arguments start at getNode(F) + CallArgPos. -// -// Future Improvements: -// Use of BDD's. -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "anders-aa" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/InstIterator.h" -#include "llvm/Support/InstVisitor.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Support/Debug.h" -#include "llvm/System/Atomic.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SparseBitVector.h" -#include "llvm/ADT/DenseSet.h" -#include <algorithm> -#include <set> -#include <list> -#include <map> -#include <stack> -#include <vector> -#include <queue> - -// Determining the actual set of nodes the universal set can consist of is very -// expensive because it means propagating around very large sets. We rely on -// other analysis being able to determine which nodes can never be pointed to in -// order to disambiguate further than "points-to anything". -#define FULL_UNIVERSAL 0 - -using namespace llvm; -#ifndef NDEBUG -STATISTIC(NumIters , "Number of iterations to reach convergence"); -#endif -STATISTIC(NumConstraints, "Number of constraints"); -STATISTIC(NumNodes , "Number of nodes"); -STATISTIC(NumUnified , "Number of variables unified"); -STATISTIC(NumErased , "Number of redundant constraints erased"); - -static const unsigned SelfRep = (unsigned)-1; -static const unsigned Unvisited = (unsigned)-1; -// Position of the function return node relative to the function node. -static const unsigned CallReturnPos = 1; -// Position of the function call node relative to the function node. -static const unsigned CallFirstArgPos = 2; - -namespace { - struct BitmapKeyInfo { - static inline SparseBitVector<> *getEmptyKey() { - return reinterpret_cast<SparseBitVector<> *>(-1); - } - static inline SparseBitVector<> *getTombstoneKey() { - return reinterpret_cast<SparseBitVector<> *>(-2); - } - static unsigned getHashValue(const SparseBitVector<> *bitmap) { - return bitmap->getHashValue(); - } - static bool isEqual(const SparseBitVector<> *LHS, - const SparseBitVector<> *RHS) { - if (LHS == RHS) - return true; - else if (LHS == getEmptyKey() || RHS == getEmptyKey() - || LHS == getTombstoneKey() || RHS == getTombstoneKey()) - return false; - - return *LHS == *RHS; - } - }; - - class Andersens : public ModulePass, public AliasAnalysis, - private InstVisitor<Andersens> { - struct Node; - - /// Constraint - Objects of this structure are used to represent the various - /// constraints identified by the algorithm. The constraints are 'copy', - /// for statements like "A = B", 'load' for statements like "A = *B", - /// 'store' for statements like "*A = B", and AddressOf for statements like - /// A = alloca; The Offset is applied as *(A + K) = B for stores, - /// A = *(B + K) for loads, and A = B + K for copies. It is - /// illegal on addressof constraints (because it is statically - /// resolvable to A = &C where C = B + K) - - struct Constraint { - enum ConstraintType { Copy, Load, Store, AddressOf } Type; - unsigned Dest; - unsigned Src; - unsigned Offset; - - Constraint(ConstraintType Ty, unsigned D, unsigned S, unsigned O = 0) - : Type(Ty), Dest(D), Src(S), Offset(O) { - assert((Offset == 0 || Ty != AddressOf) && - "Offset is illegal on addressof constraints"); - } - - bool operator==(const Constraint &RHS) const { - return RHS.Type == Type - && RHS.Dest == Dest - && RHS.Src == Src - && RHS.Offset == Offset; - } - - bool operator!=(const Constraint &RHS) const { - return !(*this == RHS); - } - - bool operator<(const Constraint &RHS) const { - if (RHS.Type != Type) - return RHS.Type < Type; - else if (RHS.Dest != Dest) - return RHS.Dest < Dest; - else if (RHS.Src != Src) - return RHS.Src < Src; - return RHS.Offset < Offset; - } - }; - - // Information DenseSet requires implemented in order to be able to do - // it's thing - struct PairKeyInfo { - static inline std::pair<unsigned, unsigned> getEmptyKey() { - return std::make_pair(~0U, ~0U); - } - static inline std::pair<unsigned, unsigned> getTombstoneKey() { - return std::make_pair(~0U - 1, ~0U - 1); - } - static unsigned getHashValue(const std::pair<unsigned, unsigned> &P) { - return P.first ^ P.second; - } - static unsigned isEqual(const std::pair<unsigned, unsigned> &LHS, - const std::pair<unsigned, unsigned> &RHS) { - return LHS == RHS; - } - }; - - struct ConstraintKeyInfo { - static inline Constraint getEmptyKey() { - return Constraint(Constraint::Copy, ~0U, ~0U, ~0U); - } - static inline Constraint getTombstoneKey() { - return Constraint(Constraint::Copy, ~0U - 1, ~0U - 1, ~0U - 1); - } - static unsigned getHashValue(const Constraint &C) { - return C.Src ^ C.Dest ^ C.Type ^ C.Offset; - } - static bool isEqual(const Constraint &LHS, - const Constraint &RHS) { - return LHS.Type == RHS.Type && LHS.Dest == RHS.Dest - && LHS.Src == RHS.Src && LHS.Offset == RHS.Offset; - } - }; - - // Node class - This class is used to represent a node in the constraint - // graph. Due to various optimizations, it is not always the case that - // there is a mapping from a Node to a Value. In particular, we add - // artificial Node's that represent the set of pointed-to variables shared - // for each location equivalent Node. - struct Node { - private: - static volatile sys::cas_flag Counter; - - public: - Value *Val; - SparseBitVector<> *Edges; - SparseBitVector<> *PointsTo; - SparseBitVector<> *OldPointsTo; - std::list<Constraint> Constraints; - - // Pointer and location equivalence labels - unsigned PointerEquivLabel; - unsigned LocationEquivLabel; - // Predecessor edges, both real and implicit - SparseBitVector<> *PredEdges; - SparseBitVector<> *ImplicitPredEdges; - // Set of nodes that point to us, only use for location equivalence. - SparseBitVector<> *PointedToBy; - // Number of incoming edges, used during variable substitution to early - // free the points-to sets - unsigned NumInEdges; - // True if our points-to set is in the Set2PEClass map - bool StoredInHash; - // True if our node has no indirect constraints (complex or otherwise) - bool Direct; - // True if the node is address taken, *or* it is part of a group of nodes - // that must be kept together. This is set to true for functions and - // their arg nodes, which must be kept at the same position relative to - // their base function node. - bool AddressTaken; - - // Nodes in cycles (or in equivalence classes) are united together using a - // standard union-find representation with path compression. NodeRep - // gives the index into GraphNodes for the representative Node. - unsigned NodeRep; - - // Modification timestamp. Assigned from Counter. - // Used for work list prioritization. - unsigned Timestamp; - - explicit Node(bool direct = true) : - Val(0), Edges(0), PointsTo(0), OldPointsTo(0), - PointerEquivLabel(0), LocationEquivLabel(0), PredEdges(0), - ImplicitPredEdges(0), PointedToBy(0), NumInEdges(0), - StoredInHash(false), Direct(direct), AddressTaken(false), - NodeRep(SelfRep), Timestamp(0) { } - - Node *setValue(Value *V) { - assert(Val == 0 && "Value already set for this node!"); - Val = V; - return this; - } - - /// getValue - Return the LLVM value corresponding to this node. - /// - Value *getValue() const { return Val; } - - /// addPointerTo - Add a pointer to the list of pointees of this node, - /// returning true if this caused a new pointer to be added, or false if - /// we already knew about the points-to relation. - bool addPointerTo(unsigned Node) { - return PointsTo->test_and_set(Node); - } - - /// intersects - Return true if the points-to set of this node intersects - /// with the points-to set of the specified node. - bool intersects(Node *N) const; - - /// intersectsIgnoring - Return true if the points-to set of this node - /// intersects with the points-to set of the specified node on any nodes - /// except for the specified node to ignore. - bool intersectsIgnoring(Node *N, unsigned) const; - - // Timestamp a node (used for work list prioritization) - void Stamp() { - Timestamp = sys::AtomicIncrement(&Counter); - --Timestamp; - } - - bool isRep() const { - return( (int) NodeRep < 0 ); - } - }; - - struct WorkListElement { - Node* node; - unsigned Timestamp; - WorkListElement(Node* n, unsigned t) : node(n), Timestamp(t) {} - - // Note that we reverse the sense of the comparison because we - // actually want to give low timestamps the priority over high, - // whereas priority is typically interpreted as a greater value is - // given high priority. - bool operator<(const WorkListElement& that) const { - return( this->Timestamp > that.Timestamp ); - } - }; - - // Priority-queue based work list specialized for Nodes. - class WorkList { - std::priority_queue<WorkListElement> Q; - - public: - void insert(Node* n) { - Q.push( WorkListElement(n, n->Timestamp) ); - } - - // We automatically discard non-representative nodes and nodes - // that were in the work list twice (we keep a copy of the - // timestamp in the work list so we can detect this situation by - // comparing against the node's current timestamp). - Node* pop() { - while( !Q.empty() ) { - WorkListElement x = Q.top(); Q.pop(); - Node* INode = x.node; - - if( INode->isRep() && - INode->Timestamp == x.Timestamp ) { - return(x.node); - } - } - return(0); - } - - bool empty() { - return Q.empty(); - } - }; - - /// GraphNodes - This vector is populated as part of the object - /// identification stage of the analysis, which populates this vector with a - /// node for each memory object and fills in the ValueNodes map. - std::vector<Node> GraphNodes; - - /// ValueNodes - This map indicates the Node that a particular Value* is - /// represented by. This contains entries for all pointers. - DenseMap<Value*, unsigned> ValueNodes; - - /// ObjectNodes - This map contains entries for each memory object in the - /// program: globals, alloca's and mallocs. - DenseMap<Value*, unsigned> ObjectNodes; - - /// ReturnNodes - This map contains an entry for each function in the - /// program that returns a value. - DenseMap<Function*, unsigned> ReturnNodes; - - /// VarargNodes - This map contains the entry used to represent all pointers - /// passed through the varargs portion of a function call for a particular - /// function. An entry is not present in this map for functions that do not - /// take variable arguments. - DenseMap<Function*, unsigned> VarargNodes; - - - /// Constraints - This vector contains a list of all of the constraints - /// identified by the program. - std::vector<Constraint> Constraints; - - // Map from graph node to maximum K value that is allowed (for functions, - // this is equivalent to the number of arguments + CallFirstArgPos) - std::map<unsigned, unsigned> MaxK; - - /// This enum defines the GraphNodes indices that correspond to important - /// fixed sets. - enum { - UniversalSet = 0, - NullPtr = 1, - NullObject = 2, - NumberSpecialNodes - }; - // Stack for Tarjan's - std::stack<unsigned> SCCStack; - // Map from Graph Node to DFS number - std::vector<unsigned> Node2DFS; - // Map from Graph Node to Deleted from graph. - std::vector<bool> Node2Deleted; - // Same as Node Maps, but implemented as std::map because it is faster to - // clear - std::map<unsigned, unsigned> Tarjan2DFS; - std::map<unsigned, bool> Tarjan2Deleted; - // Current DFS number - unsigned DFSNumber; - - // Work lists. - WorkList w1, w2; - WorkList *CurrWL, *NextWL; // "current" and "next" work lists - - // Offline variable substitution related things - - // Temporary rep storage, used because we can't collapse SCC's in the - // predecessor graph by uniting the variables permanently, we can only do so - // for the successor graph. - std::vector<unsigned> VSSCCRep; - // Mapping from node to whether we have visited it during SCC finding yet. - std::vector<bool> Node2Visited; - // During variable substitution, we create unknowns to represent the unknown - // value that is a dereference of a variable. These nodes are known as - // "ref" nodes (since they represent the value of dereferences). - unsigned FirstRefNode; - // During HVN, we create represent address taken nodes as if they were - // unknown (since HVN, unlike HU, does not evaluate unions). - unsigned FirstAdrNode; - // Current pointer equivalence class number - unsigned PEClass; - // Mapping from points-to sets to equivalence classes - typedef DenseMap<SparseBitVector<> *, unsigned, BitmapKeyInfo> BitVectorMap; - BitVectorMap Set2PEClass; - // Mapping from pointer equivalences to the representative node. -1 if we - // have no representative node for this pointer equivalence class yet. - std::vector<int> PEClass2Node; - // Mapping from pointer equivalences to representative node. This includes - // pointer equivalent but not location equivalent variables. -1 if we have - // no representative node for this pointer equivalence class yet. - std::vector<int> PENLEClass2Node; - // Union/Find for HCD - std::vector<unsigned> HCDSCCRep; - // HCD's offline-detected cycles; "Statically DeTected" - // -1 if not part of such a cycle, otherwise a representative node. - std::vector<int> SDT; - // Whether to use SDT (UniteNodes can use it during solving, but not before) - bool SDTActive; - - public: - static char ID; - Andersens() : ModulePass(&ID) {} - - bool runOnModule(Module &M) { - InitializeAliasAnalysis(this); - IdentifyObjects(M); - CollectConstraints(M); -#undef DEBUG_TYPE -#define DEBUG_TYPE "anders-aa-constraints" - DEBUG(PrintConstraints()); -#undef DEBUG_TYPE -#define DEBUG_TYPE "anders-aa" - SolveConstraints(); - DEBUG(PrintPointsToGraph()); - - // Free the constraints list, as we don't need it to respond to alias - // requests. - std::vector<Constraint>().swap(Constraints); - //These are needed for Print() (-analyze in opt) - //ObjectNodes.clear(); - //ReturnNodes.clear(); - //VarargNodes.clear(); - return false; - } - - void releaseMemory() { - // FIXME: Until we have transitively required passes working correctly, - // this cannot be enabled! Otherwise, using -count-aa with the pass - // causes memory to be freed too early. :( -#if 0 - // The memory objects and ValueNodes data structures at the only ones that - // are still live after construction. - std::vector<Node>().swap(GraphNodes); - ValueNodes.clear(); -#endif - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AliasAnalysis::getAnalysisUsage(AU); - AU.setPreservesAll(); // Does not transform code - } - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) { - if (PI->isPassID(&AliasAnalysis::ID)) - return (AliasAnalysis*)this; - return this; - } - - //------------------------------------------------ - // Implement the AliasAnalysis API - // - AliasResult alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size); - virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size); - virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2); - bool pointsToConstantMemory(const Value *P); - - virtual void deleteValue(Value *V) { - ValueNodes.erase(V); - getAnalysis<AliasAnalysis>().deleteValue(V); - } - - virtual void copyValue(Value *From, Value *To) { - ValueNodes[To] = ValueNodes[From]; - getAnalysis<AliasAnalysis>().copyValue(From, To); - } - - private: - /// getNode - Return the node corresponding to the specified pointer scalar. - /// - unsigned getNode(Value *V) { - if (Constant *C = dyn_cast<Constant>(V)) - if (!isa<GlobalValue>(C)) - return getNodeForConstantPointer(C); - - DenseMap<Value*, unsigned>::iterator I = ValueNodes.find(V); - if (I == ValueNodes.end()) { -#ifndef NDEBUG - V->dump(); -#endif - llvm_unreachable("Value does not have a node in the points-to graph!"); - } - return I->second; - } - - /// getObject - Return the node corresponding to the memory object for the - /// specified global or allocation instruction. - unsigned getObject(Value *V) const { - DenseMap<Value*, unsigned>::const_iterator I = ObjectNodes.find(V); - assert(I != ObjectNodes.end() && - "Value does not have an object in the points-to graph!"); - return I->second; - } - - /// getReturnNode - Return the node representing the return value for the - /// specified function. - unsigned getReturnNode(Function *F) const { - DenseMap<Function*, unsigned>::const_iterator I = ReturnNodes.find(F); - assert(I != ReturnNodes.end() && "Function does not return a value!"); - return I->second; - } - - /// getVarargNode - Return the node representing the variable arguments - /// formal for the specified function. - unsigned getVarargNode(Function *F) const { - DenseMap<Function*, unsigned>::const_iterator I = VarargNodes.find(F); - assert(I != VarargNodes.end() && "Function does not take var args!"); - return I->second; - } - - /// getNodeValue - Get the node for the specified LLVM value and set the - /// value for it to be the specified value. - unsigned getNodeValue(Value &V) { - unsigned Index = getNode(&V); - GraphNodes[Index].setValue(&V); - return Index; - } - - unsigned UniteNodes(unsigned First, unsigned Second, - bool UnionByRank = true); - unsigned FindNode(unsigned Node); - unsigned FindNode(unsigned Node) const; - - void IdentifyObjects(Module &M); - void CollectConstraints(Module &M); - bool AnalyzeUsesOfFunction(Value *); - void CreateConstraintGraph(); - void OptimizeConstraints(); - unsigned FindEquivalentNode(unsigned, unsigned); - void ClumpAddressTaken(); - void RewriteConstraints(); - void HU(); - void HVN(); - void HCD(); - void Search(unsigned Node); - void UnitePointerEquivalences(); - void SolveConstraints(); - bool QueryNode(unsigned Node); - void Condense(unsigned Node); - void HUValNum(unsigned Node); - void HVNValNum(unsigned Node); - unsigned getNodeForConstantPointer(Constant *C); - unsigned getNodeForConstantPointerTarget(Constant *C); - void AddGlobalInitializerConstraints(unsigned, Constant *C); - - void AddConstraintsForNonInternalLinkage(Function *F); - void AddConstraintsForCall(CallSite CS, Function *F); - bool AddConstraintsForExternalCall(CallSite CS, Function *F); - - - void PrintNode(const Node *N) const; - void PrintConstraints() const ; - void PrintConstraint(const Constraint &) const; - void PrintLabels() const; - void PrintPointsToGraph() const; - - //===------------------------------------------------------------------===// - // Instruction visitation methods for adding constraints - // - friend class InstVisitor<Andersens>; - void visitReturnInst(ReturnInst &RI); - void visitInvokeInst(InvokeInst &II) { visitCallSite(CallSite(&II)); } - void visitCallInst(CallInst &CI) { - if (isMalloc(&CI)) visitAlloc(CI); - else visitCallSite(CallSite(&CI)); - } - void visitCallSite(CallSite CS); - void visitAllocaInst(AllocaInst &I); - void visitAlloc(Instruction &I); - void visitLoadInst(LoadInst &LI); - void visitStoreInst(StoreInst &SI); - void visitGetElementPtrInst(GetElementPtrInst &GEP); - void visitPHINode(PHINode &PN); - void visitCastInst(CastInst &CI); - void visitICmpInst(ICmpInst &ICI) {} // NOOP! - void visitFCmpInst(FCmpInst &ICI) {} // NOOP! - void visitSelectInst(SelectInst &SI); - void visitVAArg(VAArgInst &I); - void visitInstruction(Instruction &I); - - //===------------------------------------------------------------------===// - // Implement Analyize interface - // - void print(raw_ostream &O, const Module*) const { - PrintPointsToGraph(); - } - }; -} - -char Andersens::ID = 0; -static RegisterPass<Andersens> -X("anders-aa", "Andersen's Interprocedural Alias Analysis (experimental)", - false, true); -static RegisterAnalysisGroup<AliasAnalysis> Y(X); - -// Initialize Timestamp Counter (static). -volatile llvm::sys::cas_flag Andersens::Node::Counter = 0; - -ModulePass *llvm::createAndersensPass() { return new Andersens(); } - -//===----------------------------------------------------------------------===// -// AliasAnalysis Interface Implementation -//===----------------------------------------------------------------------===// - -AliasAnalysis::AliasResult Andersens::alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size) { - Node *N1 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V1)))]; - Node *N2 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V2)))]; - - // Check to see if the two pointers are known to not alias. They don't alias - // if their points-to sets do not intersect. - if (!N1->intersectsIgnoring(N2, NullObject)) - return NoAlias; - - return AliasAnalysis::alias(V1, V1Size, V2, V2Size); -} - -AliasAnalysis::ModRefResult -Andersens::getModRefInfo(CallSite CS, Value *P, unsigned Size) { - // The only thing useful that we can contribute for mod/ref information is - // when calling external function calls: if we know that memory never escapes - // from the program, it cannot be modified by an external call. - // - // NOTE: This is not really safe, at least not when the entire program is not - // available. The deal is that the external function could call back into the - // program and modify stuff. We ignore this technical niggle for now. This - // is, after all, a "research quality" implementation of Andersen's analysis. - if (Function *F = CS.getCalledFunction()) - if (F->isDeclaration()) { - Node *N1 = &GraphNodes[FindNode(getNode(P))]; - - if (N1->PointsTo->empty()) - return NoModRef; -#if FULL_UNIVERSAL - if (!UniversalSet->PointsTo->test(FindNode(getNode(P)))) - return NoModRef; // Universal set does not contain P -#else - if (!N1->PointsTo->test(UniversalSet)) - return NoModRef; // P doesn't point to the universal set. -#endif - } - - return AliasAnalysis::getModRefInfo(CS, P, Size); -} - -AliasAnalysis::ModRefResult -Andersens::getModRefInfo(CallSite CS1, CallSite CS2) { - return AliasAnalysis::getModRefInfo(CS1,CS2); -} - -/// pointsToConstantMemory - If we can determine that this pointer only points -/// to constant memory, return true. In practice, this means that if the -/// pointer can only point to constant globals, functions, or the null pointer, -/// return true. -/// -bool Andersens::pointsToConstantMemory(const Value *P) { - Node *N = &GraphNodes[FindNode(getNode(const_cast<Value*>(P)))]; - unsigned i; - - for (SparseBitVector<>::iterator bi = N->PointsTo->begin(); - bi != N->PointsTo->end(); - ++bi) { - i = *bi; - Node *Pointee = &GraphNodes[i]; - if (Value *V = Pointee->getValue()) { - if (!isa<GlobalValue>(V) || (isa<GlobalVariable>(V) && - !cast<GlobalVariable>(V)->isConstant())) - return AliasAnalysis::pointsToConstantMemory(P); - } else { - if (i != NullObject) - return AliasAnalysis::pointsToConstantMemory(P); - } - } - - return true; -} - -//===----------------------------------------------------------------------===// -// Object Identification Phase -//===----------------------------------------------------------------------===// - -/// IdentifyObjects - This stage scans the program, adding an entry to the -/// GraphNodes list for each memory object in the program (global stack or -/// heap), and populates the ValueNodes and ObjectNodes maps for these objects. -/// -void Andersens::IdentifyObjects(Module &M) { - unsigned NumObjects = 0; - - // Object #0 is always the universal set: the object that we don't know - // anything about. - assert(NumObjects == UniversalSet && "Something changed!"); - ++NumObjects; - - // Object #1 always represents the null pointer. - assert(NumObjects == NullPtr && "Something changed!"); - ++NumObjects; - - // Object #2 always represents the null object (the object pointed to by null) - assert(NumObjects == NullObject && "Something changed!"); - ++NumObjects; - - // Add all the globals first. - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - ObjectNodes[I] = NumObjects++; - ValueNodes[I] = NumObjects++; - } - - // Add nodes for all of the functions and the instructions inside of them. - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - // The function itself is a memory object. - unsigned First = NumObjects; - ValueNodes[F] = NumObjects++; - if (isa<PointerType>(F->getFunctionType()->getReturnType())) - ReturnNodes[F] = NumObjects++; - if (F->getFunctionType()->isVarArg()) - VarargNodes[F] = NumObjects++; - - - // Add nodes for all of the incoming pointer arguments. - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) - { - if (isa<PointerType>(I->getType())) - ValueNodes[I] = NumObjects++; - } - MaxK[First] = NumObjects - First; - - // Scan the function body, creating a memory object for each heap/stack - // allocation in the body of the function and a node to represent all - // pointer values defined by instructions and used as operands. - for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { - // If this is an heap or stack allocation, create a node for the memory - // object. - if (isa<PointerType>(II->getType())) { - ValueNodes[&*II] = NumObjects++; - if (AllocaInst *AI = dyn_cast<AllocaInst>(&*II)) - ObjectNodes[AI] = NumObjects++; - else if (isMalloc(&*II)) - ObjectNodes[&*II] = NumObjects++; - } - - // Calls to inline asm need to be added as well because the callee isn't - // referenced anywhere else. - if (CallInst *CI = dyn_cast<CallInst>(&*II)) { - Value *Callee = CI->getCalledValue(); - if (isa<InlineAsm>(Callee)) - ValueNodes[Callee] = NumObjects++; - } - } - } - - // Now that we know how many objects to create, make them all now! - GraphNodes.resize(NumObjects); - NumNodes += NumObjects; -} - -//===----------------------------------------------------------------------===// -// Constraint Identification Phase -//===----------------------------------------------------------------------===// - -/// getNodeForConstantPointer - Return the node corresponding to the constant -/// pointer itself. -unsigned Andersens::getNodeForConstantPointer(Constant *C) { - assert(isa<PointerType>(C->getType()) && "Not a constant pointer!"); - - if (isa<ConstantPointerNull>(C) || isa<UndefValue>(C)) - return NullPtr; - else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) - return getNode(GV); - else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { - switch (CE->getOpcode()) { - case Instruction::GetElementPtr: - return getNodeForConstantPointer(CE->getOperand(0)); - case Instruction::IntToPtr: - return UniversalSet; - case Instruction::BitCast: - return getNodeForConstantPointer(CE->getOperand(0)); - default: - errs() << "Constant Expr not yet handled: " << *CE << "\n"; - llvm_unreachable(0); - } - } else { - llvm_unreachable("Unknown constant pointer!"); - } - return 0; -} - -/// getNodeForConstantPointerTarget - Return the node POINTED TO by the -/// specified constant pointer. -unsigned Andersens::getNodeForConstantPointerTarget(Constant *C) { - assert(isa<PointerType>(C->getType()) && "Not a constant pointer!"); - - if (isa<ConstantPointerNull>(C)) - return NullObject; - else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) - return getObject(GV); - else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { - switch (CE->getOpcode()) { - case Instruction::GetElementPtr: - return getNodeForConstantPointerTarget(CE->getOperand(0)); - case Instruction::IntToPtr: - return UniversalSet; - case Instruction::BitCast: - return getNodeForConstantPointerTarget(CE->getOperand(0)); - default: - errs() << "Constant Expr not yet handled: " << *CE << "\n"; - llvm_unreachable(0); - } - } else { - llvm_unreachable("Unknown constant pointer!"); - } - return 0; -} - -/// AddGlobalInitializerConstraints - Add inclusion constraints for the memory -/// object N, which contains values indicated by C. -void Andersens::AddGlobalInitializerConstraints(unsigned NodeIndex, - Constant *C) { - if (C->getType()->isSingleValueType()) { - if (isa<PointerType>(C->getType())) - Constraints.push_back(Constraint(Constraint::Copy, NodeIndex, - getNodeForConstantPointer(C))); - } else if (C->isNullValue()) { - Constraints.push_back(Constraint(Constraint::Copy, NodeIndex, - NullObject)); - return; - } else if (!isa<UndefValue>(C)) { - // If this is an array or struct, include constraints for each element. - assert(isa<ConstantArray>(C) || isa<ConstantStruct>(C)); - for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) - AddGlobalInitializerConstraints(NodeIndex, - cast<Constant>(C->getOperand(i))); - } -} - -/// AddConstraintsForNonInternalLinkage - If this function does not have -/// internal linkage, realize that we can't trust anything passed into or -/// returned by this function. -void Andersens::AddConstraintsForNonInternalLinkage(Function *F) { - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) - if (isa<PointerType>(I->getType())) - // If this is an argument of an externally accessible function, the - // incoming pointer might point to anything. - Constraints.push_back(Constraint(Constraint::Copy, getNode(I), - UniversalSet)); -} - -/// AddConstraintsForCall - If this is a call to a "known" function, add the -/// constraints and return true. If this is a call to an unknown function, -/// return false. -bool Andersens::AddConstraintsForExternalCall(CallSite CS, Function *F) { - assert(F->isDeclaration() && "Not an external function!"); - - // These functions don't induce any points-to constraints. - if (F->getName() == "atoi" || F->getName() == "atof" || - F->getName() == "atol" || F->getName() == "atoll" || - F->getName() == "remove" || F->getName() == "unlink" || - F->getName() == "rename" || F->getName() == "memcmp" || - F->getName() == "llvm.memset" || - F->getName() == "strcmp" || F->getName() == "strncmp" || - F->getName() == "execl" || F->getName() == "execlp" || - F->getName() == "execle" || F->getName() == "execv" || - F->getName() == "execvp" || F->getName() == "chmod" || - F->getName() == "puts" || F->getName() == "write" || - F->getName() == "open" || F->getName() == "create" || - F->getName() == "truncate" || F->getName() == "chdir" || - F->getName() == "mkdir" || F->getName() == "rmdir" || - F->getName() == "read" || F->getName() == "pipe" || - F->getName() == "wait" || F->getName() == "time" || - F->getName() == "stat" || F->getName() == "fstat" || - F->getName() == "lstat" || F->getName() == "strtod" || - F->getName() == "strtof" || F->getName() == "strtold" || - F->getName() == "fopen" || F->getName() == "fdopen" || - F->getName() == "freopen" || - F->getName() == "fflush" || F->getName() == "feof" || - F->getName() == "fileno" || F->getName() == "clearerr" || - F->getName() == "rewind" || F->getName() == "ftell" || - F->getName() == "ferror" || F->getName() == "fgetc" || - F->getName() == "fgetc" || F->getName() == "_IO_getc" || - F->getName() == "fwrite" || F->getName() == "fread" || - F->getName() == "fgets" || F->getName() == "ungetc" || - F->getName() == "fputc" || - F->getName() == "fputs" || F->getName() == "putc" || - F->getName() == "ftell" || F->getName() == "rewind" || - F->getName() == "_IO_putc" || F->getName() == "fseek" || - F->getName() == "fgetpos" || F->getName() == "fsetpos" || - F->getName() == "printf" || F->getName() == "fprintf" || - F->getName() == "sprintf" || F->getName() == "vprintf" || - F->getName() == "vfprintf" || F->getName() == "vsprintf" || - F->getName() == "scanf" || F->getName() == "fscanf" || - F->getName() == "sscanf" || F->getName() == "__assert_fail" || - F->getName() == "modf") - return true; - - - // These functions do induce points-to edges. - if (F->getName() == "llvm.memcpy" || - F->getName() == "llvm.memmove" || - F->getName() == "memmove") { - - const FunctionType *FTy = F->getFunctionType(); - if (FTy->getNumParams() > 1 && - isa<PointerType>(FTy->getParamType(0)) && - isa<PointerType>(FTy->getParamType(1))) { - - // *Dest = *Src, which requires an artificial graph node to represent the - // constraint. It is broken up into *Dest = temp, temp = *Src - unsigned FirstArg = getNode(CS.getArgument(0)); - unsigned SecondArg = getNode(CS.getArgument(1)); - unsigned TempArg = GraphNodes.size(); - GraphNodes.push_back(Node()); - Constraints.push_back(Constraint(Constraint::Store, - FirstArg, TempArg)); - Constraints.push_back(Constraint(Constraint::Load, - TempArg, SecondArg)); - // In addition, Dest = Src - Constraints.push_back(Constraint(Constraint::Copy, - FirstArg, SecondArg)); - return true; - } - } - - // Result = Arg0 - if (F->getName() == "realloc" || F->getName() == "strchr" || - F->getName() == "strrchr" || F->getName() == "strstr" || - F->getName() == "strtok") { - const FunctionType *FTy = F->getFunctionType(); - if (FTy->getNumParams() > 0 && - isa<PointerType>(FTy->getParamType(0))) { - Constraints.push_back(Constraint(Constraint::Copy, - getNode(CS.getInstruction()), - getNode(CS.getArgument(0)))); - return true; - } - } - - return false; -} - - - -/// AnalyzeUsesOfFunction - Look at all of the users of the specified function. -/// If this is used by anything complex (i.e., the address escapes), return -/// true. -bool Andersens::AnalyzeUsesOfFunction(Value *V) { - - if (!isa<PointerType>(V->getType())) return true; - - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (isa<LoadInst>(*UI)) { - return false; - } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { - if (V == SI->getOperand(1)) { - return false; - } else if (SI->getOperand(1)) { - return true; // Storing the pointer - } - } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { - if (AnalyzeUsesOfFunction(GEP)) return true; - } else if (isFreeCall(*UI)) { - return false; - } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { - // Make sure that this is just the function being called, not that it is - // passing into the function. - for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i) - if (CI->getOperand(i) == V) return true; - } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { - // Make sure that this is just the function being called, not that it is - // passing into the function. - for (unsigned i = 3, e = II->getNumOperands(); i != e; ++i) - if (II->getOperand(i) == V) return true; - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { - if (CE->getOpcode() == Instruction::GetElementPtr || - CE->getOpcode() == Instruction::BitCast) { - if (AnalyzeUsesOfFunction(CE)) - return true; - } else { - return true; - } - } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) { - if (!isa<ConstantPointerNull>(ICI->getOperand(1))) - return true; // Allow comparison against null. - } else { - return true; - } - return false; -} - -/// CollectConstraints - This stage scans the program, adding a constraint to -/// the Constraints list for each instruction in the program that induces a -/// constraint, and setting up the initial points-to graph. -/// -void Andersens::CollectConstraints(Module &M) { - // First, the universal set points to itself. - Constraints.push_back(Constraint(Constraint::AddressOf, UniversalSet, - UniversalSet)); - Constraints.push_back(Constraint(Constraint::Store, UniversalSet, - UniversalSet)); - - // Next, the null pointer points to the null object. - Constraints.push_back(Constraint(Constraint::AddressOf, NullPtr, NullObject)); - - // Next, add any constraints on global variables and their initializers. - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - // Associate the address of the global object as pointing to the memory for - // the global: &G = <G memory> - unsigned ObjectIndex = getObject(I); - Node *Object = &GraphNodes[ObjectIndex]; - Object->setValue(I); - Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*I), - ObjectIndex)); - - if (I->hasDefinitiveInitializer()) { - AddGlobalInitializerConstraints(ObjectIndex, I->getInitializer()); - } else { - // If it doesn't have an initializer (i.e. it's defined in another - // translation unit), it points to the universal set. - Constraints.push_back(Constraint(Constraint::Copy, ObjectIndex, - UniversalSet)); - } - } - - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - // Set up the return value node. - if (isa<PointerType>(F->getFunctionType()->getReturnType())) - GraphNodes[getReturnNode(F)].setValue(F); - if (F->getFunctionType()->isVarArg()) - GraphNodes[getVarargNode(F)].setValue(F); - - // Set up incoming argument nodes. - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) - if (isa<PointerType>(I->getType())) - getNodeValue(*I); - - // At some point we should just add constraints for the escaping functions - // at solve time, but this slows down solving. For now, we simply mark - // address taken functions as escaping and treat them as external. - if (!F->hasLocalLinkage() || AnalyzeUsesOfFunction(F)) - AddConstraintsForNonInternalLinkage(F); - - if (!F->isDeclaration()) { - // Scan the function body, creating a memory object for each heap/stack - // allocation in the body of the function and a node to represent all - // pointer values defined by instructions and used as operands. - visit(F); - } else { - // External functions that return pointers return the universal set. - if (isa<PointerType>(F->getFunctionType()->getReturnType())) - Constraints.push_back(Constraint(Constraint::Copy, - getReturnNode(F), - UniversalSet)); - - // Any pointers that are passed into the function have the universal set - // stored into them. - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) - if (isa<PointerType>(I->getType())) { - // Pointers passed into external functions could have anything stored - // through them. - Constraints.push_back(Constraint(Constraint::Store, getNode(I), - UniversalSet)); - // Memory objects passed into external function calls can have the - // universal set point to them. -#if FULL_UNIVERSAL - Constraints.push_back(Constraint(Constraint::Copy, - UniversalSet, - getNode(I))); -#else - Constraints.push_back(Constraint(Constraint::Copy, - getNode(I), - UniversalSet)); -#endif - } - - // If this is an external varargs function, it can also store pointers - // into any pointers passed through the varargs section. - if (F->getFunctionType()->isVarArg()) - Constraints.push_back(Constraint(Constraint::Store, getVarargNode(F), - UniversalSet)); - } - } - NumConstraints += Constraints.size(); -} - - -void Andersens::visitInstruction(Instruction &I) { -#ifdef NDEBUG - return; // This function is just a big assert. -#endif - if (isa<BinaryOperator>(I)) - return; - // Most instructions don't have any effect on pointer values. - switch (I.getOpcode()) { - case Instruction::Br: - case Instruction::Switch: - case Instruction::Unwind: - case Instruction::Unreachable: - case Instruction::ICmp: - case Instruction::FCmp: - return; - default: - // Is this something we aren't handling yet? - errs() << "Unknown instruction: " << I; - llvm_unreachable(0); - } -} - -void Andersens::visitAllocaInst(AllocaInst &I) { - visitAlloc(I); -} - -void Andersens::visitAlloc(Instruction &I) { - unsigned ObjectIndex = getObject(&I); - GraphNodes[ObjectIndex].setValue(&I); - Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(I), - ObjectIndex)); -} - -void Andersens::visitReturnInst(ReturnInst &RI) { - if (RI.getNumOperands() && isa<PointerType>(RI.getOperand(0)->getType())) - // return V --> <Copy/retval{F}/v> - Constraints.push_back(Constraint(Constraint::Copy, - getReturnNode(RI.getParent()->getParent()), - getNode(RI.getOperand(0)))); -} - -void Andersens::visitLoadInst(LoadInst &LI) { - if (isa<PointerType>(LI.getType())) - // P1 = load P2 --> <Load/P1/P2> - Constraints.push_back(Constraint(Constraint::Load, getNodeValue(LI), - getNode(LI.getOperand(0)))); -} - -void Andersens::visitStoreInst(StoreInst &SI) { - if (isa<PointerType>(SI.getOperand(0)->getType())) - // store P1, P2 --> <Store/P2/P1> - Constraints.push_back(Constraint(Constraint::Store, - getNode(SI.getOperand(1)), - getNode(SI.getOperand(0)))); -} - -void Andersens::visitGetElementPtrInst(GetElementPtrInst &GEP) { - // P1 = getelementptr P2, ... --> <Copy/P1/P2> - Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(GEP), - getNode(GEP.getOperand(0)))); -} - -void Andersens::visitPHINode(PHINode &PN) { - if (isa<PointerType>(PN.getType())) { - unsigned PNN = getNodeValue(PN); - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) - // P1 = phi P2, P3 --> <Copy/P1/P2>, <Copy/P1/P3>, ... - Constraints.push_back(Constraint(Constraint::Copy, PNN, - getNode(PN.getIncomingValue(i)))); - } -} - -void Andersens::visitCastInst(CastInst &CI) { - Value *Op = CI.getOperand(0); - if (isa<PointerType>(CI.getType())) { - if (isa<PointerType>(Op->getType())) { - // P1 = cast P2 --> <Copy/P1/P2> - Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI), - getNode(CI.getOperand(0)))); - } else { - // P1 = cast int --> <Copy/P1/Univ> -#if 0 - Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI), - UniversalSet)); -#else - getNodeValue(CI); -#endif - } - } else if (isa<PointerType>(Op->getType())) { - // int = cast P1 --> <Copy/Univ/P1> -#if 0 - Constraints.push_back(Constraint(Constraint::Copy, - UniversalSet, - getNode(CI.getOperand(0)))); -#else - getNode(CI.getOperand(0)); -#endif - } -} - -void Andersens::visitSelectInst(SelectInst &SI) { - if (isa<PointerType>(SI.getType())) { - unsigned SIN = getNodeValue(SI); - // P1 = select C, P2, P3 ---> <Copy/P1/P2>, <Copy/P1/P3> - Constraints.push_back(Constraint(Constraint::Copy, SIN, - getNode(SI.getOperand(1)))); - Constraints.push_back(Constraint(Constraint::Copy, SIN, - getNode(SI.getOperand(2)))); - } -} - -void Andersens::visitVAArg(VAArgInst &I) { - llvm_unreachable("vaarg not handled yet!"); -} - -/// AddConstraintsForCall - Add constraints for a call with actual arguments -/// specified by CS to the function specified by F. Note that the types of -/// arguments might not match up in the case where this is an indirect call and -/// the function pointer has been casted. If this is the case, do something -/// reasonable. -void Andersens::AddConstraintsForCall(CallSite CS, Function *F) { - Value *CallValue = CS.getCalledValue(); - bool IsDeref = F == NULL; - - // If this is a call to an external function, try to handle it directly to get - // some taste of context sensitivity. - if (F && F->isDeclaration() && AddConstraintsForExternalCall(CS, F)) - return; - - if (isa<PointerType>(CS.getType())) { - unsigned CSN = getNode(CS.getInstruction()); - if (!F || isa<PointerType>(F->getFunctionType()->getReturnType())) { - if (IsDeref) - Constraints.push_back(Constraint(Constraint::Load, CSN, - getNode(CallValue), CallReturnPos)); - else - Constraints.push_back(Constraint(Constraint::Copy, CSN, - getNode(CallValue) + CallReturnPos)); - } else { - // If the function returns a non-pointer value, handle this just like we - // treat a nonpointer cast to pointer. - Constraints.push_back(Constraint(Constraint::Copy, CSN, - UniversalSet)); - } - } else if (F && isa<PointerType>(F->getFunctionType()->getReturnType())) { -#if FULL_UNIVERSAL - Constraints.push_back(Constraint(Constraint::Copy, - UniversalSet, - getNode(CallValue) + CallReturnPos)); -#else - Constraints.push_back(Constraint(Constraint::Copy, - getNode(CallValue) + CallReturnPos, - UniversalSet)); -#endif - - - } - - CallSite::arg_iterator ArgI = CS.arg_begin(), ArgE = CS.arg_end(); - bool external = !F || F->isDeclaration(); - if (F) { - // Direct Call - Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - for (; AI != AE && ArgI != ArgE; ++AI, ++ArgI) - { -#if !FULL_UNIVERSAL - if (external && isa<PointerType>((*ArgI)->getType())) - { - // Add constraint that ArgI can now point to anything due to - // escaping, as can everything it points to. The second portion of - // this should be taken care of by universal = *universal - Constraints.push_back(Constraint(Constraint::Copy, - getNode(*ArgI), - UniversalSet)); - } -#endif - if (isa<PointerType>(AI->getType())) { - if (isa<PointerType>((*ArgI)->getType())) { - // Copy the actual argument into the formal argument. - Constraints.push_back(Constraint(Constraint::Copy, getNode(AI), - getNode(*ArgI))); - } else { - Constraints.push_back(Constraint(Constraint::Copy, getNode(AI), - UniversalSet)); - } - } else if (isa<PointerType>((*ArgI)->getType())) { -#if FULL_UNIVERSAL - Constraints.push_back(Constraint(Constraint::Copy, - UniversalSet, - getNode(*ArgI))); -#else - Constraints.push_back(Constraint(Constraint::Copy, - getNode(*ArgI), - UniversalSet)); -#endif - } - } - } else { - //Indirect Call - unsigned ArgPos = CallFirstArgPos; - for (; ArgI != ArgE; ++ArgI) { - if (isa<PointerType>((*ArgI)->getType())) { - // Copy the actual argument into the formal argument. - Constraints.push_back(Constraint(Constraint::Store, - getNode(CallValue), - getNode(*ArgI), ArgPos++)); - } else { - Constraints.push_back(Constraint(Constraint::Store, - getNode (CallValue), - UniversalSet, ArgPos++)); - } - } - } - // Copy all pointers passed through the varargs section to the varargs node. - if (F && F->getFunctionType()->isVarArg()) - for (; ArgI != ArgE; ++ArgI) - if (isa<PointerType>((*ArgI)->getType())) - Constraints.push_back(Constraint(Constraint::Copy, getVarargNode(F), - getNode(*ArgI))); - // If more arguments are passed in than we track, just drop them on the floor. -} - -void Andersens::visitCallSite(CallSite CS) { - if (isa<PointerType>(CS.getType())) - getNodeValue(*CS.getInstruction()); - - if (Function *F = CS.getCalledFunction()) { - AddConstraintsForCall(CS, F); - } else { - AddConstraintsForCall(CS, NULL); - } -} - -//===----------------------------------------------------------------------===// -// Constraint Solving Phase -//===----------------------------------------------------------------------===// - -/// intersects - Return true if the points-to set of this node intersects -/// with the points-to set of the specified node. -bool Andersens::Node::intersects(Node *N) const { - return PointsTo->intersects(N->PointsTo); -} - -/// intersectsIgnoring - Return true if the points-to set of this node -/// intersects with the points-to set of the specified node on any nodes -/// except for the specified node to ignore. -bool Andersens::Node::intersectsIgnoring(Node *N, unsigned Ignoring) const { - // TODO: If we are only going to call this with the same value for Ignoring, - // we should move the special values out of the points-to bitmap. - bool WeHadIt = PointsTo->test(Ignoring); - bool NHadIt = N->PointsTo->test(Ignoring); - bool Result = false; - if (WeHadIt) - PointsTo->reset(Ignoring); - if (NHadIt) - N->PointsTo->reset(Ignoring); - Result = PointsTo->intersects(N->PointsTo); - if (WeHadIt) - PointsTo->set(Ignoring); - if (NHadIt) - N->PointsTo->set(Ignoring); - return Result; -} - - -/// Clump together address taken variables so that the points-to sets use up -/// less space and can be operated on faster. - -void Andersens::ClumpAddressTaken() { -#undef DEBUG_TYPE -#define DEBUG_TYPE "anders-aa-renumber" - std::vector<unsigned> Translate; - std::vector<Node> NewGraphNodes; - - Translate.resize(GraphNodes.size()); - unsigned NewPos = 0; - - for (unsigned i = 0; i < Constraints.size(); ++i) { - Constraint &C = Constraints[i]; - if (C.Type == Constraint::AddressOf) { - GraphNodes[C.Src].AddressTaken = true; - } - } - for (unsigned i = 0; i < NumberSpecialNodes; ++i) { - unsigned Pos = NewPos++; - Translate[i] = Pos; - NewGraphNodes.push_back(GraphNodes[i]); - DEBUG(dbgs() << "Renumbering node " << i << " to node " << Pos << "\n"); - } - - // I believe this ends up being faster than making two vectors and splicing - // them. - for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) { - if (GraphNodes[i].AddressTaken) { - unsigned Pos = NewPos++; - Translate[i] = Pos; - NewGraphNodes.push_back(GraphNodes[i]); - DEBUG(dbgs() << "Renumbering node " << i << " to node " << Pos << "\n"); - } - } - - for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) { - if (!GraphNodes[i].AddressTaken) { - unsigned Pos = NewPos++; - Translate[i] = Pos; - NewGraphNodes.push_back(GraphNodes[i]); - DEBUG(dbgs() << "Renumbering node " << i << " to node " << Pos << "\n"); - } - } - - for (DenseMap<Value*, unsigned>::iterator Iter = ValueNodes.begin(); - Iter != ValueNodes.end(); - ++Iter) - Iter->second = Translate[Iter->second]; - - for (DenseMap<Value*, unsigned>::iterator Iter = ObjectNodes.begin(); - Iter != ObjectNodes.end(); - ++Iter) - Iter->second = Translate[Iter->second]; - - for (DenseMap<Function*, unsigned>::iterator Iter = ReturnNodes.begin(); - Iter != ReturnNodes.end(); - ++Iter) - Iter->second = Translate[Iter->second]; - - for (DenseMap<Function*, unsigned>::iterator Iter = VarargNodes.begin(); - Iter != VarargNodes.end(); - ++Iter) - Iter->second = Translate[Iter->second]; - - for (unsigned i = 0; i < Constraints.size(); ++i) { - Constraint &C = Constraints[i]; - C.Src = Translate[C.Src]; - C.Dest = Translate[C.Dest]; - } - - GraphNodes.swap(NewGraphNodes); -#undef DEBUG_TYPE -#define DEBUG_TYPE "anders-aa" -} - -/// The technique used here is described in "Exploiting Pointer and Location -/// Equivalence to Optimize Pointer Analysis. In the 14th International Static -/// Analysis Symposium (SAS), August 2007." It is known as the "HVN" algorithm, -/// and is equivalent to value numbering the collapsed constraint graph without -/// evaluating unions. This is used as a pre-pass to HU in order to resolve -/// first order pointer dereferences and speed up/reduce memory usage of HU. -/// Running both is equivalent to HRU without the iteration -/// HVN in more detail: -/// Imagine the set of constraints was simply straight line code with no loops -/// (we eliminate cycles, so there are no loops), such as: -/// E = &D -/// E = &C -/// E = F -/// F = G -/// G = F -/// Applying value numbering to this code tells us: -/// G == F == E -/// -/// For HVN, this is as far as it goes. We assign new value numbers to every -/// "address node", and every "reference node". -/// To get the optimal result for this, we use a DFS + SCC (since all nodes in a -/// cycle must have the same value number since the = operation is really -/// inclusion, not overwrite), and value number nodes we receive points-to sets -/// before we value our own node. -/// The advantage of HU over HVN is that HU considers the inclusion property, so -/// that if you have -/// E = &D -/// E = &C -/// E = F -/// F = G -/// F = &D -/// G = F -/// HU will determine that G == F == E. HVN will not, because it cannot prove -/// that the points to information ends up being the same because they all -/// receive &D from E anyway. - -void Andersens::HVN() { - DEBUG(dbgs() << "Beginning HVN\n"); - // Build a predecessor graph. This is like our constraint graph with the - // edges going in the opposite direction, and there are edges for all the - // constraints, instead of just copy constraints. We also build implicit - // edges for constraints are implied but not explicit. I.E for the constraint - // a = &b, we add implicit edges *a = b. This helps us capture more cycles - for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { - Constraint &C = Constraints[i]; - if (C.Type == Constraint::AddressOf) { - GraphNodes[C.Src].AddressTaken = true; - GraphNodes[C.Src].Direct = false; - - // Dest = &src edge - unsigned AdrNode = C.Src + FirstAdrNode; - if (!GraphNodes[C.Dest].PredEdges) - GraphNodes[C.Dest].PredEdges = new SparseBitVector<>; - GraphNodes[C.Dest].PredEdges->set(AdrNode); - - // *Dest = src edge - unsigned RefNode = C.Dest + FirstRefNode; - if (!GraphNodes[RefNode].ImplicitPredEdges) - GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>; - GraphNodes[RefNode].ImplicitPredEdges->set(C.Src); - } else if (C.Type == Constraint::Load) { - if (C.Offset == 0) { - // dest = *src edge - if (!GraphNodes[C.Dest].PredEdges) - GraphNodes[C.Dest].PredEdges = new SparseBitVector<>; - GraphNodes[C.Dest].PredEdges->set(C.Src + FirstRefNode); - } else { - GraphNodes[C.Dest].Direct = false; - } - } else if (C.Type == Constraint::Store) { - if (C.Offset == 0) { - // *dest = src edge - unsigned RefNode = C.Dest + FirstRefNode; - if (!GraphNodes[RefNode].PredEdges) - GraphNodes[RefNode].PredEdges = new SparseBitVector<>; - GraphNodes[RefNode].PredEdges->set(C.Src); - } - } else { - // Dest = Src edge and *Dest = *Src edge - if (!GraphNodes[C.Dest].PredEdges) - GraphNodes[C.Dest].PredEdges = new SparseBitVector<>; - GraphNodes[C.Dest].PredEdges->set(C.Src); - unsigned RefNode = C.Dest + FirstRefNode; - if (!GraphNodes[RefNode].ImplicitPredEdges) - GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>; - GraphNodes[RefNode].ImplicitPredEdges->set(C.Src + FirstRefNode); - } - } - PEClass = 1; - // Do SCC finding first to condense our predecessor graph - DFSNumber = 0; - Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0); - Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false); - Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false); - - for (unsigned i = 0; i < FirstRefNode; ++i) { - unsigned Node = VSSCCRep[i]; - if (!Node2Visited[Node]) - HVNValNum(Node); - } - for (BitVectorMap::iterator Iter = Set2PEClass.begin(); - Iter != Set2PEClass.end(); - ++Iter) - delete Iter->first; - Set2PEClass.clear(); - Node2DFS.clear(); - Node2Deleted.clear(); - Node2Visited.clear(); - DEBUG(dbgs() << "Finished HVN\n"); - -} - -/// This is the workhorse of HVN value numbering. We combine SCC finding at the -/// same time because it's easy. -void Andersens::HVNValNum(unsigned NodeIndex) { - unsigned MyDFS = DFSNumber++; - Node *N = &GraphNodes[NodeIndex]; - Node2Visited[NodeIndex] = true; - Node2DFS[NodeIndex] = MyDFS; - - // First process all our explicit edges - if (N->PredEdges) - for (SparseBitVector<>::iterator Iter = N->PredEdges->begin(); - Iter != N->PredEdges->end(); - ++Iter) { - unsigned j = VSSCCRep[*Iter]; - if (!Node2Deleted[j]) { - if (!Node2Visited[j]) - HVNValNum(j); - if (Node2DFS[NodeIndex] > Node2DFS[j]) - Node2DFS[NodeIndex] = Node2DFS[j]; - } - } - - // Now process all the implicit edges - if (N->ImplicitPredEdges) - for (SparseBitVector<>::iterator Iter = N->ImplicitPredEdges->begin(); - Iter != N->ImplicitPredEdges->end(); - ++Iter) { - unsigned j = VSSCCRep[*Iter]; - if (!Node2Deleted[j]) { - if (!Node2Visited[j]) - HVNValNum(j); - if (Node2DFS[NodeIndex] > Node2DFS[j]) - Node2DFS[NodeIndex] = Node2DFS[j]; - } - } - - // See if we found any cycles - if (MyDFS == Node2DFS[NodeIndex]) { - while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) { - unsigned CycleNodeIndex = SCCStack.top(); - Node *CycleNode = &GraphNodes[CycleNodeIndex]; - VSSCCRep[CycleNodeIndex] = NodeIndex; - // Unify the nodes - N->Direct &= CycleNode->Direct; - - if (CycleNode->PredEdges) { - if (!N->PredEdges) - N->PredEdges = new SparseBitVector<>; - *(N->PredEdges) |= CycleNode->PredEdges; - delete CycleNode->PredEdges; - CycleNode->PredEdges = NULL; - } - if (CycleNode->ImplicitPredEdges) { - if (!N->ImplicitPredEdges) - N->ImplicitPredEdges = new SparseBitVector<>; - *(N->ImplicitPredEdges) |= CycleNode->ImplicitPredEdges; - delete CycleNode->ImplicitPredEdges; - CycleNode->ImplicitPredEdges = NULL; - } - - SCCStack.pop(); - } - - Node2Deleted[NodeIndex] = true; - - if (!N->Direct) { - GraphNodes[NodeIndex].PointerEquivLabel = PEClass++; - return; - } - - // Collect labels of successor nodes - bool AllSame = true; - unsigned First = ~0; - SparseBitVector<> *Labels = new SparseBitVector<>; - bool Used = false; - - if (N->PredEdges) - for (SparseBitVector<>::iterator Iter = N->PredEdges->begin(); - Iter != N->PredEdges->end(); - ++Iter) { - unsigned j = VSSCCRep[*Iter]; - unsigned Label = GraphNodes[j].PointerEquivLabel; - // Ignore labels that are equal to us or non-pointers - if (j == NodeIndex || Label == 0) - continue; - if (First == (unsigned)~0) - First = Label; - else if (First != Label) - AllSame = false; - Labels->set(Label); - } - - // We either have a non-pointer, a copy of an existing node, or a new node. - // Assign the appropriate pointer equivalence label. - if (Labels->empty()) { - GraphNodes[NodeIndex].PointerEquivLabel = 0; - } else if (AllSame) { - GraphNodes[NodeIndex].PointerEquivLabel = First; - } else { - GraphNodes[NodeIndex].PointerEquivLabel = Set2PEClass[Labels]; - if (GraphNodes[NodeIndex].PointerEquivLabel == 0) { - unsigned EquivClass = PEClass++; - Set2PEClass[Labels] = EquivClass; - GraphNodes[NodeIndex].PointerEquivLabel = EquivClass; - Used = true; - } - } - if (!Used) - delete Labels; - } else { - SCCStack.push(NodeIndex); - } -} - -/// The technique used here is described in "Exploiting Pointer and Location -/// Equivalence to Optimize Pointer Analysis. In the 14th International Static -/// Analysis Symposium (SAS), August 2007." It is known as the "HU" algorithm, -/// and is equivalent to value numbering the collapsed constraint graph -/// including evaluating unions. -void Andersens::HU() { - DEBUG(dbgs() << "Beginning HU\n"); - // Build a predecessor graph. This is like our constraint graph with the - // edges going in the opposite direction, and there are edges for all the - // constraints, instead of just copy constraints. We also build implicit - // edges for constraints are implied but not explicit. I.E for the constraint - // a = &b, we add implicit edges *a = b. This helps us capture more cycles - for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { - Constraint &C = Constraints[i]; - if (C.Type == Constraint::AddressOf) { - GraphNodes[C.Src].AddressTaken = true; - GraphNodes[C.Src].Direct = false; - - GraphNodes[C.Dest].PointsTo->set(C.Src); - // *Dest = src edge - unsigned RefNode = C.Dest + FirstRefNode; - if (!GraphNodes[RefNode].ImplicitPredEdges) - GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>; - GraphNodes[RefNode].ImplicitPredEdges->set(C.Src); - GraphNodes[C.Src].PointedToBy->set(C.Dest); - } else if (C.Type == Constraint::Load) { - if (C.Offset == 0) { - // dest = *src edge - if (!GraphNodes[C.Dest].PredEdges) - GraphNodes[C.Dest].PredEdges = new SparseBitVector<>; - GraphNodes[C.Dest].PredEdges->set(C.Src + FirstRefNode); - } else { - GraphNodes[C.Dest].Direct = false; - } - } else if (C.Type == Constraint::Store) { - if (C.Offset == 0) { - // *dest = src edge - unsigned RefNode = C.Dest + FirstRefNode; - if (!GraphNodes[RefNode].PredEdges) - GraphNodes[RefNode].PredEdges = new SparseBitVector<>; - GraphNodes[RefNode].PredEdges->set(C.Src); - } - } else { - // Dest = Src edge and *Dest = *Src edg - if (!GraphNodes[C.Dest].PredEdges) - GraphNodes[C.Dest].PredEdges = new SparseBitVector<>; - GraphNodes[C.Dest].PredEdges->set(C.Src); - unsigned RefNode = C.Dest + FirstRefNode; - if (!GraphNodes[RefNode].ImplicitPredEdges) - GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>; - GraphNodes[RefNode].ImplicitPredEdges->set(C.Src + FirstRefNode); - } - } - PEClass = 1; - // Do SCC finding first to condense our predecessor graph - DFSNumber = 0; - Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0); - Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false); - Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false); - - for (unsigned i = 0; i < FirstRefNode; ++i) { - if (FindNode(i) == i) { - unsigned Node = VSSCCRep[i]; - if (!Node2Visited[Node]) - Condense(Node); - } - } - - // Reset tables for actual labeling - Node2DFS.clear(); - Node2Visited.clear(); - Node2Deleted.clear(); - // Pre-grow our densemap so that we don't get really bad behavior - Set2PEClass.resize(GraphNodes.size()); - - // Visit the condensed graph and generate pointer equivalence labels. - Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false); - for (unsigned i = 0; i < FirstRefNode; ++i) { - if (FindNode(i) == i) { - unsigned Node = VSSCCRep[i]; - if (!Node2Visited[Node]) - HUValNum(Node); - } - } - // PEClass nodes will be deleted by the deleting of N->PointsTo in our caller. - Set2PEClass.clear(); - DEBUG(dbgs() << "Finished HU\n"); -} - - -/// Implementation of standard Tarjan SCC algorithm as modified by Nuutilla. -void Andersens::Condense(unsigned NodeIndex) { - unsigned MyDFS = DFSNumber++; - Node *N = &GraphNodes[NodeIndex]; - Node2Visited[NodeIndex] = true; - Node2DFS[NodeIndex] = MyDFS; - - // First process all our explicit edges - if (N->PredEdges) - for (SparseBitVector<>::iterator Iter = N->PredEdges->begin(); - Iter != N->PredEdges->end(); - ++Iter) { - unsigned j = VSSCCRep[*Iter]; - if (!Node2Deleted[j]) { - if (!Node2Visited[j]) - Condense(j); - if (Node2DFS[NodeIndex] > Node2DFS[j]) - Node2DFS[NodeIndex] = Node2DFS[j]; - } - } - - // Now process all the implicit edges - if (N->ImplicitPredEdges) - for (SparseBitVector<>::iterator Iter = N->ImplicitPredEdges->begin(); - Iter != N->ImplicitPredEdges->end(); - ++Iter) { - unsigned j = VSSCCRep[*Iter]; - if (!Node2Deleted[j]) { - if (!Node2Visited[j]) - Condense(j); - if (Node2DFS[NodeIndex] > Node2DFS[j]) - Node2DFS[NodeIndex] = Node2DFS[j]; - } - } - - // See if we found any cycles - if (MyDFS == Node2DFS[NodeIndex]) { - while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) { - unsigned CycleNodeIndex = SCCStack.top(); - Node *CycleNode = &GraphNodes[CycleNodeIndex]; - VSSCCRep[CycleNodeIndex] = NodeIndex; - // Unify the nodes - N->Direct &= CycleNode->Direct; - - *(N->PointsTo) |= CycleNode->PointsTo; - delete CycleNode->PointsTo; - CycleNode->PointsTo = NULL; - if (CycleNode->PredEdges) { - if (!N->PredEdges) - N->PredEdges = new SparseBitVector<>; - *(N->PredEdges) |= CycleNode->PredEdges; - delete CycleNode->PredEdges; - CycleNode->PredEdges = NULL; - } - if (CycleNode->ImplicitPredEdges) { - if (!N->ImplicitPredEdges) - N->ImplicitPredEdges = new SparseBitVector<>; - *(N->ImplicitPredEdges) |= CycleNode->ImplicitPredEdges; - delete CycleNode->ImplicitPredEdges; - CycleNode->ImplicitPredEdges = NULL; - } - SCCStack.pop(); - } - - Node2Deleted[NodeIndex] = true; - - // Set up number of incoming edges for other nodes - if (N->PredEdges) - for (SparseBitVector<>::iterator Iter = N->PredEdges->begin(); - Iter != N->PredEdges->end(); - ++Iter) - ++GraphNodes[VSSCCRep[*Iter]].NumInEdges; - } else { - SCCStack.push(NodeIndex); - } -} - -void Andersens::HUValNum(unsigned NodeIndex) { - Node *N = &GraphNodes[NodeIndex]; - Node2Visited[NodeIndex] = true; - - // Eliminate dereferences of non-pointers for those non-pointers we have - // already identified. These are ref nodes whose non-ref node: - // 1. Has already been visited determined to point to nothing (and thus, a - // dereference of it must point to nothing) - // 2. Any direct node with no predecessor edges in our graph and with no - // points-to set (since it can't point to anything either, being that it - // receives no points-to sets and has none). - if (NodeIndex >= FirstRefNode) { - unsigned j = VSSCCRep[FindNode(NodeIndex - FirstRefNode)]; - if ((Node2Visited[j] && !GraphNodes[j].PointerEquivLabel) - || (GraphNodes[j].Direct && !GraphNodes[j].PredEdges - && GraphNodes[j].PointsTo->empty())){ - return; - } - } - // Process all our explicit edges - if (N->PredEdges) - for (SparseBitVector<>::iterator Iter = N->PredEdges->begin(); - Iter != N->PredEdges->end(); - ++Iter) { - unsigned j = VSSCCRep[*Iter]; - if (!Node2Visited[j]) - HUValNum(j); - - // If this edge turned out to be the same as us, or got no pointer - // equivalence label (and thus points to nothing) , just decrement our - // incoming edges and continue. - if (j == NodeIndex || GraphNodes[j].PointerEquivLabel == 0) { - --GraphNodes[j].NumInEdges; - continue; - } - - *(N->PointsTo) |= GraphNodes[j].PointsTo; - - // If we didn't end up storing this in the hash, and we're done with all - // the edges, we don't need the points-to set anymore. - --GraphNodes[j].NumInEdges; - if (!GraphNodes[j].NumInEdges && !GraphNodes[j].StoredInHash) { - delete GraphNodes[j].PointsTo; - GraphNodes[j].PointsTo = NULL; - } - } - // If this isn't a direct node, generate a fresh variable. - if (!N->Direct) { - N->PointsTo->set(FirstRefNode + NodeIndex); - } - - // See If we have something equivalent to us, if not, generate a new - // equivalence class. - if (N->PointsTo->empty()) { - delete N->PointsTo; - N->PointsTo = NULL; - } else { - if (N->Direct) { - N->PointerEquivLabel = Set2PEClass[N->PointsTo]; - if (N->PointerEquivLabel == 0) { - unsigned EquivClass = PEClass++; - N->StoredInHash = true; - Set2PEClass[N->PointsTo] = EquivClass; - N->PointerEquivLabel = EquivClass; - } - } else { - N->PointerEquivLabel = PEClass++; - } - } -} - -/// Rewrite our list of constraints so that pointer equivalent nodes are -/// replaced by their the pointer equivalence class representative. -void Andersens::RewriteConstraints() { - std::vector<Constraint> NewConstraints; - DenseSet<Constraint, ConstraintKeyInfo> Seen; - - PEClass2Node.clear(); - PENLEClass2Node.clear(); - - // We may have from 1 to Graphnodes + 1 equivalence classes. - PEClass2Node.insert(PEClass2Node.begin(), GraphNodes.size() + 1, -1); - PENLEClass2Node.insert(PENLEClass2Node.begin(), GraphNodes.size() + 1, -1); - - // Rewrite constraints, ignoring non-pointer constraints, uniting equivalent - // nodes, and rewriting constraints to use the representative nodes. - for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { - Constraint &C = Constraints[i]; - unsigned RHSNode = FindNode(C.Src); - unsigned LHSNode = FindNode(C.Dest); - unsigned RHSLabel = GraphNodes[VSSCCRep[RHSNode]].PointerEquivLabel; - unsigned LHSLabel = GraphNodes[VSSCCRep[LHSNode]].PointerEquivLabel; - - // First we try to eliminate constraints for things we can prove don't point - // to anything. - if (LHSLabel == 0) { - DEBUG(PrintNode(&GraphNodes[LHSNode])); - DEBUG(dbgs() << " is a non-pointer, ignoring constraint.\n"); - continue; - } - if (RHSLabel == 0) { - DEBUG(PrintNode(&GraphNodes[RHSNode])); - DEBUG(dbgs() << " is a non-pointer, ignoring constraint.\n"); - continue; - } - // This constraint may be useless, and it may become useless as we translate - // it. - if (C.Src == C.Dest && C.Type == Constraint::Copy) - continue; - - C.Src = FindEquivalentNode(RHSNode, RHSLabel); - C.Dest = FindEquivalentNode(FindNode(LHSNode), LHSLabel); - if ((C.Src == C.Dest && C.Type == Constraint::Copy) - || Seen.count(C)) - continue; - - Seen.insert(C); - NewConstraints.push_back(C); - } - Constraints.swap(NewConstraints); - PEClass2Node.clear(); -} - -/// See if we have a node that is pointer equivalent to the one being asked -/// about, and if so, unite them and return the equivalent node. Otherwise, -/// return the original node. -unsigned Andersens::FindEquivalentNode(unsigned NodeIndex, - unsigned NodeLabel) { - if (!GraphNodes[NodeIndex].AddressTaken) { - if (PEClass2Node[NodeLabel] != -1) { - // We found an existing node with the same pointer label, so unify them. - // We specifically request that Union-By-Rank not be used so that - // PEClass2Node[NodeLabel] U= NodeIndex and not the other way around. - return UniteNodes(PEClass2Node[NodeLabel], NodeIndex, false); - } else { - PEClass2Node[NodeLabel] = NodeIndex; - PENLEClass2Node[NodeLabel] = NodeIndex; - } - } else if (PENLEClass2Node[NodeLabel] == -1) { - PENLEClass2Node[NodeLabel] = NodeIndex; - } - - return NodeIndex; -} - -void Andersens::PrintLabels() const { - for (unsigned i = 0; i < GraphNodes.size(); ++i) { - if (i < FirstRefNode) { - PrintNode(&GraphNodes[i]); - } else if (i < FirstAdrNode) { - DEBUG(dbgs() << "REF("); - PrintNode(&GraphNodes[i-FirstRefNode]); - DEBUG(dbgs() <<")"); - } else { - DEBUG(dbgs() << "ADR("); - PrintNode(&GraphNodes[i-FirstAdrNode]); - DEBUG(dbgs() <<")"); - } - - DEBUG(dbgs() << " has pointer label " << GraphNodes[i].PointerEquivLabel - << " and SCC rep " << VSSCCRep[i] - << " and is " << (GraphNodes[i].Direct ? "Direct" : "Not direct") - << "\n"); - } -} - -/// The technique used here is described in "The Ant and the -/// Grasshopper: Fast and Accurate Pointer Analysis for Millions of -/// Lines of Code. In Programming Language Design and Implementation -/// (PLDI), June 2007." It is known as the "HCD" (Hybrid Cycle -/// Detection) algorithm. It is called a hybrid because it performs an -/// offline analysis and uses its results during the solving (online) -/// phase. This is just the offline portion; the results of this -/// operation are stored in SDT and are later used in SolveContraints() -/// and UniteNodes(). -void Andersens::HCD() { - DEBUG(dbgs() << "Starting HCD.\n"); - HCDSCCRep.resize(GraphNodes.size()); - - for (unsigned i = 0; i < GraphNodes.size(); ++i) { - GraphNodes[i].Edges = new SparseBitVector<>; - HCDSCCRep[i] = i; - } - - for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { - Constraint &C = Constraints[i]; - assert (C.Src < GraphNodes.size() && C.Dest < GraphNodes.size()); - if (C.Type == Constraint::AddressOf) { - continue; - } else if (C.Type == Constraint::Load) { - if( C.Offset == 0 ) - GraphNodes[C.Dest].Edges->set(C.Src + FirstRefNode); - } else if (C.Type == Constraint::Store) { - if( C.Offset == 0 ) - GraphNodes[C.Dest + FirstRefNode].Edges->set(C.Src); - } else { - GraphNodes[C.Dest].Edges->set(C.Src); - } - } - - Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0); - Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false); - Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false); - SDT.insert(SDT.begin(), GraphNodes.size() / 2, -1); - - DFSNumber = 0; - for (unsigned i = 0; i < GraphNodes.size(); ++i) { - unsigned Node = HCDSCCRep[i]; - if (!Node2Deleted[Node]) - Search(Node); - } - - for (unsigned i = 0; i < GraphNodes.size(); ++i) - if (GraphNodes[i].Edges != NULL) { - delete GraphNodes[i].Edges; - GraphNodes[i].Edges = NULL; - } - - while( !SCCStack.empty() ) - SCCStack.pop(); - - Node2DFS.clear(); - Node2Visited.clear(); - Node2Deleted.clear(); - HCDSCCRep.clear(); - DEBUG(dbgs() << "HCD complete.\n"); -} - -// Component of HCD: -// Use Nuutila's variant of Tarjan's algorithm to detect -// Strongly-Connected Components (SCCs). For non-trivial SCCs -// containing ref nodes, insert the appropriate information in SDT. -void Andersens::Search(unsigned Node) { - unsigned MyDFS = DFSNumber++; - - Node2Visited[Node] = true; - Node2DFS[Node] = MyDFS; - - for (SparseBitVector<>::iterator Iter = GraphNodes[Node].Edges->begin(), - End = GraphNodes[Node].Edges->end(); - Iter != End; - ++Iter) { - unsigned J = HCDSCCRep[*Iter]; - assert(GraphNodes[J].isRep() && "Debug check; must be representative"); - if (!Node2Deleted[J]) { - if (!Node2Visited[J]) - Search(J); - if (Node2DFS[Node] > Node2DFS[J]) - Node2DFS[Node] = Node2DFS[J]; - } - } - - if( MyDFS != Node2DFS[Node] ) { - SCCStack.push(Node); - return; - } - - // This node is the root of a SCC, so process it. - // - // If the SCC is "non-trivial" (not a singleton) and contains a reference - // node, we place this SCC into SDT. We unite the nodes in any case. - if (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) { - SparseBitVector<> SCC; - - SCC.set(Node); - - bool Ref = (Node >= FirstRefNode); - - Node2Deleted[Node] = true; - - do { - unsigned P = SCCStack.top(); SCCStack.pop(); - Ref |= (P >= FirstRefNode); - SCC.set(P); - HCDSCCRep[P] = Node; - } while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS); - - if (Ref) { - unsigned Rep = SCC.find_first(); - assert(Rep < FirstRefNode && "The SCC didn't have a non-Ref node!"); - - SparseBitVector<>::iterator i = SCC.begin(); - - // Skip over the non-ref nodes - while( *i < FirstRefNode ) - ++i; - - while( i != SCC.end() ) - SDT[ (*i++) - FirstRefNode ] = Rep; - } - } -} - - -/// Optimize the constraints by performing offline variable substitution and -/// other optimizations. -void Andersens::OptimizeConstraints() { - DEBUG(dbgs() << "Beginning constraint optimization\n"); - - SDTActive = false; - - // Function related nodes need to stay in the same relative position and can't - // be location equivalent. - for (std::map<unsigned, unsigned>::iterator Iter = MaxK.begin(); - Iter != MaxK.end(); - ++Iter) { - for (unsigned i = Iter->first; - i != Iter->first + Iter->second; - ++i) { - GraphNodes[i].AddressTaken = true; - GraphNodes[i].Direct = false; - } - } - - ClumpAddressTaken(); - FirstRefNode = GraphNodes.size(); - FirstAdrNode = FirstRefNode + GraphNodes.size(); - GraphNodes.insert(GraphNodes.end(), 2 * GraphNodes.size(), - Node(false)); - VSSCCRep.resize(GraphNodes.size()); - for (unsigned i = 0; i < GraphNodes.size(); ++i) { - VSSCCRep[i] = i; - } - HVN(); - for (unsigned i = 0; i < GraphNodes.size(); ++i) { - Node *N = &GraphNodes[i]; - delete N->PredEdges; - N->PredEdges = NULL; - delete N->ImplicitPredEdges; - N->ImplicitPredEdges = NULL; - } -#undef DEBUG_TYPE -#define DEBUG_TYPE "anders-aa-labels" - DEBUG(PrintLabels()); -#undef DEBUG_TYPE -#define DEBUG_TYPE "anders-aa" - RewriteConstraints(); - // Delete the adr nodes. - GraphNodes.resize(FirstRefNode * 2); - - // Now perform HU - for (unsigned i = 0; i < GraphNodes.size(); ++i) { - Node *N = &GraphNodes[i]; - if (FindNode(i) == i) { - N->PointsTo = new SparseBitVector<>; - N->PointedToBy = new SparseBitVector<>; - // Reset our labels - } - VSSCCRep[i] = i; - N->PointerEquivLabel = 0; - } - HU(); -#undef DEBUG_TYPE -#define DEBUG_TYPE "anders-aa-labels" - DEBUG(PrintLabels()); -#undef DEBUG_TYPE -#define DEBUG_TYPE "anders-aa" - RewriteConstraints(); - for (unsigned i = 0; i < GraphNodes.size(); ++i) { - if (FindNode(i) == i) { - Node *N = &GraphNodes[i]; - delete N->PointsTo; - N->PointsTo = NULL; - delete N->PredEdges; - N->PredEdges = NULL; - delete N->ImplicitPredEdges; - N->ImplicitPredEdges = NULL; - delete N->PointedToBy; - N->PointedToBy = NULL; - } - } - - // perform Hybrid Cycle Detection (HCD) - HCD(); - SDTActive = true; - - // No longer any need for the upper half of GraphNodes (for ref nodes). - GraphNodes.erase(GraphNodes.begin() + FirstRefNode, GraphNodes.end()); - - // HCD complete. - - DEBUG(dbgs() << "Finished constraint optimization\n"); - FirstRefNode = 0; - FirstAdrNode = 0; -} - -/// Unite pointer but not location equivalent variables, now that the constraint -/// graph is built. -void Andersens::UnitePointerEquivalences() { - DEBUG(dbgs() << "Uniting remaining pointer equivalences\n"); - for (unsigned i = 0; i < GraphNodes.size(); ++i) { - if (GraphNodes[i].AddressTaken && GraphNodes[i].isRep()) { - unsigned Label = GraphNodes[i].PointerEquivLabel; - - if (Label && PENLEClass2Node[Label] != -1) - UniteNodes(i, PENLEClass2Node[Label]); - } - } - DEBUG(dbgs() << "Finished remaining pointer equivalences\n"); - PENLEClass2Node.clear(); -} - -/// Create the constraint graph used for solving points-to analysis. -/// -void Andersens::CreateConstraintGraph() { - for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { - Constraint &C = Constraints[i]; - assert (C.Src < GraphNodes.size() && C.Dest < GraphNodes.size()); - if (C.Type == Constraint::AddressOf) - GraphNodes[C.Dest].PointsTo->set(C.Src); - else if (C.Type == Constraint::Load) - GraphNodes[C.Src].Constraints.push_back(C); - else if (C.Type == Constraint::Store) - GraphNodes[C.Dest].Constraints.push_back(C); - else if (C.Offset != 0) - GraphNodes[C.Src].Constraints.push_back(C); - else - GraphNodes[C.Src].Edges->set(C.Dest); - } -} - -// Perform DFS and cycle detection. -bool Andersens::QueryNode(unsigned Node) { - assert(GraphNodes[Node].isRep() && "Querying a non-rep node"); - unsigned OurDFS = ++DFSNumber; - SparseBitVector<> ToErase; - SparseBitVector<> NewEdges; - Tarjan2DFS[Node] = OurDFS; - - // Changed denotes a change from a recursive call that we will bubble up. - // Merged is set if we actually merge a node ourselves. - bool Changed = false, Merged = false; - - for (SparseBitVector<>::iterator bi = GraphNodes[Node].Edges->begin(); - bi != GraphNodes[Node].Edges->end(); - ++bi) { - unsigned RepNode = FindNode(*bi); - // If this edge points to a non-representative node but we are - // already planning to add an edge to its representative, we have no - // need for this edge anymore. - if (RepNode != *bi && NewEdges.test(RepNode)){ - ToErase.set(*bi); - continue; - } - - // Continue about our DFS. - if (!Tarjan2Deleted[RepNode]){ - if (Tarjan2DFS[RepNode] == 0) { - Changed |= QueryNode(RepNode); - // May have been changed by QueryNode - RepNode = FindNode(RepNode); - } - if (Tarjan2DFS[RepNode] < Tarjan2DFS[Node]) - Tarjan2DFS[Node] = Tarjan2DFS[RepNode]; - } - - // We may have just discovered that this node is part of a cycle, in - // which case we can also erase it. - if (RepNode != *bi) { - ToErase.set(*bi); - NewEdges.set(RepNode); - } - } - - GraphNodes[Node].Edges->intersectWithComplement(ToErase); - GraphNodes[Node].Edges |= NewEdges; - - // If this node is a root of a non-trivial SCC, place it on our - // worklist to be processed. - if (OurDFS == Tarjan2DFS[Node]) { - while (!SCCStack.empty() && Tarjan2DFS[SCCStack.top()] >= OurDFS) { - Node = UniteNodes(Node, SCCStack.top()); - - SCCStack.pop(); - Merged = true; - } - Tarjan2Deleted[Node] = true; - - if (Merged) - NextWL->insert(&GraphNodes[Node]); - } else { - SCCStack.push(Node); - } - - return(Changed | Merged); -} - -/// SolveConstraints - This stage iteratively processes the constraints list -/// propagating constraints (adding edges to the Nodes in the points-to graph) -/// until a fixed point is reached. -/// -/// We use a variant of the technique called "Lazy Cycle Detection", which is -/// described in "The Ant and the Grasshopper: Fast and Accurate Pointer -/// Analysis for Millions of Lines of Code. In Programming Language Design and -/// Implementation (PLDI), June 2007." -/// The paper describes performing cycle detection one node at a time, which can -/// be expensive if there are no cycles, but there are long chains of nodes that -/// it heuristically believes are cycles (because it will DFS from each node -/// without state from previous nodes). -/// Instead, we use the heuristic to build a worklist of nodes to check, then -/// cycle detect them all at the same time to do this more cheaply. This -/// catches cycles slightly later than the original technique did, but does it -/// make significantly cheaper. - -void Andersens::SolveConstraints() { - CurrWL = &w1; - NextWL = &w2; - - OptimizeConstraints(); -#undef DEBUG_TYPE -#define DEBUG_TYPE "anders-aa-constraints" - DEBUG(PrintConstraints()); -#undef DEBUG_TYPE -#define DEBUG_TYPE "anders-aa" - - for (unsigned i = 0; i < GraphNodes.size(); ++i) { - Node *N = &GraphNodes[i]; - N->PointsTo = new SparseBitVector<>; - N->OldPointsTo = new SparseBitVector<>; - N->Edges = new SparseBitVector<>; - } - CreateConstraintGraph(); - UnitePointerEquivalences(); - assert(SCCStack.empty() && "SCC Stack should be empty by now!"); - Node2DFS.clear(); - Node2Deleted.clear(); - Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0); - Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false); - DFSNumber = 0; - DenseSet<Constraint, ConstraintKeyInfo> Seen; - DenseSet<std::pair<unsigned,unsigned>, PairKeyInfo> EdgesChecked; - - // Order graph and add initial nodes to work list. - for (unsigned i = 0; i < GraphNodes.size(); ++i) { - Node *INode = &GraphNodes[i]; - - // Add to work list if it's a representative and can contribute to the - // calculation right now. - if (INode->isRep() && !INode->PointsTo->empty() - && (!INode->Edges->empty() || !INode->Constraints.empty())) { - INode->Stamp(); - CurrWL->insert(INode); - } - } - std::queue<unsigned int> TarjanWL; -#if !FULL_UNIVERSAL - // "Rep and special variables" - in order for HCD to maintain conservative - // results when !FULL_UNIVERSAL, we need to treat the special variables in - // the same way that the !FULL_UNIVERSAL tweak does throughout the rest of - // the analysis - it's ok to add edges from the special nodes, but never - // *to* the special nodes. - std::vector<unsigned int> RSV; -#endif - while( !CurrWL->empty() ) { - DEBUG(dbgs() << "Starting iteration #" << ++NumIters << "\n"); - - Node* CurrNode; - unsigned CurrNodeIndex; - - // Actual cycle checking code. We cycle check all of the lazy cycle - // candidates from the last iteration in one go. - if (!TarjanWL.empty()) { - DFSNumber = 0; - - Tarjan2DFS.clear(); - Tarjan2Deleted.clear(); - while (!TarjanWL.empty()) { - unsigned int ToTarjan = TarjanWL.front(); - TarjanWL.pop(); - if (!Tarjan2Deleted[ToTarjan] - && GraphNodes[ToTarjan].isRep() - && Tarjan2DFS[ToTarjan] == 0) - QueryNode(ToTarjan); - } - } - - // Add to work list if it's a representative and can contribute to the - // calculation right now. - while( (CurrNode = CurrWL->pop()) != NULL ) { - CurrNodeIndex = CurrNode - &GraphNodes[0]; - CurrNode->Stamp(); - - - // Figure out the changed points to bits - SparseBitVector<> CurrPointsTo; - CurrPointsTo.intersectWithComplement(CurrNode->PointsTo, - CurrNode->OldPointsTo); - if (CurrPointsTo.empty()) - continue; - - *(CurrNode->OldPointsTo) |= CurrPointsTo; - - // Check the offline-computed equivalencies from HCD. - bool SCC = false; - unsigned Rep; - - if (SDT[CurrNodeIndex] >= 0) { - SCC = true; - Rep = FindNode(SDT[CurrNodeIndex]); - -#if !FULL_UNIVERSAL - RSV.clear(); -#endif - for (SparseBitVector<>::iterator bi = CurrPointsTo.begin(); - bi != CurrPointsTo.end(); ++bi) { - unsigned Node = FindNode(*bi); -#if !FULL_UNIVERSAL - if (Node < NumberSpecialNodes) { - RSV.push_back(Node); - continue; - } -#endif - Rep = UniteNodes(Rep,Node); - } -#if !FULL_UNIVERSAL - RSV.push_back(Rep); -#endif - - NextWL->insert(&GraphNodes[Rep]); - - if ( ! CurrNode->isRep() ) - continue; - } - - Seen.clear(); - - /* Now process the constraints for this node. */ - for (std::list<Constraint>::iterator li = CurrNode->Constraints.begin(); - li != CurrNode->Constraints.end(); ) { - li->Src = FindNode(li->Src); - li->Dest = FindNode(li->Dest); - - // Delete redundant constraints - if( Seen.count(*li) ) { - std::list<Constraint>::iterator lk = li; li++; - - CurrNode->Constraints.erase(lk); - ++NumErased; - continue; - } - Seen.insert(*li); - - // Src and Dest will be the vars we are going to process. - // This may look a bit ugly, but what it does is allow us to process - // both store and load constraints with the same code. - // Load constraints say that every member of our RHS solution has K - // added to it, and that variable gets an edge to LHS. We also union - // RHS+K's solution into the LHS solution. - // Store constraints say that every member of our LHS solution has K - // added to it, and that variable gets an edge from RHS. We also union - // RHS's solution into the LHS+K solution. - unsigned *Src; - unsigned *Dest; - unsigned K = li->Offset; - unsigned CurrMember; - if (li->Type == Constraint::Load) { - Src = &CurrMember; - Dest = &li->Dest; - } else if (li->Type == Constraint::Store) { - Src = &li->Src; - Dest = &CurrMember; - } else { - // TODO Handle offseted copy constraint - li++; - continue; - } - - // See if we can use Hybrid Cycle Detection (that is, check - // if it was a statically detected offline equivalence that - // involves pointers; if so, remove the redundant constraints). - if( SCC && K == 0 ) { -#if FULL_UNIVERSAL - CurrMember = Rep; - - if (GraphNodes[*Src].Edges->test_and_set(*Dest)) - if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo)) - NextWL->insert(&GraphNodes[*Dest]); -#else - for (unsigned i=0; i < RSV.size(); ++i) { - CurrMember = RSV[i]; - - if (*Dest < NumberSpecialNodes) - continue; - if (GraphNodes[*Src].Edges->test_and_set(*Dest)) - if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo)) - NextWL->insert(&GraphNodes[*Dest]); - } -#endif - // since all future elements of the points-to set will be - // equivalent to the current ones, the complex constraints - // become redundant. - // - std::list<Constraint>::iterator lk = li; li++; -#if !FULL_UNIVERSAL - // In this case, we can still erase the constraints when the - // elements of the points-to sets are referenced by *Dest, - // but not when they are referenced by *Src (i.e. for a Load - // constraint). This is because if another special variable is - // put into the points-to set later, we still need to add the - // new edge from that special variable. - if( lk->Type != Constraint::Load) -#endif - GraphNodes[CurrNodeIndex].Constraints.erase(lk); - } else { - const SparseBitVector<> &Solution = CurrPointsTo; - - for (SparseBitVector<>::iterator bi = Solution.begin(); - bi != Solution.end(); - ++bi) { - CurrMember = *bi; - - // Need to increment the member by K since that is where we are - // supposed to copy to/from. Note that in positive weight cycles, - // which occur in address taking of fields, K can go past - // MaxK[CurrMember] elements, even though that is all it could point - // to. - if (K > 0 && K > MaxK[CurrMember]) - continue; - else - CurrMember = FindNode(CurrMember + K); - - // Add an edge to the graph, so we can just do regular - // bitmap ior next time. It may also let us notice a cycle. -#if !FULL_UNIVERSAL - if (*Dest < NumberSpecialNodes) - continue; -#endif - if (GraphNodes[*Src].Edges->test_and_set(*Dest)) - if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo)) - NextWL->insert(&GraphNodes[*Dest]); - - } - li++; - } - } - SparseBitVector<> NewEdges; - SparseBitVector<> ToErase; - - // Now all we have left to do is propagate points-to info along the - // edges, erasing the redundant edges. - for (SparseBitVector<>::iterator bi = CurrNode->Edges->begin(); - bi != CurrNode->Edges->end(); - ++bi) { - - unsigned DestVar = *bi; - unsigned Rep = FindNode(DestVar); - - // If we ended up with this node as our destination, or we've already - // got an edge for the representative, delete the current edge. - if (Rep == CurrNodeIndex || - (Rep != DestVar && NewEdges.test(Rep))) { - ToErase.set(DestVar); - continue; - } - - std::pair<unsigned,unsigned> edge(CurrNodeIndex,Rep); - - // This is where we do lazy cycle detection. - // If this is a cycle candidate (equal points-to sets and this - // particular edge has not been cycle-checked previously), add to the - // list to check for cycles on the next iteration. - if (!EdgesChecked.count(edge) && - *(GraphNodes[Rep].PointsTo) == *(CurrNode->PointsTo)) { - EdgesChecked.insert(edge); - TarjanWL.push(Rep); - } - // Union the points-to sets into the dest -#if !FULL_UNIVERSAL - if (Rep >= NumberSpecialNodes) -#endif - if (GraphNodes[Rep].PointsTo |= CurrPointsTo) { - NextWL->insert(&GraphNodes[Rep]); - } - // If this edge's destination was collapsed, rewrite the edge. - if (Rep != DestVar) { - ToErase.set(DestVar); - NewEdges.set(Rep); - } - } - CurrNode->Edges->intersectWithComplement(ToErase); - CurrNode->Edges |= NewEdges; - } - - // Switch to other work list. - WorkList* t = CurrWL; CurrWL = NextWL; NextWL = t; - } - - - Node2DFS.clear(); - Node2Deleted.clear(); - for (unsigned i = 0; i < GraphNodes.size(); ++i) { - Node *N = &GraphNodes[i]; - delete N->OldPointsTo; - delete N->Edges; - } - SDTActive = false; - SDT.clear(); -} - -//===----------------------------------------------------------------------===// -// Union-Find -//===----------------------------------------------------------------------===// - -// Unite nodes First and Second, returning the one which is now the -// representative node. First and Second are indexes into GraphNodes -unsigned Andersens::UniteNodes(unsigned First, unsigned Second, - bool UnionByRank) { - assert (First < GraphNodes.size() && Second < GraphNodes.size() && - "Attempting to merge nodes that don't exist"); - - Node *FirstNode = &GraphNodes[First]; - Node *SecondNode = &GraphNodes[Second]; - - assert (SecondNode->isRep() && FirstNode->isRep() && - "Trying to unite two non-representative nodes!"); - if (First == Second) - return First; - - if (UnionByRank) { - int RankFirst = (int) FirstNode ->NodeRep; - int RankSecond = (int) SecondNode->NodeRep; - - // Rank starts at -1 and gets decremented as it increases. - // Translation: higher rank, lower NodeRep value, which is always negative. - if (RankFirst > RankSecond) { - unsigned t = First; First = Second; Second = t; - Node* tp = FirstNode; FirstNode = SecondNode; SecondNode = tp; - } else if (RankFirst == RankSecond) { - FirstNode->NodeRep = (unsigned) (RankFirst - 1); - } - } - - SecondNode->NodeRep = First; -#if !FULL_UNIVERSAL - if (First >= NumberSpecialNodes) -#endif - if (FirstNode->PointsTo && SecondNode->PointsTo) - FirstNode->PointsTo |= *(SecondNode->PointsTo); - if (FirstNode->Edges && SecondNode->Edges) - FirstNode->Edges |= *(SecondNode->Edges); - if (!SecondNode->Constraints.empty()) - FirstNode->Constraints.splice(FirstNode->Constraints.begin(), - SecondNode->Constraints); - if (FirstNode->OldPointsTo) { - delete FirstNode->OldPointsTo; - FirstNode->OldPointsTo = new SparseBitVector<>; - } - - // Destroy interesting parts of the merged-from node. - delete SecondNode->OldPointsTo; - delete SecondNode->Edges; - delete SecondNode->PointsTo; - SecondNode->Edges = NULL; - SecondNode->PointsTo = NULL; - SecondNode->OldPointsTo = NULL; - - NumUnified++; - DEBUG(dbgs() << "Unified Node "); - DEBUG(PrintNode(FirstNode)); - DEBUG(dbgs() << " and Node "); - DEBUG(PrintNode(SecondNode)); - DEBUG(dbgs() << "\n"); - - if (SDTActive) - if (SDT[Second] >= 0) { - if (SDT[First] < 0) - SDT[First] = SDT[Second]; - else { - UniteNodes( FindNode(SDT[First]), FindNode(SDT[Second]) ); - First = FindNode(First); - } - } - - return First; -} - -// Find the index into GraphNodes of the node representing Node, performing -// path compression along the way -unsigned Andersens::FindNode(unsigned NodeIndex) { - assert (NodeIndex < GraphNodes.size() - && "Attempting to find a node that can't exist"); - Node *N = &GraphNodes[NodeIndex]; - if (N->isRep()) - return NodeIndex; - else - return (N->NodeRep = FindNode(N->NodeRep)); -} - -// Find the index into GraphNodes of the node representing Node, -// don't perform path compression along the way (for Print) -unsigned Andersens::FindNode(unsigned NodeIndex) const { - assert (NodeIndex < GraphNodes.size() - && "Attempting to find a node that can't exist"); - const Node *N = &GraphNodes[NodeIndex]; - if (N->isRep()) - return NodeIndex; - else - return FindNode(N->NodeRep); -} - -//===----------------------------------------------------------------------===// -// Debugging Output -//===----------------------------------------------------------------------===// - -void Andersens::PrintNode(const Node *N) const { - if (N == &GraphNodes[UniversalSet]) { - dbgs() << "<universal>"; - return; - } else if (N == &GraphNodes[NullPtr]) { - dbgs() << "<nullptr>"; - return; - } else if (N == &GraphNodes[NullObject]) { - dbgs() << "<null>"; - return; - } - if (!N->getValue()) { - dbgs() << "artificial" << (intptr_t) N; - return; - } - - assert(N->getValue() != 0 && "Never set node label!"); - Value *V = N->getValue(); - if (Function *F = dyn_cast<Function>(V)) { - if (isa<PointerType>(F->getFunctionType()->getReturnType()) && - N == &GraphNodes[getReturnNode(F)]) { - dbgs() << F->getName() << ":retval"; - return; - } else if (F->getFunctionType()->isVarArg() && - N == &GraphNodes[getVarargNode(F)]) { - dbgs() << F->getName() << ":vararg"; - return; - } - } - - if (Instruction *I = dyn_cast<Instruction>(V)) - dbgs() << I->getParent()->getParent()->getName() << ":"; - else if (Argument *Arg = dyn_cast<Argument>(V)) - dbgs() << Arg->getParent()->getName() << ":"; - - if (V->hasName()) - dbgs() << V->getName(); - else - dbgs() << "(unnamed)"; - - if (isa<GlobalValue>(V) || isa<AllocaInst>(V) || isMalloc(V)) - if (N == &GraphNodes[getObject(V)]) - dbgs() << "<mem>"; -} -void Andersens::PrintConstraint(const Constraint &C) const { - if (C.Type == Constraint::Store) { - dbgs() << "*"; - if (C.Offset != 0) - dbgs() << "("; - } - PrintNode(&GraphNodes[C.Dest]); - if (C.Type == Constraint::Store && C.Offset != 0) - dbgs() << " + " << C.Offset << ")"; - dbgs() << " = "; - if (C.Type == Constraint::Load) { - dbgs() << "*"; - if (C.Offset != 0) - dbgs() << "("; - } - else if (C.Type == Constraint::AddressOf) - dbgs() << "&"; - PrintNode(&GraphNodes[C.Src]); - if (C.Offset != 0 && C.Type != Constraint::Store) - dbgs() << " + " << C.Offset; - if (C.Type == Constraint::Load && C.Offset != 0) - dbgs() << ")"; - dbgs() << "\n"; -} - -void Andersens::PrintConstraints() const { - dbgs() << "Constraints:\n"; - - for (unsigned i = 0, e = Constraints.size(); i != e; ++i) - PrintConstraint(Constraints[i]); -} - -void Andersens::PrintPointsToGraph() const { - dbgs() << "Points-to graph:\n"; - for (unsigned i = 0, e = GraphNodes.size(); i != e; ++i) { - const Node *N = &GraphNodes[i]; - if (FindNode(i) != i) { - PrintNode(N); - dbgs() << "\t--> same as "; - PrintNode(&GraphNodes[FindNode(i)]); - dbgs() << "\n"; - } else { - dbgs() << "[" << (N->PointsTo->count()) << "] "; - PrintNode(N); - dbgs() << "\t--> "; - - bool first = true; - for (SparseBitVector<>::iterator bi = N->PointsTo->begin(); - bi != N->PointsTo->end(); - ++bi) { - if (!first) - dbgs() << ", "; - PrintNode(&GraphNodes[*bi]); - first = false; - } - dbgs() << "\n"; - } - } -} diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt index 1ebb0be..007ad22 100644 --- a/lib/Analysis/IPA/CMakeLists.txt +++ b/lib/Analysis/IPA/CMakeLists.txt @@ -1,5 +1,4 @@ add_llvm_library(LLVMipa - Andersens.cpp CallGraph.cpp CallGraphSCCPass.cpp FindUsedTypes.cpp diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index ec94bc8..7b43089 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -213,7 +213,7 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) { ++NumNonAddrTakenGlobalVars; // If this global holds a pointer type, see if it is an indirect global. - if (isa<PointerType>(I->getType()->getElementType()) && + if (I->getType()->getElementType()->isPointerTy() && AnalyzeIndirectGlobalMemory(I)) ++NumIndirectGlobalVars; } @@ -231,7 +231,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers, std::vector<Function*> &Writers, GlobalValue *OkayStoreDest) { - if (!isa<PointerType>(V->getType())) return true; + if (!V->getType()->isPointerTy()) return true; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 9c472ae..98a436f 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Assembly/AsmAnnotationWriter.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -35,42 +36,30 @@ Pass *llvm::createIVUsersPass() { return new IVUsers(); } -/// containsAddRecFromDifferentLoop - Determine whether expression S involves a -/// subexpression that is an AddRec from a loop other than L. An outer loop -/// of L is OK, but not an inner loop nor a disjoint loop. -static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { - // This is very common, put it first. - if (isa<SCEVConstant>(S)) - return false; - if (const SCEVCommutativeExpr *AE = dyn_cast<SCEVCommutativeExpr>(S)) { - for (unsigned int i=0; i< AE->getNumOperands(); i++) - if (containsAddRecFromDifferentLoop(AE->getOperand(i), L)) - return true; - return false; - } - if (const SCEVAddRecExpr *AE = dyn_cast<SCEVAddRecExpr>(S)) { - if (const Loop *newLoop = AE->getLoop()) { - if (newLoop == L) - return false; - // if newLoop is an outer loop of L, this is OK. - if (newLoop->contains(L)) - return false; +/// CollectSubexprs - Split S into subexpressions which can be pulled out into +/// separate registers. +static void CollectSubexprs(const SCEV *S, + SmallVectorImpl<const SCEV *> &Ops, + ScalarEvolution &SE) { + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + // Break out add operands. + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) + CollectSubexprs(*I, Ops, SE); + return; + } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // Split a non-zero base out of an addrec. + if (!AR->getStart()->isZero()) { + CollectSubexprs(AR->getStart(), Ops, SE); + CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()), + AR->getStepRecurrence(SE), + AR->getLoop()), Ops, SE); + return; } - return true; } - if (const SCEVUDivExpr *DE = dyn_cast<SCEVUDivExpr>(S)) - return containsAddRecFromDifferentLoop(DE->getLHS(), L) || - containsAddRecFromDifferentLoop(DE->getRHS(), L); -#if 0 - // SCEVSDivExpr has been backed out temporarily, but will be back; we'll - // need this when it is. - if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S)) - return containsAddRecFromDifferentLoop(DE->getLHS(), L) || - containsAddRecFromDifferentLoop(DE->getRHS(), L); -#endif - if (const SCEVCastExpr *CE = dyn_cast<SCEVCastExpr>(S)) - return containsAddRecFromDifferentLoop(CE->getOperand(), L); - return false; + + // Otherwise use the value itself. + Ops.push_back(S); } /// getSCEVStartAndStride - Compute the start and stride of this expression, @@ -89,35 +78,42 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop, if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) { for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i) if (const SCEVAddRecExpr *AddRec = - dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) { - if (AddRec->getLoop() == L) - TheAddRec = SE->getAddExpr(AddRec, TheAddRec); - else - return false; // Nested IV of some sort? - } else { + dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) + TheAddRec = SE->getAddExpr(AddRec, TheAddRec); + else Start = SE->getAddExpr(Start, AE->getOperand(i)); - } } else if (isa<SCEVAddRecExpr>(SH)) { TheAddRec = SH; } else { return false; // not analyzable. } - const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec); - if (!AddRec || AddRec->getLoop() != L) return false; + // Break down TheAddRec into its component parts. + SmallVector<const SCEV *, 4> Subexprs; + CollectSubexprs(TheAddRec, Subexprs, *SE); + + // Look for an addrec on the current loop among the parts. + const SCEV *AddRecStride = 0; + for (SmallVectorImpl<const SCEV *>::iterator I = Subexprs.begin(), + E = Subexprs.end(); I != E; ++I) { + const SCEV *S = *I; + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + if (AR->getLoop() == L) { + *I = AR->getStart(); + AddRecStride = AR->getStepRecurrence(*SE); + break; + } + } + if (!AddRecStride) + return false; + + // Add up everything else into a start value (which may not be + // loop-invariant). + const SCEV *AddRecStart = SE->getAddExpr(Subexprs); // Use getSCEVAtScope to attempt to simplify other loops out of // the picture. - const SCEV *AddRecStart = AddRec->getStart(); AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop); - const SCEV *AddRecStride = AddRec->getStepRecurrence(*SE); - - // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other - // than an outer loop of the current loop, reject it. LSR has no concept of - // operating on more than one loop at a time so don't confuse it with such - // expressions. - if (containsAddRecFromDifferentLoop(AddRecStart, L)) - return false; Start = SE->getAddExpr(Start, AddRecStart); @@ -130,7 +126,7 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop, DEBUG(dbgs() << "["; WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false); - dbgs() << "] Variable stride: " << *AddRec << "\n"); + dbgs() << "] Variable stride: " << *AddRecStride << "\n"); } Stride = AddRecStride; @@ -146,8 +142,7 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop, /// the loop, resulting in reg-reg copies (if we use the pre-inc value when we /// should use the post-inc value). static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, - Loop *L, LoopInfo *LI, DominatorTree *DT, - Pass *P) { + Loop *L, DominatorTree *DT) { // If the user is in the loop, use the preinc value. if (L->contains(User)) return false; @@ -227,7 +222,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { // Descend recursively, but not into PHI nodes outside the current loop. // It's important to see the entire expression outside the loop to get // choices that depend on addressing mode use right, although we won't - // consider references ouside the loop in all cases. + // consider references outside the loop in all cases. // If User is already in Processed, we don't want to recurse into it again, // but do want to record a second reference in the same instruction. bool AddUserToIVUsers = false; @@ -246,42 +241,28 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { } if (AddUserToIVUsers) { - IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride]; - if (!StrideUses) { // First occurrence of this stride? - StrideOrder.push_back(Stride); - StrideUses = new IVUsersOfOneStride(Stride); - IVUses.push_back(StrideUses); - IVUsesByStride[Stride] = StrideUses; - } - // Okay, we found a user that we cannot reduce. Analyze the instruction // and decide what to do with it. If we are a use inside of the loop, use // the value before incrementation, otherwise use it after incrementation. - if (IVUseShouldUsePostIncValue(User, I, L, LI, DT, this)) { + if (IVUseShouldUsePostIncValue(User, I, L, DT)) { // The value used will be incremented by the stride more than we are // expecting, so subtract this off. const SCEV *NewStart = SE->getMinusSCEV(Start, Stride); - StrideUses->addUser(NewStart, User, I); - StrideUses->Users.back().setIsUseOfPostIncrementedValue(true); + IVUses.push_back(new IVStrideUse(this, Stride, NewStart, User, I)); + IVUses.back().setIsUseOfPostIncrementedValue(true); DEBUG(dbgs() << " USING POSTINC SCEV, START=" << *NewStart<< "\n"); } else { - StrideUses->addUser(Start, User, I); + IVUses.push_back(new IVStrideUse(this, Stride, Start, User, I)); } } } return true; } -void IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset, - Instruction *User, Value *Operand) { - IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride]; - if (!StrideUses) { // First occurrence of this stride? - StrideOrder.push_back(Stride); - StrideUses = new IVUsersOfOneStride(Stride); - IVUses.push_back(StrideUses); - IVUsesByStride[Stride] = StrideUses; - } - IVUsesByStride[Stride]->addUser(Offset, User, Operand); +IVStrideUse &IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset, + Instruction *User, Value *Operand) { + IVUses.push_back(new IVStrideUse(this, Stride, Offset, User, Operand)); + return IVUses.back(); } IVUsers::IVUsers() @@ -315,15 +296,15 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { /// value of the OperandValToReplace of the given IVStrideUse. const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const { // Start with zero. - const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType()); + const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType()); // Create the basic add recurrence. - RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L); + RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L); // Add the offset in a separate step, because it may be loop-variant. RetVal = SE->getAddExpr(RetVal, U.getOffset()); // For uses of post-incremented values, add an extra stride to compute // the actual replacement value. if (U.isUseOfPostIncrementedValue()) - RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride); + RetVal = SE->getAddExpr(RetVal, U.getStride()); return RetVal; } @@ -332,9 +313,9 @@ const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const { /// isUseOfPostIncrementedValue flag. const SCEV *IVUsers::getCanonicalExpr(const IVStrideUse &U) const { // Start with zero. - const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType()); + const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType()); // Create the basic add recurrence. - RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L); + RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L); // Add the offset in a separate step, because it may be loop-variant. RetVal = SE->getAddExpr(RetVal, U.getOffset()); return RetVal; @@ -349,24 +330,20 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { } OS << ":\n"; - for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) { - std::map<const SCEV *, IVUsersOfOneStride*>::const_iterator SI = - IVUsesByStride.find(StrideOrder[Stride]); - assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); - OS << " Stride " << *SI->first->getType() << " " << *SI->first << ":\n"; - - for (ilist<IVStrideUse>::const_iterator UI = SI->second->Users.begin(), - E = SI->second->Users.end(); UI != E; ++UI) { - OS << " "; - WriteAsOperand(OS, UI->getOperandValToReplace(), false); - OS << " = "; - OS << *getReplacementExpr(*UI); - if (UI->isUseOfPostIncrementedValue()) - OS << " (post-inc)"; - OS << " in "; - UI->getUser()->print(OS); - OS << '\n'; - } + // Use a default AssemblyAnnotationWriter to suppress the default info + // comments, which aren't relevant here. + AssemblyAnnotationWriter Annotator; + for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(), + E = IVUses.end(); UI != E; ++UI) { + OS << " "; + WriteAsOperand(OS, UI->getOperandValToReplace(), false); + OS << " = " + << *getReplacementExpr(*UI); + if (UI->isUseOfPostIncrementedValue()) + OS << " (post-inc)"; + OS << " in "; + UI->getUser()->print(OS, &Annotator); + OS << '\n'; } } @@ -375,37 +352,12 @@ void IVUsers::dump() const { } void IVUsers::releaseMemory() { - IVUsesByStride.clear(); - StrideOrder.clear(); Processed.clear(); IVUses.clear(); } void IVStrideUse::deleted() { // Remove this user from the list. - Parent->Users.erase(this); + Parent->IVUses.erase(this); // this now dangles! } - -void IVUsersOfOneStride::print(raw_ostream &OS) const { - OS << "IV Users of one stride:\n"; - - if (Stride) - OS << " Stride: " << *Stride << '\n'; - - OS << " Users:\n"; - - unsigned Count = 1; - - for (ilist<IVStrideUse>::const_iterator - I = Users.begin(), E = Users.end(); I != E; ++I) { - const IVStrideUse &SU = *I; - OS << " " << Count++ << '\n'; - OS << " Offset: " << *SU.getOffset() << '\n'; - OS << " Instr: " << *SU << '\n'; - } -} - -void IVUsersOfOneStride::dump() const { - print(dbgs()); -} diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 972d034..ca50a17 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -84,7 +84,7 @@ unsigned InlineCostAnalyzer::FunctionInfo:: // unsigned InlineCostAnalyzer::FunctionInfo:: CountCodeReductionForAlloca(Value *V) { - if (!isa<PointerType>(V->getType())) return 0; // Not a pointer + if (!V->getType()->isPointerTy()) return 0; // Not a pointer unsigned Reduction = 0; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ Instruction *I = cast<Instruction>(*UI); @@ -175,7 +175,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { this->usesDynamicAlloca = true; } - if (isa<ExtractElementInst>(II) || isa<VectorType>(II->getType())) + if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) ++NumVectorInsts; if (const CastInst *CI = dyn_cast<CastInst>(II)) { diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index b53ac13..8288e96 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -194,11 +194,10 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const Type *ITy = GetCompareTy(LHS); // icmp X, X -> true/false - if (LHS == RHS) + // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false + // because X could be 0. + if (LHS == RHS || isa<UndefValue>(RHS)) return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); - - if (isa<UndefValue>(RHS)) // X icmp undef -> undef - return UndefValue::get(ITy); // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value // addresses never equal each other! We already know that Op0 != Op1. @@ -283,6 +282,32 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, // True if unordered. return ConstantInt::getTrue(CFP->getContext()); } + // Check whether the constant is an infinity. + if (CFP->getValueAPF().isInfinity()) { + if (CFP->getValueAPF().isNegative()) { + switch (Pred) { + case FCmpInst::FCMP_OLT: + // No value is ordered and less than negative infinity. + return ConstantInt::getFalse(CFP->getContext()); + case FCmpInst::FCMP_UGE: + // All values are unordered with or at least negative infinity. + return ConstantInt::getTrue(CFP->getContext()); + default: + break; + } + } else { + switch (Pred) { + case FCmpInst::FCMP_OGT: + // No value is ordered and greater than infinity. + return ConstantInt::getFalse(CFP->getContext()); + case FCmpInst::FCMP_ULE: + // All values are unordered with and at most infinity. + return ConstantInt::getTrue(CFP->getContext()); + default: + break; + } + } + } } } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 2d74709d..2aa2f17 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -580,7 +580,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { void MemoryDependenceAnalysis:: getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB, SmallVectorImpl<NonLocalDepResult> &Result) { - assert(isa<PointerType>(Pointer->getType()) && + assert(Pointer->getType()->isPointerTy() && "Can't get pointer deps of a non-pointer!"); Result.clear(); @@ -861,7 +861,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize, // Get the PHI translated pointer in this predecessor. This can fail if // not translatable, in which case the getAddr() returns null. PHITransAddr PredPointer(Pointer); - PredPointer.PHITranslateValue(BB, Pred); + PredPointer.PHITranslateValue(BB, Pred, 0); Value *PredPtrVal = PredPointer.getAddr(); @@ -1009,13 +1009,20 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { /// in more places that cached info does not necessarily keep. void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) { // If Ptr isn't really a pointer, just ignore it. - if (!isa<PointerType>(Ptr->getType())) return; + if (!Ptr->getType()->isPointerTy()) return; // Flush store info for the pointer. RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false)); // Flush load info for the pointer. RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true)); } +/// invalidateCachedPredecessors - Clear the PredIteratorCache info. +/// This needs to be done when the CFG changes, e.g., due to splitting +/// critical edges. +void MemoryDependenceAnalysis::invalidateCachedPredecessors() { + PredCache->clear(); +} + /// removeInstruction - Remove an instruction from the dependence analysis, /// updating the dependence of instructions that previously depended on it. /// This method attempts to keep the cache coherent using the reverse map. @@ -1050,7 +1057,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { // Remove it from both the load info and the store info. The instruction // can't be in either of these maps if it is non-pointer. - if (isa<PointerType>(RemInst->getType())) { + if (RemInst->getType()->isPointerTy()) { RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false)); RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true)); } diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 334a188..8e4fa03 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -134,7 +134,8 @@ static void RemoveInstInputs(Value *V, } Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, - BasicBlock *PredBB) { + BasicBlock *PredBB, + const DominatorTree *DT) { // If this is a non-instruction value, it can't require PHI translation. Instruction *Inst = dyn_cast<Instruction>(V); if (Inst == 0) return V; @@ -177,7 +178,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // operands need to be phi translated, and if so, reconstruct it. if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) { - Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB); + Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB, DT); if (PHIIn == 0) return 0; if (PHIIn == BC->getOperand(0)) return BC; @@ -193,7 +194,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end(); UI != E; ++UI) { if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) - if (BCI->getType() == BC->getType()) + if (BCI->getType() == BC->getType() && + (!DT || DT->dominates(BCI->getParent(), PredBB))) return BCI; } return 0; @@ -204,7 +206,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, SmallVector<Value*, 8> GEPOps; bool AnyChanged = false; for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { - Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB); + Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT); if (GEPOp == 0) return 0; AnyChanged |= GEPOp != GEP->getOperand(i); @@ -229,7 +231,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) if (GEPI->getType() == GEP->getType() && GEPI->getNumOperands() == GEPOps.size() && - GEPI->getParent()->getParent() == CurBB->getParent()) { + GEPI->getParent()->getParent() == CurBB->getParent() && + (!DT || DT->dominates(GEPI->getParent(), PredBB))) { bool Mismatch = false; for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) if (GEPI->getOperand(i) != GEPOps[i]) { @@ -251,7 +254,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap(); bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap(); - Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB); + Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT); if (LHS == 0) return 0; // If the PHI translated LHS is an add of a constant, fold the immediates. @@ -287,7 +290,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI)) if (BO->getOpcode() == Instruction::Add && BO->getOperand(0) == LHS && BO->getOperand(1) == RHS && - BO->getParent()->getParent() == CurBB->getParent()) + BO->getParent()->getParent() == CurBB->getParent() && + (!DT || DT->dominates(BO->getParent(), PredBB))) return BO; } @@ -300,33 +304,24 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, /// PHITranslateValue - PHI translate the current address up the CFG from -/// CurBB to Pred, updating our state the reflect any needed changes. This -/// returns true on failure and sets Addr to null. -bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB) { +/// CurBB to Pred, updating our state to reflect any needed changes. If the +/// dominator tree DT is non-null, the translated value must dominate +/// PredBB. This returns true on failure and sets Addr to null. +bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, + const DominatorTree *DT) { assert(Verify() && "Invalid PHITransAddr!"); - Addr = PHITranslateSubExpr(Addr, CurBB, PredBB); + Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT); assert(Verify() && "Invalid PHITransAddr!"); - return Addr == 0; -} -/// GetAvailablePHITranslatedSubExpr - Return the value computed by -/// PHITranslateSubExpr if it dominates PredBB, otherwise return null. -Value *PHITransAddr:: -GetAvailablePHITranslatedSubExpr(Value *V, BasicBlock *CurBB,BasicBlock *PredBB, - const DominatorTree &DT) const { - PHITransAddr Tmp(V, TD); - Tmp.PHITranslateValue(CurBB, PredBB); - - // See if PHI translation succeeds. - V = Tmp.getAddr(); - - // Make sure the value is live in the predecessor. - if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) - if (!DT.dominates(Inst->getParent(), PredBB)) - return 0; - return V; -} + if (DT) { + // Make sure the value is live in the predecessor. + if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr)) + if (!DT->dominates(Inst->getParent(), PredBB)) + Addr = 0; + } + return Addr == 0; +} /// PHITranslateWithInsertion - PHI translate this value into the specified /// predecessor block, inserting a computation of the value if it is @@ -365,8 +360,9 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, SmallVectorImpl<Instruction*> &NewInsts) { // See if we have a version of this value already available and dominating // PredBB. If so, there is no need to insert a new instance of it. - if (Value *Res = GetAvailablePHITranslatedSubExpr(InVal, CurBB, PredBB, DT)) - return Res; + PHITransAddr Tmp(InVal, TD); + if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT)) + return Tmp.getAddr(); // If we don't have an available version of this value, it must be an // instruction. diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp index 8da07e7..ce7ac89 100644 --- a/lib/Analysis/PointerTracking.cpp +++ b/lib/Analysis/PointerTracking.cpp @@ -231,7 +231,7 @@ void PointerTracking::print(raw_ostream &OS, const Module* M) const { // this should be safe for the same reason its safe for SCEV. PointerTracking &PT = *const_cast<PointerTracking*>(this); for (inst_iterator I=inst_begin(*FF), E=inst_end(*FF); I != E; ++I) { - if (!isa<PointerType>(I->getType())) + if (!I->getType()->isPointerTy()) continue; Value *Base; const SCEV *Limit, *Offset; diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 85531be..66760c6 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -246,7 +246,7 @@ const BasicBlock *ProfileInfoT<Function,BasicBlock>:: succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); if (Succ == End) { - P[0] = BB; + P[ reinterpret_cast<const llvm::BasicBlock*>(0) ] = BB; if (Mode & GetPathToExit) { hasFoundPath = true; BB = 0; @@ -753,10 +753,10 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { Succ != End; ++Succ) { Path P; GetPath(*Succ, 0, P, GetPathToExit); - if (Dest && Dest != P[0]) { + if (Dest && Dest != P[ reinterpret_cast<const llvm::BasicBlock*>(0) ]) { AllEdgesHaveSameReturn = false; } - Dest = P[0]; + Dest = P[ reinterpret_cast<const llvm::BasicBlock*>(0) ]; } if (AllEdgesHaveSameReturn) { if(EstimateMissingEdges(BB)) { @@ -928,7 +928,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { Path P; const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges); - Dest = P[0]; + Dest = P[ reinterpret_cast<const llvm::BasicBlock*>(0) ]; if (!Dest) continue; if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) { diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 82be9cd..b979f33 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -214,8 +214,8 @@ bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeID &ID, const SCEV *op, const Type *ty) : SCEVCastExpr(ID, scTruncate, op, ty) { - assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate non-integer value!"); } @@ -226,8 +226,8 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const { SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeID &ID, const SCEV *op, const Type *ty) : SCEVCastExpr(ID, scZeroExtend, op, ty) { - assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot zero extend non-integer value!"); } @@ -238,8 +238,8 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const { SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeID &ID, const SCEV *op, const Type *ty) : SCEVCastExpr(ID, scSignExtend, op, ty) { - assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot sign extend non-integer value!"); } @@ -312,6 +312,21 @@ bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const { return true; } +bool +SCEVAddRecExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { + return DT->dominates(L->getHeader(), BB) && + SCEVNAryExpr::dominates(BB, DT); +} + +bool +SCEVAddRecExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + // This uses a "dominates" query instead of "properly dominates" query because + // the instruction which produces the addrec's value is a PHI, and a PHI + // effectively properly dominates its entire containing block. + return DT->dominates(L->getHeader(), BB) && + SCEVNAryExpr::properlyDominates(BB, DT); +} + void SCEVAddRecExpr::print(raw_ostream &OS) const { OS << "{" << *Operands[0]; for (unsigned i = 1, e = Operands.size(); i != e; ++i) @@ -379,7 +394,7 @@ bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const { if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2))) if (CI->isOne() && STy->getNumElements() == 2 && - STy->getElementType(0)->isInteger(1)) { + STy->getElementType(0)->isIntegerTy(1)) { AllocTy = STy->getElementType(1); return true; } @@ -401,7 +416,7 @@ bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const { cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); // Ignore vector types here so that ScalarEvolutionExpander doesn't // emit getelementptrs that index into vectors. - if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) { + if (Ty->isStructTy() || Ty->isArrayTy()) { CTy = Ty; FieldNo = CE->getOperand(2); return true; @@ -503,9 +518,9 @@ namespace { // Order pointer values after integer values. This helps SCEVExpander // form GEPs. - if (isa<PointerType>(LU->getType()) && !isa<PointerType>(RU->getType())) + if (LU->getType()->isPointerTy() && !RU->getType()->isPointerTy()) return false; - if (isa<PointerType>(RU->getType()) && !isa<PointerType>(LU->getType())) + if (RU->getType()->isPointerTy() && !LU->getType()->isPointerTy()) return true; // Compare getValueID values. @@ -601,7 +616,7 @@ namespace { /// When this routine is finished, we know that any duplicates in the vector are /// consecutive and that complexity is monotonically increasing. /// -/// Note that we go take special precautions to ensure that we get determinstic +/// Note that we go take special precautions to ensure that we get deterministic /// results from this routine. In other words, we don't want the results of /// this to depend on where the addresses of various SCEV objects happened to /// land in memory. @@ -729,7 +744,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, // We need at least W + T bits for the multiplication step unsigned CalculationBits = W + T; - // Calcuate 2^T, at width T+W. + // Calculate 2^T, at width T+W. APInt DivFactor = APInt(CalculationBits, 1).shl(T); // Calculate the multiplicative inverse of K! / 2^T; @@ -906,9 +921,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, if (MaxBECount == RecastedMaxBECount) { const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no unsigned overflow. - const SCEV *ZMul = - getMulExpr(CastedMaxBECount, - getTruncateOrZeroExtend(Step, Start->getType())); + const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step); const SCEV *Add = getAddExpr(Start, ZMul); const SCEV *OperandExtendedAdd = getAddExpr(getZeroExtendExpr(Start, WideTy), @@ -922,9 +935,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // Similar to above, only this time treat the step value as signed. // This covers loops that count down. - const SCEV *SMul = - getMulExpr(CastedMaxBECount, - getTruncateOrSignExtend(Step, Start->getType())); + const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); Add = getAddExpr(Start, SMul); OperandExtendedAdd = getAddExpr(getZeroExtendExpr(Start, WideTy), @@ -1045,9 +1056,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, if (MaxBECount == RecastedMaxBECount) { const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no signed overflow. - const SCEV *SMul = - getMulExpr(CastedMaxBECount, - getTruncateOrSignExtend(Step, Start->getType())); + const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); const SCEV *Add = getAddExpr(Start, SMul); const SCEV *OperandExtendedAdd = getAddExpr(getSignExtendExpr(Start, WideTy), @@ -1061,9 +1070,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Similar to above, only this time treat the step value as unsigned. // This covers loops that count up with an unsigned step. - const SCEV *UMul = - getMulExpr(CastedMaxBECount, - getTruncateOrZeroExtend(Step, Start->getType())); + const SCEV *UMul = getMulExpr(CastedMaxBECount, Step); Add = getAddExpr(Start, UMul); OperandExtendedAdd = getAddExpr(getSignExtendExpr(Start, WideTy), @@ -1403,7 +1410,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // If we deleted at least one add, we added operands to the end of the list, // and they are not necessarily sorted. Recurse to resort and resimplify - // any operands we just aquired. + // any operands we just acquired. if (DeletedAdd) return getAddExpr(Ops); } @@ -1710,7 +1717,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // If we deleted at least one mul, we added operands to the end of the list, // and they are not necessarily sorted. Recurse to resort and resimplify - // any operands we just aquired. + // any operands we just acquired. if (DeletedMul) return getMulExpr(Ops); } @@ -1958,6 +1965,12 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0} --> X } + // It's tempting to want to call getMaxBackedgeTakenCount count here and + // use that information to infer NUW and NSW flags. However, computing a + // BE count requires calling getAddRecExpr, so we may not yet have a + // meaningful BE count at this point (and if we don't, we'd be stuck + // with a SCEVCouldNotCompute as the cached BE count). + // If HasNSW is true and all the operands are non-negative, infer HasNUW. if (!HasNUW && HasNSW) { bool All = true; @@ -2293,7 +2306,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) { /// has access to target-specific information. bool ScalarEvolution::isSCEVable(const Type *Ty) const { // Integers and pointers are always SCEVable. - return Ty->isInteger() || isa<PointerType>(Ty); + return Ty->isIntegerTy() || Ty->isPointerTy(); } /// getTypeSizeInBits - Return the size in bits of the specified type, @@ -2306,12 +2319,12 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const { return TD->getTypeSizeInBits(Ty); // Integer types have fixed sizes. - if (Ty->isInteger()) + if (Ty->isIntegerTy()) return Ty->getPrimitiveSizeInBits(); // The only other support type is pointer. Without TargetData, conservatively // assume pointers are 64-bit. - assert(isa<PointerType>(Ty) && "isSCEVable permitted a non-SCEVable type!"); + assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!"); return 64; } @@ -2322,11 +2335,11 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const { const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); - if (Ty->isInteger()) + if (Ty->isIntegerTy()) return Ty; // The only other support type is pointer. - assert(isa<PointerType>(Ty) && "Unexpected non-pointer non-integer type!"); + assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); if (TD) return TD->getIntPtrType(getContext()); // Without TargetData, conservatively assume pointers are 64-bit. @@ -2397,8 +2410,8 @@ const SCEV * ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion @@ -2414,8 +2427,8 @@ const SCEV * ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion @@ -2430,8 +2443,8 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, const SCEV * ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or zero extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrZeroExtend cannot truncate!"); @@ -2446,8 +2459,8 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) { const SCEV * ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or sign extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrSignExtend cannot truncate!"); @@ -2463,8 +2476,8 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) { const SCEV * ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or any extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrAnyExtend cannot truncate!"); @@ -2478,8 +2491,8 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) { const SCEV * ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or noop with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && "getTruncateOrNoop cannot extend!"); @@ -2536,12 +2549,12 @@ PushDefUseChildren(Instruction *I, /// the Scalars map if they reference SymName. This is used during PHI /// resolution. void -ScalarEvolution::ForgetSymbolicName(Instruction *I, const SCEV *SymName) { +ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { SmallVector<Instruction *, 16> Worklist; - PushDefUseChildren(I, Worklist); + PushDefUseChildren(PN, Worklist); SmallPtrSet<Instruction *, 8> Visited; - Visited.insert(I); + Visited.insert(PN); while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; @@ -2551,16 +2564,19 @@ ScalarEvolution::ForgetSymbolicName(Instruction *I, const SCEV *SymName) { if (It != Scalars.end()) { // Short-circuit the def-use traversal if the symbolic name // ceases to appear in expressions. - if (!It->second->hasOperand(SymName)) + if (It->second != SymName && !It->second->hasOperand(SymName)) continue; // SCEVUnknown for a PHI either means that it has an unrecognized - // structure, or it's a PHI that's in the progress of being computed - // by createNodeForPHI. In the former case, additional loop trip - // count information isn't going to change anything. In the later - // case, createNodeForPHI will perform the necessary updates on its - // own when it gets to that point. - if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) { + // structure, it's a PHI that's in the progress of being computed + // by createNodeForPHI, or it's a single-value PHI. In the first case, + // additional loop trip count information isn't going to change anything. + // In the second case, createNodeForPHI will perform the necessary + // updates on its own when it gets to that point. In the third, we do + // want to forget the SCEVUnknown. + if (!isa<PHINode>(I) || + !isa<SCEVUnknown>(It->second) || + (I != PN && It->second == SymName)) { ValuesAtScopes.erase(It->second); Scalars.erase(It); } @@ -2683,9 +2699,21 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { return SymbolicName; } - // It's tempting to recognize PHIs with a unique incoming value, however - // this leads passes like indvars to break LCSSA form. Fortunately, such - // PHIs are rare, as instcombine zaps them. + // If the PHI has a single incoming value, follow that value, unless the + // PHI's incoming blocks are in a different loop, in which case doing so + // risks breaking LCSSA form. Instcombine would normally zap these, but + // it doesn't have DominatorTree information, so it may miss cases. + if (Value *V = PN->hasConstantValue(DT)) { + bool AllSameLoop = true; + Loop *PNLoop = LI->getLoopFor(PN->getParent()); + for (size_t i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (LI->getLoopFor(PN->getIncomingBlock(i)) != PNLoop) { + AllSameLoop = false; + break; + } + if (AllSameLoop) + return getSCEV(V); + } // If it's not a loop phi, we can't handle it yet. return getUnknown(PN); @@ -2718,7 +2746,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { } else { // For an array, add the element offset, explicitly scaled. const SCEV *LocalOffset = getSCEV(Index); - // Getelementptr indicies are signed. + // Getelementptr indices are signed. LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy); // Lower "inbounds" GEPs to NSW arithmetic. LocalOffset = getMulExpr(LocalOffset, getSizeOfExpr(*GTI), @@ -2921,7 +2949,6 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { // For a SCEVUnknown, ask ValueTracking. - unsigned BitWidth = getTypeSizeInBits(U->getType()); APInt Mask = APInt::getAllOnesValue(BitWidth); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD); @@ -3053,7 +3080,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { // For a SCEVUnknown, ask ValueTracking. - if (!U->getValue()->getType()->isInteger() && !TD) + if (!U->getValue()->getType()->isIntegerTy() && !TD) return ConservativeResult; unsigned NS = ComputeNumSignBits(U->getValue(), TD); if (NS == 1) @@ -3193,7 +3220,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { const Type *Z0Ty = Z0->getType(); unsigned Z0TySize = getTypeSizeInBits(Z0Ty); - // If C is a low-bits mask, the zero extend is zerving to + // If C is a low-bits mask, the zero extend is serving to // mask off the high bits. Complement the operand and // re-apply the zext. if (APIntOps::isMask(Z0TySize, CI->getValue())) @@ -3378,7 +3405,7 @@ PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) { const ScalarEvolution::BackedgeTakenInfo & ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // Initially insert a CouldNotCompute for this loop. If the insertion - // succeeds, procede to actually compute a backedge-taken count and + // succeeds, proceed to actually compute a backedge-taken count and // update the value. The temporary CouldNotCompute value tells SCEV // code elsewhere that it shouldn't attempt to request a new // backedge-taken count, which could result in infinite recursion. @@ -3470,6 +3497,35 @@ void ScalarEvolution::forgetLoop(const Loop *L) { } } +/// forgetValue - This method should be called by the client when it has +/// changed a value in a way that may effect its value, or which may +/// disconnect it from a def-use chain linking it to a loop. +void ScalarEvolution::forgetValue(Value *V) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return; + + // Drop information about expressions based on loop-header PHIs. + SmallVector<Instruction *, 16> Worklist; + Worklist.push_back(I); + + SmallPtrSet<Instruction *, 8> Visited; + while (!Worklist.empty()) { + I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + std::map<SCEVCallbackVH, const SCEV *>::iterator It = + Scalars.find(static_cast<Value *>(I)); + if (It != Scalars.end()) { + ValuesAtScopes.erase(It->second); + Scalars.erase(It); + if (PHINode *PN = dyn_cast<PHINode>(I)) + ConstantEvolutionLoopExitValue.erase(PN); + } + + PushDefUseChildren(I, Worklist); + } +} + /// ComputeBackedgeTakenCount - Compute the number of times the backedge /// of the specified loop will execute. ScalarEvolution::BackedgeTakenInfo @@ -3566,7 +3622,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L, return getCouldNotCompute(); } - // Procede to the next level to examine the exit condition expression. + // Proceed to the next level to examine the exit condition expression. return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(), ExitBr->getSuccessor(0), ExitBr->getSuccessor(1)); @@ -3655,10 +3711,23 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, } // With an icmp, it may be feasible to compute an exact backedge-taken count. - // Procede to the next level to examine the icmp. + // Proceed to the next level to examine the icmp. if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB); + // Check for a constant condition. These are normally stripped out by + // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to + // preserve the CFG and is temporarily leaving constant conditions + // in place. + if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) { + if (L->contains(FBB) == !CI->getZExtValue()) + // The backedge is always taken. + return getCouldNotCompute(); + else + // The backedge is never taken. + return getIntegerSCEV(0, CI->getType()); + } + // If it's not an integer or pointer comparison then compute it the hard way. return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); } @@ -3682,14 +3751,10 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, // Handle common loops like: for (X = "string"; *X; ++X) if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0))) if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { - const SCEV *ItCnt = + BackedgeTakenInfo ItCnt = ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond); - if (!isa<SCEVCouldNotCompute>(ItCnt)) { - unsigned BitWidth = getTypeSizeInBits(ItCnt->getType()); - return BackedgeTakenInfo(ItCnt, - isa<SCEVConstant>(ItCnt) ? ItCnt : - getConstant(APInt::getMaxValue(BitWidth)-1)); - } + if (ItCnt.hasAnyInfo()) + return ItCnt; } const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); @@ -3723,14 +3788,14 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - const SCEV *TC = HowFarToZero(getMinusSCEV(LHS, RHS), L); - if (!isa<SCEVCouldNotCompute>(TC)) return TC; + BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L); + if (BTI.hasAnyInfo()) return BTI; break; } case ICmpInst::ICMP_EQ: { // while (X == Y) // Convert to: while (X-Y == 0) - const SCEV *TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); - if (!isa<SCEVCouldNotCompute>(TC)) return TC; + BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); + if (BTI.hasAnyInfo()) return BTI; break; } case ICmpInst::ICMP_SLT: { @@ -3817,7 +3882,7 @@ GetAddressedElementFromGlobal(GlobalVariable *GV, /// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of /// 'icmp op load X, cst', try to see if we can compute the backedge /// execution count. -const SCEV * +ScalarEvolution::BackedgeTakenInfo ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount( LoadInst *LI, Constant *RHS, @@ -3826,6 +3891,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount( if (LI->isVolatile()) return getCouldNotCompute(); // Check to see if the loaded pointer is a getelementptr of a global. + // TODO: Use SCEV instead of manually grubbing with GEPs. GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)); if (!GEP) return getCouldNotCompute(); @@ -4175,14 +4241,15 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { } } - Constant *C; + Constant *C = 0; if (const CmpInst *CI = dyn_cast<CmpInst>(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], Operands[1], TD); else C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Operands[0], Operands.size(), TD); - return getSCEV(C); + if (C) + return getSCEV(C); } } @@ -4390,7 +4457,8 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { /// HowFarToZero - Return the number of times a backedge comparing the specified /// value to zero will execute. If not computable, return CouldNotCompute. -const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // If the value is a constant if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { // If the value is already zero, the branch will execute zero times. @@ -4435,7 +4503,7 @@ const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { -StartC->getValue()->getValue(), *this); } - } else if (AddRec->isQuadratic() && AddRec->getType()->isInteger()) { + } else if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) { // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of // the quadratic equation to solve it. std::pair<const SCEV *,const SCEV *> Roots = SolveQuadraticEquation(AddRec, @@ -4470,7 +4538,8 @@ const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { /// HowFarToNonZero - Return the number of times a backedge checking the /// specified value for nonzero will execute. If not computable, return /// CouldNotCompute -const SCEV *ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { // Loops that look like: while (X == 0) are very strange indeed. We don't // handle them yet except for the trivial case. This could be expanded in the // future as needed. @@ -4711,7 +4780,7 @@ bool ScalarEvolution::isImpliedCond(Value *CondValue, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, bool Inverse) { - // Recursivly handle And and Or conditions. + // Recursively handle And and Or conditions. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) { if (BO->getOpcode() == Instruction::And) { if (!Inverse) @@ -4914,7 +4983,7 @@ bool ScalarEvolution::isImpliedCond(Value *CondValue, } /// isImpliedCondOperands - Test whether the condition described by Pred, -/// LHS, and RHS is true whenever the condition desribed by Pred, FoundLHS, +/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS, /// and FoundRHS is true. bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, @@ -4929,7 +4998,7 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, } /// isImpliedCondOperandsHelper - Test whether the condition described by -/// Pred, LHS, and RHS is true whenever the condition desribed by Pred, +/// Pred, LHS, and RHS is true whenever the condition described by Pred, /// FoundLHS, and FoundRHS is true. bool ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, @@ -5087,7 +5156,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // If MaxEnd is within a step of the maximum integer value in its type, // adjust it down to the minimum value which would produce the same effect. - // This allows the subsequent ceiling divison of (N+(step-1))/step to + // This allows the subsequent ceiling division of (N+(step-1))/step to // compute the correct value. const SCEV *StepMinusOne = getMinusSCEV(Step, getIntegerSCEV(1, Step->getType())); @@ -5304,8 +5373,8 @@ ScalarEvolution::ScalarEvolution() bool ScalarEvolution::runOnFunction(Function &F) { this->F = &F; LI = &getAnalysis<LoopInfo>(); - DT = &getAnalysis<DominatorTree>(); TD = getAnalysisIfAvailable<TargetData>(); + DT = &getAnalysis<DominatorTree>(); return false; } @@ -5364,7 +5433,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, } void ScalarEvolution::print(raw_ostream &OS, const Module *) const { - // ScalarEvolution's implementaiton of the print method is to print + // ScalarEvolution's implementation of the print method is to print // out SCEV values of all instructions that are interesting. Doing // this potentially causes it to create new SCEV objects though, // which technically conflicts with the const qualifier. This isn't diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 498c4a8..17b254f 100644 --- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -10,6 +10,10 @@ // This file defines the ScalarEvolutionAliasAnalysis pass, which implements a // simple alias analysis implemented in terms of ScalarEvolution queries. // +// This differs from traditional loop dependence analysis in that it tests +// for dependencies within a single iteration of a loop, rather than +// dependences between different iterations. +// // ScalarEvolution has a more complete understanding of pointer arithmetic // than BasicAliasAnalysis' collection of ad-hoc analyses. // @@ -41,7 +45,7 @@ namespace { return (AliasAnalysis*)this; return this; } - + private: virtual void getAnalysisUsage(AnalysisUsage &AU) const; virtual bool runOnFunction(Function &F); @@ -89,7 +93,7 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) { } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { // If there's a pointer operand, it'll be sorted at the end of the list. const SCEV *Last = A->getOperand(A->getNumOperands()-1); - if (isa<PointerType>(Last->getType())) + if (Last->getType()->isPointerTy()) return GetBaseValue(Last); } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { // This is a leaf node. diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 4310e3c..e27da96 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/STLExtras.h" @@ -137,6 +138,10 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, if (IP != BlockBegin) { --IP; for (; ScanLimit; --IP, --ScanLimit) { + // Don't count dbg.value against the ScanLimit, to avoid perturbing the + // generated code. + if (isa<DbgInfoIntrinsic>(IP)) + ScanLimit++; if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && IP->getOperand(1) == RHS) return IP; @@ -144,15 +149,34 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, } } + // Save the original insertion point so we can restore it when we're done. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + } + // If we haven't found this binop, insert it. Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp"); rememberInstruction(BO); + + // Restore the original insert point. + if (SaveInsertBB) + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + return BO; } /// FactorOutConstant - Test if S is divisible by Factor, using signed /// division. If so, update S with Factor divided out and return true. -/// S need not be evenly divisble if a reasonable remainder can be +/// S need not be evenly divisible if a reasonable remainder can be /// computed. /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made /// unnecessary; in its place, just signed-divide Ops[i] by the scale and @@ -462,7 +486,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, break; } - // If none of the operands were convertable to proper GEP indices, cast + // If none of the operands were convertible to proper GEP indices, cast // the base to i8* and do an ugly getelementptr with that. It's still // better than ptrtoint+arithmetic+inttoptr at least. if (!AnyNonZeroIndices) { @@ -486,6 +510,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, if (IP != BlockBegin) { --IP; for (; ScanLimit; --IP, --ScanLimit) { + // Don't count dbg.value against the ScanLimit, to avoid perturbing the + // generated code. + if (isa<DbgInfoIntrinsic>(IP)) + ScanLimit++; if (IP->getOpcode() == Instruction::GetElementPtr && IP->getOperand(0) == V && IP->getOperand(1) == Idx) return IP; @@ -493,12 +521,56 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } } + // Save the original insertion point so we can restore it when we're done. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + } + // Emit a GEP. Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); rememberInstruction(GEP); + + // Restore the original insert point. + if (SaveInsertBB) + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + return GEP; } + // Save the original insertion point so we can restore it when we're done. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(V)) break; + + bool AnyIndexNotLoopInvariant = false; + for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(), + E = GepIndices.end(); I != E; ++I) + if (!L->isLoopInvariant(*I)) { + AnyIndexNotLoopInvariant = true; + break; + } + if (AnyIndexNotLoopInvariant) + break; + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + } + // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, // because ScalarEvolution may have changed the address arithmetic to // compute a value which is beyond the end of the allocated object. @@ -511,6 +583,11 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, "scevgep"); Ops.push_back(SE.getUnknown(GEP)); rememberInstruction(GEP); + + // Restore the original insert point. + if (SaveInsertBB) + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + return expand(SE.getAddExpr(Ops)); } @@ -528,70 +605,179 @@ static bool isNonConstantNegative(const SCEV *F) { return SC->getValue()->getValue().isNegative(); } -Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { - int NumOperands = S->getNumOperands(); - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); +/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for +/// SCEV expansion. If they are nested, this is the most nested. If they are +/// neighboring, pick the later. +static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, + DominatorTree &DT) { + if (!A) return B; + if (!B) return A; + if (A->contains(B)) return B; + if (B->contains(A)) return A; + if (DT.dominates(A->getHeader(), B->getHeader())) return B; + if (DT.dominates(B->getHeader(), A->getHeader())) return A; + return A; // Arbitrarily break the tie. +} - // Find the index of an operand to start with. Choose the operand with - // pointer type, if there is one, or the last operand otherwise. - int PIdx = 0; - for (; PIdx != NumOperands - 1; ++PIdx) - if (isa<PointerType>(S->getOperand(PIdx)->getType())) break; +/// GetRelevantLoop - Get the most relevant loop associated with the given +/// expression, according to PickMostRelevantLoop. +static const Loop *GetRelevantLoop(const SCEV *S, LoopInfo &LI, + DominatorTree &DT) { + if (isa<SCEVConstant>(S)) + return 0; + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) + return LI.getLoopFor(I->getParent()); + return 0; + } + if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) { + const Loop *L = 0; + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + L = AR->getLoop(); + for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); + I != E; ++I) + L = PickMostRelevantLoop(L, GetRelevantLoop(*I, LI, DT), DT); + return L; + } + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) + return GetRelevantLoop(C->getOperand(), LI, DT); + if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) + return PickMostRelevantLoop(GetRelevantLoop(D->getLHS(), LI, DT), + GetRelevantLoop(D->getRHS(), LI, DT), + DT); + llvm_unreachable("Unexpected SCEV type!"); +} - // Expand code for the operand that we chose. - Value *V = expand(S->getOperand(PIdx)); +/// LoopCompare - Compare loops by PickMostRelevantLoop. +class LoopCompare { + DominatorTree &DT; +public: + explicit LoopCompare(DominatorTree &dt) : DT(dt) {} + + bool operator()(std::pair<const Loop *, const SCEV *> LHS, + std::pair<const Loop *, const SCEV *> RHS) const { + // Compare loops with PickMostRelevantLoop. + if (LHS.first != RHS.first) + return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first; + + // If one operand is a non-constant negative and the other is not, + // put the non-constant negative on the right so that a sub can + // be used instead of a negate and add. + if (isNonConstantNegative(LHS.second)) { + if (!isNonConstantNegative(RHS.second)) + return false; + } else if (isNonConstantNegative(RHS.second)) + return true; - // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the - // comments on expandAddToGEP for details. - if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) { - // Take the operand at PIdx out of the list. - const SmallVectorImpl<const SCEV *> &Ops = S->getOperands(); - SmallVector<const SCEV *, 8> NewOps; - NewOps.insert(NewOps.end(), Ops.begin(), Ops.begin() + PIdx); - NewOps.insert(NewOps.end(), Ops.begin() + PIdx + 1, Ops.end()); - // Make a GEP. - return expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, V); + // Otherwise they are equivalent according to this comparison. + return false; } +}; - // Otherwise, we'll expand the rest of the SCEVAddExpr as plain integer - // arithmetic. - V = InsertNoopCastOfTo(V, Ty); +Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - // Emit a bunch of add instructions - for (int i = NumOperands-1; i >= 0; --i) { - if (i == PIdx) continue; - const SCEV *Op = S->getOperand(i); - if (isNonConstantNegative(Op)) { + // Collect all the add operands in a loop, along with their associated loops. + // Iterate in reverse so that constants are emitted last, all else equal, and + // so that pointer operands are inserted first, which the code below relies on + // to form more involved GEPs. + SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; + for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()), + E(S->op_begin()); I != E; ++I) + OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT), + *I)); + + // Sort by loop. Use a stable sort so that constants follow non-constants and + // pointer operands precede non-pointer operands. + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + + // Emit instructions to add all the operands. Hoist as much as possible + // out of loops, and form meaningful getelementptrs where possible. + Value *Sum = 0; + for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator + I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { + const Loop *CurLoop = I->first; + const SCEV *Op = I->second; + if (!Sum) { + // This is the first operand. Just expand it. + Sum = expand(Op); + ++I; + } else if (const PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) { + // The running sum expression is a pointer. Try to form a getelementptr + // at this level with that as the base. + SmallVector<const SCEV *, 4> NewOps; + for (; I != E && I->first == CurLoop; ++I) + NewOps.push_back(I->second); + Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); + } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { + // The running sum is an integer, and there's a pointer at this level. + // Try to form a getelementptr. + SmallVector<const SCEV *, 4> NewOps; + NewOps.push_back(SE.getUnknown(Sum)); + for (++I; I != E && I->first == CurLoop; ++I) + NewOps.push_back(I->second); + Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); + } else if (isNonConstantNegative(Op)) { + // Instead of doing a negate and add, just do a subtract. Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty); - V = InsertBinop(Instruction::Sub, V, W); + Sum = InsertNoopCastOfTo(Sum, Ty); + Sum = InsertBinop(Instruction::Sub, Sum, W); + ++I; } else { + // A simple add. Value *W = expandCodeFor(Op, Ty); - V = InsertBinop(Instruction::Add, V, W); + Sum = InsertNoopCastOfTo(Sum, Ty); + // Canonicalize a constant to the RHS. + if (isa<Constant>(Sum)) std::swap(Sum, W); + Sum = InsertBinop(Instruction::Add, Sum, W); + ++I; } } - return V; + + return Sum; } Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - int FirstOp = 0; // Set if we should emit a subtract. - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(0))) - if (SC->getValue()->isAllOnesValue()) - FirstOp = 1; - - int i = S->getNumOperands()-2; - Value *V = expandCodeFor(S->getOperand(i+1), Ty); - - // Emit a bunch of multiply instructions - for (; i >= FirstOp; --i) { - Value *W = expandCodeFor(S->getOperand(i), Ty); - V = InsertBinop(Instruction::Mul, V, W); + + // Collect all the mul operands in a loop, along with their associated loops. + // Iterate in reverse so that constants are emitted last, all else equal. + SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; + for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()), + E(S->op_begin()); I != E; ++I) + OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT), + *I)); + + // Sort by loop. Use a stable sort so that constants follow non-constants. + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + + // Emit instructions to mul all the operands. Hoist as much as possible + // out of loops. + Value *Prod = 0; + for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator + I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { + const SCEV *Op = I->second; + if (!Prod) { + // This is the first operand. Just expand it. + Prod = expand(Op); + ++I; + } else if (Op->isAllOnesValue()) { + // Instead of doing a multiply by negative one, just do a negate. + Prod = InsertNoopCastOfTo(Prod, Ty); + Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod); + ++I; + } else { + // A simple mul. + Value *W = expandCodeFor(Op, Ty); + Prod = InsertNoopCastOfTo(Prod, Ty); + // Canonicalize a constant to the RHS. + if (isa<Constant>(Prod)) std::swap(Prod, W); + Prod = InsertBinop(Instruction::Mul, Prod, W); + ++I; + } } - // -1 * ... ---> 0 - ... - if (FirstOp == 1) - V = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), V); - return V; + return Prod; } Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { @@ -641,8 +827,65 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Reuse a previously-inserted PHI, if present. for (BasicBlock::iterator I = L->getHeader()->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) - if (isInsertedInstruction(PN) && SE.getSCEV(PN) == Normalized) - return PN; + if (SE.isSCEVable(PN->getType()) && + (SE.getEffectiveSCEVType(PN->getType()) == + SE.getEffectiveSCEVType(Normalized->getType())) && + SE.getSCEV(PN) == Normalized) + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + Instruction *IncV = + cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); + + // Determine if this is a well-behaved chain of instructions leading + // back to the PHI. It probably will be, if we're scanning an inner + // loop already visited by LSR for example, but it wouldn't have + // to be. + do { + if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV)) { + IncV = 0; + break; + } + // If any of the operands don't dominate the insert position, bail. + // Addrec operands are always loop-invariant, so this can only happen + // if there are instructions which haven't been hoisted. + for (User::op_iterator OI = IncV->op_begin()+1, + OE = IncV->op_end(); OI != OE; ++OI) + if (Instruction *OInst = dyn_cast<Instruction>(OI)) + if (!SE.DT->dominates(OInst, IVIncInsertPos)) { + IncV = 0; + break; + } + if (!IncV) + break; + // Advance to the next instruction. + IncV = dyn_cast<Instruction>(IncV->getOperand(0)); + if (!IncV) + break; + if (IncV->mayHaveSideEffects()) { + IncV = 0; + break; + } + } while (IncV != PN); + + if (IncV) { + // Ok, the add recurrence looks usable. + // Remember this PHI, even in post-inc mode. + InsertedValues.insert(PN); + // Remember the increment. + IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); + rememberInstruction(IncV); + if (L == IVIncInsertLoop) + do { + if (SE.DT->dominates(IncV, IVIncInsertPos)) + break; + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + IncV->moveBefore(IVIncInsertPos); + IVIncInsertPos = IncV; + IncV = cast<Instruction>(IncV->getOperand(0)); + } while (IncV != PN); + return PN; + } + } // Save the original insertion point so we can restore it when we're done. BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); @@ -658,7 +901,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // negative, insert a sub instead of an add for the increment (unless it's a // constant, because subtracts of constants are canonicalized to adds). const SCEV *Step = Normalized->getStepRecurrence(SE); - bool isPointer = isa<PointerType>(ExpandTy); + bool isPointer = ExpandTy->isPointerTy(); bool isNegative = !isPointer && isNonConstantNegative(Step); if (isNegative) Step = SE.getNegativeSCEV(Step); @@ -713,7 +956,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Restore the original insert point. if (SaveInsertBB) - Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + restoreInsertPoint(SaveInsertBB, SaveInsertPt); // Remember this PHI, even in post-inc mode. InsertedValues.insert(PN); @@ -763,7 +1006,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { const Type *ExpandTy = PostLoopScale ? IntTy : STy; PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy); - // Accomodate post-inc mode, if necessary. + // Accommodate post-inc mode, if necessary. Value *Result; if (L != PostIncLoop) Result = PN; @@ -776,6 +1019,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Re-apply any non-loop-dominating scale. if (PostLoopScale) { + Result = InsertNoopCastOfTo(Result, IntTy); Result = Builder.CreateMul(Result, expandCodeFor(PostLoopScale, IntTy)); rememberInstruction(Result); @@ -787,6 +1031,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { const SCEV *const OffsetArray[1] = { PostLoopOffset }; Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result); } else { + Result = InsertNoopCastOfTo(Result, IntTy); Result = Builder.CreateAdd(Result, expandCodeFor(PostLoopOffset, IntTy)); rememberInstruction(Result); @@ -806,7 +1051,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { PHINode *CanonicalIV = 0; if (PHINode *PN = L->getCanonicalInductionVariable()) if (SE.isSCEVable(PN->getType()) && - isa<IntegerType>(SE.getEffectiveSCEVType(PN->getType())) && + SE.getEffectiveSCEVType(PN->getType())->isIntegerTy() && SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) CanonicalIV = PN; @@ -827,7 +1072,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { while (isa<PHINode>(NewInsertPt)) ++NewInsertPt; V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, NewInsertPt); - Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; } @@ -1022,13 +1267,24 @@ Value *SCEVExpander::expand(const SCEV *S) { L = L->getParentLoop()) if (S->isLoopInvariant(L)) { if (!L) break; - if (BasicBlock *Preheader = L->getLoopPreheader()) + if (BasicBlock *Preheader = L->getLoopPreheader()) { InsertPt = Preheader->getTerminator(); + BasicBlock::iterator IP = InsertPt; + // Back past any debug info instructions. Sometimes we inserted + // something earlier before debug info but after any real instructions. + // This should behave the same as if debug info was not present. + while (IP != Preheader->begin()) { + --IP; + if (!isa<DbgInfoIntrinsic>(IP)) + break; + InsertPt = IP; + } + } } else { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. - if (L && S->hasComputableLoopEvolution(L)) + if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop) InsertPt = L->getHeader()->getFirstNonPHI(); while (isInsertedInstruction(InsertPt)) InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); @@ -1053,10 +1309,32 @@ Value *SCEVExpander::expand(const SCEV *S) { if (!PostIncLoop) InsertedExpressions[std::make_pair(S, InsertPt)] = V; - Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; } +void SCEVExpander::rememberInstruction(Value *I) { + if (!PostIncLoop) + InsertedValues.insert(I); + + // If we just claimed an existing instruction and that instruction had + // been the insert point, adjust the insert point forward so that + // subsequently inserted code will be dominated. + if (Builder.GetInsertPoint() == I) { + BasicBlock::iterator It = cast<Instruction>(I); + do { ++It; } while (isInsertedInstruction(It)); + Builder.SetInsertPoint(Builder.GetInsertBlock(), It); + } +} + +void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { + // If we acquired more instructions since the old insert point was saved, + // advance past them. + while (isInsertedInstruction(I)) ++I; + + Builder.SetInsertPoint(BB, I); +} + /// getOrInsertCanonicalInductionVariable - This method returns the /// canonical induction variable of the specified type for the specified /// loop (inserting one if there is none). A canonical induction variable @@ -1064,13 +1342,13 @@ Value *SCEVExpander::expand(const SCEV *S) { Value * SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, const Type *Ty) { - assert(Ty->isInteger() && "Can only insert integer induction variables!"); + assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); const SCEV *H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty), SE.getIntegerSCEV(1, Ty), L); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); Value *V = expandCodeFor(H, 0, L->getHeader()->begin()); if (SaveInsertBB) - Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index f9331e7..92cbb7c 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -23,6 +23,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include <cstring> using namespace llvm; @@ -49,11 +50,11 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = Mask.getBitWidth(); - assert((V->getType()->isIntOrIntVector() || isa<PointerType>(V->getType())) && - "Not integer or pointer type!"); + assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy()) + && "Not integer or pointer type!"); assert((!TD || TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && - (!V->getType()->isIntOrIntVector() || + (!V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarSizeInBits() == BitWidth) && KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && @@ -249,7 +250,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint // which fall through here. - if (isa<PointerType>(SrcTy)) + if (SrcTy->isPointerTy()) SrcBitWidth = TD->getTypeSizeInBits(SrcTy); else SrcBitWidth = SrcTy->getScalarSizeInBits(); @@ -269,10 +270,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } case Instruction::BitCast: { const Type *SrcTy = I->getOperand(0)->getType(); - if ((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && + if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) - !isa<VectorType>(I->getType())) { + !I->getType()->isVectorTy()) { ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD, Depth+1); return; @@ -649,7 +650,7 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, /// unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, unsigned Depth) { - assert((TD || V->getType()->isIntOrIntVector()) && + assert((TD || V->getType()->isIntOrIntVectorTy()) && "ComputeNumSignBits requires a TargetData object to operate " "on non-integer values!"); const Type *Ty = V->getType(); @@ -823,7 +824,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); - assert(V->getType()->isInteger() && "Not integer or pointer type!"); + assert(V->getType()->isIntegerTy() && "Not integer or pointer type!"); const Type *T = V->getType(); @@ -980,7 +981,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { /// may not be represented in the result. static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, const TargetData *TD, unsigned Depth) { - assert(isa<IntegerType>(V->getType()) && "Not an integer value"); + assert(V->getType()->isIntegerTy() && "Not an integer value"); // Limit our recursion depth. if (Depth == 6) { @@ -1253,7 +1254,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, if (idx_begin == idx_end) return V; // We have indices, so V should have an indexable type - assert((isa<StructType>(V->getType()) || isa<ArrayType>(V->getType())) + assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) && "Not looking at a struct or array?"); assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end) && "Invalid indices for type?"); @@ -1372,7 +1373,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // Make sure the index-ee is a pointer to array of i8. const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType()); const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType()); - if (AT == 0 || !AT->getElementType()->isInteger(8)) + if (AT == 0 || !AT->getElementType()->isIntegerTy(8)) return false; // Check to make sure that the first operand of the GEP is an integer and @@ -1411,7 +1412,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // Must be a Constant Array ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); - if (Array == 0 || !Array->getType()->getElementType()->isInteger(8)) + if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8)) return false; // Get the number of elements in the array @@ -1436,3 +1437,131 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // The array isn't null terminated, but maybe this is a memcpy, not a strcpy. return true; } + +// These next two are very similar to the above, but also look through PHI +// nodes. +// TODO: See if we can integrate these two together. + +/// GetStringLengthH - If we can compute the length of the string pointed to by +/// the specified pointer, return 'len+1'. If we can't, return 0. +static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { + // Look through noop bitcast instructions. + if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) + return GetStringLengthH(BCI->getOperand(0), PHIs); + + // If this is a PHI node, there are two cases: either we have already seen it + // or we haven't. + if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (!PHIs.insert(PN)) + return ~0ULL; // already in the set. + + // If it was new, see if all the input strings are the same length. + uint64_t LenSoFar = ~0ULL; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs); + if (Len == 0) return 0; // Unknown length -> unknown. + + if (Len == ~0ULL) continue; + + if (Len != LenSoFar && LenSoFar != ~0ULL) + return 0; // Disagree -> unknown. + LenSoFar = Len; + } + + // Success, all agree. + return LenSoFar; + } + + // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) + if (SelectInst *SI = dyn_cast<SelectInst>(V)) { + uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); + if (Len1 == 0) return 0; + uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs); + if (Len2 == 0) return 0; + if (Len1 == ~0ULL) return Len2; + if (Len2 == ~0ULL) return Len1; + if (Len1 != Len2) return 0; + return Len1; + } + + // If the value is not a GEP instruction nor a constant expression with a + // GEP instruction, then return unknown. + User *GEP = 0; + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) { + GEP = GEPI; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (CE->getOpcode() != Instruction::GetElementPtr) + return 0; + GEP = CE; + } else { + return 0; + } + + // Make sure the GEP has exactly three arguments. + if (GEP->getNumOperands() != 3) + return 0; + + // Check to make sure that the first operand of the GEP is an integer and + // has value 0 so that we are sure we're indexing into the initializer. + if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) { + if (!Idx->isZero()) + return 0; + } else + return 0; + + // If the second index isn't a ConstantInt, then this is a variable index + // into the array. If this occurs, we can't say anything meaningful about + // the string. + uint64_t StartIdx = 0; + if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2))) + StartIdx = CI->getZExtValue(); + else + return 0; + + // The GEP instruction, constant or instruction, must reference a global + // variable that is a constant and is initialized. The referenced constant + // initializer is the array that we'll use for optimization. + GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); + if (!GV || !GV->isConstant() || !GV->hasInitializer() || + GV->mayBeOverridden()) + return 0; + Constant *GlobalInit = GV->getInitializer(); + + // Handle the ConstantAggregateZero case, which is a degenerate case. The + // initializer is constant zero so the length of the string must be zero. + if (isa<ConstantAggregateZero>(GlobalInit)) + return 1; // Len = 0 offset by 1. + + // Must be a Constant Array + ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); + if (!Array || !Array->getType()->getElementType()->isIntegerTy(8)) + return false; + + // Get the number of elements in the array + uint64_t NumElts = Array->getType()->getNumElements(); + + // Traverse the constant array from StartIdx (derived above) which is + // the place the GEP refers to in the array. + for (unsigned i = StartIdx; i != NumElts; ++i) { + Constant *Elt = Array->getOperand(i); + ConstantInt *CI = dyn_cast<ConstantInt>(Elt); + if (!CI) // This array isn't suitable, non-int initializer. + return 0; + if (CI->isZero()) + return i-StartIdx+1; // We found end of string, success! + } + + return 0; // The array isn't null terminated, conservatively return 'unknown'. +} + +/// GetStringLength - If we can compute the length of the string pointed to by +/// the specified pointer, return 'len+1'. If we can't, return 0. +uint64_t llvm::GetStringLength(Value *V) { + if (!V->getType()->isPointerTy()) return 0; + + SmallPtrSet<PHINode*, 32> PHIs; + uint64_t Len = GetStringLengthH(V, PHIs); + // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return + // an empty string as a length. + return Len == ~0ULL ? 1 : Len; +} diff --git a/lib/AsmParser/Android.mk b/lib/AsmParser/Android.mk new file mode 100644 index 0000000..548f719 --- /dev/null +++ b/lib/AsmParser/Android.mk @@ -0,0 +1,28 @@ +LOCAL_PATH:= $(call my-dir) + +asm_parser_SRC_FILES := \ + LLLexer.cpp \ + LLParser.cpp \ + Parser.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(asm_parser_SRC_FILES) + +LOCAL_MODULE:= libLLVMAsmParser + +include $(LOCAL_PATH)/../../llvm-host-build.mk +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(asm_parser_SRC_FILES) + +LOCAL_MODULE:= libLLVMAsmParser + +include $(LOCAL_PATH)/../../llvm-device-build.mk +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 8ad658d..46f3cbc 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -570,6 +570,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(type); KEYWORD(opaque); + KEYWORD(union); KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle); KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 5dd6569..8083a07 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -614,7 +614,7 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, Aliasee = ID.ConstantVal; } - if (!isa<PointerType>(Aliasee->getType())) + if (!Aliasee->getType()->isPointerTy()) return Error(AliaseeLoc, "alias must have pointer type"); // Okay, create the alias but do not insert it into the module yet. @@ -685,7 +685,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, return true; } - if (isa<FunctionType>(Ty) || Ty->isLabelTy()) + if (Ty->isFunctionTy() || Ty->isLabelTy()) return Error(TyLoc, "invalid type for global variable"); GlobalVariable *GV = 0; @@ -791,7 +791,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty, GlobalValue *FwdVal; if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) { // Function types can return opaque but functions can't. - if (isa<OpaqueType>(FT->getReturnType())) { + if (FT->getReturnType()->isOpaqueTy()) { Error(Loc, "function may not return opaque type"); return 0; } @@ -836,7 +836,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) { GlobalValue *FwdVal; if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) { // Function types can return opaque but functions can't. - if (isa<OpaqueType>(FT->getReturnType())) { + if (FT->getReturnType()->isOpaqueTy()) { Error(Loc, "function may not return opaque type"); return 0; } @@ -956,6 +956,14 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break; case lltok::kw_naked: Attrs |= Attribute::Naked; break; + case lltok::kw_alignstack: { + unsigned Alignment; + if (ParseOptionalStackAlignment(Alignment)) + return true; + Attrs |= Attribute::constructStackAlignmentFromInt(Alignment); + continue; + } + case lltok::kw_align: { unsigned Alignment; if (ParseOptionalAlignment(Alignment)) @@ -963,6 +971,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { Attrs |= Attribute::constructAlignmentFromInt(Alignment); continue; } + } Lex.Lex(); } @@ -1131,6 +1140,25 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment, return false; } +/// ParseOptionalStackAlignment +/// ::= /* empty */ +/// ::= 'alignstack' '(' 4 ')' +bool LLParser::ParseOptionalStackAlignment(unsigned &Alignment) { + Alignment = 0; + if (!EatIfPresent(lltok::kw_alignstack)) + return false; + LocTy ParenLoc = Lex.getLoc(); + if (!EatIfPresent(lltok::lparen)) + return Error(ParenLoc, "expected '('"); + LocTy AlignLoc = Lex.getLoc(); + if (ParseUInt32(Alignment)) return true; + ParenLoc = Lex.getLoc(); + if (!EatIfPresent(lltok::rparen)) + return Error(ParenLoc, "expected ')'"); + if (!isPowerOf2_32(Alignment)) + return Error(AlignLoc, "stack alignment is not a power of two"); + return false; +} /// ParseIndexList - This parses the index list for an insert/extractvalue /// instruction. This sets AteExtraComma in the case where we eat an extra @@ -1267,6 +1295,11 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) { if (ParseStructType(Result, false)) return true; break; + case lltok::kw_union: + // TypeRec ::= 'union' '{' ... '}' + if (ParseUnionType(Result)) + return true; + break; case lltok::lsquare: // TypeRec ::= '[' ... ']' Lex.Lex(); // eat the lsquare. @@ -1482,7 +1515,7 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList, Name = ""; } - if (!ArgTy->isFirstClassType() && !isa<OpaqueType>(ArgTy)) + if (!ArgTy->isFirstClassType() && !ArgTy->isOpaqueTy()) return Error(TypeLoc, "invalid type for function argument"); ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name)); @@ -1576,6 +1609,38 @@ bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) { return false; } +/// ParseUnionType +/// TypeRec +/// ::= 'union' '{' TypeRec (',' TypeRec)* '}' +bool LLParser::ParseUnionType(PATypeHolder &Result) { + assert(Lex.getKind() == lltok::kw_union); + Lex.Lex(); // Consume the 'union' + + if (ParseToken(lltok::lbrace, "'{' expected after 'union'")) return true; + + SmallVector<PATypeHolder, 8> ParamsList; + do { + LocTy EltTyLoc = Lex.getLoc(); + if (ParseTypeRec(Result)) return true; + ParamsList.push_back(Result); + + if (Result->isVoidTy()) + return Error(EltTyLoc, "union element can not have void type"); + if (!UnionType::isValidElementType(Result)) + return Error(EltTyLoc, "invalid element type for union"); + + } while (EatIfPresent(lltok::comma)) ; + + if (ParseToken(lltok::rbrace, "expected '}' at end of union")) + return true; + + SmallVector<const Type*, 8> ParamsListTy; + for (unsigned i = 0, e = ParamsList.size(); i != e; ++i) + ParamsListTy.push_back(ParamsList[i].get()); + Result = HandleUpRefs(UnionType::get(&ParamsListTy[0], ParamsListTy.size())); + return false; +} + /// ParseArrayVectorType - Parse an array or vector type, assuming the first /// token has already been consumed. /// TypeRec @@ -1720,7 +1785,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, } // Don't make placeholders with invalid type. - if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && !Ty->isLabelTy()) { + if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) { P.Error(Loc, "invalid use of a non-first-class type"); return 0; } @@ -1761,7 +1826,7 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty, return 0; } - if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && !Ty->isLabelTy()) { + if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) { P.Error(Loc, "invalid use of a non-first-class type"); return 0; } @@ -1992,8 +2057,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (Elts.empty()) return Error(ID.Loc, "constant vector must not be empty"); - if (!Elts[0]->getType()->isInteger() && - !Elts[0]->getType()->isFloatingPoint()) + if (!Elts[0]->getType()->isIntegerTy() && + !Elts[0]->getType()->isFloatingPointTy()) return Error(FirstEltLoc, "vector elements must have integer or floating point type"); @@ -2135,8 +2200,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ParseToken(lltok::rparen, "expected ')' in extractvalue constantexpr")) return true; - if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType())) - return Error(ID.Loc, "extractvalue operand must be array or struct"); + if (!Val->getType()->isAggregateType()) + return Error(ID.Loc, "extractvalue operand must be aggregate type"); if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(), Indices.end())) return Error(ID.Loc, "invalid indices for extractvalue"); @@ -2156,8 +2221,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ParseIndexList(Indices) || ParseToken(lltok::rparen, "expected ')' in insertvalue constantexpr")) return true; - if (!isa<StructType>(Val0->getType()) && !isa<ArrayType>(Val0->getType())) - return Error(ID.Loc, "extractvalue operand must be array or struct"); + if (!Val0->getType()->isAggregateType()) + return Error(ID.Loc, "insertvalue operand must be aggregate type"); if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(), Indices.end())) return Error(ID.Loc, "invalid indices for insertvalue"); @@ -2185,13 +2250,13 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { CmpInst::Predicate Pred = (CmpInst::Predicate)PredVal; if (Opc == Instruction::FCmp) { - if (!Val0->getType()->isFPOrFPVector()) + if (!Val0->getType()->isFPOrFPVectorTy()) return Error(ID.Loc, "fcmp requires floating point operands"); ID.ConstantVal = ConstantExpr::getFCmp(Pred, Val0, Val1); } else { assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!"); - if (!Val0->getType()->isIntOrIntVector() && - !isa<PointerType>(Val0->getType())) + if (!Val0->getType()->isIntOrIntVectorTy() && + !Val0->getType()->isPointerTy()) return Error(ID.Loc, "icmp requires pointer or integer operands"); ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1); } @@ -2241,7 +2306,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return true; if (Val0->getType() != Val1->getType()) return Error(ID.Loc, "operands of constexpr must have same type"); - if (!Val0->getType()->isIntOrIntVector()) { + if (!Val0->getType()->isIntOrIntVectorTy()) { if (NUW) return Error(ModifierLoc, "nuw only applies to integer operations"); if (NSW) @@ -2249,8 +2314,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { } // API compatibility: Accept either integer or floating-point types with // add, sub, and mul. - if (!Val0->getType()->isIntOrIntVector() && - !Val0->getType()->isFPOrFPVector()) + if (!Val0->getType()->isIntOrIntVectorTy() && + !Val0->getType()->isFPOrFPVectorTy()) return Error(ID.Loc,"constexpr requires integer, fp, or vector operands"); unsigned Flags = 0; if (NUW) Flags |= OverflowingBinaryOperator::NoUnsignedWrap; @@ -2280,7 +2345,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return true; if (Val0->getType() != Val1->getType()) return Error(ID.Loc, "operands of constexpr must have same type"); - if (!Val0->getType()->isIntOrIntVector()) + if (!Val0->getType()->isIntOrIntVectorTy()) return Error(ID.Loc, "constexpr requires integer or integer vector operands"); ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1); @@ -2305,7 +2370,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return true; if (Opc == Instruction::GetElementPtr) { - if (Elts.size() == 0 || !isa<PointerType>(Elts[0]->getType())) + if (Elts.size() == 0 || !Elts[0]->getType()->isPointerTy()) return Error(ID.Loc, "getelementptr requires pointer operand"); if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(), @@ -2405,7 +2470,7 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) { bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, PerFunctionState *PFS) { - if (isa<FunctionType>(Ty)) + if (Ty->isFunctionTy()) return Error(ID.Loc, "functions are not values, refer to them as pointers"); switch (ID.Kind) { @@ -2444,13 +2509,13 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc); return V == 0; case ValID::t_APSInt: - if (!isa<IntegerType>(Ty)) + if (!Ty->isIntegerTy()) return Error(ID.Loc, "integer constant must have integer type"); ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits()); V = ConstantInt::get(Context, ID.APSIntVal); return false; case ValID::t_APFloat: - if (!Ty->isFloatingPoint() || + if (!Ty->isFloatingPointTy() || !ConstantFP::isValueValidForType(Ty, ID.APFloatVal)) return Error(ID.Loc, "floating point constant invalid for type"); @@ -2470,19 +2535,19 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, return false; case ValID::t_Null: - if (!isa<PointerType>(Ty)) + if (!Ty->isPointerTy()) return Error(ID.Loc, "null must be a pointer type"); V = ConstantPointerNull::get(cast<PointerType>(Ty)); return false; case ValID::t_Undef: // FIXME: LabelTy should not be a first-class type. if ((!Ty->isFirstClassType() || Ty->isLabelTy()) && - !isa<OpaqueType>(Ty)) + !Ty->isOpaqueTy()) return Error(ID.Loc, "invalid type for undef constant"); V = UndefValue::get(Ty); return false; case ValID::t_EmptyArray: - if (!isa<ArrayType>(Ty) || cast<ArrayType>(Ty)->getNumElements() != 0) + if (!Ty->isArrayTy() || cast<ArrayType>(Ty)->getNumElements() != 0) return Error(ID.Loc, "invalid empty array initializer"); V = UndefValue::get(Ty); return false; @@ -2493,8 +2558,17 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, V = Constant::getNullValue(Ty); return false; case ValID::t_Constant: - if (ID.ConstantVal->getType() != Ty) + if (ID.ConstantVal->getType() != Ty) { + // Allow a constant struct with a single member to be converted + // to a union, if the union has a member which is the same type + // as the struct member. + if (const UnionType* utype = dyn_cast<UnionType>(Ty)) { + return ParseUnionValue(utype, ID, V); + } + return Error(ID.Loc, "constant expression type mismatch"); + } + V = ID.ConstantVal; return false; } @@ -2524,6 +2598,22 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc, return false; } +bool LLParser::ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V) { + if (const StructType* stype = dyn_cast<StructType>(ID.ConstantVal->getType())) { + if (stype->getNumContainedTypes() != 1) + return Error(ID.Loc, "constant expression type mismatch"); + int index = utype->getElementTypeIndex(stype->getContainedType(0)); + if (index < 0) + return Error(ID.Loc, "initializer type is not a member of the union"); + + V = ConstantUnion::get( + utype, cast<Constant>(ID.ConstantVal->getOperand(0))); + return false; + } + + return Error(ID.Loc, "constant expression type mismatch"); +} + /// FunctionHeader /// ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs @@ -2572,7 +2662,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { } if (!FunctionType::isValidReturnType(RetType) || - isa<OpaqueType>(RetType)) + RetType->isOpaqueTy()) return Error(RetTypeLoc, "invalid function return type"); LocTy NameLoc = Lex.getLoc(); @@ -2873,7 +2963,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, // API compatibility: Accept either integer or floating-point types. bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 0); if (!Result) { - if (!Inst->getType()->isIntOrIntVector()) { + if (!Inst->getType()->isIntOrIntVectorTy()) { if (NUW) return Error(ModifierLoc, "nuw only applies to integer operations"); if (NSW) @@ -3096,7 +3186,7 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) { ParseToken(lltok::lsquare, "expected '[' with switch table")) return true; - if (!isa<IntegerType>(Cond->getType())) + if (!Cond->getType()->isIntegerTy()) return Error(CondLoc, "switch condition must have integer type"); // Parse the jump table pairs. @@ -3139,7 +3229,7 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) { ParseToken(lltok::lsquare, "expected '[' with indirectbr")) return true; - if (!isa<PointerType>(Address->getType())) + if (!Address->getType()->isPointerTy()) return Error(AddrLoc, "indirectbr address must have pointer type"); // Parse the destination list. @@ -3292,11 +3382,11 @@ bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS, switch (OperandType) { default: llvm_unreachable("Unknown operand type!"); case 0: // int or FP. - Valid = LHS->getType()->isIntOrIntVector() || - LHS->getType()->isFPOrFPVector(); + Valid = LHS->getType()->isIntOrIntVectorTy() || + LHS->getType()->isFPOrFPVectorTy(); break; - case 1: Valid = LHS->getType()->isIntOrIntVector(); break; - case 2: Valid = LHS->getType()->isFPOrFPVector(); break; + case 1: Valid = LHS->getType()->isIntOrIntVectorTy(); break; + case 2: Valid = LHS->getType()->isFPOrFPVectorTy(); break; } if (!Valid) @@ -3316,7 +3406,7 @@ bool LLParser::ParseLogical(Instruction *&Inst, PerFunctionState &PFS, ParseValue(LHS->getType(), RHS, PFS)) return true; - if (!LHS->getType()->isIntOrIntVector()) + if (!LHS->getType()->isIntOrIntVectorTy()) return Error(Loc,"instruction requires integer or integer vector operands"); Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); @@ -3340,13 +3430,13 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS, return true; if (Opc == Instruction::FCmp) { - if (!LHS->getType()->isFPOrFPVector()) + if (!LHS->getType()->isFPOrFPVectorTy()) return Error(Loc, "fcmp requires floating point operands"); Inst = new FCmpInst(CmpInst::Predicate(Pred), LHS, RHS); } else { assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!"); - if (!LHS->getType()->isIntOrIntVector() && - !isa<PointerType>(LHS->getType())) + if (!LHS->getType()->isIntOrIntVectorTy() && + !LHS->getType()->isPointerTy()) return Error(Loc, "icmp requires integer operands"); Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS); } @@ -3643,7 +3733,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS, } } - if (Size && !Size->getType()->isInteger(32)) + if (Size && !Size->getType()->isIntegerTy(32)) return Error(SizeLoc, "element count must be i32"); if (isAlloca) { @@ -3671,7 +3761,7 @@ bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS, BasicBlock* BB) { Value *Val; LocTy Loc; if (ParseTypeAndValue(Val, Loc, PFS)) return true; - if (!isa<PointerType>(Val->getType())) + if (!Val->getType()->isPointerTy()) return Error(Loc, "operand to free must be a pointer"); Inst = CallInst::CreateFree(Val, BB); return false; @@ -3688,7 +3778,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS, ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; - if (!isa<PointerType>(Val->getType()) || + if (!Val->getType()->isPointerTy() || !cast<PointerType>(Val->getType())->getElementType()->isFirstClassType()) return Error(Loc, "load operand must be a pointer to a first class type"); @@ -3709,7 +3799,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS, ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; - if (!isa<PointerType>(Ptr->getType())) + if (!Ptr->getType()->isPointerTy()) return Error(PtrLoc, "store operand must be a pointer"); if (!Val->getType()->isFirstClassType()) return Error(Loc, "store operand must be a first class value"); @@ -3731,7 +3821,7 @@ bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) { ParseUInt32(Element, EltLoc)) return true; - if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType())) + if (!Val->getType()->isStructTy() && !Val->getType()->isArrayTy()) return Error(ValLoc, "getresult inst requires an aggregate operand"); if (!ExtractValueInst::getIndexedType(Val->getType(), Element)) return Error(EltLoc, "invalid getresult index for value"); @@ -3748,7 +3838,7 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { if (ParseTypeAndValue(Ptr, Loc, PFS)) return true; - if (!isa<PointerType>(Ptr->getType())) + if (!Ptr->getType()->isPointerTy()) return Error(Loc, "base of getelementptr must be a pointer"); SmallVector<Value*, 16> Indices; @@ -3759,7 +3849,7 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { break; } if (ParseTypeAndValue(Val, EltLoc, PFS)) return true; - if (!isa<IntegerType>(Val->getType())) + if (!Val->getType()->isIntegerTy()) return Error(EltLoc, "getelementptr index must be an integer"); Indices.push_back(Val); } @@ -3783,8 +3873,8 @@ int LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) { ParseIndexList(Indices, AteExtraComma)) return true; - if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType())) - return Error(Loc, "extractvalue operand must be array or struct"); + if (!Val->getType()->isAggregateType()) + return Error(Loc, "extractvalue operand must be aggregate type"); if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(), Indices.end())) @@ -3805,8 +3895,8 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) { ParseIndexList(Indices, AteExtraComma)) return true; - if (!isa<StructType>(Val0->getType()) && !isa<ArrayType>(Val0->getType())) - return Error(Loc0, "extractvalue operand must be array or struct"); + if (!Val0->getType()->isAggregateType()) + return Error(Loc0, "insertvalue operand must be aggregate type"); if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(), Indices.end())) diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 85c07ff..9abe404 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -31,6 +31,7 @@ namespace llvm { class GlobalValue; class MDString; class MDNode; + class UnionType; /// ValID - Represents a reference of a definition of some sort with no type. /// There are several cases where we have to parse the value but where the @@ -169,6 +170,7 @@ namespace llvm { bool ParseOptionalVisibility(unsigned &Visibility); bool ParseOptionalCallingConv(CallingConv::ID &CC); bool ParseOptionalAlignment(unsigned &Alignment); + bool ParseOptionalStackAlignment(unsigned &Alignment); bool ParseInstructionMetadata(SmallVectorImpl<std::pair<unsigned, MDNode *> > &); bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma); @@ -211,6 +213,7 @@ namespace llvm { } bool ParseTypeRec(PATypeHolder &H); bool ParseStructType(PATypeHolder &H, bool Packed); + bool ParseUnionType(PATypeHolder &H); bool ParseArrayVectorType(PATypeHolder &H, bool isVector); bool ParseFunctionType(PATypeHolder &Result); PATypeHolder HandleUpRefs(const Type *Ty); @@ -279,6 +282,8 @@ namespace llvm { return ParseTypeAndBasicBlock(BB, Loc, PFS); } + bool ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V); + struct ParamInfo { LocTy Loc; Value *V; diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 7f1807c..3ac9169 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -97,6 +97,7 @@ namespace lltok { kw_type, kw_opaque, + kw_union, kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule, kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno, diff --git a/lib/Bitcode/Reader/Android.mk b/lib/Bitcode/Reader/Android.mk new file mode 100644 index 0000000..165b0d0 --- /dev/null +++ b/lib/Bitcode/Reader/Android.mk @@ -0,0 +1,29 @@ +LOCAL_PATH:= $(call my-dir) + +bitcode_reader_SRC_FILES := \ + BitReader.cpp \ + BitcodeReader.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(bitcode_reader_SRC_FILES) + +LOCAL_MODULE:= libLLVMBitReader + +include $(LLVM_HOST_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(bitcode_reader_SRC_FILES) + +LOCAL_MODULE:= libLLVMBitReader + +include $(LLVM_DEVICE_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp index 1facbc3..15844c0 100644 --- a/lib/Bitcode/Reader/BitReader.cpp +++ b/lib/Bitcode/Reader/BitReader.cpp @@ -21,17 +21,8 @@ using namespace llvm; Optionally returns a human-readable error message via OutMessage. */ LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutModule, char **OutMessage) { - std::string Message; - - *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), getGlobalContext(), - &Message)); - if (!*OutModule) { - if (OutMessage) - *OutMessage = strdup(Message.c_str()); - return 1; - } - - return 0; + return LLVMParseBitcodeInContext(wrap(&getGlobalContext()), MemBuf, OutModule, + OutMessage); } LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, @@ -54,36 +45,44 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, /* Reads a module from the specified path, returning via the OutModule parameter a module provider which performs lazy deserialization. Returns 0 on success. Optionally returns a human-readable error message via OutMessage. */ -LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, - LLVMModuleProviderRef *OutMP, - char **OutMessage) { +LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, + LLVMModuleRef *OutM, + char **OutMessage) { std::string Message; - - *OutMP = reinterpret_cast<LLVMModuleProviderRef>( - getLazyBitcodeModule(unwrap(MemBuf), getGlobalContext(), &Message)); - - if (!*OutMP) { + + *OutM = wrap(getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef), + &Message)); + if (!*OutM) { if (OutMessage) *OutMessage = strdup(Message.c_str()); - return 1; + return 1; } - + return 0; + } +LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM, + char **OutMessage) { + return LLVMGetBitcodeModuleInContext(LLVMGetGlobalContext(), MemBuf, OutM, + OutMessage); +} + +/* Deprecated: Use LLVMGetBitcodeModuleInContext instead. */ LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef, LLVMMemoryBufferRef MemBuf, LLVMModuleProviderRef *OutMP, char **OutMessage) { - std::string Message; - - *OutMP = reinterpret_cast<LLVMModuleProviderRef>( - getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef), &Message)); - if (!*OutMP) { - if (OutMessage) - *OutMessage = strdup(Message.c_str()); - return 1; - } - - return 0; + return LLVMGetBitcodeModuleInContext(ContextRef, MemBuf, + reinterpret_cast<LLVMModuleRef*>(OutMP), + OutMessage); +} + +/* Deprecated: Use LLVMGetBitcodeModule instead. */ +LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, + LLVMModuleProviderRef *OutMP, + char **OutMessage) { + return LLVMGetBitcodeModuleProviderInContext(LLVMGetGlobalContext(), MemBuf, + OutMP, OutMessage); } diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 4dfc6ce..a328837 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -108,17 +108,17 @@ static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) { switch (Val) { default: return -1; case bitc::BINOP_ADD: - return Ty->isFPOrFPVector() ? Instruction::FAdd : Instruction::Add; + return Ty->isFPOrFPVectorTy() ? Instruction::FAdd : Instruction::Add; case bitc::BINOP_SUB: - return Ty->isFPOrFPVector() ? Instruction::FSub : Instruction::Sub; + return Ty->isFPOrFPVectorTy() ? Instruction::FSub : Instruction::Sub; case bitc::BINOP_MUL: - return Ty->isFPOrFPVector() ? Instruction::FMul : Instruction::Mul; + return Ty->isFPOrFPVectorTy() ? Instruction::FMul : Instruction::Mul; case bitc::BINOP_UDIV: return Instruction::UDiv; case bitc::BINOP_SDIV: - return Ty->isFPOrFPVector() ? Instruction::FDiv : Instruction::SDiv; + return Ty->isFPOrFPVectorTy() ? Instruction::FDiv : Instruction::SDiv; case bitc::BINOP_UREM: return Instruction::URem; case bitc::BINOP_SREM: - return Ty->isFPOrFPVector() ? Instruction::FRem : Instruction::SRem; + return Ty->isFPOrFPVectorTy() ? Instruction::FRem : Instruction::SRem; case bitc::BINOP_SHL: return Instruction::Shl; case bitc::BINOP_LSHR: return Instruction::LShr; case bitc::BINOP_ASHR: return Instruction::AShr; @@ -585,6 +585,13 @@ bool BitcodeReader::ParseTypeTable() { ResultTy = StructType::get(Context, EltTys, Record[0]); break; } + case bitc::TYPE_CODE_UNION: { // UNION: [eltty x N] + SmallVector<const Type*, 8> EltTys; + for (unsigned i = 0, e = Record.size(); i != e; ++i) + EltTys.push_back(getTypeByID(Record[i], true)); + ResultTy = UnionType::get(&EltTys[0], EltTys.size()); + break; + } case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] if (Record.size() < 2) return Error("Invalid ARRAY type record"); @@ -956,12 +963,12 @@ bool BitcodeReader::ParseConstants() { V = Constant::getNullValue(CurTy); break; case bitc::CST_CODE_INTEGER: // INTEGER: [intval] - if (!isa<IntegerType>(CurTy) || Record.empty()) + if (!CurTy->isIntegerTy() || Record.empty()) return Error("Invalid CST_INTEGER record"); V = ConstantInt::get(CurTy, DecodeSignRotatedValue(Record[0])); break; case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval] - if (!isa<IntegerType>(CurTy) || Record.empty()) + if (!CurTy->isIntegerTy() || Record.empty()) return Error("Invalid WIDE_INTEGER record"); unsigned NumWords = Record.size(); @@ -1168,7 +1175,7 @@ bool BitcodeReader::ParseConstants() { Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); - if (OpTy->isFPOrFPVector()) + if (OpTy->isFPOrFPVectorTy()) V = ConstantExpr::getFCmp(Record[3], Op0, Op1); else V = ConstantExpr::getICmp(Record[3], Op0, Op1); @@ -1400,7 +1407,7 @@ bool BitcodeReader::ParseModule() { if (Record.size() < 6) return Error("Invalid MODULE_CODE_GLOBALVAR record"); const Type *Ty = getTypeByID(Record[0]); - if (!isa<PointerType>(Ty)) + if (!Ty->isPointerTy()) return Error("Global not a pointer type!"); unsigned AddressSpace = cast<PointerType>(Ty)->getAddressSpace(); Ty = cast<PointerType>(Ty)->getElementType(); @@ -1443,7 +1450,7 @@ bool BitcodeReader::ParseModule() { if (Record.size() < 8) return Error("Invalid MODULE_CODE_FUNCTION record"); const Type *Ty = getTypeByID(Record[0]); - if (!isa<PointerType>(Ty)) + if (!Ty->isPointerTy()) return Error("Function not a pointer type!"); const FunctionType *FTy = dyn_cast<FunctionType>(cast<PointerType>(Ty)->getElementType()); @@ -1484,7 +1491,7 @@ bool BitcodeReader::ParseModule() { if (Record.size() < 3) return Error("Invalid MODULE_ALIAS record"); const Type *Ty = getTypeByID(Record[0]); - if (!isa<PointerType>(Ty)) + if (!Ty->isPointerTy()) return Error("Function not a pointer type!"); GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]), @@ -1615,6 +1622,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID)) return Error("Malformed block record"); + InstructionList.clear(); unsigned ModuleValueListSize = ValueList.size(); // Add all the function arguments to the value table. @@ -1885,7 +1893,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { OpNum+1 != Record.size()) return Error("Invalid CMP record"); - if (LHS->getType()->isFPOrFPVector()) + if (LHS->getType()->isFPOrFPVectorTy()) I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS); else I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS); @@ -1925,7 +1933,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { const Type *ReturnType = F->getReturnType(); if (Vs.size() > 1 || - (isa<StructType>(ReturnType) && + (ReturnType->isStructTy() && (Vs.empty() || Vs[0]->getType() != ReturnType))) { Value *RV = UndefValue::get(ReturnType); for (unsigned i = 0, e = Vs.size(); i != e; ++i) { diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp index 7ed651b..4288422 100644 --- a/lib/Bitcode/Writer/BitWriter.cpp +++ b/lib/Bitcode/Writer/BitWriter.cpp @@ -27,20 +27,14 @@ int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) { return 0; } -#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR >= 4) -#include <ext/stdio_filebuf.h> - -int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) { - raw_fd_ostream OS(FileHandle, false); +int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose, + int Unbuffered) { + raw_fd_ostream OS(FD, ShouldClose, Unbuffered); WriteBitcodeToFile(unwrap(M), OS); return 0; } -#else - int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) { - return -1; // Not supported. + return LLVMWriteBitcodeToFD(M, FileHandle, true, false); } - -#endif diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index a5bb526..82e73b5 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -181,6 +181,14 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Log2_32_Ceil(VE.getTypes().size()+1))); unsigned StructAbbrev = Stream.EmitAbbrev(Abbv); + // Abbrev for TYPE_CODE_UNION. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_UNION)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + Log2_32_Ceil(VE.getTypes().size()+1))); + unsigned UnionAbbrev = Stream.EmitAbbrev(Abbv); + // Abbrev for TYPE_CODE_ARRAY. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY)); @@ -250,6 +258,17 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { AbbrevToUse = StructAbbrev; break; } + case Type::UnionTyID: { + const UnionType *UT = cast<UnionType>(T); + // UNION: [eltty x N] + Code = bitc::TYPE_CODE_UNION; + // Output all of the element types. + for (UnionType::element_iterator I = UT->element_begin(), + E = UT->element_end(); I != E; ++I) + TypeVals.push_back(VE.getTypeID(*I)); + AbbrevToUse = UnionAbbrev; + break; + } case Type::ArrayTyID: { const ArrayType *AT = cast<ArrayType>(T); // ARRAY: [numelts, eltty] @@ -789,7 +808,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, else if (isCStr7) AbbrevToUse = CString7Abbrev; } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(V) || - isa<ConstantVector>(V)) { + isa<ConstantUnion>(C) || isa<ConstantVector>(V)) { Code = bitc::CST_CODE_AGGREGATE; for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) Record.push_back(VE.getValueID(C->getOperand(i))); @@ -1510,16 +1529,50 @@ enum { DarwinBCHeaderSize = 5*4 }; +/// isARMTriplet - Return true if the triplet looks like: +/// arm-*, thumb-*, armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*. +static bool isARMTriplet(const std::string &TT) { + size_t Pos = 0; + size_t Size = TT.size(); + if (Size >= 6 && + TT[0] == 't' && TT[1] == 'h' && TT[2] == 'u' && + TT[3] == 'm' && TT[4] == 'b') + Pos = 5; + else if (Size >= 4 && TT[0] == 'a' && TT[1] == 'r' && TT[2] == 'm') + Pos = 3; + else + return false; + + if (TT[Pos] == '-') + return true; + else if (TT[Pos] == 'v') { + if (Size >= Pos+4 && + TT[Pos+1] == '6' && TT[Pos+2] == 't' && TT[Pos+3] == '2') + return true; + else if (Size >= Pos+4 && + TT[Pos+1] == '5' && TT[Pos+2] == 't' && TT[Pos+3] == 'e') + return true; + } else + return false; + while (++Pos < Size && TT[Pos] != '-') { + if (!isdigit(TT[Pos])) + return false; + } + return true; +} + static void EmitDarwinBCHeader(BitstreamWriter &Stream, const std::string &TT) { unsigned CPUType = ~0U; - // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*. The CPUType is a - // magic number from /usr/include/mach/machine.h. It is ok to reproduce the + // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*, + // armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*. The CPUType is a magic + // number from /usr/include/mach/machine.h. It is ok to reproduce the // specific constants here because they are implicitly part of the Darwin ABI. enum { DARWIN_CPU_ARCH_ABI64 = 0x01000000, DARWIN_CPU_TYPE_X86 = 7, + DARWIN_CPU_TYPE_ARM = 12, DARWIN_CPU_TYPE_POWERPC = 18 }; @@ -1532,6 +1585,8 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream, CPUType = DARWIN_CPU_TYPE_POWERPC; else if (TT.find("powerpc64-") == 0) CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64; + else if (isARMTriplet(TT)) + CPUType = DARWIN_CPU_TYPE_ARM; // Traditional Bitcode starts after header. unsigned BCOffset = DarwinBCHeaderSize; diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 595497f..aa4c3af 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -27,7 +27,7 @@ static bool isSingleValueType(const std::pair<const llvm::Type*, } static bool isIntegerValue(const std::pair<const Value*, unsigned> &V) { - return isa<IntegerType>(V.first->getType()); + return V.first->getType()->isIntegerTy(); } static bool CompareByFrequency(const std::pair<const llvm::Type*, @@ -39,8 +39,6 @@ static bool CompareByFrequency(const std::pair<const llvm::Type*, /// ValueEnumerator - Enumerate module-level information. ValueEnumerator::ValueEnumerator(const Module *M) { - InstructionCount = 0; - // Enumerate the global variables. for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) @@ -377,6 +375,7 @@ void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) { void ValueEnumerator::incorporateFunction(const Function &F) { + InstructionCount = 0; NumModuleValues = Values.size(); // Adding function arguments to the value table. diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk new file mode 100644 index 0000000..9fa2ecd --- /dev/null +++ b/lib/CodeGen/Android.mk @@ -0,0 +1,99 @@ +LOCAL_PATH:= $(call my-dir) + +codegen_SRC_FILES := \ + AggressiveAntiDepBreaker.cpp \ + BranchFolding.cpp \ + CalcSpillWeights.cpp \ + CodePlacementOpt.cpp \ + CriticalAntiDepBreaker.cpp \ + DeadMachineInstructionElim.cpp \ + DwarfEHPrepare.cpp \ + ELFCodeEmitter.cpp \ + ELFWriter.cpp \ + ExactHazardRecognizer.cpp \ + GCMetadata.cpp \ + GCMetadataPrinter.cpp \ + GCStrategy.cpp \ + IfConversion.cpp \ + IntrinsicLowering.cpp \ + LLVMTargetMachine.cpp \ + LatencyPriorityQueue.cpp \ + LiveInterval.cpp \ + LiveIntervalAnalysis.cpp \ + LiveStackAnalysis.cpp \ + LiveVariables.cpp \ + LowerSubregs.cpp \ + MachineBasicBlock.cpp \ + MachineCSE.cpp \ + MachineDominators.cpp \ + MachineFunction.cpp \ + MachineFunctionAnalysis.cpp \ + MachineFunctionPass.cpp \ + MachineInstr.cpp \ + MachineLICM.cpp \ + MachineLoopInfo.cpp \ + MachineModuleInfo.cpp \ + MachineModuleInfoImpls.cpp \ + MachinePassRegistry.cpp \ + MachineRegisterInfo.cpp \ + MachineSSAUpdater.cpp \ + MachineSink.cpp \ + MachineVerifier.cpp \ + ObjectCodeEmitter.cpp \ + OcamlGC.cpp \ + OptimizeExts.cpp \ + OptimizePHIs.cpp \ + PHIElimination.cpp \ + Passes.cpp \ + PostRASchedulerList.cpp \ + PreAllocSplitting.cpp \ + ProcessImplicitDefs.cpp \ + PrologEpilogInserter.cpp \ + PseudoSourceValue.cpp \ + RegAllocLinearScan.cpp \ + RegAllocLocal.cpp \ + RegAllocPBQP.cpp \ + RegisterCoalescer.cpp \ + RegisterScavenging.cpp \ + ScheduleDAG.cpp \ + ScheduleDAGEmit.cpp \ + ScheduleDAGInstrs.cpp \ + ScheduleDAGPrinter.cpp \ + ShadowStackGC.cpp \ + ShrinkWrapping.cpp \ + SimpleRegisterCoalescing.cpp \ + SjLjEHPrepare.cpp \ + SlotIndexes.cpp \ + Spiller.cpp \ + StackProtector.cpp \ + StackSlotColoring.cpp \ + StrongPHIElimination.cpp \ + TailDuplication.cpp \ + TargetInstrInfoImpl.cpp \ + TargetLoweringObjectFileImpl.cpp \ + TwoAddressInstructionPass.cpp \ + UnreachableBlockElim.cpp \ + VirtRegMap.cpp \ + VirtRegRewriter.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(codegen_SRC_FILES) +LOCAL_MODULE:= libLLVMCodeGen + +include $(LLVM_HOST_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(codegen_SRC_FILES) +LOCAL_MODULE:= libLLVMCodeGen + +include $(LLVM_DEVICE_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk new file mode 100644 index 0000000..62601f0 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/Android.mk @@ -0,0 +1,31 @@ +LOCAL_PATH := $(call my-dir) + +codegen_asmprinter_SRC_FILES := \ + AsmPrinter.cpp \ + DIE.cpp \ + DwarfDebug.cpp \ + DwarfException.cpp \ + DwarfLabel.cpp \ + DwarfPrinter.cpp \ + DwarfWriter.cpp \ + OcamlGCPrinter.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES) +LOCAL_MODULE:= libLLVMAsmPrinter + +include $(LLVM_HOST_BUILD_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES) +LOCAL_MODULE:= libLLVMAsmPrinter + +include $(LLVM_DEVICE_BUILD_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index fc08384..bbeb026 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -50,6 +50,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" #include <cerrno> +#include <ctype.h> using namespace llvm; STATISTIC(EmittedInsts, "Number of machine instrs printed"); @@ -917,11 +918,10 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV, if (NumBits == 0) return; // No need to emit alignment. - unsigned FillValue = 0; if (getCurrentSection()->getKind().isText()) - FillValue = MAI->getTextAlignFillValue(); - - OutStreamer.EmitValueToAlignment(1 << NumBits, FillValue, 1, 0); + OutStreamer.EmitCodeAlignment(1 << NumBits); + else + OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0); } /// LowerConstant - Lower the specified LLVM Constant to an MCExpr. @@ -1717,7 +1717,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { } // Print the main label for the block. - if (MBB->pred_empty() || MBB->isOnlyReachableByFallthrough()) { + if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) { if (VerboseAsm) { // NOTE: Want this comment at start of line. O << MAI->getCommentString() << " BB#" << MBB->getNumber() << ':'; @@ -1764,6 +1764,39 @@ void AsmPrinter::printOffset(int64_t Offset) const { O << Offset; } +/// isBlockOnlyReachableByFallthough - Return true if the basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +bool AsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) + const { + // If this is a landing pad, it isn't a fall through. If it has no preds, + // then nothing falls through to it. + if (MBB->isLandingPad() || MBB->pred_empty()) + return false; + + // If there isn't exactly one predecessor, it can't be a fall through. + MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; + ++PI2; + if (PI2 != MBB->pred_end()) + return false; + + // The predecessor has to be immediately before this block. + const MachineBasicBlock *Pred = *PI; + + if (!Pred->isLayoutSuccessor(MBB)) + return false; + + // If the block is completely empty, then it definitely does fall through. + if (Pred->empty()) + return true; + + // Otherwise, check the last instruction. + const MachineInstr &LastInst = Pred->back(); + return !LastInst.getDesc().isBarrier(); +} + + + GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { if (!S->usesMetadata()) return 0; diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 349e0ac..63360c0 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -313,6 +313,7 @@ void DIESectionOffset::EmitValue(DwarfPrinter *D, unsigned Form) const { D->EmitSectionOffset(Label.getTag(), Section.getTag(), Label.getNumber(), Section.getNumber(), IsSmall, IsEH, UseSet); + D->getAsm()->O << '\n'; // FIXME: Necesssary? } /// SizeOf - Determine size of delta value in bytes. diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5093dd9..5ad1e5e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -238,7 +238,18 @@ public: LIndex = DSI; } } - setLastInsn(LastInsn); + + unsigned CurrentLastInsnIndex = 0; + if (const MachineInstr *CL = getLastInsn()) + CurrentLastInsnIndex = MIIndexMap[CL]; + unsigned FIndex = MIIndexMap[getFirstInsn()]; + + // Set LastInsn as the last instruction for this scope only if + // it follows + // 1) this scope's first instruction and + // 2) current last instruction for this scope, if any. + if (LIndex >= CurrentLastInsnIndex && LIndex >= FIndex) + setLastInsn(LastInsn); } #ifndef NDEBUG @@ -1166,7 +1177,9 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { return SPDie; SPDie = new DIE(dwarf::DW_TAG_subprogram); - addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName()); + // Constructors and operators for anonymous aggregates do not have names. + if (!SP.getName().empty()) + addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName()); StringRef LinkageName = SP.getLinkageName(); if (!LinkageName.empty()) diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index b6801dc..2b08ba4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -50,26 +50,6 @@ DwarfException::~DwarfException() { delete ExceptionTimer; } -/// SizeOfEncodedValue - Return the size of the encoding in bytes. -unsigned DwarfException::SizeOfEncodedValue(unsigned Encoding) { - if (Encoding == dwarf::DW_EH_PE_omit) - return 0; - - switch (Encoding & 0x07) { - case dwarf::DW_EH_PE_absptr: - return TD->getPointerSize(); - case dwarf::DW_EH_PE_udata2: - return 2; - case dwarf::DW_EH_PE_udata4: - return 4; - case dwarf::DW_EH_PE_udata8: - return 8; - } - - assert(0 && "Invalid encoded value."); - return 0; -} - /// CreateLabelDiff - Emit a label and subtract it from the expression we /// already have. This is equivalent to emitting "foo - .", but we have to emit /// the label for "." directly. @@ -100,7 +80,7 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) { TD->getPointerSize() : -TD->getPointerSize(); const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - + // Begin eh frame section. Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); @@ -128,30 +108,16 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) { // The personality presence indicates that language specific information will // show up in the eh frame. Find out how we are supposed to lower the // personality function reference: - const MCExpr *PersonalityRef = 0; - bool IsPersonalityIndirect = false, IsPersonalityPCRel = false; - if (PersonalityFn) { - // FIXME: HANDLE STATIC CODEGEN MODEL HERE. - - // In non-static mode, ask the object file how to represent this reference. - PersonalityRef = - TLOF.getSymbolForDwarfGlobalReference(PersonalityFn, Asm->Mang, - Asm->MMI, - IsPersonalityIndirect, - IsPersonalityPCRel); - } - - unsigned PerEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; - if (IsPersonalityIndirect) - PerEncoding |= dwarf::DW_EH_PE_indirect; - unsigned LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; - unsigned FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + unsigned FDEEncoding = TLOF.getFDEEncoding(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); char Augmentation[6] = { 0 }; unsigned AugmentationSize = 0; char *APtr = Augmentation + 1; - if (PersonalityRef) { + if (PersonalityFn) { // There is a personality function. *APtr++ = 'P'; AugmentationSize += 1 + SizeOfEncodedValue(PerEncoding); @@ -181,20 +147,19 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) { Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true)); EOL("CIE Return Address Column"); - EmitULEB128(AugmentationSize, "Augmentation Size"); - EmitEncodingByte(PerEncoding, "Personality"); - - // If there is a personality, we need to indicate the function's location. - if (PersonalityRef) { - if (!IsPersonalityPCRel) - PersonalityRef = CreateLabelDiff(PersonalityRef, "personalityref_addr", - Index); + if (Augmentation[0]) { + EmitULEB128(AugmentationSize, "Augmentation Size"); - O << MAI->getData32bitsDirective() << *PersonalityRef; - EOL("Personality"); - - EmitEncodingByte(LSDAEncoding, "LSDA"); - EmitEncodingByte(FDEEncoding, "FDE"); + // If there is a personality, we need to indicate the function's location. + if (PersonalityFn) { + EmitEncodingByte(PerEncoding, "Personality"); + EmitReference(PersonalityFn, PerEncoding); + EOL("Personality"); + } + if (UsesLSDA[Index]) + EmitEncodingByte(LSDAEncoding, "LSDA"); + if (FDEEncoding != dwarf::DW_EH_PE_absptr) + EmitEncodingByte(FDEEncoding, "FDE"); } // Indicate locations of general callee saved registers in frame. @@ -216,8 +181,12 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { "Should not emit 'available externally' functions at all"); const Function *TheFunc = EHFrameInfo.function; + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getEHFrameSection()); + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + unsigned FDEEncoding = TLOF.getFDEEncoding(); + + Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); // Externally visible entry into the functions eh frame info. If the // corresponding function is static, this should not be externally visible. @@ -255,7 +224,8 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { // EH frame header. EmitDifference("eh_frame_end", EHFrameInfo.Number, - "eh_frame_begin", EHFrameInfo.Number, true); + "eh_frame_begin", EHFrameInfo.Number, + true); EOL("Length of Frame Information Entry"); EmitLabel("eh_frame_begin", EHFrameInfo.Number); @@ -266,33 +236,23 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { EOL("FDE CIE offset"); - EmitReference("eh_func_begin", EHFrameInfo.Number, true, true); + EmitReference("eh_func_begin", EHFrameInfo.Number, FDEEncoding); EOL("FDE initial location"); EmitDifference("eh_func_end", EHFrameInfo.Number, - "eh_func_begin", EHFrameInfo.Number, true); + "eh_func_begin", EHFrameInfo.Number, + SizeOfEncodedValue(FDEEncoding) == 4); EOL("FDE address range"); // If there is a personality and landing pads then point to the language // specific data area in the exception table. if (MMI->getPersonalities()[0] != NULL) { + unsigned Size = SizeOfEncodedValue(LSDAEncoding); - if (Asm->TM.getLSDAEncoding() != DwarfLSDAEncoding::EightByte) { - EmitULEB128(4, "Augmentation size"); - - if (EHFrameInfo.hasLandingPads) - EmitReference("exception", EHFrameInfo.Number, true, true); - else - Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); - } else { - EmitULEB128(TD->getPointerSize(), "Augmentation size"); - - if (EHFrameInfo.hasLandingPads) { - EmitReference("exception", EHFrameInfo.Number, true, false); - } else { - Asm->OutStreamer.EmitIntValue(0, TD->getPointerSize(), - 0/*addrspace*/); - } - } + EmitULEB128(Size, "Augmentation size"); + if (EHFrameInfo.hasLandingPads) + EmitReference("exception", EHFrameInfo.Number, LSDAEncoding); + else + Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/); EOL("Language Specific Data Area"); } else { @@ -407,20 +367,22 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, if (NumShared < TypeIds.size()) { unsigned SizeAction = 0; - ActionEntry *PrevAction = 0; + unsigned PrevAction = (unsigned)-1; if (NumShared) { const unsigned SizePrevIds = PrevLPI->TypeIds.size(); assert(Actions.size()); - PrevAction = &Actions.back(); - SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) + - MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + PrevAction = Actions.size() - 1; + SizeAction = + MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) + + MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID); for (unsigned j = NumShared; j != SizePrevIds; ++j) { + assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!"); SizeAction -= - MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); - SizeAction += -PrevAction->NextAction; - PrevAction = PrevAction->Previous; + MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID); + SizeAction += -Actions[PrevAction].NextAction; + PrevAction = Actions[PrevAction].Previous; } } @@ -437,7 +399,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, ActionEntry Action = { ValueForTypeID, NextAction, PrevAction }; Actions.push_back(Action); - PrevAction = &Actions.back(); + PrevAction = Actions.size() - 1; } // Record the first action of the landing pad site. @@ -447,7 +409,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, // Information used when created the call-site table. The action record // field of the call site record is the offset of the first associated // action record, relative to the start of the actions table. This value is - // biased by 1 (1 in dicating the start of the actions table), and 0 + // biased by 1 (1 indicating the start of the actions table), and 0 // indicates that there are no actions. FirstActions.push_back(FirstAction); @@ -648,8 +610,7 @@ void DwarfException::EmitExceptionTable() { // landing pad site. SmallVector<ActionEntry, 32> Actions; SmallVector<unsigned, 64> FirstActions; - unsigned SizeActions = ComputeActionsTable(LandingPads, Actions, - FirstActions); + unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions); // Invokes and nounwind calls have entries in PadMap (due to being bracketed // by try-range labels when lowered). Ordinary calls do not, so appropriate @@ -677,29 +638,29 @@ void DwarfException::EmitExceptionTable() { const unsigned LandingPadSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4); bool IsSJLJ = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true; - unsigned SizeSites; + unsigned CallSiteTableLength; if (IsSJLJ) - SizeSites = 0; + CallSiteTableLength = 0; else - SizeSites = CallSites.size() * + CallSiteTableLength = CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize); for (unsigned i = 0, e = CallSites.size(); i < e; ++i) { - SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action); + CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action); if (IsSJLJ) - SizeSites += MCAsmInfo::getULEB128Size(i); + CallSiteTableLength += MCAsmInfo::getULEB128Size(i); } // Type infos. const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection(); - unsigned TTypeFormat; + unsigned TTypeEncoding; unsigned TypeFormatSize; if (!HaveTTData) { // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say // that we're omitting that bit. - TTypeFormat = dwarf::DW_EH_PE_omit; + TTypeEncoding = dwarf::DW_EH_PE_omit; TypeFormatSize = SizeOfEncodedValue(dwarf::DW_EH_PE_absptr); } else { // Okay, we have actual filters or typeinfos to emit. As such, we need to @@ -729,21 +690,28 @@ void DwarfException::EmitExceptionTable() { // somewhere. This predicate should be moved to a shared location that is // in target-independent code. // - if (LSDASection->getKind().isWriteable() || - Asm->TM.getRelocationModel() == Reloc::Static) - TTypeFormat = dwarf::DW_EH_PE_absptr; - else - TTypeFormat = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | - dwarf::DW_EH_PE_sdata4; - - TypeFormatSize = SizeOfEncodedValue(TTypeFormat); + TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding(); + TypeFormatSize = SizeOfEncodedValue(TTypeEncoding); } // Begin the exception table. Asm->OutStreamer.SwitchSection(LSDASection); Asm->EmitAlignment(2, 0, 0, false); + // Emit the LSDA. O << "GCC_except_table" << SubprogramCount << ":\n"; + EmitLabel("exception", SubprogramCount); + + if (IsSJLJ) { + SmallString<16> LSDAName; + raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() << + "_LSDA_" << Asm->getFunctionNumber(); + O << LSDAName.str() << ":\n"; + } + + // Emit the LSDA header. + EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); + EmitEncodingByte(TTypeEncoding, "@TType"); // The type infos need to be aligned. GCC does this by inserting padding just // before the type infos. However, this changes the size of the exception @@ -752,7 +720,7 @@ void DwarfException::EmitExceptionTable() { // So by increasing the size by inserting padding, you may increase the number // of bytes used for writing the size. If it increases, say by one byte, then // you now need to output one less byte of padding to get the type infos - // aligned. However this decreases the size of the exception table. This + // aligned. However this decreases the size of the exception table. This // changes the value you have to output for the exception table size. Due to // the variable length encoding, the number of bytes used for writing the // length may decrease. If so, you then have to increase the amount of @@ -761,41 +729,35 @@ void DwarfException::EmitExceptionTable() { // We chose another solution: don't output padding inside the table like GCC // does, instead output it before the table. unsigned SizeTypes = TypeInfos.size() * TypeFormatSize; - unsigned TyOffset = sizeof(int8_t) + // Call site format - MCAsmInfo::getULEB128Size(SizeSites) + // Call site table length - SizeSites + SizeActions + SizeTypes; - unsigned TotalSize = sizeof(int8_t) + // LPStart format - sizeof(int8_t) + // TType format - (HaveTTData ? - MCAsmInfo::getULEB128Size(TyOffset) : 0) + // TType base offset - TyOffset; + unsigned CallSiteTableLengthSize = + MCAsmInfo::getULEB128Size(CallSiteTableLength); + unsigned TTypeBaseOffset = + sizeof(int8_t) + // Call site format + CallSiteTableLengthSize + // Call site table length size + CallSiteTableLength + // Call site table length + SizeActions + // Actions size + SizeTypes; + unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset); + unsigned TotalSize = + sizeof(int8_t) + // LPStart format + sizeof(int8_t) + // TType format + (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size + TTypeBaseOffset; // TType base offset unsigned SizeAlign = (4 - TotalSize) & 3; - for (unsigned i = 0; i != SizeAlign; ++i) { - Asm->EmitInt8(0); - EOL("Padding"); - } - - EmitLabel("exception", SubprogramCount); - - if (IsSJLJ) { - SmallString<16> LSDAName; - raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() << - "_LSDA_" << Asm->getFunctionNumber(); - O << LSDAName.str() << ":\n"; + if (HaveTTData) { + // Account for any extra padding that will be added to the call site table + // length. + EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign); + SizeAlign = 0; } - // Emit the header. - EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); - EmitEncodingByte(TTypeFormat, "@TType"); - - if (HaveTTData) - EmitULEB128(TyOffset, "@TType base offset"); - // SjLj Exception handling if (IsSJLJ) { EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); - EmitULEB128(SizeSites, "Call site table length"); + + // Add extra padding if it wasn't added to the TType base offset. + EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign); // Emit the landing pad site information. unsigned idx = 0; @@ -836,7 +798,9 @@ void DwarfException::EmitExceptionTable() { // Emit the landing pad call site table. EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); - EmitULEB128(SizeSites, "Call site table length"); + + // Add extra padding if it wasn't added to the TType base offset. + EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign); for (SmallVectorImpl<CallSiteEntry>::const_iterator I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { @@ -906,23 +870,23 @@ void DwarfException::EmitExceptionTable() { } // Emit the Catch TypeInfos. - if (TypeInfos.size() != 0) EOL("-- Catch TypeInfos --"); + if (!TypeInfos.empty()) EOL("-- Catch TypeInfos --"); for (std::vector<GlobalVariable *>::const_reverse_iterator I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { const GlobalVariable *GV = *I; - PrintRelDirective(); if (GV) { - O << *Asm->GetGlobalValueSymbol(GV); + EmitReference(GV, TTypeEncoding); EOL("TypeInfo"); } else { + PrintRelDirective(TTypeEncoding); O << "0x0"; EOL(""); } } // Emit the Exception Specifications. - if (FilterIds.size() != 0) EOL("-- Filter IDs --"); + if (!FilterIds.empty()) EOL("-- Filter IDs --"); for (std::vector<unsigned>::const_iterator I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) { unsigned TypeID = *I; diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index 06033a1..3db1a00 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -76,9 +76,6 @@ class DwarfException : public DwarfPrinter { /// ExceptionTimer - Timer for the Dwarf exception writer. Timer *ExceptionTimer; - /// SizeOfEncodedValue - Return the size of the encoding in bytes. - unsigned SizeOfEncodedValue(unsigned Encoding); - /// EmitCIE - Emit a Common Information Entry (CIE). This holds information /// that is shared among many Frame Description Entries. There is at least /// one CIE in every non-empty .debug_frame section. @@ -135,7 +132,7 @@ class DwarfException : public DwarfPrinter { struct ActionEntry { int ValueForTypeID; // The value to write - may not be equal to the type id. int NextAction; - struct ActionEntry *Previous; + unsigned Previous; }; /// CallSiteEntry - Structure describing an entry in the call-site table. diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp index 415390b..28ff0eb 100644 --- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // Emit general DWARF directives. -// +// //===----------------------------------------------------------------------===// #include "DwarfPrinter.h" @@ -18,13 +18,17 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/SmallString.h" using namespace llvm; DwarfPrinter::DwarfPrinter(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T, @@ -33,6 +37,26 @@ DwarfPrinter::DwarfPrinter(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T, RI(Asm->TM.getRegisterInfo()), M(NULL), MF(NULL), MMI(NULL), SubprogramCount(0), Flavor(flavor), SetCounter(1) {} +/// SizeOfEncodedValue - Return the size of the encoding in bytes. +unsigned DwarfPrinter::SizeOfEncodedValue(unsigned Encoding) const { + if (Encoding == dwarf::DW_EH_PE_omit) + return 0; + + switch (Encoding & 0x07) { + case dwarf::DW_EH_PE_absptr: + return TD->getPointerSize(); + case dwarf::DW_EH_PE_udata2: + return 2; + case dwarf::DW_EH_PE_udata4: + return 4; + case dwarf::DW_EH_PE_udata8: + return 8; + } + + assert(0 && "Invalid encoded value."); + return 0; +} + void DwarfPrinter::PrintRelDirective(bool Force32Bit, bool isInSection) const { if (isInSection && MAI->getDwarfSectionOffsetDirective()) O << MAI->getDwarfSectionOffsetDirective(); @@ -42,6 +66,14 @@ void DwarfPrinter::PrintRelDirective(bool Force32Bit, bool isInSection) const { O << MAI->getData64bitsDirective(); } +void DwarfPrinter::PrintRelDirective(unsigned Encoding) const { + unsigned Size = SizeOfEncodedValue(Encoding); + assert((Size == 4 || Size == 8) && "Do not support other types or rels!"); + + O << (Size == 4 ? + MAI->getData32bitsDirective() : MAI->getData64bitsDirective()); +} + /// EOL - Print a newline character to asm stream. If a comment is present /// then it will be printed first. Comments should not contain '\n'. void DwarfPrinter::EOL(const Twine &Comment) const { @@ -127,29 +159,35 @@ void DwarfPrinter::EmitSLEB128(int Value, const char *Desc) const { Value >>= 7; IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; if (IsMore) Byte |= 0x80; - Asm->OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0); } while (IsMore); } /// EmitULEB128 - emit the specified signed leb128 value. -void DwarfPrinter::EmitULEB128(unsigned Value, const char *Desc) const { +void DwarfPrinter::EmitULEB128(unsigned Value, const char *Desc, + unsigned PadTo) const { if (Asm->VerboseAsm && Desc) Asm->OutStreamer.AddComment(Desc); - if (MAI->hasLEB128()) { + if (MAI->hasLEB128() && PadTo == 0) { O << "\t.uleb128\t" << Value; Asm->OutStreamer.AddBlankLine(); return; } - // If we don't have .uleb128, emit as .bytes. + // If we don't have .uleb128 or we want to emit padding, emit as .bytes. do { unsigned char Byte = static_cast<unsigned char>(Value & 0x7f); Value >>= 7; - if (Value) Byte |= 0x80; + if (Value || PadTo != 0) Byte |= 0x80; Asm->OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0); } while (Value); + + if (PadTo) { + if (PadTo > 1) + Asm->OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/); + Asm->OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/); + } } @@ -195,6 +233,31 @@ void DwarfPrinter::EmitReference(const MCSymbol *Sym, bool IsPCRelative, if (IsPCRelative) O << "-" << MAI->getPCSymbol(); } +void DwarfPrinter::EmitReference(const char *Tag, unsigned Number, + unsigned Encoding) const { + SmallString<64> Name; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() + << Tag << Number; + + MCSymbol *Sym = Asm->OutContext.GetOrCreateSymbol(Name.str()); + EmitReference(Sym, Encoding); +} + +void DwarfPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + PrintRelDirective(Encoding); + O << *TLOF.getSymbolForDwarfReference(Sym, Asm->MMI, Encoding);; +} + +void DwarfPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + PrintRelDirective(Encoding); + O << *TLOF.getSymbolForDwarfGlobalReference(GV, Asm->Mang, + Asm->MMI, Encoding);; +} + /// EmitDifference - Emit the difference between two labels. If this assembler /// supports .set, we emit a .set of a temporary and then use it in the .word. void DwarfPrinter::EmitDifference(const char *TagHi, unsigned NumberHi, @@ -248,7 +311,6 @@ void DwarfPrinter::EmitSectionOffset(const char* Label, const char* Section, PrintRelDirective(IsSmall); PrintLabelName("set", SetCounter, Flavor); ++SetCounter; - O << "\n"; } else { PrintRelDirective(IsSmall, true); PrintLabelName(Label, LabelNumber); @@ -257,7 +319,6 @@ void DwarfPrinter::EmitSectionOffset(const char* Label, const char* Section, O << "-"; PrintLabelName(Section, SectionNumber); } - O << "\n"; } } diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h index 69d9c27..bd715f2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h @@ -28,6 +28,7 @@ class Module; class MCAsmInfo; class TargetData; class TargetRegisterInfo; +class GlobalValue; class MCSymbol; class Twine; @@ -85,6 +86,10 @@ public: const MCAsmInfo *getMCAsmInfo() const { return MAI; } const TargetData *getTargetData() const { return TD; } + /// SizeOfEncodedValue - Return the size of the encoding in bytes. + unsigned SizeOfEncodedValue(unsigned Encoding) const; + + void PrintRelDirective(unsigned Encoding) const; void PrintRelDirective(bool Force32Bit = false, bool isInSection = false) const; @@ -106,7 +111,8 @@ public: void EmitSLEB128(int Value, const char *Desc) const; /// EmitULEB128 - emit the specified unsigned leb128 value. - void EmitULEB128(unsigned Value, const char *Desc = 0) const; + void EmitULEB128(unsigned Value, const char *Desc = 0, + unsigned PadTo = 0) const; /// PrintLabelName - Print label name in form used by Dwarf writer. @@ -140,6 +146,10 @@ public: void EmitReference(const MCSymbol *Sym, bool IsPCRelative = false, bool Force32Bit = false) const; + void EmitReference(const char *Tag, unsigned Number, unsigned Encoding) const; + void EmitReference(const MCSymbol *Sym, unsigned Encoding) const; + void EmitReference(const GlobalValue *GV, unsigned Encoding) const; + /// EmitDifference - Emit the difference between two labels. void EmitDifference(const DWLabel &LabelHi, const DWLabel &LabelLo, bool IsSmall = false) { diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 3531ed6..a9502fd 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -22,6 +22,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" +#include <ctype.h> using namespace llvm; namespace { diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index faf4d95..d94729a 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -334,7 +334,9 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, unsigned TailLen = 0; while (I1 != MBB1->begin() && I2 != MBB2->begin()) { --I1; --I2; - if (!I1->isIdenticalTo(I2) || + // Don't merge debugging pseudos. + if (I1->isDebugValue() || I2->isDebugValue() || + !I1->isIdenticalTo(I2) || // FIXME: This check is dubious. It's used to get around a problem where // people incorrectly expect inline asm directives to remain in the same // relative order. This is untenable because normal compiler @@ -412,6 +414,8 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) { unsigned Time = 0; for (; I != E; ++I) { + if (I->isDebugValue()) + continue; const TargetInstrDesc &TID = I->getDesc(); if (TID.isCall()) Time += 10; diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 9fcbea9..d385b86 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_library(LLVMCodeGen LiveVariables.cpp LowerSubregs.cpp MachineBasicBlock.cpp + MachineCSE.cpp MachineDominators.cpp MachineFunction.cpp MachineFunctionAnalysis.cpp @@ -39,6 +40,7 @@ add_llvm_library(LLVMCodeGen ObjectCodeEmitter.cpp OcamlGC.cpp OptimizeExts.cpp + OptimizePHIs.cpp PHIElimination.cpp Passes.cpp PostRASchedulerList.cpp @@ -66,6 +68,7 @@ add_llvm_library(LLVMCodeGen StrongPHIElimination.cpp TailDuplication.cpp TargetInstrInfoImpl.cpp + TargetLoweringObjectFileImpl.cpp TwoAddressInstructionPass.cpp UnreachableBlockElim.cpp VirtRegMap.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 2bedd04..a328d0e 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -131,10 +131,7 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { if (Hint.first || Hint.second) li.weight *= 1.01F; - // Divide the weight of the interval by its size. This encourages - // spilling of intervals that are large and have few uses, and - // discourages spilling of small intervals with many uses. - li.weight /= lis->getApproximateInstructionCount(li) * SlotIndex::NUM; + lis->normalizeSpillWeight(li); } } diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 05a57d4..3ff2a04 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -102,22 +102,23 @@ bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) { // Conservatively ignore EH landing pads. if (MBB->isLandingPad()) return false; - // Ignore blocks which look like they might have EH-related control flow. - // At the time of this writing, there are blocks which AnalyzeBranch - // thinks end in single uncoditional branches, yet which have two CFG - // successors. Code in this file is not prepared to reason about such things. - if (!MBB->empty() && MBB->back().isEHLabel()) - return false; - // Aggressively handle return blocks and similar constructs. if (MBB->succ_empty()) return true; // Ask the target's AnalyzeBranch if it can handle this block. MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector<MachineOperand, 4> Cond; - // Make the terminator is understood. + // Make sure the terminator is understood. if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) return false; + // Ignore blocks which look like they might have EH-related control flow. + // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't + // recognize the possibility of a control transfer through an unwind. + // Such blocks contain EH_LABEL instructions, however they may be in the + // middle of the block. Instead of searching for them, just check to see + // if the CFG disagrees with AnalyzeBranch. + if (1u + !Cond.empty() != MBB->succ_size()) + return false; // Make sure we have the option of reversing the condition. if (!Cond.empty() && TII->ReverseBranchCondition(Cond)) return false; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 056e2d5..7d3de89 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -119,6 +119,8 @@ void CriticalAntiDepBreaker::FinishBlock() { void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex) { + if (MI->isDebugValue()) + return; assert(Count < InsertPosIndex && "Instruction index out of expected range!"); // Any register which was defined within the previous scheduling region @@ -409,6 +411,8 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits, for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) { MachineInstr *MI = --I; + if (MI->isDebugValue()) + continue; // Check if this instruction has a dependence on the critical path that // is an anti-dependence that we may be able to break. If it is, set diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index b0cb24d..d69c995 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -55,7 +55,7 @@ FunctionPass *llvm::createDeadMachineInstructionElimPass() { bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { // Don't delete instructions with side effects. bool SawStore = false; - if (!MI->isSafeToMove(TII, SawStore, 0)) + if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI()) return false; // Examine each operand. @@ -64,8 +64,8 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg) ? - LivePhysRegs[Reg] : !MRI->use_empty(Reg)) { - // This def has a use. Don't delete the instruction! + LivePhysRegs[Reg] : !MRI->use_nodbg_empty(Reg)) { + // This def has a non-debug use. Don't delete the instruction! return false; } } @@ -111,23 +111,31 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { MIE = MBB->rend(); MII != MIE; ) { MachineInstr *MI = &*MII; - if (MI->isDebugValue()) { - // Don't delete the DBG_VALUE itself, but if its Value operand is - // a vreg and this is the only use, substitute an undef operand; - // the former operand will then be deleted normally. - if (MI->getNumOperands()==3 && MI->getOperand(0).isReg()) { - unsigned Reg = MI->getOperand(0).getReg(); - MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg); - assert(I != MRI->use_end()); - if (++I == MRI->use_end()) - // only one use, which must be this DBG_VALUE. - MI->getOperand(0).setReg(0U); - } - } - // If the instruction is dead, delete it! if (isDead(MI)) { DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); + // It is possible that some DBG_VALUE instructions refer to this + // instruction. Examine each def operand for such references; + // if found, mark the DBG_VALUE as undef (but don't delete it). + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + MachineRegisterInfo::use_iterator nextI; + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), + E = MRI->use_end(); I!=E; I=nextI) { + nextI = llvm::next(I); // I is invalidated by the setReg + MachineOperand& Use = I.getOperand(); + MachineInstr *UseMI = Use.getParent(); + if (UseMI==MI) + continue; + assert(Use.isDebug()); + UseMI->getOperand(0).setReg(0U); + } + } AnyChanges = true; MI->eraseFromParent(); ++NumDeletes; diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 9997a48..87ab7ef 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -155,7 +155,7 @@ void IntrinsicLowering::AddPrototypes(Module &M) { /// LowerBSWAP - Emit the code to lower bswap of V before the specified /// instruction IP. static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { - assert(V->getType()->isInteger() && "Can't bswap a non-integer type!"); + assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!"); unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); @@ -251,7 +251,7 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { /// LowerCTPOP - Emit the code to lower ctpop of V before the specified /// instruction IP. static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { - assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!"); + assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!"); static const uint64_t MaskValues[6] = { 0x5555555555555555ULL, 0x3333333333333333ULL, diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 40e0150..5e88865 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -14,6 +14,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/PassManager.h" #include "llvm/Pass.h" +#include "llvm/Analysis/Verifier.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/Passes.h" @@ -66,6 +67,9 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); +static cl::opt<bool> EnableMachineCSE("enable-machine-cse", cl::Hidden, + cl::desc("Enable Machine CSE")); + static cl::opt<cl::boolOrDefault> AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), cl::init(cl::BOU_UNSET)); @@ -114,9 +118,10 @@ LLVMTargetMachine::setCodeModelForStatic() { bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel) { + CodeGenOpt::Level OptLevel, + bool DisableVerify) { // Add common CodeGen passes. - if (addCommonCodeGenPasses(PM, OptLevel)) + if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify)) return true; OwningPtr<MCContext> Context(new MCContext()); @@ -140,7 +145,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this); + MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context); if (MCE == 0) return true; @@ -192,12 +197,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, /// bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, JITCodeEmitter &JCE, - CodeGenOpt::Level OptLevel) { + CodeGenOpt::Level OptLevel, + bool DisableVerify) { // Make sure the code model is set. setCodeModelForJIT(); // Add common CodeGen passes. - if (addCommonCodeGenPasses(PM, OptLevel)) + if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify)) return true; addCodeEmitter(PM, OptLevel, JCE); @@ -206,6 +212,12 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, return false; // success! } +static void printNoVerify(PassManagerBase &PM, + const char *Banner) { + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); +} + static void printAndVerify(PassManagerBase &PM, const char *Banner, bool allowDoubleDefs = false) { @@ -220,13 +232,19 @@ static void printAndVerify(PassManagerBase &PM, /// emitting to assembly files or machine code output. /// bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { + CodeGenOpt::Level OptLevel, + bool DisableVerify) { // Standard LLVM-Level Passes. + // Before running any passes, run the verifier to determine if the input + // coming from the front-end and/or optimizer is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); + // Optionally, tun split-GEPs and no-load GVN. if (EnableSplitGEPGVN) { PM.add(createGEPSplitterPass()); - PM.add(createGVNPass(/*NoPRE=*/false, /*NoLoads=*/true)); + PM.add(createGVNPass(/*NoLoads=*/true)); } // Run loop strength reduction before anything else. @@ -273,6 +291,11 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, "*** Final LLVM Code input to ISel ***\n", &dbgs())); + // All passes which modify the LLVM IR are now complete; run the verifier + // to ensure that the IR is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); + // Standard Lower-Level Passes. // Set up a MachineFunction for the rest of CodeGen to work on. @@ -291,6 +314,10 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, printAndVerify(PM, "After Instruction Selection", /* allowDoubleDefs= */ true); + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + if (OptLevel != CodeGenOpt::None) + PM.add(createOptimizePHIsPass()); // Delete dead machine instructions regardless of optimization level. PM.add(createDeadMachineInstructionElimPass()); @@ -301,6 +328,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createOptimizeExtsPass()); if (!DisableMachineLICM) PM.add(createMachineLICMPass()); + if (EnableMachineCSE) + PM.add(createMachineCSEPass()); if (!DisableMachineSink) PM.add(createMachineSinkingPass()); printAndVerify(PM, "After MachineLICM and MachineSinking", @@ -355,13 +384,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Branch folding must be run after regalloc and prolog/epilog insertion. if (OptLevel != CodeGenOpt::None && !DisableBranchFold) { PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); - printAndVerify(PM, "After BranchFolding"); + printNoVerify(PM, "After BranchFolding"); } // Tail duplication. if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) { PM.add(createTailDuplicatePass(false)); - printAndVerify(PM, "After TailDuplicate"); + printNoVerify(PM, "After TailDuplicate"); } PM.add(createGCMachineCodeAnalysisPass()); @@ -371,11 +400,11 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None && !DisableCodePlace) { PM.add(createCodePlacementOptPass()); - printAndVerify(PM, "After CodePlacementOpt"); + printNoVerify(PM, "After CodePlacementOpt"); } if (addPreEmitPass(PM, OptLevel)) - printAndVerify(PM, "After PreEmit passes"); + printNoVerify(PM, "After PreEmit passes"); return false; } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 432409a..ccda66f 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -329,24 +329,43 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, DEBUG(dbgs() << " +" << NewLR); interval.addRange(NewLR); - // Iterate over all of the blocks that the variable is completely - // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the - // live interval. - for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), - E = vi.AliveBlocks.end(); I != E; ++I) { - MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I); - LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR); + bool PHIJoin = lv_->isPHIJoin(interval.reg); + + if (PHIJoin) { + // A phi join register is killed at the end of the MBB and revived as a new + // valno in the killing blocks. + assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks"); + DEBUG(dbgs() << " phi-join"); + ValNo->addKill(indexes_->getTerminatorGap(mbb)); + ValNo->setHasPHIKill(true); + } else { + // Iterate over all of the blocks that the variable is completely + // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the + // live interval. + for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), + E = vi.AliveBlocks.end(); I != E; ++I) { + MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I); + LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo); + interval.addRange(LR); + DEBUG(dbgs() << " +" << LR); + } } // Finally, this virtual register is live from the start of any killing // block to the 'use' slot of the killing instruction. for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) { MachineInstr *Kill = vi.Kills[i]; - SlotIndex killIdx = - getInstructionIndex(Kill).getDefIndex(); - LiveRange LR(getMBBStartIdx(Kill->getParent()), killIdx, ValNo); + SlotIndex Start = getMBBStartIdx(Kill->getParent()); + SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex(); + + // Create interval with one of a NEW value number. Note that this value + // number isn't actually defined by an instruction, weird huh? :) + if (PHIJoin) { + ValNo = interval.getNextValue(SlotIndex(Start, true), 0, false, + VNInfoAllocator); + ValNo->setIsPHIDef(true); + } + LiveRange LR(Start, killIdx, ValNo); interval.addRange(LR); ValNo->addKill(killIdx); DEBUG(dbgs() << " +" << LR); @@ -409,48 +428,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, interval.print(dbgs(), tri_); }); } else { - // Otherwise, this must be because of phi elimination. If this is the - // first redefinition of the vreg that we have seen, go back and change - // the live range in the PHI block to be a different value number. - if (interval.containsOneValue()) { - - VNInfo *VNI = interval.getValNumInfo(0); - // Phi elimination may have reused the register for multiple identical - // phi nodes. There will be a kill per phi. Remove the old ranges that - // we now know have an incorrect number. - for (unsigned ki=0, ke=vi.Kills.size(); ki != ke; ++ki) { - MachineInstr *Killer = vi.Kills[ki]; - SlotIndex Start = getMBBStartIdx(Killer->getParent()); - SlotIndex End = getInstructionIndex(Killer).getDefIndex(); - DEBUG({ - dbgs() << "\n\t\trenaming [" << Start << "," << End << "] in: "; - interval.print(dbgs(), tri_); - }); - interval.removeRange(Start, End); - - // Replace the interval with one of a NEW value number. Note that - // this value number isn't actually defined by an instruction, weird - // huh? :) - LiveRange LR(Start, End, - interval.getNextValue(SlotIndex(Start, true), - 0, false, VNInfoAllocator)); - LR.valno->setIsPHIDef(true); - interval.addRange(LR); - LR.valno->addKill(End); - } - - MachineBasicBlock *killMBB = getMBBFromIndex(VNI->def); - VNI->addKill(indexes_->getTerminatorGap(killMBB)); - VNI->setHasPHIKill(true); - DEBUG({ - dbgs() << " RESULT: "; - interval.print(dbgs(), tri_); - }); - } - + assert(lv_->isPHIJoin(interval.reg) && "Multiply defined register"); // In the case of PHI elimination, each variable definition is only // live until the end of the block. We've already taken care of the // rest of the live range. + SlotIndex defIndex = MIIdx.getDefIndex(); if (MO.isEarlyClobber()) defIndex = MIIdx.getUseIndex(); @@ -468,7 +450,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, interval.addRange(LR); ValNo->addKill(indexes_->getTerminatorGap(mbb)); ValNo->setHasPHIKill(true); - DEBUG(dbgs() << " +" << LR); + DEBUG(dbgs() << " phi-join +" << LR); } } @@ -613,6 +595,9 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, while (mi != E) { if (mi->isDebugValue()) { ++mi; + if (mi != E && !mi->isDebugValue()) { + baseIndex = indexes_->getNextNonNullIndex(baseIndex); + } continue; } if (mi->killsRegister(interval.reg, tri_)) { @@ -1355,11 +1340,9 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, MachineBasicBlock *MBB = MI->getParent(); if (ImpUse && MI != ReMatDefMI) { - // Re-matting an instruction with virtual register use. Update the - // register interval's spill weight to HUGE_VALF to prevent it from - // being spilled. - LiveInterval &ImpLi = getInterval(ImpUse); - ImpLi.weight = HUGE_VALF; + // Re-matting an instruction with virtual register use. Prevent interval + // from being spilled. + getInterval(ImpUse).markNotSpillable(); } unsigned MBBId = MBB->getNumber(); @@ -1411,7 +1394,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, LiveInterval &nI = getOrCreateInterval(NewVReg); if (!TrySplit) { // The spill weight is now infinity as it cannot be spilled again. - nI.weight = HUGE_VALF; + nI.markNotSpillable(); continue; } @@ -1559,6 +1542,28 @@ LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm, } } +float +LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { + // Limit the loop depth ridiculousness. + if (loopDepth > 200) + loopDepth = 200; + + // The loop depth is used to roughly estimate the number of times the + // instruction is executed. Something like 10^d is simple, but will quickly + // overflow a float. This expression behaves like 10^d for small d, but is + // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of + // headroom before overflow. + float lc = powf(1 + (100.0f / (loopDepth+10)), (float)loopDepth); + + return (isDef + isUse) * lc; +} + +void +LiveIntervals::normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) { + for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) + normalizeSpillWeight(*NewLIs[i]); +} + std::vector<LiveInterval*> LiveIntervals:: addIntervalsForSpillsFast(const LiveInterval &li, const MachineLoopInfo *loopInfo, @@ -1567,8 +1572,7 @@ addIntervalsForSpillsFast(const LiveInterval &li, std::vector<LiveInterval*> added; - assert(li.weight != HUGE_VALF && - "attempt to spill already spilled interval!"); + assert(li.isSpillable() && "attempt to spill already spilled interval!"); DEBUG({ dbgs() << "\t\t\t\tadding intervals for spills for interval: "; @@ -1604,10 +1608,7 @@ addIntervalsForSpillsFast(const LiveInterval &li, // create a new register for this spill LiveInterval &nI = getOrCreateInterval(NewVReg); - - // the spill weight is now infinity as it - // cannot be spilled again - nI.weight = HUGE_VALF; + nI.markNotSpillable(); // Rewrite register operands to use the new vreg. for (SmallVectorImpl<unsigned>::iterator I = Indices.begin(), @@ -1661,8 +1662,7 @@ addIntervalsForSpills(const LiveInterval &li, if (EnableFastSpilling) return addIntervalsForSpillsFast(li, loopInfo, vrm); - assert(li.weight != HUGE_VALF && - "attempt to spill already spilled interval!"); + assert(li.isSpillable() && "attempt to spill already spilled interval!"); DEBUG({ dbgs() << "\t\t\t\tadding intervals for spills for interval: "; @@ -1736,6 +1736,7 @@ addIntervalsForSpills(const LiveInterval &li, } handleSpilledImpDefs(li, vrm, rc, NewLIs); + normalizeSpillWeights(NewLIs); return NewLIs; } @@ -1811,6 +1812,7 @@ addIntervalsForSpills(const LiveInterval &li, // Insert spills / restores if we are splitting. if (!TrySplit) { handleSpilledImpDefs(li, vrm, rc, NewLIs); + normalizeSpillWeights(NewLIs); return NewLIs; } @@ -1927,11 +1929,10 @@ addIntervalsForSpills(const LiveInterval &li, unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI); if (ImpUse) { // Re-matting an instruction with virtual register use. Add the - // register as an implicit use on the use MI and update the register - // interval's spill weight to HUGE_VALF to prevent it from being - // spilled. + // register as an implicit use on the use MI and mark the register + // interval as unspillable. LiveInterval &ImpLi = getInterval(ImpUse); - ImpLi.weight = HUGE_VALF; + ImpLi.markNotSpillable(); MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true)); } } @@ -1970,6 +1971,7 @@ addIntervalsForSpills(const LiveInterval &li, } handleSpilledImpDefs(li, vrm, rc, RetNewLIs); + normalizeSpillWeights(RetNewLIs); return RetNewLIs; } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 8a124dc..519990e 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -365,27 +365,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { } } - if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) { - if (LastPartDef) - // The last partial def kills the register. - LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, - true/*IsImp*/, true/*IsKill*/)); - else { - MachineOperand *MO = - LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI); - bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg; - // If the last reference is the last def, then it's not used at all. - // That is, unless we are currently processing the last reference itself. - LastRefOrPartRef->addRegisterDead(Reg, TRI, true); - if (NeedEC) { - // If we are adding a subreg def and the superreg def is marked early - // clobber, add an early clobber marker to the subreg def. - MO = LastRefOrPartRef->findRegisterDefOperand(Reg); - if (MO) - MO->setIsEarlyClobber(); - } - } - } else if (!PhysRegUse[Reg]) { + if (!PhysRegUse[Reg]) { // Partial uses. Mark register def dead and add implicit def of // sub-registers which are used. // EAX<dead> = op AL<imp-def> @@ -419,6 +399,26 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) PartUses.erase(*SS); } + } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) { + if (LastPartDef) + // The last partial def kills the register. + LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, + true/*IsImp*/, true/*IsKill*/)); + else { + MachineOperand *MO = + LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI); + bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg; + // If the last reference is the last def, then it's not used at all. + // That is, unless we are currently processing the last reference itself. + LastRefOrPartRef->addRegisterDead(Reg, TRI, true); + if (NeedEC) { + // If we are adding a subreg def and the superreg def is marked early + // clobber, add an early clobber marker to the subreg def. + MO = LastRefOrPartRef->findRegisterDefOperand(Reg); + if (MO) + MO->setIsEarlyClobber(); + } + } } else LastRefOrPartRef->addRegisterKilled(Reg, TRI, true); return true; @@ -510,6 +510,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()]; std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); + PHIJoins.clear(); /// Get some space for a respectable number of registers. VirtRegInfo.resize(64); diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 655a0bf..64134ce 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -143,36 +143,6 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { return I; } -/// isOnlyReachableViaFallthough - Return true if this basic block has -/// exactly one predecessor and the control transfer mechanism between -/// the predecessor and this block is a fall-through. -bool MachineBasicBlock::isOnlyReachableByFallthrough() const { - // If this is a landing pad, it isn't a fall through. If it has no preds, - // then nothing falls through to it. - if (isLandingPad() || pred_empty()) - return false; - - // If there isn't exactly one predecessor, it can't be a fall through. - const_pred_iterator PI = pred_begin(), PI2 = PI; - ++PI2; - if (PI2 != pred_end()) - return false; - - // The predecessor has to be immediately before this block. - const MachineBasicBlock *Pred = *PI; - - if (!Pred->isLayoutSuccessor(this)) - return false; - - // If the block is completely empty, then it definitely does fall through. - if (Pred->empty()) - return true; - - // Otherwise, check the last instruction. - const MachineInstr &LastInst = Pred->back(); - return !LastInst.getDesc().isBarrier(); -} - void MachineBasicBlock::dump() const { print(dbgs()); } diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp new file mode 100644 index 0000000..b376e3d --- /dev/null +++ b/lib/CodeGen/MachineCSE.cpp @@ -0,0 +1,268 @@ +//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs global common subexpression elimination on machine +// instructions using a scoped hash table based value numbering scheme. It +// must be run while the machine function is still in SSA form. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-cse" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +STATISTIC(NumCoalesces, "Number of copies coalesced"); +STATISTIC(NumCSEs, "Number of common subexpression eliminated"); + +namespace { + class MachineCSE : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + MachineDominatorTree *DT; + AliasAnalysis *AA; + public: + static char ID; // Pass identification + MachineCSE() : MachineFunctionPass(&ID), CurrVN(0) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + } + + private: + unsigned CurrVN; + ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT; + SmallVector<MachineInstr*, 64> Exps; + + bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); + bool isPhysDefTriviallyDead(unsigned Reg, + MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator E); + bool hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB); + bool isCSECandidate(MachineInstr *MI); + bool ProcessBlock(MachineDomTreeNode *Node); + }; +} // end anonymous namespace + +char MachineCSE::ID = 0; +static RegisterPass<MachineCSE> +X("machine-cse", "Machine Common Subexpression Elimination"); + +FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } + +bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, + MachineBasicBlock *MBB) { + bool Changed = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (!MRI->hasOneUse(Reg)) + // Only coalesce single use copies. This ensure the copy will be + // deleted. + continue; + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (DefMI->getParent() != MBB) + continue; + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && + TargetRegisterInfo::isVirtualRegister(SrcReg) && + !SrcSubIdx && !DstSubIdx) { + MO.setReg(SrcReg); + DefMI->eraseFromParent(); + ++NumCoalesces; + Changed = true; + } + } + + return Changed; +} + +bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg, + MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator E) { + unsigned LookAheadLeft = 5; + while (LookAheadLeft--) { + if (I == E) + // Reached end of block, register is obviously dead. + return true; + + if (I->isDebugValue()) + continue; + bool SeenDef = false; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = I->getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + if (!TRI->regsOverlap(MO.getReg(), Reg)) + continue; + if (MO.isUse()) + return false; + SeenDef = true; + } + if (SeenDef) + // See a def of Reg (or an alias) before encountering any use, it's + // trivially dead. + return true; + ++I; + } + return false; +} + +bool MachineCSE::hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB){ + unsigned PhysDef = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) + // Can't touch anything to read a physical register. + return true; + if (MO.isDead()) + // If the def is dead, it's ok. + continue; + // Ok, this is a physical register def that's not marked "dead". That's + // common since this pass is run before livevariables. We can scan + // forward a few instructions and check if it is obviously dead. + if (PhysDef) + // Multiple physical register defs. These are rare, forget about it. + return true; + PhysDef = Reg; + } + } + + if (PhysDef) { + MachineBasicBlock::iterator I = MI; I = llvm::next(I); + if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end())) + return true; + } + return false; +} + +bool MachineCSE::isCSECandidate(MachineInstr *MI) { + // Ignore copies or instructions that read / write physical registers + // (except for dead defs of physical registers). + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) || + MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg()) + return false; + + // Ignore stuff that we obviously can't move. + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.mayStore() || TID.isCall() || TID.isTerminator() || + TID.hasUnmodeledSideEffects()) + return false; + + if (TID.mayLoad()) { + // Okay, this instruction does a load. As a refinement, we allow the target + // to decide whether the loaded value is actually a constant. If so, we can + // actually use it as a load. + if (!MI->isInvariantLoad(AA)) + // FIXME: we should be able to hoist loads with no other side effects if + // there are no other instructions which can change memory in this loop. + // This is a trivial form of alias analysis. + return false; + } + return true; +} + +bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) { + bool Changed = false; + + ScopedHashTableScope<MachineInstr*, unsigned, + MachineInstrExpressionTrait> VNTS(VNT); + MachineBasicBlock *MBB = Node->getBlock(); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { + MachineInstr *MI = &*I; + ++I; + + if (!isCSECandidate(MI)) + continue; + + bool FoundCSE = VNT.count(MI); + if (!FoundCSE) { + // Look for trivial copy coalescing opportunities. + if (PerformTrivialCoalescing(MI, MBB)) + FoundCSE = VNT.count(MI); + } + // FIXME: commute commutable instructions? + + // If the instruction defines a physical register and the value *may* be + // used, then it's not safe to replace it with a common subexpression. + if (FoundCSE && hasLivePhysRegDefUse(MI, MBB)) + FoundCSE = false; + + if (!FoundCSE) { + VNT.insert(MI, CurrVN++); + Exps.push_back(MI); + continue; + } + + // Found a common subexpression, eliminate it. + unsigned CSVN = VNT.lookup(MI); + MachineInstr *CSMI = Exps[CSVN]; + DEBUG(dbgs() << "Examining: " << *MI); + DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); + unsigned NumDefs = MI->getDesc().getNumDefs(); + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned OldReg = MO.getReg(); + unsigned NewReg = CSMI->getOperand(i).getReg(); + if (OldReg == NewReg) + continue; + assert(TargetRegisterInfo::isVirtualRegister(OldReg) && + TargetRegisterInfo::isVirtualRegister(NewReg) && + "Do not CSE physical register defs!"); + MRI->replaceRegWith(OldReg, NewReg); + --NumDefs; + } + MI->eraseFromParent(); + ++NumCSEs; + } + + // Recursively call ProcessBlock with childred. + const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); + for (unsigned i = 0, e = Children.size(); i != e; ++i) + Changed |= ProcessBlock(Children[i]); + + return Changed; +} + +bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + DT = &getAnalysis<MachineDominatorTree>(); + AA = &getAnalysis<AliasAnalysis>(); + return ProcessBlock(DT->getRootNode()); +} diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index f141c56..4377d5b 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -95,6 +95,9 @@ MachineFunction::MachineFunction(Function *F, const TargetMachine &TM, MFInfo = 0; FrameInfo = new (Allocator.Allocate<MachineFrameInfo>()) MachineFrameInfo(*TM.getFrameInfo()); + if (Fn->hasFnAttr(Attribute::StackAlignment)) + FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs( + Fn->getAttributes().getFnAttributes())); ConstantPool = new (Allocator.Allocate<MachineConstantPool>()) MachineConstantPool(TM.getTargetData()); Alignment = TM.getTargetLowering()->getFunctionAlignment(F); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index df61c74..e23670d 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -18,6 +18,7 @@ #include "llvm/Type.h" #include "llvm/Value.h" #include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -305,7 +306,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f, int64_t o, uint64_t s, unsigned int a) : Offset(o), Size(s), V(v), - Flags((f & 7) | ((Log2_32(a) + 1) << 3)) { + Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)) { assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); assert((isLoad() || isStore()) && "Not a load/store!"); } @@ -327,7 +328,8 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { if (MMO->getBaseAlignment() >= getBaseAlignment()) { // Update the alignment value. - Flags = (Flags & 7) | ((Log2_32(MMO->getBaseAlignment()) + 1) << 3); + Flags = (Flags & ((1 << MOMaxBits) - 1)) | + ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits); // Also update the base and offset, because the new alignment may // not be applicable with the old ones. V = MMO->getValue(); @@ -700,6 +702,35 @@ void MachineInstr::addMemOperand(MachineFunction &MF, MemRefsEnd = NewMemRefsEnd; } +bool MachineInstr::isIdenticalTo(const MachineInstr *Other, + MICheckType Check) const { + // If opcodes or number of operands are not the same then the two + // instructions are obviously not identical. + if (Other->getOpcode() != getOpcode() || + Other->getNumOperands() != getNumOperands()) + return false; + + // Check operands to make sure they match. + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + const MachineOperand &OMO = Other->getOperand(i); + // Clients may or may not want to ignore defs when testing for equality. + // For example, machine CSE pass only cares about finding common + // subexpressions, so it's safe to ignore virtual register defs. + if (Check != CheckDefs && MO.isReg() && MO.isDef()) { + if (Check == IgnoreDefs) + continue; + // Check == IgnoreVRegDefs + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || + TargetRegisterInfo::isPhysicalRegister(OMO.getReg())) + if (MO.getReg() != OMO.getReg()) + return false; + } else if (!MO.isIdenticalTo(OMO)) + return false; + } + return true; +} + /// removeFromParent - This method unlinks 'this' from the containing basic /// block, and returns it, but does not delete it. MachineInstr *MachineInstr::removeFromParent() { @@ -958,8 +989,8 @@ void MachineInstr::copyPredicates(const MachineInstr *MI) { /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, - bool &SawStore, - AliasAnalysis *AA) const { + AliasAnalysis *AA, + bool &SawStore) const { // Ignore stuff that we obviously can't move. if (TID->mayStore() || TID->isCall()) { SawStore = true; @@ -984,11 +1015,11 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, /// isSafeToReMat - Return true if it's safe to rematerialize the specified /// instruction which defined the specified register instead of copying it. bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, - unsigned DstReg, - AliasAnalysis *AA) const { + AliasAnalysis *AA, + unsigned DstReg) const { bool SawStore = false; if (!TII->isTriviallyReMaterializable(this, AA) || - !isSafeToMove(TII, SawStore, AA)) + !isSafeToMove(TII, AA, SawStore)) return false; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); @@ -1324,3 +1355,48 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg, true /*IsDef*/, true /*IsImp*/)); } + +unsigned +MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { + unsigned Hash = MI->getOpcode() * 37; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + uint64_t Key = (uint64_t)MO.getType() << 32; + switch (MO.getType()) { + default: break; + case MachineOperand::MO_Register: + if (MO.isDef() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; // Skip virtual register defs. + Key |= MO.getReg(); + break; + case MachineOperand::MO_Immediate: + Key |= MO.getImm(); + break; + case MachineOperand::MO_FrameIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_JumpTableIndex: + Key |= MO.getIndex(); + break; + case MachineOperand::MO_MachineBasicBlock: + Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB()); + break; + case MachineOperand::MO_GlobalAddress: + Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal()); + break; + case MachineOperand::MO_BlockAddress: + Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress()); + break; + } + Key += ~(Key << 32); + Key ^= (Key >> 22); + Key += ~(Key << 13); + Key ^= (Key >> 8); + Key += (Key << 3); + Key ^= (Key >> 15); + Key += ~(Key << 27); + Key ^= (Key >> 31); + Hash = (unsigned)Key + Hash * 37; + } + return Hash; +} diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 92c84f3..0361694 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -252,32 +252,6 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { return false; } - DEBUG({ - dbgs() << "--- Checking if we can hoist " << I; - if (I.getDesc().getImplicitUses()) { - dbgs() << " * Instruction has implicit uses:\n"; - - const TargetRegisterInfo *TRI = TM->getRegisterInfo(); - for (const unsigned *ImpUses = I.getDesc().getImplicitUses(); - *ImpUses; ++ImpUses) - dbgs() << " -> " << TRI->getName(*ImpUses) << "\n"; - } - - if (I.getDesc().getImplicitDefs()) { - dbgs() << " * Instruction has implicit defines:\n"; - - const TargetRegisterInfo *TRI = TM->getRegisterInfo(); - for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs(); - *ImpDefs; ++ImpDefs) - dbgs() << " -> " << TRI->getName(*ImpDefs) << "\n"; - } - }); - - if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) { - DEBUG(dbgs() << "Cannot hoist with implicit defines or uses\n"); - return false; - } - // The instruction is loop invariant if all of its operands are. for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { const MachineOperand &MO = I.getOperand(i); @@ -311,6 +285,10 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } else if (!MO.isDead()) { // A def that isn't dead. We can't move it. return false; + } else if (CurLoop->getHeader()->isLiveIn(Reg)) { + // If the reg is live into the loop, we can't hoist an instruction + // which would clobber it. + return false; } } @@ -467,7 +445,7 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI, std::vector<const MachineInstr*> &PrevMIs) { for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { const MachineInstr *PrevMI = PrevMIs[i]; - if (TII->isIdentical(MI, PrevMI, RegInfo)) + if (TII->produceSameValue(MI, PrevMI)) return PrevMI; } return 0; @@ -480,9 +458,20 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); + + // Replace virtual registers defined by MI by their counterparts defined + // by Dup. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDef()) + + // Physical registers may not differ here. + assert((!MO.isReg() || MO.getReg() == 0 || + !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || + MO.getReg() == Dup->getOperand(i).getReg()) && + "Instructions with different phys regs are not identical!"); + + if (MO.isReg() && MO.isDef() && + !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); } MI->eraseFromParent(); diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp index 8378906..39d2c75 100644 --- a/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -22,7 +22,7 @@ using namespace llvm; // Out of line virtual method. void MachineModuleInfoMachO::Anchor() {} - +void MachineModuleInfoELF::Anchor() {} static int SortSymbolPair(const void *LHS, const void *RHS) { const MCSymbol *LHSS = @@ -34,10 +34,11 @@ static int SortSymbolPair(const void *LHS, const void *RHS) { /// GetSortedStubs - Return the entries from a DenseMap in a deterministic /// sorted orer. -MachineModuleInfoMachO::SymbolListTy -MachineModuleInfoMachO::GetSortedStubs(const DenseMap<MCSymbol*, - MCSymbol*> &Map) { - MachineModuleInfoMachO::SymbolListTy List(Map.begin(), Map.end()); +MachineModuleInfoImpl::SymbolListTy +MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*, + MCSymbol*> &Map) { + MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end()); + if (!List.empty()) qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair); return List; diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index b31973e..d9ab677 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -116,6 +116,19 @@ MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { return 0; } +bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const { + use_iterator UI = use_begin(RegNo); + if (UI == use_end()) + return false; + return ++UI == use_end(); +} + +bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { + use_nodbg_iterator UI = use_nodbg_begin(RegNo); + if (UI == use_nodbg_end()) + return false; + return ++UI == use_nodbg_end(); +} #ifndef NDEBUG void MachineRegisterInfo::dumpUses(unsigned Reg) const { diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index c391576..e47ba7c 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -72,8 +72,13 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const { assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Only makes sense for vregs"); - for (MachineRegisterInfo::use_iterator I = RegInfo->use_begin(Reg), - E = RegInfo->use_end(); I != E; ++I) { + // Ignoring debug uses is necessary so debug info doesn't affect the code. + // This may leave a referencing dbg_value in the original block, before + // the definition of the vreg. Dwarf generator handles this although the + // user might not get the right info at runtime. + for (MachineRegisterInfo::use_nodbg_iterator I = + RegInfo->use_nodbg_begin(Reg), + E = RegInfo->use_nodbg_end(); I != E; ++I) { // Determine the block of the use. MachineInstr *UseInst = &*I; MachineBasicBlock *UseBlock = UseInst->getParent(); @@ -135,7 +140,10 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { ProcessedBegin = I == MBB.begin(); if (!ProcessedBegin) --I; - + + if (MI->isDebugValue()) + continue; + if (SinkInstruction(MI, SawStore)) ++NumSunk, MadeChange = true; @@ -149,7 +157,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Check if it's safe to move the instruction. - if (!MI->isSafeToMove(TII, SawStore, AA)) + if (!MI->isSafeToMove(TII, AA, SawStore)) return false; // FIXME: This should include support for sinking instructions within the diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp new file mode 100644 index 0000000..2717d4d --- /dev/null +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -0,0 +1,189 @@ +//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass optimizes machine instruction PHIs to take advantage of +// opportunities created during DAG legalization. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "phi-opt" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Function.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumPHICycles, "Number of PHI cycles replaced"); +STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles"); + +namespace { + class OptimizePHIs : public MachineFunctionPass { + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + + public: + static char ID; // Pass identification + OptimizePHIs() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + typedef SmallPtrSet<MachineInstr*, 16> InstrSet; + typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator; + + bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg, + InstrSet &PHIsInCycle); + bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle); + bool OptimizeBB(MachineBasicBlock &MBB); + }; +} + +char OptimizePHIs::ID = 0; +static RegisterPass<OptimizePHIs> +X("opt-phis", "Optimize machine instruction PHIs"); + +FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); } + +bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { + MRI = &Fn.getRegInfo(); + TII = Fn.getTarget().getInstrInfo(); + + // Find dead PHI cycles and PHI cycles that can be replaced by a single + // value. InstCombine does these optimizations, but DAG legalization may + // introduce new opportunities, e.g., when i64 values are split up for + // 32-bit targets. + bool Changed = false; + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + Changed |= OptimizeBB(*I); + + return Changed; +} + +/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands +/// are copies of SingleValReg, possibly via copies through other PHIs. If +/// SingleValReg is zero on entry, it is set to the register with the single +/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that +/// have been scanned. +bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, + unsigned &SingleValReg, + InstrSet &PHIsInCycle) { + assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction"); + unsigned DstReg = MI->getOperand(0).getReg(); + + // See if we already saw this register. + if (!PHIsInCycle.insert(MI)) + return true; + + // Don't scan crazily complex things. + if (PHIsInCycle.size() == 16) + return false; + + // Scan the PHI operands. + for (unsigned i = 1; i != MI->getNumOperands(); i += 2) { + unsigned SrcReg = MI->getOperand(i).getReg(); + if (SrcReg == DstReg) + continue; + MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + + // Skip over register-to-register moves. + unsigned MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx; + if (SrcMI && + TII->isMoveInstr(*SrcMI, MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx) && + SrcSubIdx == 0 && DstSubIdx == 0 && + TargetRegisterInfo::isVirtualRegister(MvSrcReg)) + SrcMI = MRI->getVRegDef(MvSrcReg); + if (!SrcMI) + return false; + + if (SrcMI->isPHI()) { + if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle)) + return false; + } else { + // Fail if there is more than one non-phi/non-move register. + if (SingleValReg != 0) + return false; + SingleValReg = SrcReg; + } + } + return true; +} + +/// IsDeadPHICycle - Check if the register defined by a PHI is only used by +/// other PHIs in a cycle. +bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { + assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction"); + unsigned DstReg = MI->getOperand(0).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + "PHI destination is not a virtual register"); + + // See if we already saw this register. + if (!PHIsInCycle.insert(MI)) + return true; + + // Don't scan crazily complex things. + if (PHIsInCycle.size() == 16) + return false; + + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg), + E = MRI->use_end(); I != E; ++I) { + MachineInstr *UseMI = &*I; + if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle)) + return false; + } + + return true; +} + +/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by +/// a single value. +bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) { + bool Changed = false; + for (MachineBasicBlock::iterator + MII = MBB.begin(), E = MBB.end(); MII != E; ) { + MachineInstr *MI = &*MII++; + if (!MI->isPHI()) + break; + + // Check for single-value PHI cycles. + unsigned SingleValReg = 0; + InstrSet PHIsInCycle; + if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) && + SingleValReg != 0) { + MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg); + MI->eraseFromParent(); + ++NumPHICycles; + Changed = true; + continue; + } + + // Check for dead PHI cycles. + PHIsInCycle.clear(); + if (IsDeadPHICycle(MI, PHIsInCycle)) { + for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end(); + PI != PE; ++PI) { + MachineInstr *PhiMI = *PI; + if (&*MII == PhiMI) + ++MII; + PhiMI->eraseFromParent(); + } + ++NumDeadPHICycles; + Changed = true; + } + } + return Changed; +} diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h index b48f548..bd18b52 100644 --- a/lib/CodeGen/PBQP/HeuristicSolver.h +++ b/lib/CodeGen/PBQP/HeuristicSolver.h @@ -18,7 +18,6 @@ #include "Graph.h" #include "Solution.h" -#include "llvm/Support/raw_ostream.h" #include <vector> #include <limits> @@ -230,7 +229,7 @@ namespace PBQP { } /// \brief Apply rule R1. - /// @param nItr Node iterator for node to apply R1 to. + /// @param xnItr Node iterator for node to apply R1 to. /// /// Node will be automatically pushed to the solver stack. void applyR1(Graph::NodeItr xnItr) { @@ -278,7 +277,7 @@ namespace PBQP { } /// \brief Apply rule R2. - /// @param nItr Node iterator for node to apply R2 to. + /// @param xnItr Node iterator for node to apply R2 to. /// /// Node will be automatically pushed to the solver stack. void applyR2(Graph::NodeItr xnItr) { @@ -494,14 +493,23 @@ namespace PBQP { bool tryNormaliseEdgeMatrix(Graph::EdgeItr &eItr) { + const PBQPNum infinity = std::numeric_limits<PBQPNum>::infinity(); + Matrix &edgeCosts = g.getEdgeCosts(eItr); Vector &uCosts = g.getNodeCosts(g.getEdgeNode1(eItr)), &vCosts = g.getNodeCosts(g.getEdgeNode2(eItr)); for (unsigned r = 0; r < edgeCosts.getRows(); ++r) { - PBQPNum rowMin = edgeCosts.getRowMin(r); + PBQPNum rowMin = infinity; + + for (unsigned c = 0; c < edgeCosts.getCols(); ++c) { + if (vCosts[c] != infinity && edgeCosts[r][c] < rowMin) + rowMin = edgeCosts[r][c]; + } + uCosts[r] += rowMin; - if (rowMin != std::numeric_limits<PBQPNum>::infinity()) { + + if (rowMin != infinity) { edgeCosts.subFromRow(r, rowMin); } else { @@ -510,9 +518,16 @@ namespace PBQP { } for (unsigned c = 0; c < edgeCosts.getCols(); ++c) { - PBQPNum colMin = edgeCosts.getColMin(c); + PBQPNum colMin = infinity; + + for (unsigned r = 0; r < edgeCosts.getRows(); ++r) { + if (uCosts[r] != infinity && edgeCosts[r][c] < colMin) + colMin = edgeCosts[r][c]; + } + vCosts[c] += colMin; - if (colMin != std::numeric_limits<PBQPNum>::infinity()) { + + if (colMin != infinity) { edgeCosts.subFromCol(c, colMin); } else { diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h index c09ad74..30d34d9 100644 --- a/lib/CodeGen/PBQP/Heuristics/Briggs.h +++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h @@ -128,14 +128,7 @@ namespace PBQP { /// selected for heuristic reduction instead. bool shouldOptimallyReduce(Graph::NodeItr nItr) { if (getSolver().getSolverDegree(nItr) < 3) { - if (getGraph().getNodeCosts(nItr)[0] != - std::numeric_limits<PBQPNum>::infinity()) { - return true; - } - // Otherwise we have an infinite spill cost node. - initializeNode(nItr); - NodeData &nd = getHeuristicNodeData(nItr); - return nd.isAllocable; + return true; } // else return false; diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index b740c68..8bbe0a7 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -55,8 +55,6 @@ void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) { MRI = &Fn.getRegInfo(); - PHIDefs.clear(); - PHIKills.clear(); bool Changed = false; // Split critical edges to help the coalescer @@ -215,10 +213,6 @@ void llvm::PHIElimination::LowerAtomicPHINode( TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC); } - // Record PHI def. - assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?"); - PHIDefs[DestReg] = &MBB; - // Update live variable information if there is any. LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>(); if (LV) { @@ -229,6 +223,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( // Increment use count of the newly created virtual register. VI.NumUses++; + LV->setPHIJoin(IncomingReg); // When we are reusing the incoming register, it may already have been // killed in this block. The old kill will also have been inserted at @@ -276,9 +271,6 @@ void llvm::PHIElimination::LowerAtomicPHINode( // path the PHI. MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); - // Record the kill. - PHIKills[SrcReg].insert(&opBlock); - // If source is defined by an implicit def, there is no need to insert a // copy. MachineInstr *DefMI = MRI->getVRegDef(SrcReg); @@ -451,34 +443,3 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, return NMBB; } - -unsigned -PHIElimination::PHINodeTraits::getHashValue(const MachineInstr *MI) { - if (!MI || MI==getEmptyKey() || MI==getTombstoneKey()) - return DenseMapInfo<MachineInstr*>::getHashValue(MI); - unsigned hash = 0; - for (unsigned ni = 1, ne = MI->getNumOperands(); ni != ne; ni += 2) - hash = hash*37 + DenseMapInfo<BBVRegPair>:: - getHashValue(BBVRegPair(MI->getOperand(ni+1).getMBB()->getNumber(), - MI->getOperand(ni).getReg())); - return hash; -} - -bool PHIElimination::PHINodeTraits::isEqual(const MachineInstr *LHS, - const MachineInstr *RHS) { - const MachineInstr *EmptyKey = getEmptyKey(); - const MachineInstr *TombstoneKey = getTombstoneKey(); - if (!LHS || !RHS || LHS==EmptyKey || RHS==EmptyKey || - LHS==TombstoneKey || RHS==TombstoneKey) - return LHS==RHS; - - unsigned ne = LHS->getNumOperands(); - if (ne != RHS->getNumOperands()) - return false; - // Ignore operand 0, the defined register. - for (unsigned ni = 1; ni != ne; ni += 2) - if (LHS->getOperand(ni).getReg() != RHS->getOperand(ni).getReg() || - LHS->getOperand(ni+1).getMBB() != RHS->getOperand(ni+1).getMBB()) - return false; - return true; -} diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h index 895aaa4..7dedf03 100644 --- a/lib/CodeGen/PHIElimination.h +++ b/lib/CodeGen/PHIElimination.h @@ -22,17 +22,8 @@ namespace llvm { /// Lower PHI instructions to copies. class PHIElimination : public MachineFunctionPass { MachineRegisterInfo *MRI; // Machine register information - private: - - typedef SmallSet<MachineBasicBlock*, 4> PHIKillList; - typedef DenseMap<unsigned, PHIKillList> PHIKillMap; - typedef DenseMap<unsigned, MachineBasicBlock*> PHIDefMap; public: - - typedef PHIKillList::iterator phi_kill_iterator; - typedef PHIKillList::const_iterator const_phi_kill_iterator; - static char ID; // Pass identification, replacement for typeid PHIElimination() : MachineFunctionPass(&ID) {} @@ -40,38 +31,6 @@ namespace llvm { virtual void getAnalysisUsage(AnalysisUsage &AU) const; - /// Return true if the given vreg was defined by a PHI intsr prior to - /// lowering. - bool hasPHIDef(unsigned vreg) const { - return PHIDefs.count(vreg); - } - - /// Returns the block in which the PHI instruction which defined the - /// given vreg used to reside. - MachineBasicBlock* getPHIDefBlock(unsigned vreg) { - PHIDefMap::iterator phiDefItr = PHIDefs.find(vreg); - assert(phiDefItr != PHIDefs.end() && "vreg has no phi-def."); - return phiDefItr->second; - } - - /// Returns true if the given vreg was killed by a PHI instr. - bool hasPHIKills(unsigned vreg) const { - return PHIKills.count(vreg); - } - - /// Returns an iterator over the BasicBlocks which contained PHI - /// kills of this register prior to lowering. - phi_kill_iterator phiKillsBegin(unsigned vreg) { - PHIKillMap::iterator phiKillItr = PHIKills.find(vreg); - assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills."); - return phiKillItr->second.begin(); - } - phi_kill_iterator phiKillsEnd(unsigned vreg) { - PHIKillMap::iterator phiKillItr = PHIKills.find(vreg); - assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills."); - return phiKillItr->second.end(); - } - private: /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions /// in predecessor basic blocks. @@ -109,12 +68,29 @@ namespace llvm { // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and // also after any exception handling labels: in landing pads execution // starts at the label, so any copies placed before it won't be executed! + // We also deal with DBG_VALUEs, which are a bit tricky: + // PHI + // DBG_VALUE + // LABEL + // Here the DBG_VALUE needs to be skipped, and if it refers to a PHI it + // needs to be annulled or, better, moved to follow the label, as well. + // PHI + // DBG_VALUE + // no label + // Here it is not a good idea to skip the DBG_VALUE. + // FIXME: For now we skip and annul all DBG_VALUEs, maximally simple and + // maximally stupid. MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { // Rather than assuming that EH labels come before other kinds of labels, // just skip all labels. - while (I != MBB.end() && (I->isPHI() || I->isLabel())) + while (I != MBB.end() && + (I->isPHI() || I->isLabel() || I->isDebugValue())) { + if (I->isDebugValue() && I->getNumOperands()==3 && + I->getOperand(0).isReg()) + I->getOperand(0).setReg(0U); ++I; + } return I; } @@ -122,21 +98,13 @@ namespace llvm { typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse; VRegPHIUse VRegPHIUseCount; - PHIDefMap PHIDefs; - PHIKillMap PHIKills; // Defs of PHI sources which are implicit_def. SmallPtrSet<MachineInstr*, 4> ImpDefs; - // Lowered PHI nodes may be reused. We provide special DenseMap traits to - // match PHI nodes with identical arguments. - struct PHINodeTraits : public DenseMapInfo<MachineInstr*> { - static unsigned getHashValue(const MachineInstr *PtrVal); - static bool isEqual(const MachineInstr *LHS, const MachineInstr *RHS); - }; - // Map reusable lowered PHI node -> incoming join register. - typedef DenseMap<MachineInstr*, unsigned, PHINodeTraits> LoweredPHIMap; + typedef DenseMap<MachineInstr*, unsigned, + MachineInstrExpressionTrait> LoweredPHIMap; LoweredPHIMap LoweredPHIs; }; diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index f67eb79..5ea2941 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -34,7 +34,7 @@ static cl::opt<RegisterRegAlloc::FunctionPassCtor, false, RegisterPassParser<RegisterRegAlloc> > RegAlloc("regalloc", cl::init(&createLinearScanRegisterAllocator), - cl::desc("Register allocator to use: (default = linearscan)")); + cl::desc("Register allocator to use (default=linearscan)")); //===---------------------------------------------------------------------===// diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index f43395f..424181c 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -460,6 +460,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); I != E; --Count) { MachineInstr *MI = --I; + if (MI->isDebugValue()) + continue; // Update liveness. Registers that are defed but not used in this // instruction are now dead. Mark register and all subregs as they diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index e3df2e4..d7179b3 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -205,10 +205,9 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { // Process each use instruction once. for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), UE = mri_->use_end(); UI != UE; ++UI) { - MachineInstr *RMI = &*UI; - MachineBasicBlock *RMBB = RMI->getParent(); - if (RMBB == MBB) + if (UI.getOperand().isUndef()) continue; + MachineInstr *RMI = &*UI; if (ModInsts.insert(RMI)) RUses.push_back(RMI); } diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 036f59a..138e711 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -175,9 +175,10 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { MachineBasicBlock::iterator I = *i; // If call frames are not being included as part of the stack frame, and - // there is no dynamic allocation (therefore referencing frame slots off - // sp), leave the pseudo ops alone. We'll eliminate them later. - if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn)) + // the target doesn't indicate otherwise, remove the call frame pseudos + // here. The sub/add sp instruction pairs are still inserted, but we don't + // need to track the SP adjustment for frame index elimination. + if (RegInfo->canSimplifyCallFramePseudos(Fn)) RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); } } @@ -476,8 +477,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *FFI = Fn.getFrameInfo(); - unsigned MaxAlign = 1; - // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction // of stack growth -- so it's always nonnegative. @@ -517,9 +516,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { Offset += FFI->getObjectSize(i); unsigned Align = FFI->getObjectAlignment(i); - // If the alignment of this object is greater than that of the stack, - // then increase the stack alignment to match. - MaxAlign = std::max(MaxAlign, Align); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; @@ -529,9 +525,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { unsigned Align = FFI->getObjectAlignment(i); - // If the alignment of this object is greater than that of the stack, - // then increase the stack alignment to match. - MaxAlign = std::max(MaxAlign, Align); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; @@ -540,6 +533,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { } } + unsigned MaxAlign = FFI->getMaxAlignment(); + // Make sure the special register scavenging spill slot is closest to the // frame pointer if a frame pointer is required. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); @@ -605,11 +600,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Update frame info to pretend that this is part of the stack... FFI->setStackSize(Offset - LocalAreaOffset); - - // Remember the required stack alignment in case targets need it to perform - // dynamic stack alignment. - if (MaxAlign > FFI->getMaxAlignment()) - FFI->setMaxAlignment(MaxAlign); } diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index 7fb3e6e..5e86e5a 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -18,19 +18,38 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/System/Mutex.h" #include <map> using namespace llvm; -static ManagedStatic<PseudoSourceValue[4]> PSVs; +namespace { +struct PSVGlobalsTy { + // PseudoSourceValues are immutable so don't need locking. + const PseudoSourceValue PSVs[4]; + sys::Mutex Lock; // Guards FSValues, but not the values inside it. + std::map<int, const PseudoSourceValue *> FSValues; + + PSVGlobalsTy() : PSVs() {} + ~PSVGlobalsTy() { + for (std::map<int, const PseudoSourceValue *>::iterator + I = FSValues.begin(), E = FSValues.end(); I != E; ++I) { + delete I->second; + } + } +}; + +static ManagedStatic<PSVGlobalsTy> PSVGlobals; + +} // anonymous namespace const PseudoSourceValue *PseudoSourceValue::getStack() -{ return &(*PSVs)[0]; } +{ return &PSVGlobals->PSVs[0]; } const PseudoSourceValue *PseudoSourceValue::getGOT() -{ return &(*PSVs)[1]; } +{ return &PSVGlobals->PSVs[1]; } const PseudoSourceValue *PseudoSourceValue::getJumpTable() -{ return &(*PSVs)[2]; } +{ return &PSVGlobals->PSVs[2]; } const PseudoSourceValue *PseudoSourceValue::getConstantPool() -{ return &(*PSVs)[3]; } +{ return &PSVGlobals->PSVs[3]; } static const char *const PSVNames[] = { "Stack", @@ -48,13 +67,13 @@ PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) : Subclass) {} void PseudoSourceValue::printCustom(raw_ostream &O) const { - O << PSVNames[this - *PSVs]; + O << PSVNames[this - PSVGlobals->PSVs]; } -static ManagedStatic<std::map<int, const PseudoSourceValue *> > FSValues; - const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) { - const PseudoSourceValue *&V = (*FSValues)[FI]; + PSVGlobalsTy &PG = *PSVGlobals; + sys::ScopedLock locked(PG.Lock); + const PseudoSourceValue *&V = PG.FSValues[FI]; if (!V) V = new FixedStackPseudoSourceValue(FI); return V; diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 8e44a57..5c5a394 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -334,10 +334,6 @@ namespace { SmallVector<unsigned, 256> &inactiveCounts, bool SkipDGRegs); - /// assignVirt2StackSlot - assigns this virtual register to a - /// stack slot. returns the stack slot - int assignVirt2StackSlot(unsigned virtReg); - void ComputeRelatedRegClasses(); template <typename ItTy> diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp index 4d2e3a3..04303cf 100644 --- a/lib/CodeGen/RegAllocLocal.cpp +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -490,9 +490,12 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, // If the virtual register is already available, just update the instruction // and return. if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { - MarkPhysRegRecentlyUsed(PR); // Already have this value available! MI->getOperand(OpNum).setReg(PR); // Assign the input register - getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); + if (!MI->isDebugValue()) { + // Do not do these for DBG_VALUE as they can affect codegen. + MarkPhysRegRecentlyUsed(PR); // Already have this value available! + getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); + } return MI; } @@ -609,6 +612,8 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { + if (I->isDebugValue()) + continue; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { MachineOperand& MO = I->getOperand(i); // Uses don't trigger any flags, but we need to save @@ -691,7 +696,13 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { bool usedOutsideBlock = isPhysReg ? false : UsedInMultipleBlocks.test(MO.getReg() - TargetRegisterInfo::FirstVirtualRegister); - if (!isPhysReg && !usedOutsideBlock) + if (!isPhysReg && !usedOutsideBlock) { + // DBG_VALUE complicates this: if the only refs of a register outside + // this block are DBG_VALUE, we can't keep the reg live just for that, + // as it will cause the reg to be spilled at the end of this block when + // it wouldn't have been otherwise. Nullify the DBG_VALUEs when that + // happens. + bool UsedByDebugValueOnly = false; for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()), UE = MRI.reg_end(); UI != UE; ++UI) // Two cases: @@ -699,12 +710,26 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { // - used in the same block before it is defined (loop) if (UI->getParent() != &MBB || (MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) { + if (UI->isDebugValue()) { + UsedByDebugValueOnly = true; + continue; + } + // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone. UsedInMultipleBlocks.set(MO.getReg() - TargetRegisterInfo::FirstVirtualRegister); usedOutsideBlock = true; + UsedByDebugValueOnly = false; break; } - + if (UsedByDebugValueOnly) + for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()), + UE = MRI.reg_end(); UI != UE; ++UI) + if (UI->isDebugValue() && + (UI->getParent() != &MBB || + (MO.isDef() && precedes(&*UI, MI)))) + UI.getOperand().setReg(0U); + } + // Physical registers and those that are not live-out of the block // are killed/dead at their last use/def within this block. if (isPhysReg || !usedOutsideBlock) { diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 2701faf..81cfd8f 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -57,7 +57,7 @@ using namespace llvm; static RegisterRegAlloc -registerPBQPRepAlloc("pbqp", "PBQP register allocator.", +registerPBQPRepAlloc("pbqp", "PBQP register allocator", llvm::createPBQPRegisterAllocator); static cl::opt<bool> @@ -867,10 +867,6 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { // Find the vreg intervals in need of allocation. findVRegIntervalsToAlloc(); - // If there aren't any then we're done here. - if (vregIntervalsToAlloc.empty() && emptyVRegIntervals.empty()) - return true; - // If there are non-empty intervals allocate them using pbqp. if (!vregIntervalsToAlloc.empty()) { diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 56dd533..badf34e 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -72,7 +72,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { } else { return V; } - assert(isa<IntegerType>(V->getType()) && "Unexpected operand type!"); + assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); } while (1); } @@ -87,7 +87,7 @@ static const Value *getUnderlyingObject(const Value *V) { break; const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); // If that succeeded in finding a pointer, continue the search. - if (!isa<PointerType>(O->getType())) + if (!O->getType()->isPointerTy()) break; V = O; } while (1); diff --git a/lib/CodeGen/SelectionDAG/Android.mk b/lib/CodeGen/SelectionDAG/Android.mk new file mode 100644 index 0000000..eb15a18 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/Android.mk @@ -0,0 +1,48 @@ +LOCAL_PATH:= $(call my-dir) + +codegen_selectiondag_SRC_FILES := \ + CallingConvLower.cpp \ + DAGCombiner.cpp \ + FastISel.cpp \ + FunctionLoweringInfo.cpp \ + InstrEmitter.cpp \ + LegalizeDAG.cpp \ + LegalizeFloatTypes.cpp \ + LegalizeIntegerTypes.cpp \ + LegalizeTypes.cpp \ + LegalizeTypesGeneric.cpp \ + LegalizeVectorOps.cpp \ + LegalizeVectorTypes.cpp \ + ScheduleDAGFast.cpp \ + ScheduleDAGList.cpp \ + ScheduleDAGRRList.cpp \ + ScheduleDAGSDNodes.cpp \ + SelectionDAG.cpp \ + SelectionDAGBuilder.cpp \ + SelectionDAGISel.cpp \ + SelectionDAGPrinter.cpp \ + TargetLowering.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(codegen_selectiondag_SRC_FILES) + +LOCAL_MODULE:= libLLVMSelectionDAG + +include $(LLVM_HOST_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(codegen_selectiondag_SRC_FILES) + +LOCAL_MODULE:= libLLVMSelectionDAG + +include $(LLVM_DEVICE_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9189e71..3be6b43 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1064,7 +1064,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (VT.isInteger() && !VT.isVector()) { APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()); + APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { @@ -1136,7 +1136,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()); + APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { @@ -1758,7 +1758,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N1.getValueType(); - unsigned BitWidth = VT.getSizeInBits(); + unsigned BitWidth = VT.getScalarType().getSizeInBits(); // fold vector ops if (VT.isVector()) { @@ -1786,7 +1786,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); if (RAND.getNode() != 0) return RAND; - // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF + // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) @@ -1872,16 +1872,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueSizeInBits(); + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getSizeInBits())) && + BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -1894,16 +1895,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueSizeInBits(); + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getSizeInBits())) && + BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -1935,7 +1937,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), - ExtVT, LN0->isVolatile(), LN0->getAlignment()); + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); AddToWorkList(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -1970,7 +1973,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), - ExtVT, LN0->isVolatile(), Alignment); + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + Alignment); AddToWorkList(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2021,13 +2025,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (ROR.getNode() != 0) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) + // iff (c1 & c2) == 0. if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && isa<ConstantSDNode>(N0.getOperand(1))) { ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, - N0.getOperand(0), N1), - DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); + if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + N0.getOperand(0), N1), + DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); } // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ @@ -2750,7 +2756,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::CTLZ && N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()); + APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); // If any of the input bits are KnownOne, then the input couldn't be all @@ -3143,7 +3149,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), N0.getValueType(), - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); @@ -3185,7 +3192,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), @@ -3315,7 +3323,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), N0.getValueType(), - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); @@ -3357,7 +3366,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), @@ -3471,7 +3481,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), N0.getValueType(), - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); @@ -3513,7 +3524,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), @@ -3636,10 +3648,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue Load = (ExtType == ISD::NON_EXTLOAD) ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, - LN0->isVolatile(), NewAlign) + LN0->isVolatile(), LN0->isNonTemporal(), NewAlign) : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, - ExtVT, LN0->isVolatile(), NewAlign); + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + NewAlign); // Replace the old load's chain with the new load's chain. WorkListRemover DeadNodes(*this); @@ -3726,7 +3739,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -3742,7 +3756,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -3826,7 +3841,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), LD1->getBasePtr(), LD1->getSrcValue(), - LD1->getSrcValueOffset(), false, Align); + LD1->getSrcValueOffset(), false, false, Align); } return SDValue(); @@ -3896,7 +3911,8 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), - LN0->isVolatile(), OrigAlign); + LN0->isVolatile(), LN0->isNonTemporal(), + OrigAlign); AddToWorkList(N); CombineTo(N0.getNode(), DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), @@ -4492,7 +4508,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), N0.getValueType(), - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), @@ -4640,7 +4657,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { DAG.DeleteNode(Trunc); } // Replace the uses of SRL with SETCC - DAG.ReplaceAllUsesOfValueWith(N1, SetCC); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); removeFromWorkList(N1.getNode()); DAG.DeleteNode(N1.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -4648,6 +4666,56 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { } } } + + // Transform br(xor(x, y)) -> br(x != y) + // Transform br(xor(xor(x,y), 1)) -> br (x == y) + if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { + SDNode *TheXor = N1.getNode(); + SDValue Op0 = TheXor->getOperand(0); + SDValue Op1 = TheXor->getOperand(1); + if (Op0.getOpcode() == Op1.getOpcode()) { + // Avoid missing important xor optimizations. + SDValue Tmp = visitXOR(TheXor); + if (Tmp.getNode()) { + DEBUG(dbgs() << "\nReplacing.8 "; + TheXor->dump(&DAG); + dbgs() << "\nWith: "; + Tmp.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes); + removeFromWorkList(TheXor); + DAG.DeleteNode(TheXor); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, Tmp, N2); + } + } + + if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { + bool Equal = false; + if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0)) + if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && + Op0.getOpcode() == ISD::XOR) { + TheXor = Op0.getNode(); + Equal = true; + } + + EVT SetCCVT = N1.getValueType(); + if (LegalTypes) + SetCCVT = TLI.getSetCCResultType(SetCCVT); + SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), + SetCCVT, + Op0, Op1, + Equal ? ISD::SETEQ : ISD::SETNE); + // Replace the uses of XOR with SETCC + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); + removeFromWorkList(N1.getNode()); + DAG.DeleteNode(N1.getNode()); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, SetCC, N2); + } + } return SDValue(); } @@ -4960,7 +5028,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getValueType(0), Chain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), - LD->isVolatile(), Align); + LD->isVolatile(), LD->isNonTemporal(), Align); } } @@ -4997,7 +5065,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); - DEBUG(dbgs() << "\nReplacing.6 "; + DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); @@ -5042,7 +5110,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), BetterChain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); } else { ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), LD->getValueType(0), @@ -5050,6 +5119,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getSrcValueOffset(), LD->getMemoryVT(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); } @@ -5149,13 +5219,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), LD->getChain(), NewPtr, LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), NewAlign); + LD->isVolatile(), LD->isNonTemporal(), + NewAlign); SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), NewVal, NewPtr, ST->getSrcValue(), ST->getSrcValueOffset(), - false, NewAlign); + false, false, NewAlign); AddToWorkList(NewPtr.getNode()); AddToWorkList(NewLD.getNode()); @@ -5184,7 +5255,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->getMemoryVT(), - ST->isVolatile(), Align); + ST->isVolatile(), ST->isNonTemporal(), Align); } } @@ -5201,7 +5272,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign); + ST->getSrcValueOffset(), ST->isVolatile(), + ST->isNonTemporal(), OrigAlign); } // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' @@ -5227,7 +5299,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getStore(Chain, N->getDebugLoc(), Tmp, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->isVolatile(), - ST->getAlignment()); + ST->isNonTemporal(), ST->getAlignment()); } break; case MVT::f64: @@ -5239,7 +5311,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getStore(Chain, N->getDebugLoc(), Tmp, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->isVolatile(), - ST->getAlignment()); + ST->isNonTemporal(), ST->getAlignment()); } else if (!ST->isVolatile() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for @@ -5253,18 +5325,21 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { int SVOffset = ST->getSrcValueOffset(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), - isVolatile, ST->getAlignment()); + isVolatile, isNonTemporal, + ST->getAlignment()); Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); SVOffset += 4; Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, Ptr, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); + SVOffset, isVolatile, isNonTemporal, + Alignment); return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, St0, St1); } @@ -5286,12 +5361,13 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (ST->isTruncatingStore()) { ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, ST->getSrcValue(),ST->getSrcValueOffset(), - ST->getMemoryVT(), - ST->isVolatile(), ST->getAlignment()); + ST->getMemoryVT(), ST->isVolatile(), + ST->isNonTemporal(), ST->getAlignment()); } else { ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), - ST->isVolatile(), ST->getAlignment()); + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); } // Create token to keep both nodes around. @@ -5325,7 +5401,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->getMemoryVT(), - ST->isVolatile(), ST->getAlignment()); + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. @@ -5358,7 +5435,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->getMemoryVT(), - ST->isVolatile(), ST->getAlignment()); + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); } return ReduceLoadOpStoreWidth(N); @@ -5503,7 +5581,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), - LN0->isVolatile(), Align); + LN0->isVolatile(), LN0->isNonTemporal(), Align); } return SDValue(); @@ -5883,6 +5961,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, LLD->getChain(), Addr, 0, 0, LLD->isVolatile(), + LLD->isNonTemporal(), LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType(), @@ -5891,6 +5970,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, LLD->getChain(), Addr, 0, 0, LLD->getMemoryVT(), LLD->isVolatile(), + LLD->isNonTemporal(), LLD->getAlignment()); } @@ -5998,7 +6078,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, CstOffset); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, false, - Alignment); + false, Alignment); } } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 35ef5b7..1d76c7c 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -350,6 +350,34 @@ bool FastISel::SelectCall(User *I) { (void)TargetSelectInstruction(cast<Instruction>(I)); return true; } + case Intrinsic::dbg_value: { + // This requires target support, but right now X86 is the only Fast target. + DbgValueInst *DI = cast<DbgValueInst>(I); + const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + Value *V = DI->getValue(); + if (!V) { + // Currently the optimizer can produce this; insert an undef to + // help debugging. Probably the optimizer should not do this. + BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()). + addMetadata(DI->getVariable()); + } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()). + addMetadata(DI->getVariable()); + } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()). + addMetadata(DI->getVariable()); + } else if (unsigned Reg = lookUpRegForValue(V)) { + BuildMI(MBB, DL, II).addReg(Reg, RegState::Debug).addImm(DI->getOffset()). + addMetadata(DI->getVariable()); + } else { + // We can't yet handle anything else here because it would require + // generating code, thus altering codegen because of debug info. + // Insert an undef so we can see what we dropped. + BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()). + addMetadata(DI->getVariable()); + } + return true; + } case Intrinsic::eh_exception: { EVT VT = TLI.getValueType(I->getType()); switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 02fe85d..625de11 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "instr-emitter" #include "InstrEmitter.h" +#include "SDDbgValue.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -497,6 +498,56 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, assert(isNew && "Node emitted out of order - early"); } +/// EmitDbgValue - Generate any debug info that refers to this Node. Constant +/// dbg_value is not handled here. +void +InstrEmitter::EmitDbgValue(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap, + SDDbgValue *sd) { + if (!Node->getHasDebugValue()) + return; + if (!sd) + return; + unsigned VReg = getVR(SDValue(sd->getSDNode(), sd->getResNo()), VRBaseMap); + const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); + DebugLoc DL = sd->getDebugLoc(); + MachineInstr *MI; + if (VReg) { + MI = BuildMI(*MF, DL, II).addReg(VReg, RegState::Debug). + addImm(sd->getOffset()). + addMetadata(sd->getMDPtr()); + } else { + // Insert an Undef so we can see what we dropped. + MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()). + addMetadata(sd->getMDPtr()); + } + MBB->insert(InsertPos, MI); +} + +/// EmitDbgValue - Generate constant debug info. No SDNode is involved. +void +InstrEmitter::EmitDbgValue(SDDbgValue *sd) { + if (!sd) + return; + const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); + DebugLoc DL = sd->getDebugLoc(); + MachineInstr *MI; + Value *V = sd->getConst(); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + MI = BuildMI(*MF, DL, II).addImm(CI->getZExtValue()). + addImm(sd->getOffset()). + addMetadata(sd->getMDPtr()); + } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + MI = BuildMI(*MF, DL, II).addFPImm(CF).addImm(sd->getOffset()). + addMetadata(sd->getMDPtr()); + } else { + // Insert an Undef so we can see what we dropped. + MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()). + addMetadata(sd->getMDPtr()); + } + MBB->insert(InsertPos, MI); +} + /// EmitNode - Generate machine code for a node and needed dependencies. /// void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned, diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index 91817e4..4fe9f19 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -23,6 +23,7 @@ namespace llvm { class TargetInstrDesc; +class SDDbgValue; class InstrEmitter { MachineFunction *MF; @@ -97,6 +98,16 @@ public: /// MachineInstr. static unsigned CountOperands(SDNode *Node); + /// EmitDbgValue - Generate any debug info that refers to this Node. Constant + /// dbg_value is not handled here. + void EmitDbgValue(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap, + SDDbgValue* sd); + + + /// EmitDbgValue - Generate a constant DBG_VALUE. No node is involved. + void EmitDbgValue(SDDbgValue* sd); + /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 78e6e4e..f498263 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -377,9 +377,10 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), - 0, VT, false, Alignment); + 0, VT, false, false, Alignment); return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, false, Alignment); + PseudoSourceValue::getConstantPool(), 0, false, false, + Alignment); } /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. @@ -402,7 +403,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // FIXME: Does not handle truncating floating point stores! SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val); return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(), - SVOffset, ST->isVolatile(), Alignment); + SVOffset, ST->isVolatile(), ST->isNonTemporal(), + Alignment); } else { // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. @@ -418,7 +420,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Perform the original store, only redirected to the stack slot. SDValue Store = DAG.getTruncStore(Chain, dl, - Val, StackPtr, NULL, 0, StoredVT); + Val, StackPtr, NULL, 0, StoredVT, + false, false, 0); SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); SmallVector<SDValue, 8> Stores; unsigned Offset = 0; @@ -426,11 +429,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Do all but one copies using the full register width. for (unsigned i = 1; i < NumRegs; i++) { // Load one integer register's worth from the stack slot. - SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0); + SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0, + false, false, 0); // Store it to the final location. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, ST->getSrcValue(), SVOffset + Offset, - ST->isVolatile(), + ST->isVolatile(), ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); // Increment the pointers. Offset += RegBytes; @@ -446,11 +450,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Load from the stack slot. SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, - NULL, 0, MemVT); + NULL, 0, MemVT, false, false, 0); Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, ST->getSrcValue(), SVOffset + Offset, MemVT, ST->isVolatile(), + ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); // The order of the stores doesn't matter - say it with a TokenFactor. return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], @@ -474,13 +479,14 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Store1, Store2; Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, ST->getSrcValue(), SVOffset, NewStoredVT, - ST->isVolatile(), Alignment); + ST->isVolatile(), ST->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); Alignment = MinAlign(Alignment, IncrementSize); Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, ST->getSrcValue(), SVOffset + IncrementSize, - NewStoredVT, ST->isVolatile(), Alignment); + NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), + Alignment); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); } @@ -502,7 +508,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // then bitconvert to floating point or vector. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, LD->isVolatile(), - LD->getAlignment()); + LD->isNonTemporal(), LD->getAlignment()); SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad); if (VT.isFloatingPoint() && LoadedVT != VT) Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); @@ -530,10 +536,11 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Load one integer register's worth from the original location. SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset + Offset, LD->isVolatile(), + LD->isNonTemporal(), MinAlign(LD->getAlignment(), Offset)); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, - NULL, 0)); + NULL, 0, false, false, 0)); // Increment the pointers. Offset += RegBytes; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); @@ -546,12 +553,13 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, LD->getSrcValue(), SVOffset + Offset, MemVT, LD->isVolatile(), + LD->isNonTemporal(), MinAlign(LD->getAlignment(), Offset)); // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, - NULL, 0, MemVT)); + NULL, 0, MemVT, false, false, 0)); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], @@ -559,7 +567,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Finally, perform the original load only redirected to the stack slot. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, - NULL, 0, LoadedVT); + NULL, 0, LoadedVT, false, false, 0); // Callers expect a MERGE_VALUES node. SDValue Ops[] = { Load, TF }; @@ -588,20 +596,22 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue Lo, Hi; if (TLI.isLittleEndian()) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), - SVOffset, NewLoadedVT, LD->isVolatile(), Alignment); + SVOffset, NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), - MinAlign(Alignment, IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment, IncrementSize)); } else { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), - SVOffset, NewLoadedVT, LD->isVolatile(), Alignment); + SVOffset, NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), - MinAlign(Alignment, IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment, IncrementSize)); } // aggregate the two parts @@ -643,7 +653,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, // Store the vector. SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr, - PseudoSourceValue::getFixedStack(SPFI), 0); + PseudoSourceValue::getFixedStack(SPFI), 0, + false, false, 0); // Truncate or zero extend offset to target pointer type. unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; @@ -654,10 +665,12 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); // Store the scalar value. Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, - PseudoSourceValue::getFixedStack(SPFI), 0, EltVT); + PseudoSourceValue::getFixedStack(SPFI), 0, EltVT, + false, false, 0); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, - PseudoSourceValue::getFixedStack(SPFI), 0); + PseudoSourceValue::getFixedStack(SPFI), 0, + false, false, 0); } @@ -702,6 +715,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { int SVOffset = ST->getSrcValueOffset(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); DebugLoc dl = ST->getDebugLoc(); if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && @@ -710,14 +724,14 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { bitcastToAPInt().zextOrTrunc(32), MVT::i32); return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); + SVOffset, isVolatile, isNonTemporal, Alignment); } else if (CFP->getValueType(0) == MVT::f64) { // If this target supports 64-bit registers, do a single 64-bit store. if (getTypeAction(MVT::i64) == Legal) { Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); + SVOffset, isVolatile, isNonTemporal, Alignment); } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { // Otherwise, if the target supports 32-bit registers, use 2 32-bit // stores. If the target supports neither 32- nor 64-bits, this @@ -728,11 +742,11 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); + SVOffset, isVolatile, isNonTemporal, Alignment); Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(4)); Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4, - isVolatile, MinAlign(Alignment, 4U)); + isVolatile, isNonTemporal, MinAlign(Alignment, 4U)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -1108,7 +1122,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1)); Tmp4 = LegalizeOp(Tmp1.getValue(1)); break; @@ -1125,6 +1140,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { int SVOffset = LD->getSrcValueOffset(); unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); if (SrcWidth != SrcVT.getStoreSizeInBits() && // Some targets pretend to have an i1 loading operation, and actually @@ -1150,7 +1166,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Tmp1, Tmp2, LD->getSrcValue(), SVOffset, - NVT, isVolatile, Alignment); + NVT, isVolatile, isNonTemporal, Alignment); Ch = Result.getValue(1); // The chain. @@ -1187,7 +1203,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Tmp1, Tmp2, LD->getSrcValue(), SVOffset, RoundVT, isVolatile, - Alignment); + isNonTemporal, Alignment); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1195,7 +1211,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, LD->getSrcValue(), SVOffset + IncrementSize, - ExtraVT, isVolatile, + ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the @@ -1215,7 +1231,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, LD->getSrcValue(), SVOffset, RoundVT, isVolatile, - Alignment); + isNonTemporal, Alignment); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1224,7 +1240,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Tmp1, Tmp2, LD->getSrcValue(), SVOffset + IncrementSize, - ExtraVT, isVolatile, + ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the @@ -1284,7 +1300,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) { SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); Result = DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Load); Tmp1 = LegalizeOp(Result); // Relegalize new nodes. @@ -1297,7 +1314,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), Tmp1, Tmp2, LD->getSrcValue(), LD->getSrcValueOffset(), SrcVT, - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, @@ -1325,6 +1343,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { int SVOffset = ST->getSrcValueOffset(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { @@ -1361,7 +1380,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), SVOffset, isVolatile, - Alignment); + isNonTemporal, Alignment); break; } break; @@ -1379,7 +1398,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, NVT, isVolatile, Alignment); + SVOffset, NVT, isVolatile, isNonTemporal, + Alignment); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1399,7 +1419,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Store the bottom RoundWidth bits. Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), SVOffset, RoundVT, - isVolatile, Alignment); + isVolatile, isNonTemporal, Alignment); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1409,6 +1429,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset + IncrementSize, ExtraVT, isVolatile, + isNonTemporal, MinAlign(Alignment, IncrementSize)); } else { // Big endian - avoid unaligned stores. @@ -1417,7 +1438,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), - SVOffset, RoundVT, isVolatile, Alignment); + SVOffset, RoundVT, isVolatile, isNonTemporal, + Alignment); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1425,6 +1447,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { DAG.getIntPtrConstant(IncrementSize)); Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), SVOffset + IncrementSize, ExtraVT, isVolatile, + isNonTemporal, MinAlign(Alignment, IncrementSize)); } @@ -1457,7 +1480,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { assert(isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); + SVOffset, isVolatile, isNonTemporal, + Alignment); break; } } @@ -1484,7 +1508,8 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { DebugLoc dl = Op.getDebugLoc(); // Store the value to a temporary stack slot, then LOAD the returned part. SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0, + false, false, 0); // Add the offset to the index. unsigned EltSize = @@ -1500,10 +1525,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); if (Op.getValueType().isVector()) - return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0); + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0, + false, false, 0); else return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, - NULL, 0, Vec.getValueType().getVectorElementType()); + NULL, 0, Vec.getValueType().getVectorElementType(), + false, false, 0); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { @@ -1512,7 +1539,6 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // the result as a vector. // Create the stack frame object. EVT VT = Node->getValueType(0); - EVT OpVT = Node->getOperand(0).getValueType(); EVT EltVT = VT.getVectorElementType(); DebugLoc dl = Node->getDebugLoc(); SDValue FIPtr = DAG.CreateStackTemporary(VT); @@ -1532,13 +1558,16 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); - // If EltVT smaller than OpVT, only store the bits necessary. - if (!OpVT.isVector() && EltVT.bitsLT(OpVT)) { + // If the destination vector element type is narrower than the source + // element type, only store the bits necessary. + if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) { Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, - Node->getOperand(i), Idx, SV, Offset, EltVT)); + Node->getOperand(i), Idx, SV, Offset, + EltVT, false, false, 0)); } else Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, - Node->getOperand(i), Idx, SV, Offset)); + Node->getOperand(i), Idx, SV, Offset, + false, false, 0)); } SDValue StoreChain; @@ -1549,7 +1578,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0); + return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0); } SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { @@ -1572,12 +1601,14 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType()); SDValue StorePtr = StackPtr, LoadPtr = StackPtr; SDValue Ch = - DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0); + DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0, + false, false, 0); if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian()) LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), LoadPtr, DAG.getIntPtrConstant(4)); SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(), - Ch, LoadPtr, NULL, 0, MVT::i32); + Ch, LoadPtr, NULL, 0, MVT::i32, + false, false, 0); } SignBit = DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()), @@ -1701,20 +1732,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, if (SrcSize > SlotSize) Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, - SV, 0, SlotVT, false, SrcAlign); + SV, 0, SlotVT, false, false, SrcAlign); else { assert(SrcSize == SlotSize && "Invalid store"); Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, - SV, 0, false, SrcAlign); + SV, 0, false, false, SrcAlign); } // Result is a load from the stack slot. if (SlotSize == DestSize) - return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, DestAlign); + return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false, + DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT, - false, DestAlign); + false, false, DestAlign); } SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { @@ -1729,9 +1761,11 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr, PseudoSourceValue::getFixedStack(SPFI), 0, - Node->getValueType(0).getVectorElementType()); + Node->getValueType(0).getVectorElementType(), + false, false, 0); return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, - PseudoSourceValue::getFixedStack(SPFI), 0); + PseudoSourceValue::getFixedStack(SPFI), 0, + false, false, 0); } @@ -1805,7 +1839,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, - false, Alignment); + false, false, Alignment); } if (!MoreThanTwoValues) { @@ -1865,8 +1899,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, - Node->getDebugLoc(), DAG.GetOrdering(Node)); + Callee, Args, DAG, Node->getDebugLoc()); // Legalize the call sequence, starting with the chain. This will advance // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that @@ -1943,13 +1976,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, } // store the lo of the constructed double - based on integer input SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, - Op0Mapped, Lo, NULL, 0); + Op0Mapped, Lo, NULL, 0, + false, false, 0); // initial hi portion of constructed double SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32); // store the hi of the constructed double - biased exponent - SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0); + SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0, + false, false, 0); // load the constructed double - SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0); + SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0, + false, false, 0); // FP constant to bias correct the final result SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : @@ -1972,6 +2008,31 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, return Result; } assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + + // Implementation of unsigned i64 to f64 following the algorithm in + // __floatundidf in compiler_rt. This implementation has the advantage + // of performing rounding correctly, both in the default rounding mode + // and in all alternate rounding modes. + // TODO: Generalize this for use with other types. + if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { + SDValue TwoP52 = + DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64); + SDValue TwoP84PlusTwoP52 = + DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64); + SDValue TwoP84 = + DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64); + + SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32); + SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, + DAG.getConstant(32, MVT::i64)); + SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52); + SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); + SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr); + SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr); + SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52); + return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); + } + SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()), @@ -2004,13 +2065,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, if (DestVT == MVT::f32) FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, - false, Alignment); + false, false, Alignment); else { FudgeInReg = LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, - MVT::f32, false, Alignment)); + MVT::f32, false, false, Alignment)); } return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); @@ -2271,7 +2332,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("abort", TLI.getPointerTy()), - Args, DAG, dl, DAG.GetOrdering(Node)); + Args, DAG, dl); Results.push_back(CallResult.second); break; } @@ -2350,16 +2411,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT VT = Node->getValueType(0); Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); - SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0); + SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0, + false, false, 0); // Increment the pointer, VAList, to the next vaarg Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, DAG.getConstant(TLI.getTargetData()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), TLI.getPointerTy())); // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0); + Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0, + false, false, 0); // Load the actual argument out of the pointer VAList - Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0)); + Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, + false, false, 0)); Results.push_back(Results[0].getValue(1)); break; } @@ -2369,8 +2433,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), - Node->getOperand(2), VS, 0); - Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0); + Node->getOperand(2), VS, 0, false, false, 0); + Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0, + false, false, 0); Results.push_back(Tmp1); break; } @@ -2827,7 +2892,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, - PseudoSourceValue::getJumpTable(), 0, MemVT); + PseudoSourceValue::getJumpTable(), 0, MemVT, + false, false, 0); Addr = LD; if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) { // For PIC, the sequence is: diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 4f0fce7..35a7c7c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -444,7 +444,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(), NVT, L->getChain(), L->getBasePtr(), L->getOffset(), L->getSrcValue(), L->getSrcValueOffset(), NVT, - L->isVolatile(), L->getAlignment()); + L->isVolatile(), L->isNonTemporal(), L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -456,8 +456,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getMemoryVT(), L->getChain(), L->getBasePtr(), L->getOffset(), L->getSrcValue(), L->getSrcValueOffset(), - L->getMemoryVT(), - L->isVolatile(), L->getAlignment()); + L->getMemoryVT(), L->isVolatile(), + L->isNonTemporal(), L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -755,7 +755,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), ST->getSrcValue(), ST->getSrcValueOffset(), - ST->isVolatile(), ST->getAlignment()); + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); } @@ -1073,8 +1074,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), - LD->getMemoryVT(), - LD->isVolatile(), LD->getAlignment()); + LD->getMemoryVT(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); // Remember the chain. Chain = Hi.getValue(1); @@ -1382,6 +1383,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), - ST->getMemoryVT(), - ST->isVolatile(), ST->getAlignment()); + ST->getMemoryVT(), ST->isVolatile(), + ST->isNonTemporal(), ST->getAlignment()); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 9932cf4..81f28ad 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -359,7 +359,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), N->getMemoryVT(), N->isVolatile(), - N->getAlignment()); + N->isNonTemporal(), N->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -873,6 +873,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ int SVOffset = N->getSrcValueOffset(); unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); + bool isNonTemporal = N->isNonTemporal(); DebugLoc dl = N->getDebugLoc(); SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. @@ -880,7 +881,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ // Truncate the value and store the result. return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(), SVOffset, N->getMemoryVT(), - isVolatile, Alignment); + isVolatile, isNonTemporal, Alignment); } SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { @@ -1079,8 +1080,8 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Amt = N->getOperand(1); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT ShTy = Amt.getValueType(); - unsigned ShBits = ShTy.getSizeInBits(); - unsigned NVTBits = NVT.getSizeInBits(); + unsigned ShBits = ShTy.getScalarType().getSizeInBits(); + unsigned NVTBits = NVT.getScalarType().getSizeInBits(); assert(isPowerOf2_32(NVTBits) && "Expanded integer type size not a power of two!"); DebugLoc dl = N->getDebugLoc(); @@ -1500,6 +1501,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, int SVOffset = N->getSrcValueOffset(); unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); + bool isNonTemporal = N->isNonTemporal(); DebugLoc dl = N->getDebugLoc(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -1508,7 +1510,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, EVT MemVT = N->getMemoryVT(); Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, - MemVT, isVolatile, Alignment); + MemVT, isVolatile, isNonTemporal, Alignment); // Remember the chain. Ch = Lo.getValue(1); @@ -1530,7 +1532,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } else if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, - isVolatile, Alignment); + isVolatile, isNonTemporal, Alignment); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -1542,7 +1544,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset+IncrementSize, NEVT, - isVolatile, MinAlign(Alignment, IncrementSize)); + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the // other one. @@ -1560,7 +1563,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), - isVolatile, Alignment); + isVolatile, isNonTemporal, Alignment); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -1569,7 +1572,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset+IncrementSize, EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - isVolatile, MinAlign(Alignment, IncrementSize)); + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the // other one. @@ -2212,6 +2216,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { int SVOffset = N->getSrcValueOffset(); unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); + bool isNonTemporal = N->isNonTemporal(); DebugLoc dl = N->getDebugLoc(); SDValue Lo, Hi; @@ -2220,13 +2225,14 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { if (N->getMemoryVT().bitsLE(NVT)) { GetExpandedInteger(N->getValue(), Lo, Hi); return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, - N->getMemoryVT(), isVolatile, Alignment); + N->getMemoryVT(), isVolatile, isNonTemporal, + Alignment); } else if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. GetExpandedInteger(N->getValue(), Lo, Hi); Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, - isVolatile, Alignment); + isVolatile, isNonTemporal, Alignment); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2238,7 +2244,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset+IncrementSize, NEVT, - isVolatile, MinAlign(Alignment, IncrementSize)); + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } else { // Big-endian - high bits are at low addresses. Favor aligned stores at @@ -2264,7 +2271,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Store both the high bits and maybe some of the low bits. Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), - SVOffset, HiVT, isVolatile, Alignment); + SVOffset, HiVT, isVolatile, isNonTemporal, + Alignment); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -2273,7 +2281,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset+IncrementSize, EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - isVolatile, MinAlign(Alignment, IncrementSize)); + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } } @@ -2341,7 +2350,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { // FIXME: Avoid the extend by constructing the right constant pool? SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr, NULL, 0, MVT::f32, - false, Alignment); + false, false, Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 37f36a3..f3e7ca4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -871,9 +871,10 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, // the source and destination types. SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0, + false, false, 0); // Result is a load from the stack slot. - return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0); + return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0); } /// CustomLowerNode - Replace the node's results with custom code provided @@ -1033,8 +1034,7 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl, - DAG.GetOrdering(DAG.getEntryNode().getNode())); + Callee, Args, DAG, dl); return CallInfo.first; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index a1b6ced..5e83b4b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -122,10 +122,11 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, const Value *SV = PseudoSourceValue::getFixedStack(SPFI); // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0, + false, false, 0); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0); + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; @@ -134,7 +135,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false, - MinAlign(Alignment, IncrementSize)); + false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. if (TLI.isBigEndian()) @@ -205,11 +206,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, int SVOffset = LD->getSrcValueOffset(); unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, - isVolatile, Alignment); + isVolatile, isNonTemporal, Alignment); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -217,7 +219,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset+IncrementSize, - isVolatile, MinAlign(Alignment, IncrementSize)); + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the // other one. @@ -383,6 +386,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { int SVOffset = St->getSrcValueOffset(); unsigned Alignment = St->getAlignment(); bool isVolatile = St->isVolatile(); + bool isNonTemporal = St->isNonTemporal(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -394,14 +398,15 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset, - isVolatile, Alignment); + isVolatile, isNonTemporal, Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(), SVOffset + IncrementSize, - isVolatile, MinAlign(Alignment, IncrementSize)); + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index bf95bb5..8363c3a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -172,7 +172,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { DAG.getUNDEF(N->getBasePtr().getValueType()), N->getSrcValue(), N->getSrcValueOffset(), N->getMemoryVT().getVectorElementType(), - N->isVolatile(), N->getOriginalAlignment()); + N->isVolatile(), N->isNonTemporal(), + N->getOriginalAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -366,11 +367,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), N->getMemoryVT().getVectorElementType(), - N->isVolatile(), N->getAlignment()); + N->isVolatile(), N->isNonTemporal(), + N->getAlignment()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), - N->isVolatile(), N->getOriginalAlignment()); + N->isVolatile(), N->isNonTemporal(), + N->getOriginalAlignment()); } @@ -696,17 +699,20 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, EVT VecVT = Vec.getValueType(); EVT EltVT = VecVT.getVectorElementType(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0, + false, false, 0); // Store the new element. This may be larger than the vector element type, // so use a truncating store. SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); unsigned Alignment = TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext())); - Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT); + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT, + false, false, 0); // Load the Lo part from the stack slot. - Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0); + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0, + false, false, 0); // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; @@ -715,7 +721,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false, - MinAlign(Alignment, IncrementSize)); + false, MinAlign(Alignment, IncrementSize)); } void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, @@ -743,19 +749,20 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, EVT MemoryVT = LD->getMemoryVT(); unsigned Alignment = LD->getOriginalAlignment(); bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); EVT LoMemVT, HiMemVT; GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset, - SV, SVOffset, LoMemVT, isVolatile, Alignment); + SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); SVOffset += IncrementSize; Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset, - SV, SVOffset, HiMemVT, isVolatile, Alignment); + SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment); // Build a factor node to remember that this load is independent of the // other one. @@ -1086,12 +1093,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue StackPtr = DAG.CreateStackTemporary(VecVT); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); const Value *SV = PseudoSourceValue::getFixedStack(SPFI); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0, + false, false, 0); // Load back the required element. StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, - SV, 0, EltVT); + SV, 0, EltVT, false, false, 0); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -1106,6 +1114,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { EVT MemoryVT = N->getMemoryVT(); unsigned Alignment = N->getOriginalAlignment(); bool isVol = N->isVolatile(); + bool isNT = N->isNonTemporal(); SDValue Lo, Hi; GetSplitVector(N->getOperand(1), Lo, Hi); @@ -1116,10 +1125,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { if (isTruncating) Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, - LoMemVT, isVol, Alignment); + LoMemVT, isVol, isNT, Alignment); else Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, - isVol, Alignment); + isVol, isNT, Alignment); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -1128,10 +1137,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { if (isTruncating) Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset, - HiMemVT, isVol, Alignment); + HiMemVT, isVol, isNT, Alignment); else Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset, - isVol, Alignment); + isVol, isNT, Alignment); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -1242,10 +1251,96 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. + unsigned Opcode = N->getOpcode(); + DebugLoc dl = N->getDebugLoc(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue InOp1 = GetWidenedVector(N->getOperand(0)); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2); + EVT WidenEltVT = WidenVT.getVectorElementType(); + EVT VT = WidenVT; + unsigned NumElts = VT.getVectorNumElements(); + while (!TLI.isTypeLegal(VT) && NumElts != 1) { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } + + if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) { + // Operation doesn't trap so just widen as normal. + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); + } else if (NumElts == 1) { + // No legal vector version so unroll the vector operation and then widen. + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); + } else { + // Since the operation can trap, apply operation on the original vector. + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); + + SmallVector<SDValue, 16> ConcatOps(CurNumElts); + unsigned ConcatEnd = 0; // Current ConcatOps index. + unsigned Idx = 0; // Current Idx into input vectors. + while (CurNumElts != 0) { + while (CurNumElts >= NumElts) { + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, + DAG.getIntPtrConstant(Idx)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, + DAG.getIntPtrConstant(Idx)); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); + Idx += NumElts; + CurNumElts -= NumElts; + } + EVT PrevVecVT = VT; + do { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } while (!TLI.isTypeLegal(VT) && NumElts != 1); + + if (NumElts == 1) { + // Since we are using concat vector, build a vector from the scalar ops. + SDValue VecOp = DAG.getUNDEF(PrevVecVT); + for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp1, DAG.getIntPtrConstant(Idx)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp2, DAG.getIntPtrConstant(Idx)); + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, PrevVecVT, VecOp, + DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2), + DAG.getIntPtrConstant(i)); + } + CurNumElts = 0; + ConcatOps[ConcatEnd++] = VecOp; + } + } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } + + // Rebuild vector to one with the widen type + Idx = ConcatEnd - 1; + while (Idx != 0) { + VT = ConcatOps[Idx--].getValueType(); + while (Idx != 0 && ConcatOps[Idx].getValueType() == VT) + --Idx; + if (Idx != 0) { + VT = ConcatOps[Idx].getValueType(); + ConcatOps[Idx+1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + &ConcatOps[Idx+1], ConcatEnd - Idx - 1); + ConcatEnd = Idx + 2; + } + } + + unsigned NumOps = WidenVT.getVectorNumElements()/VT.getVectorNumElements(); + if (NumOps != ConcatEnd ) { + SDValue UndefVal = DAG.getUNDEF(VT); + for (unsigned j = ConcatEnd; j < NumOps; ++j) + ConcatOps[j] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps); + } } SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { @@ -2042,6 +2137,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, int SVOffset = LD->getSrcValueOffset(); unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); const Value *SV = LD->getSrcValue(); int LdWidth = LdVT.getSizeInBits(); @@ -2052,7 +2148,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset, - isVolatile, Align); + isVolatile, isNonTemporal, Align); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction @@ -2099,7 +2195,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset+Offset, isVolatile, - MinAlign(Align, Increment)); + isNonTemporal, MinAlign(Align, Increment)); LdChain.push_back(LdOp.getValue(1)); LdOps.push_back(LdOp); @@ -2173,6 +2269,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, int SVOffset = LD->getSrcValueOffset(); unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); const Value *SV = LD->getSrcValue(); EVT EltVT = WidenVT.getVectorElementType(); @@ -2184,14 +2281,15 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, SmallVector<SDValue, 16> Ops(WidenNumElts); unsigned Increment = LdEltVT.getSizeInBits() / 8; Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset, - LdEltVT, isVolatile, Align); + LdEltVT, isVolatile, isNonTemporal, Align); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Offset)); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV, - SVOffset + Offset, LdEltVT, isVolatile, Align); + SVOffset + Offset, LdEltVT, isVolatile, + isNonTemporal, Align); LdChain.push_back(Ops[i].getValue(1)); } @@ -2215,6 +2313,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, int SVOffset = ST->getSrcValueOffset(); unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); SDValue ValOp = GetWidenedVector(ST->getValue()); DebugLoc dl = ST->getDebugLoc(); @@ -2240,6 +2339,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, DAG.getIntPtrConstant(Idx)); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, SVOffset + Offset, isVolatile, + isNonTemporal, MinAlign(Align, Offset))); StWidth -= NewVTWidth; Offset += Increment; @@ -2258,8 +2358,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, DAG.getIntPtrConstant(Idx++)); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, - SVOffset + Offset, isVolatile, - MinAlign(Align, Offset))); + SVOffset + Offset, isVolatile, + isNonTemporal, MinAlign(Align, Offset))); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, @@ -2282,6 +2382,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, int SVOffset = ST->getSrcValueOffset(); unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); SDValue ValOp = GetWidenedVector(ST->getValue()); DebugLoc dl = ST->getDebugLoc(); @@ -2304,7 +2405,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, DAG.getIntPtrConstant(0)); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV, SVOffset, StEltVT, - isVolatile, Align)); + isVolatile, isNonTemporal, Align)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), @@ -2313,7 +2414,8 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, DAG.getIntPtrConstant(0)); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV, SVOffset + Offset, StEltVT, - isVolatile, MinAlign(Align, Offset))); + isVolatile, isNonTemporal, + MinAlign(Align, Offset))); } } diff --git a/lib/CodeGen/SelectionDAG/SDDbgValue.h b/lib/CodeGen/SelectionDAG/SDDbgValue.h new file mode 100644 index 0000000..9e15fc9 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SDDbgValue.h @@ -0,0 +1,67 @@ +//===-- llvm/CodeGen/SDDbgValue.h - SD dbg_value handling--------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SDDbgValue class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SDDBGVALUE_H +#define LLVM_CODEGEN_SDDBGVALUE_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DebugLoc.h" + +namespace llvm { + +class MDNode; +class SDNode; +class Value; + +/// SDDbgValue - Holds the information from a dbg_value node through SDISel. +/// Either Const or Node is nonzero, but not both. +/// We do not use SDValue here to avoid including its header. + +class SDDbgValue { + SDNode *Node; // valid for non-constants + unsigned ResNo; // valid for non-constants + Value *Const; // valid for constants + MDNode *mdPtr; + uint64_t Offset; + DebugLoc DL; +public: + // Constructor for non-constants. + SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl) : + Node(N), ResNo(R), Const(0), mdPtr(mdP), Offset(off), DL(dl) {} + + // Constructor for constants. + SDDbgValue(MDNode *mdP, Value *C, uint64_t off, DebugLoc dl) : Node(0), + ResNo(0), Const(C), mdPtr(mdP), Offset(off), DL(dl) {} + + // Returns the MDNode pointer. + MDNode *getMDPtr() { return mdPtr; } + + // Returns the SDNode* (valid for non-constants only). + SDNode *getSDNode() { assert (!Const); return Node; } + + // Returns the ResNo (valid for non-constants only). + unsigned getResNo() { assert (!Const); return ResNo; } + + // Returns the Value* for a constant (invalid for non-constants). + Value *getConst() { assert (!Node); return Const; } + + // Returns the offset. + uint64_t getOffset() { return Offset; } + + // Returns the DebugLoc. + DebugLoc getDebugLoc() { return DL; } +}; + +} // end llvm namespace + +#endif diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index b51c61b..06e7b8c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -218,8 +218,20 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // Check to see if the scheduler cares about latencies. bool UnitLatencies = ForceUnitLatencies(); - for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), - E = DAG->allnodes_end(); NI != E; ++NI) { + // Add all nodes in depth first order. + SmallVector<SDNode*, 64> Worklist; + SmallPtrSet<SDNode*, 64> Visited; + Worklist.push_back(DAG->getRoot().getNode()); + Visited.insert(DAG->getRoot().getNode()); + + while (!Worklist.empty()) { + SDNode *NI = Worklist.pop_back_val(); + + // Add all operands to the worklist unless they've already been added. + for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i) + if (Visited.insert(NI->getOperand(i).getNode())) + Worklist.push_back(NI->getOperand(i).getNode()); + if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. continue; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 6122a2a..746d4e2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -468,18 +468,20 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { } /// encodeMemSDNodeFlags - Generic routine for computing a value for use in -/// the CSE map that carries volatility, indexing mode, and +/// the CSE map that carries volatility, temporalness, indexing mode, and /// extension/truncation information. /// static inline unsigned -encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile) { +encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile, + bool isNonTemporal) { assert((ConvType & 3) == ConvType && "ConvType may not require more than 2 bits!"); assert((AM & 7) == AM && "AM may not require more than 3 bits!"); return ConvType | (AM << 2) | - (isVolatile << 5); + (isVolatile << 5) | + (isNonTemporal << 6); } //===----------------------------------------------------------------------===// @@ -829,6 +831,7 @@ void SelectionDAG::clear() { EntryNode.UseList = 0; AllNodes.push_back(&EntryNode); Root = getEntryNode(); + delete Ordering; Ordering = new SDNodeOrdering(); } @@ -859,14 +862,14 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) { /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). /// SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) { - EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + EVT EltVT = VT.getScalarType(); SDValue NegOne = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); return getNode(ISD::XOR, DL, VT, Val, NegOne); } SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) { - EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + EVT EltVT = VT.getScalarType(); assert((EltVT.getSizeInBits() >= 64 || (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && "getConstant with a uint64_t value that doesn't fit in the type!"); @@ -880,7 +883,7 @@ SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) { SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { assert(VT.isInteger() && "Cannot create FP integer constant!"); - EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + EVT EltVT = VT.getScalarType(); assert(Val.getBitWidth() == EltVT.getSizeInBits() && "APInt size does not match type size!"); @@ -923,8 +926,7 @@ SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) { SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); - EVT EltVT = - VT.isVector() ? VT.getVectorElementType() : VT; + EVT EltVT = VT.getScalarType(); // Do the map lookup using the actual bit pattern for the floating point // value, so that we don't have problems with 0.0 comparing equal to -0.0, and @@ -958,8 +960,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ } SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { - EVT EltVT = - VT.isVector() ? VT.getVectorElementType() : VT; + EVT EltVT = VT.getScalarType(); if (EltVT==MVT::f32) return getConstantFP(APFloat((float)Val), VT, isTarget); else @@ -1344,7 +1345,7 @@ SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT, } SDValue SelectionDAG::getSrcValue(const Value *V) { - assert((!V || isa<PointerType>(V->getType())) && + assert((!V || V->getType()->isPointerTy()) && "SrcValue is not a pointer?"); FoldingSetNodeID ID; @@ -2232,6 +2233,29 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { return false; } +bool SelectionDAG::isKnownNeverZero(SDValue Op) const { + // If the value is a constant, we can obviously see if it is a zero or not. + if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) + return !C->isZero(); + + // TODO: Recognize more cases here. + + return false; +} + +bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { + // Check the obvious case. + if (A == B) return true; + + // For for negative and positive zero. + if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A)) + if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B)) + if (CA->isZero() && CB->isZero()) return true; + + // Otherwise they may not be equal. + return false; +} + bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); if (!GA) return false; @@ -3080,8 +3104,7 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { /// operand. static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, DebugLoc dl) { - unsigned NumBits = VT.isVector() ? - VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits(); + unsigned NumBits = VT.getScalarType().getSizeInBits(); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { APInt Val = APInt(NumBits, C->getZExtValue() & 255); unsigned Shift = 8; @@ -3185,7 +3208,7 @@ bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps, bool isSrcConst = isa<ConstantSDNode>(Src); EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG); bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT); - if (VT != MVT::iAny) { + if (VT != MVT::Other) { const Type *Ty = VT.getTypeForEVT(*DAG.getContext()); unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); // If source is a string constant, this will require an unaligned load. @@ -3193,14 +3216,14 @@ bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps, if (Dst.getOpcode() != ISD::FrameIndex) { // Can't change destination alignment. It requires a unaligned store. if (AllowUnalign) - VT = MVT::iAny; + VT = MVT::Other; } else { int FI = cast<FrameIndexSDNode>(Dst)->getIndex(); MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); if (MFI->isFixedObjectIndex(FI)) { // Can't change destination alignment. It requires a unaligned store. if (AllowUnalign) - VT = MVT::iAny; + VT = MVT::Other; } else { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI) < NewAlign) @@ -3211,7 +3234,7 @@ bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps, } } - if (VT == MVT::iAny) { + if (VT == MVT::Other) { if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) { VT = MVT::i64; } else { @@ -3299,7 +3322,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff); Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, false, DstAlign); + DstSV, DstSVOff + DstOff, false, false, DstAlign); } else { // The type might not be legal for the target. This should only happen // if the type is smaller than a legal type, as on PPC, so the right @@ -3310,10 +3333,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, assert(NVT.bitsGE(VT)); Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), - SrcSV, SrcSVOff + SrcOff, VT, false, Align); + SrcSV, SrcSVOff + SrcOff, VT, false, false, Align); Store = DAG.getTruncStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, VT, false, DstAlign); + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, VT, false, false, + DstAlign); } OutChains.push_back(Store); SrcOff += VTSize; @@ -3358,7 +3382,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = DAG.getLoad(VT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), - SrcSV, SrcSVOff + SrcOff, false, Align); + SrcSV, SrcSVOff + SrcOff, false, false, Align); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -3373,7 +3397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Store = DAG.getStore(Chain, dl, LoadValues[i], getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, false, DstAlign); + DstSV, DstSVOff + DstOff, false, false, DstAlign); OutChains.push_back(Store); DstOff += VTSize; } @@ -3408,7 +3432,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, SDValue Value = getMemsetValue(Src, VT, DAG, dl); SDValue Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff); + DstSV, DstSVOff + DstOff, false, false, 0); OutChains.push_back(Store); DstOff += VTSize; } @@ -3472,7 +3496,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, /*isReturnValueUsed=*/false, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY), TLI.getPointerTy()), - Args, *this, dl, GetOrdering(Chain.getNode())); + Args, *this, dl); return CallResult.second; } @@ -3521,7 +3545,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, /*isReturnValueUsed=*/false, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE), TLI.getPointerTy()), - Args, *this, dl, GetOrdering(Chain.getNode())); + Args, *this, dl); return CallResult.second; } @@ -3580,7 +3604,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, /*isReturnValueUsed=*/false, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), TLI.getPointerTy()), - Args, *this, dl, GetOrdering(Chain.getNode())); + Args, *this, dl); return CallResult.second; } @@ -3788,7 +3812,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, ISD::LoadExtType ExtType, EVT VT, SDValue Chain, SDValue Ptr, SDValue Offset, const Value *SV, int SVOffset, EVT MemVT, - bool isVolatile, unsigned Alignment) { + bool isVolatile, bool isNonTemporal, + unsigned Alignment) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(VT); @@ -3802,6 +3827,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, unsigned Flags = MachineMemOperand::MOLoad; if (isVolatile) Flags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; MachineMemOperand *MMO = MF.getMachineMemOperand(SV, Flags, SVOffset, MemVT.getStoreSize(), Alignment); @@ -3840,7 +3867,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); ID.AddInteger(MemVT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile())); + ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), + MMO->isNonTemporal())); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); @@ -3856,20 +3884,22 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, const Value *SV, int SVOffset, - bool isVolatile, unsigned Alignment) { + bool isVolatile, bool isNonTemporal, + unsigned Alignment) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef, - SV, SVOffset, VT, isVolatile, Alignment); + SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment); } SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, SDValue Chain, SDValue Ptr, const Value *SV, int SVOffset, EVT MemVT, - bool isVolatile, unsigned Alignment) { + bool isVolatile, bool isNonTemporal, + unsigned Alignment) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef, - SV, SVOffset, MemVT, isVolatile, Alignment); + SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment); } SDValue @@ -3881,12 +3911,13 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(), LD->getChain(), Base, Offset, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); } SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, const Value *SV, int SVOffset, - bool isVolatile, unsigned Alignment) { + bool isVolatile, bool isNonTemporal, + unsigned Alignment) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(Val.getValueType()); @@ -3900,6 +3931,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, unsigned Flags = MachineMemOperand::MOStore; if (isVolatile) Flags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; MachineMemOperand *MMO = MF.getMachineMemOperand(SV, Flags, SVOffset, Val.getValueType().getStoreSize(), Alignment); @@ -3916,7 +3949,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(VT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile())); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal())); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -3932,7 +3966,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, const Value *SV, int SVOffset, EVT SVT, - bool isVolatile, unsigned Alignment) { + bool isVolatile, bool isNonTemporal, + unsigned Alignment) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(SVT); @@ -3946,6 +3981,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, unsigned Flags = MachineMemOperand::MOStore; if (isVolatile) Flags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; MachineMemOperand *MMO = MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment); @@ -3976,7 +4013,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(SVT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile())); + ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal())); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -4535,91 +4573,13 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDVTList VTs, const SDValue *Ops, unsigned NumOps) { - return MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps); -} - -SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - EVT VT) { - SDVTList VTs = getVTList(VT); - return MorphNodeTo(N, Opc, VTs, 0, 0); -} - -SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - EVT VT, SDValue Op1) { - SDVTList VTs = getVTList(VT); - SDValue Ops[] = { Op1 }; - return MorphNodeTo(N, Opc, VTs, Ops, 1); -} - -SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - EVT VT, SDValue Op1, - SDValue Op2) { - SDVTList VTs = getVTList(VT); - SDValue Ops[] = { Op1, Op2 }; - return MorphNodeTo(N, Opc, VTs, Ops, 2); -} - -SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - EVT VT, SDValue Op1, - SDValue Op2, SDValue Op3) { - SDVTList VTs = getVTList(VT); - SDValue Ops[] = { Op1, Op2, Op3 }; - return MorphNodeTo(N, Opc, VTs, Ops, 3); -} - -SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - EVT VT, const SDValue *Ops, - unsigned NumOps) { - SDVTList VTs = getVTList(VT); - return MorphNodeTo(N, Opc, VTs, Ops, NumOps); -} - -SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - EVT VT1, EVT VT2, const SDValue *Ops, - unsigned NumOps) { - SDVTList VTs = getVTList(VT1, VT2); - return MorphNodeTo(N, Opc, VTs, Ops, NumOps); -} - -SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - EVT VT1, EVT VT2) { - SDVTList VTs = getVTList(VT1, VT2); - return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0); -} - -SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - EVT VT1, EVT VT2, EVT VT3, - const SDValue *Ops, unsigned NumOps) { - SDVTList VTs = getVTList(VT1, VT2, VT3); - return MorphNodeTo(N, Opc, VTs, Ops, NumOps); -} - -SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - EVT VT1, EVT VT2, - SDValue Op1) { - SDVTList VTs = getVTList(VT1, VT2); - SDValue Ops[] = { Op1 }; - return MorphNodeTo(N, Opc, VTs, Ops, 1); -} - -SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - EVT VT1, EVT VT2, - SDValue Op1, SDValue Op2) { - SDVTList VTs = getVTList(VT1, VT2); - SDValue Ops[] = { Op1, Op2 }; - return MorphNodeTo(N, Opc, VTs, Ops, 2); -} - -SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - EVT VT1, EVT VT2, - SDValue Op1, SDValue Op2, - SDValue Op3) { - SDVTList VTs = getVTList(VT1, VT2); - SDValue Ops[] = { Op1, Op2, Op3 }; - return MorphNodeTo(N, Opc, VTs, Ops, 3); + N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps); + // Reset the NodeID to -1. + N->setNodeId(-1); + return N; } -/// MorphNodeTo - These *mutate* the specified node to have the specified +/// MorphNodeTo - This *mutates* the specified node to have the specified /// return type, opcode, and operands. /// /// Note that MorphNodeTo returns the resultant node. If there is already a @@ -4695,12 +4655,14 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, // Delete any nodes that are still dead after adding the uses for the // new operands. - SmallVector<SDNode *, 16> DeadNodes; - for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(), - E = DeadNodeSet.end(); I != E; ++I) - if ((*I)->use_empty()) - DeadNodes.push_back(*I); - RemoveDeadNodes(DeadNodes); + if (!DeadNodeSet.empty()) { + SmallVector<SDNode *, 16> DeadNodes; + for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(), + E = DeadNodeSet.end(); I != E; ++I) + if ((*I)->use_empty()) + DeadNodes.push_back(*I); + RemoveDeadNodes(DeadNodes); + } if (IP) CSEMap.InsertNode(N, IP); // Memoize the new node. @@ -4907,6 +4869,43 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, return NULL; } +namespace { + +/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node +/// pointed to by a use iterator is deleted, increment the use iterator +/// so that it doesn't dangle. +/// +/// This class also manages a "downlink" DAGUpdateListener, to forward +/// messages to ReplaceAllUsesWith's callers. +/// +class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { + SelectionDAG::DAGUpdateListener *DownLink; + SDNode::use_iterator &UI; + SDNode::use_iterator &UE; + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + // Increment the iterator as needed. + while (UI != UE && N == *UI) + ++UI; + + // Then forward the message. + if (DownLink) DownLink->NodeDeleted(N, E); + } + + virtual void NodeUpdated(SDNode *N) { + // Just forward the message. + if (DownLink) DownLink->NodeUpdated(N); + } + +public: + RAUWUpdateListener(SelectionDAG::DAGUpdateListener *dl, + SDNode::use_iterator &ui, + SDNode::use_iterator &ue) + : DownLink(dl), UI(ui), UE(ue) {} +}; + +} + /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. /// This can cause recursive merging of nodes in the DAG. /// @@ -4927,6 +4926,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // is replaced by To, we don't want to replace of all its users with To // too. See PR3018 for more info. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(UpdateListener, UI, UE); while (UI != UE) { SDNode *User = *UI; @@ -4945,7 +4945,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, UpdateListener); + AddModifiedNodeToCSEMaps(User, &Listener); } } @@ -4971,6 +4971,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(UpdateListener, UI, UE); while (UI != UE) { SDNode *User = *UI; @@ -4989,7 +4990,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, UpdateListener); + AddModifiedNodeToCSEMaps(User, &Listener); } } @@ -5007,6 +5008,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(UpdateListener, UI, UE); while (UI != UE) { SDNode *User = *UI; @@ -5026,7 +5028,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, UpdateListener); + AddModifiedNodeToCSEMaps(User, &Listener); } } @@ -5048,6 +5050,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From.getNode()->use_begin(), UE = From.getNode()->use_end(); + RAUWUpdateListener Listener(UpdateListener, UI, UE); while (UI != UE) { SDNode *User = *UI; bool UserRemovedFromCSEMaps = false; @@ -5083,7 +5086,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, UpdateListener); + AddModifiedNodeToCSEMaps(User, &Listener); } } @@ -5280,8 +5283,11 @@ GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA, MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, MachineMemOperand *mmo) : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) { - SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile()); + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); + assert(isNonTemporal() == MMO->isNonTemporal() && + "Non-temporal encoding error!"); assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); } @@ -5290,7 +5296,8 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, MachineMemOperand *mmo) : SDNode(Opc, dl, VTs, Ops, NumOps), MemoryVT(memvt), MMO(mmo) { - SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile()); + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); } @@ -5459,15 +5466,15 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) if (getMachineOpcode() < TII->getNumOpcodes()) return TII->get(getMachineOpcode()).getName(); - return "<<Unknown Machine Node>>"; + return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; } if (G) { const TargetLowering &TLI = G->getTargetLoweringInfo(); const char *Name = TLI.getTargetNodeName(getOpcode()); if (Name) return Name; - return "<<Unknown Target Node>>"; + return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>"; } - return "<<Unknown Node>>"; + return "<<Unknown Node #" + utostr(getOpcode()) + ">>"; #ifndef NDEBUG case ISD::DELETED_NODE: @@ -5904,6 +5911,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (G) if (unsigned Order = G->GetOrdering(this)) OS << " [ORD=" << Order << ']'; + + if (getNodeId() != -1) + OS << " [ID=" << getNodeId() << ']'; } void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { @@ -6292,31 +6302,37 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { return true; } +#ifdef XDEBUG static void checkForCyclesHelper(const SDNode *N, - std::set<const SDNode *> &visited) { - if (visited.find(N) != visited.end()) { + SmallPtrSet<const SDNode*, 32> &Visited, + SmallPtrSet<const SDNode*, 32> &Checked) { + // If this node has already been checked, don't check it again. + if (Checked.count(N)) + return; + + // If a node has already been visited on this depth-first walk, reject it as + // a cycle. + if (!Visited.insert(N)) { dbgs() << "Offending node:\n"; N->dumprFull(); - assert(0 && "Detected cycle in SelectionDAG"); + errs() << "Detected cycle in SelectionDAG\n"; + abort(); } - - std::set<const SDNode*>::iterator i; - bool inserted; - - tie(i, inserted) = visited.insert(N); - assert(inserted && "Missed cycle"); - - for(unsigned i = 0; i < N->getNumOperands(); ++i) { - checkForCyclesHelper(N->getOperand(i).getNode(), visited); - } - visited.erase(i); + + for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked); + + Checked.insert(N); + Visited.erase(N); } +#endif void llvm::checkForCycles(const llvm::SDNode *N) { #ifdef XDEBUG assert(N && "Checking nonexistant SDNode"); - std::set<const SDNode *> visited; - checkForCyclesHelper(N, visited); + SmallPtrSet<const SDNode*, 32> visited; + SmallPtrSet<const SDNode*, 32> checked; + checkForCyclesHelper(N, visited, checked); #endif } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index de17f90..05be9a1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -155,7 +155,7 @@ namespace { /// this value and returns the result as a ValueVTs value. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. - SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, unsigned Order, + SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, SDValue &Chain, SDValue *Flag) const; /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the @@ -163,14 +163,14 @@ namespace { /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - unsigned Order, SDValue &Chain, SDValue *Flag) const; + SDValue &Chain, SDValue *Flag) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index /// (if applicable), and includes the number of values added into it. void AddInlineAsmOperands(unsigned Code, bool HasMatching, unsigned MatchingIdx, - SelectionDAG &DAG, unsigned Order, + SelectionDAG &DAG, std::vector<SDValue> &Ops) const; }; } @@ -180,7 +180,7 @@ namespace { /// larger then ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). -static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, +static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, const SDValue *Parts, unsigned NumParts, EVT PartVT, EVT ValueVT, ISD::NodeType AssertOp = ISD::DELETED_NODE) { @@ -205,9 +205,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); if (RoundParts > 2) { - Lo = getCopyFromParts(DAG, dl, Order, Parts, RoundParts / 2, + Lo = getCopyFromParts(DAG, dl, Parts, RoundParts / 2, PartVT, HalfVT); - Hi = getCopyFromParts(DAG, dl, Order, Parts + RoundParts / 2, + Hi = getCopyFromParts(DAG, dl, Parts + RoundParts / 2, RoundParts / 2, PartVT, HalfVT); } else { Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]); @@ -223,7 +223,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, // Assemble the trailing non-power-of-2 part. unsigned OddParts = NumParts - RoundParts; EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); - Hi = getCopyFromParts(DAG, dl, Order, + Hi = getCopyFromParts(DAG, dl, Parts + RoundParts, OddParts, PartVT, OddVT); // Combine the round and odd parts. @@ -259,7 +259,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, // If the register was not expanded, truncate or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) - Ops[i] = getCopyFromParts(DAG, dl, Order, &Parts[i], 1, + Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1, PartVT, IntermediateVT); } else if (NumParts > 0) { // If the intermediate type was expanded, build the intermediate @@ -268,7 +268,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) - Ops[i] = getCopyFromParts(DAG, dl, Order, &Parts[i * Factor], Factor, + Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor, PartVT, IntermediateVT); } @@ -292,7 +292,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); - Val = getCopyFromParts(DAG, dl, Order, Parts, NumParts, PartVT, IntVT); + Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT); } } @@ -349,7 +349,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. -static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, +static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, SDValue *Parts, unsigned NumParts, EVT PartVT, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { @@ -417,7 +417,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val, DAG.getConstant(RoundBits, TLI.getPointerTy())); - getCopyToParts(DAG, dl, Order, OddVal, Parts + RoundParts, + getCopyToParts(DAG, dl, OddVal, Parts + RoundParts, OddParts, PartVT); if (TLI.isBigEndian()) @@ -514,7 +514,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, // If the register was not expanded, promote or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) - getCopyToParts(DAG, dl, Order, Ops[i], &Parts[i], 1, PartVT); + getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT); } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. @@ -522,7 +522,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) - getCopyToParts(DAG, dl, Order, Ops[i], &Parts[i*Factor], Factor, PartVT); + getCopyToParts(DAG, dl, Ops[i], &Parts[i*Factor], Factor, PartVT); } } @@ -680,7 +680,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { getCurDebugLoc()); } - if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) { + if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && "Unknown struct or array constant!"); @@ -747,8 +747,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), - SDNodeOrder, Chain, NULL); + return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL); } /// Get the EVTs and ArgFlags collections that represent the legalized return @@ -844,19 +843,17 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) { Chains[i] = DAG.getStore(Chain, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), - Add, NULL, Offsets[i], false, 0); + Add, NULL, Offsets[i], false, false, 0); } Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, &Chains[0], NumValues); - } else { - for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { - SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs); - unsigned NumValues = ValueVTs.size(); - if (NumValues == 0) continue; - - SDValue RetOp = getValue(I.getOperand(i)); + } else if (I.getNumOperands() != 0) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues) { + SDValue RetOp = getValue(I.getOperand(0)); for (unsigned j = 0, f = NumValues; j != f; ++j) { EVT VT = ValueVTs[j]; @@ -881,7 +878,7 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) { unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); SmallVector<SDValue, 4> Parts(NumParts); - getCopyToParts(DAG, getCurDebugLoc(), SDNodeOrder, + getCopyToParts(DAG, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), &Parts[0], NumParts, PartVT, ExtendKind); @@ -1973,7 +1970,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, if (Cases.size() >= 2) // Must recompute end() each iteration because it may be // invalidated by erase if we hold on to it - for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) { + for (CaseItr TmpBegin = Cases.begin(), I = TmpBegin, J = ++TmpBegin; J != Cases.end(); ) { const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); MachineBasicBlock* nextBB = J->BB; @@ -2062,9 +2059,15 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) { } void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) { - // Update machine-CFG edges. + // Update machine-CFG edges with unique successors. + SmallVector<BasicBlock*, 32> succs; + succs.reserve(I.getNumSuccessors()); for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) - CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]); + succs.push_back(I.getSuccessor(i)); + array_pod_sort(succs.begin(), succs.end()); + succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); + for (unsigned i = 0, e = succs.size(); i != e; ++i) + CurMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]); DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -2074,7 +2077,7 @@ void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) { void SelectionDAGBuilder::visitFSub(User &I) { // -0.0 - X --> fneg const Type *Ty = I.getType(); - if (isa<VectorType>(Ty)) { + if (Ty->isVectorTy()) { if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) { const VectorType *DestTy = cast<VectorType>(I.getType()); const Type *ElTy = DestTy->getElementType(); @@ -2111,7 +2114,7 @@ void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) { void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - if (!isa<VectorType>(I.getType()) && + if (!I.getType()->isVectorTy() && Op2.getValueType() != TLI.getShiftAmountTy()) { // If the operand is smaller than the shift count type, promote it. EVT PTy = TLI.getPointerTy(); @@ -2699,7 +2702,9 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) { SDValue Ptr = getValue(SV); const Type *Ty = I.getType(); + bool isVolatile = I.isVolatile(); + bool isNonTemporal = I.getMetadata("nontemporal") != 0; unsigned Alignment = I.getAlignment(); SmallVector<EVT, 4> ValueVTs; @@ -2731,7 +2736,8 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) { PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, - A, SV, Offsets[i], isVolatile, Alignment); + A, SV, Offsets[i], isVolatile, + isNonTemporal, Alignment); Values[i] = L; Chains[i] = L.getValue(1); @@ -2772,6 +2778,7 @@ void SelectionDAGBuilder::visitStore(StoreInst &I) { SmallVector<SDValue, 4> Chains(NumValues); EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); + bool isNonTemporal = I.getMetadata("nontemporal") != 0; unsigned Alignment = I.getAlignment(); for (unsigned i = 0; i != NumValues; ++i) { @@ -2779,7 +2786,8 @@ void SelectionDAGBuilder::visitStore(StoreInst &I) { DAG.getConstant(Offsets[i], PtrVT)); Chains[i] = DAG.getStore(Root, getCurDebugLoc(), SDValue(Src.getNode(), Src.getResNo() + i), - Add, PtrV, Offsets[i], isVolatile, Alignment); + Add, PtrV, Offsets[i], isVolatile, + isNonTemporal, Alignment); } DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), @@ -2879,7 +2887,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I, /// /// where Op is the hexidecimal representation of floating point value. static SDValue -GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl, unsigned Order) { +GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x007fffff, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, @@ -2894,7 +2902,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl, unsigned Order) { /// where Op is the hexidecimal representation of floating point value. static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, - DebugLoc dl, unsigned Order) { + DebugLoc dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x7f800000, MVT::i32)); SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, @@ -3078,13 +3086,13 @@ SelectionDAGBuilder::visitLog(CallInst &I) { SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Scale the exponent by log(2) [0.69314718f]. - SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder); + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, getF32Constant(DAG, 0x3f317218)); // Get the significand and build it into a floating-point number with // exponent of 1. - SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder); + SDValue X = GetSignificand(DAG, Op1, dl); if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: @@ -3188,11 +3196,11 @@ SelectionDAGBuilder::visitLog2(CallInst &I) { SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Get the exponent. - SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder); + SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); // Get the significand and build it into a floating-point number with // exponent of 1. - SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder); + SDValue X = GetSignificand(DAG, Op1, dl); // Different possible minimax approximations of significand in // floating-point for various degrees of accuracy over [1,2]. @@ -3297,13 +3305,13 @@ SelectionDAGBuilder::visitLog10(CallInst &I) { SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Scale the exponent by log10(2) [0.30102999f]. - SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder); + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, getF32Constant(DAG, 0x3e9a209a)); // Get the significand and build it into a floating-point number with // exponent of 1. - SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder); + SDValue X = GetSignificand(DAG, Op1, dl); if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: @@ -4058,7 +4066,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { // Store the stack protector onto the stack. Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN, PseudoSourceValue::getFixedStack(FI), - 0, true); + 0, true, false, 0); setValue(&I, Res); DAG.setRoot(Res); return 0; @@ -4276,8 +4284,8 @@ isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr, // Check for a truly no-op bitcast. if (isa<BitCastInst>(U) && (U->getOperand(0)->getType() == U->getType() || - (isa<PointerType>(U->getOperand(0)->getType()) && - isa<PointerType>(U->getType())))) + (U->getOperand(0)->getType()->isPointerTy() && + U->getType()->isPointerTy()))) continue; // Otherwise it's not a true no-op. return false; @@ -4385,7 +4393,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, CS.getCallingConv(), isTailCall, !CS.getInstruction()->use_empty(), - Callee, Args, DAG, getCurDebugLoc(), SDNodeOrder); + Callee, Args, DAG, getCurDebugLoc()); assert((isTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && @@ -4410,7 +4418,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second, - Add, NULL, Offsets[i], false, 1); + Add, NULL, Offsets[i], false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); } @@ -4433,7 +4441,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT); SDValue ReturnValue = - getCopyFromParts(DAG, getCurDebugLoc(), SDNodeOrder, &Values[CurReg], NumRegs, + getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs, RegisterVT, VT, AssertOp); ReturnValues.push_back(ReturnValue); CurReg += NumRegs; @@ -4512,7 +4520,8 @@ static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy, SDValue Ptr = Builder.getValue(PtrVal); SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/, - false /*volatile*/, 1 /* align=1 */); + false /*volatile*/, + false /*nontemporal*/, 1 /* align=1 */); if (!ConstantMemory) Builder.PendingLoads.push_back(LoadVal.getValue(1)); @@ -4529,9 +4538,9 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) { return false; Value *LHS = I.getOperand(1), *RHS = I.getOperand(2); - if (!isa<PointerType>(LHS->getType()) || !isa<PointerType>(RHS->getType()) || - !isa<IntegerType>(I.getOperand(3)->getType()) || - !isa<IntegerType>(I.getType())) + if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || + !I.getOperand(3)->getType()->isIntegerTy() || + !I.getType()->isIntegerTy()) return false; ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3)); @@ -4625,7 +4634,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) { StringRef Name = F->getName(); if (Name == "copysign" || Name == "copysignf") { if (I.getNumOperands() == 3 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPoint() && + I.getOperand(1)->getType()->isFloatingPointTy() && I.getType() == I.getOperand(1)->getType() && I.getType() == I.getOperand(2)->getType()) { SDValue LHS = getValue(I.getOperand(1)); @@ -4636,7 +4645,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) { } } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPoint() && + I.getOperand(1)->getType()->isFloatingPointTy() && I.getType() == I.getOperand(1)->getType()) { SDValue Tmp = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), @@ -4645,7 +4654,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) { } } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPoint() && + I.getOperand(1)->getType()->isFloatingPointTy() && I.getType() == I.getOperand(1)->getType() && I.onlyReadsMemory()) { SDValue Tmp = getValue(I.getOperand(1)); @@ -4655,7 +4664,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) { } } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPoint() && + I.getOperand(1)->getType()->isFloatingPointTy() && I.getType() == I.getOperand(1)->getType() && I.onlyReadsMemory()) { SDValue Tmp = getValue(I.getOperand(1)); @@ -4665,7 +4674,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) { } } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPoint() && + I.getOperand(1)->getType()->isFloatingPointTy() && I.getType() == I.getOperand(1)->getType() && I.onlyReadsMemory()) { SDValue Tmp = getValue(I.getOperand(1)); @@ -4699,8 +4708,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) { /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, - unsigned Order, SDValue &Chain, - SDValue *Flag) const { + SDValue &Chain, SDValue *Flag) const { // Assemble the legal parts into the final values. SmallVector<SDValue, 4> Values(ValueVTs.size()); SmallVector<SDValue, 8> Parts; @@ -4765,7 +4773,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, Parts[i] = P; } - Values[Value] = getCopyFromParts(DAG, dl, Order, Parts.begin(), + Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs, RegisterVT, ValueVT); Part += NumRegs; Parts.clear(); @@ -4781,8 +4789,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - unsigned Order, SDValue &Chain, - SDValue *Flag) const { + SDValue &Chain, SDValue *Flag) const { // Get the list of the values's legal parts. unsigned NumRegs = Regs.size(); SmallVector<SDValue, 8> Parts(NumRegs); @@ -4791,7 +4798,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT); EVT RegisterVT = RegVTs[Value]; - getCopyToParts(DAG, dl, Order, + getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], NumParts, RegisterVT); Part += NumParts; @@ -4832,7 +4839,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, /// values added into it. void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,unsigned MatchingIdx, - SelectionDAG &DAG, unsigned Order, + SelectionDAG &DAG, std::vector<SDValue> &Ops) const { assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!"); unsigned Flag = Code | (Regs.size() << 3); @@ -5330,7 +5337,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); Chain = DAG.getStore(Chain, getCurDebugLoc(), - OpInfo.CallOperand, StackSlot, NULL, 0); + OpInfo.CallOperand, StackSlot, NULL, 0, + false, false, 0); OpInfo.CallOperand = StackSlot; } @@ -5421,7 +5429,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { 2 /* REGDEF */ , false, 0, - DAG, SDNodeOrder, + DAG, AsmNodeOperands); break; } @@ -5469,10 +5477,10 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), - SDNodeOrder, Chain, &Flag); + Chain, &Flag); MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, true, OpInfo.getMatchedOperand(), - DAG, SDNodeOrder, AsmNodeOperands); + DAG, AsmNodeOperands); break; } else { assert(((OpFlag & 7) == 4) && "Unknown matching constraint!"); @@ -5533,11 +5541,10 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { } OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), - SDNodeOrder, Chain, &Flag); + Chain, &Flag); OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0, - DAG, SDNodeOrder, - AsmNodeOperands); + DAG, AsmNodeOperands); break; } case InlineAsm::isClobber: { @@ -5545,7 +5552,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */, - false, 0, DAG, SDNodeOrder, + false, 0, DAG, AsmNodeOperands); break; } @@ -5565,7 +5572,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(), - SDNodeOrder, Chain, &Flag); + Chain, &Flag); // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { @@ -5605,7 +5612,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; Value *Ptr = IndirectStoresToEmit[i].second; SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(), - SDNodeOrder, Chain, &Flag); + Chain, &Flag); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } @@ -5616,7 +5623,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { SDValue Val = DAG.getStore(Chain, getCurDebugLoc(), StoresToEmit[i].first, getValue(StoresToEmit[i].second), - StoresToEmit[i].second, 0); + StoresToEmit[i].second, 0, + false, false, 0); OutChains.push_back(Val); } @@ -5669,8 +5677,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, CallingConv::ID CallConv, bool isTailCall, bool isReturnValueUsed, SDValue Callee, - ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl, - unsigned Order) { + ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) { // Handle all of the outgoing arguments. SmallVector<ISD::OutputArg, 32> Outs; for (unsigned i = 0, e = Args.size(); i != e; ++i) { @@ -5721,7 +5728,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, else if (Args[i].isZExt) ExtendKind = ISD::ZERO_EXTEND; - getCopyToParts(DAG, dl, Order, Op, &Parts[0], NumParts, + getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { @@ -5800,7 +5807,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, EVT RegisterVT = getRegisterType(RetTy->getContext(), VT); unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); - ReturnValues.push_back(getCopyFromParts(DAG, dl, Order, &InVals[CurReg], + ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg], NumRegs, RegisterVT, VT, AssertOp)); CurReg += NumRegs; @@ -5840,7 +5847,7 @@ void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) { RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), SDNodeOrder, Chain, 0); + RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0); PendingExports.push_back(Chain); } @@ -5966,7 +5973,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { EVT VT = ValueVTs[0]; EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; - SDValue ArgValue = getCopyFromParts(DAG, dl, 0, &InVals[0], 1, + SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); @@ -6000,7 +6007,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { else if (F.paramHasAttr(Idx, Attribute::ZExt)) AssertOp = ISD::AssertZext; - ArgValues.push_back(getCopyFromParts(DAG, dl, 0, &InVals[i], + ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, PartVT, VT, AssertOp)); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index da2e6e4..05f9f1f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -56,9 +56,12 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" #include <algorithm> using namespace llvm; +STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); + static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " @@ -723,9 +726,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // code to the MachineBasicBlock. if (TimePassesIsEnabled) { NamedRegionTimer T("Instruction Selection", GroupName); - InstructionSelect(); + DoInstructionSelection(); } else { - InstructionSelect(); + DoInstructionSelection(); } DEBUG(dbgs() << "Selected selection DAG:\n"); @@ -765,6 +768,66 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(BB->dump()); } +void SelectionDAGISel::DoInstructionSelection() { + DEBUG(errs() << "===== Instruction selection begins:\n"); + + PreprocessISelDAG(); + + // Select target instructions for the DAG. + { + // Number all nodes with a topological order and set DAGSize. + DAGSize = CurDAG->AssignTopologicalOrder(); + + // Create a dummy node (which is not added to allnodes), that adds + // a reference to the root node, preventing it from being deleted, + // and tracking any changes of the root. + HandleSDNode Dummy(CurDAG->getRoot()); + ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode()); + ++ISelPosition; + + // The AllNodes list is now topological-sorted. Visit the + // nodes by starting at the end of the list (the root of the + // graph) and preceding back toward the beginning (the entry + // node). + while (ISelPosition != CurDAG->allnodes_begin()) { + SDNode *Node = --ISelPosition; + // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes, + // but there are currently some corner cases that it misses. Also, this + // makes it theoretically possible to disable the DAGCombiner. + if (Node->use_empty()) + continue; + + SDNode *ResNode = Select(Node); + + // FIXME: This is pretty gross. 'Select' should be changed to not return + // anything at all and this code should be nuked with a tactical strike. + + // If node should not be replaced, continue with the next one. + if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE) + continue; + // Replace node. + if (ResNode) + ReplaceUses(Node, ResNode); + + // If after the replacement this node is not used any more, + // remove this dead node. + if (Node->use_empty()) { // Don't delete EntryToken, etc. + ISelUpdater ISU(ISelPosition); + CurDAG->RemoveDeadNode(Node, &ISU); + } + } + + CurDAG->setRoot(Dummy.getValue()); + } + DEBUG(errs() << "===== Instruction selection ends:\n"); + + PostprocessISelDAG(); + + // FIXME: This shouldn't be needed, remove it. + CurDAG->RemoveDeadNodes(); +} + + void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, MachineFunction &MF, MachineModuleInfo *MMI, @@ -870,6 +933,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // feed PHI nodes in successor blocks. if (isa<TerminatorInst>(BI)) if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) { + ++NumFastIselFailures; ResetDebugLoc(SDB, FastIS); if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel miss: "; @@ -894,6 +958,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa<CallInst>(BI)) { + ++NumFastIselFailures; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed call: "; BI->dump(); @@ -923,6 +988,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // Otherwise, give up on FastISel for the rest of the block. // For now, be a little lenient about non-branch terminators. if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) { + ++NumFastIselFailures; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel miss: "; BI->dump(); @@ -972,6 +1038,8 @@ SelectionDAGISel::FinishBasicBlock() { MachineInstr *PHI = SDB->PHINodesToUpdate[i].first; assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); + if (!BB->isSuccessor(PHI->getParent())) + continue; PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second, false)); PHI->addOperand(MachineOperand::CreateMBB(BB)); @@ -1316,13 +1384,29 @@ static SDNode *findFlagUse(SDNode *N) { /// This function recursively traverses up the operand chain, ignoring /// certain nodes. static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, - SDNode *Root, - SmallPtrSet<SDNode*, 16> &Visited) { - if (Use->getNodeId() < Def->getNodeId() || - !Visited.insert(Use)) + SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited, + bool IgnoreChains) { + // The NodeID's are given uniques ID's where a node ID is guaranteed to be + // greater than all of its (recursive) operands. If we scan to a point where + // 'use' is smaller than the node we're scanning for, then we know we will + // never find it. + // + // The Use may be -1 (unassigned) if it is a newly allocated node. This can + // happen because we scan down to newly selected nodes in the case of flag + // uses. + if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)) + return false; + + // Don't revisit nodes if we already scanned it and didn't fail, we know we + // won't fail if we scan it again. + if (!Visited.insert(Use)) return false; for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { + // Ignore chain uses, they are validated by HandleMergeInputChains. + if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains) + continue; + SDNode *N = Use->getOperand(i).getNode(); if (N == Def) { if (Use == ImmedUse || Use == Root) @@ -1332,32 +1416,24 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, } // Traverse up the operand chain. - if (findNonImmUse(N, Def, ImmedUse, Root, Visited)) + if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains)) return true; } return false; } -/// isNonImmUse - Start searching from Root up the DAG to check is Def can -/// be reached. Return true if that's the case. However, ignore direct uses -/// by ImmedUse (which would be U in the example illustrated in -/// IsLegalAndProfitableToFold) and by Root (which can happen in the store -/// case). -/// FIXME: to be really generic, we should allow direct use by any node -/// that is being folded. But realisticly since we only fold loads which -/// have one non-chain use, we only need to watch out for load/op/store -/// and load/op/cmp case where the root (store / cmp) may reach the load via -/// its chain operand. -static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) { - SmallPtrSet<SDNode*, 16> Visited; - return findNonImmUse(Root, Def, ImmedUse, Root, Visited); +/// IsProfitableToFold - Returns true if it's profitable to fold the specific +/// operand node N of U during instruction selection that starts at Root. +bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U, + SDNode *Root) const { + if (OptLevel == CodeGenOpt::None) return false; + return N.hasOneUse(); } -/// IsLegalAndProfitableToFold - Returns true if the specific operand node N of -/// U can be folded during instruction selection that starts at Root and -/// folding N is profitable. -bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, - SDNode *Root) const { +/// IsLegalToFold - Returns true if the specific operand node N of +/// U can be folded during instruction selection that starts at Root. +bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, + bool IgnoreChains) const { if (OptLevel == CodeGenOpt::None) return false; // If Root use can somehow reach N through a path that that doesn't contain @@ -1402,6 +1478,8 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, // Fold. But since Fold and FU are flagged together, this will create // a cycle in the scheduling graph. + // If the node has flags, walk down the graph to the "lowest" node in the + // flagged set. EVT VT = Root->getValueType(Root->getNumValues()-1); while (VT == MVT::Flag) { SDNode *FU = findFlagUse(Root); @@ -1409,9 +1487,17 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, break; Root = FU; VT = Root->getValueType(Root->getNumValues()-1); + + // If our query node has a flag result with a use, we've walked up it. If + // the user (which has already been selected) has a chain or indirectly uses + // the chain, our WalkChainUsers predicate will not consider it. Because of + // this, we cannot ignore chains in this predicate. + IgnoreChains = false; } + - return !isNonImmUse(Root, N, U); + SmallPtrSet<SDNode*, 16> Visited; + return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); } SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { @@ -1423,6 +1509,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { VTs.push_back(MVT::Flag); SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), VTs, &Ops[0], Ops.size()); + New->setNodeId(-1); return New.getNode(); } @@ -1438,25 +1525,1219 @@ SDNode *SelectionDAGISel::Select_EH_LABEL(SDNode *N) { MVT::Other, Tmp, Chain); } +/// GetVBR - decode a vbr encoding whose top bit is set. +ALWAYS_INLINE static uint64_t +GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { + assert(Val >= 128 && "Not a VBR"); + Val &= 127; // Remove first vbr bit. + + unsigned Shift = 7; + uint64_t NextBits; + do { + NextBits = MatcherTable[Idx++]; + Val |= (NextBits&127) << Shift; + Shift += 7; + } while (NextBits & 128); + + return Val; +} + + +/// UpdateChainsAndFlags - When a match is complete, this method updates uses of +/// interior flag and chain results to use the new flag and chain results. +void SelectionDAGISel:: +UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain, + const SmallVectorImpl<SDNode*> &ChainNodesMatched, + SDValue InputFlag, + const SmallVectorImpl<SDNode*> &FlagResultNodesMatched, + bool isMorphNodeTo) { + SmallVector<SDNode*, 4> NowDeadNodes; + + ISelUpdater ISU(ISelPosition); + + // Now that all the normal results are replaced, we replace the chain and + // flag results if present. + if (!ChainNodesMatched.empty()) { + assert(InputChain.getNode() != 0 && + "Matched input chains but didn't produce a chain"); + // Loop over all of the nodes we matched that produced a chain result. + // Replace all the chain results with the final chain we ended up with. + for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { + SDNode *ChainNode = ChainNodesMatched[i]; + + // If this node was already deleted, don't look at it. + if (ChainNode->getOpcode() == ISD::DELETED_NODE) + continue; + + // Don't replace the results of the root node if we're doing a + // MorphNodeTo. + if (ChainNode == NodeToMatch && isMorphNodeTo) + continue; + + SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1); + if (ChainVal.getValueType() == MVT::Flag) + ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2); + assert(ChainVal.getValueType() == MVT::Other && "Not a chain?"); + CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU); + + // If the node became dead, delete it. + if (ChainNode->use_empty()) + NowDeadNodes.push_back(ChainNode); + } + } + + // If the result produces a flag, update any flag results in the matched + // pattern with the flag result. + if (InputFlag.getNode() != 0) { + // Handle any interior nodes explicitly marked. + for (unsigned i = 0, e = FlagResultNodesMatched.size(); i != e; ++i) { + SDNode *FRN = FlagResultNodesMatched[i]; + + // If this node was already deleted, don't look at it. + if (FRN->getOpcode() == ISD::DELETED_NODE) + continue; + + assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Flag && + "Doesn't have a flag result"); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1), + InputFlag, &ISU); + + // If the node became dead, delete it. + if (FRN->use_empty()) + NowDeadNodes.push_back(FRN); + } + } + + if (!NowDeadNodes.empty()) + CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU); + + DEBUG(errs() << "ISEL: Match complete!\n"); +} + +enum ChainResult { + CR_Simple, + CR_InducesCycle, + CR_LeadsToInteriorNode +}; + +/// WalkChainUsers - Walk down the users of the specified chained node that is +/// part of the pattern we're matching, looking at all of the users we find. +/// This determines whether something is an interior node, whether we have a +/// non-pattern node in between two pattern nodes (which prevent folding because +/// it would induce a cycle) and whether we have a TokenFactor node sandwiched +/// between pattern nodes (in which case the TF becomes part of the pattern). +/// +/// The walk we do here is guaranteed to be small because we quickly get down to +/// already selected nodes "below" us. +static ChainResult +WalkChainUsers(SDNode *ChainedNode, + SmallVectorImpl<SDNode*> &ChainedNodesInPattern, + SmallVectorImpl<SDNode*> &InteriorChainedNodes) { + ChainResult Result = CR_Simple; + + for (SDNode::use_iterator UI = ChainedNode->use_begin(), + E = ChainedNode->use_end(); UI != E; ++UI) { + // Make sure the use is of the chain, not some other value we produce. + if (UI.getUse().getValueType() != MVT::Other) continue; + + SDNode *User = *UI; + + // If we see an already-selected machine node, then we've gone beyond the + // pattern that we're selecting down into the already selected chunk of the + // DAG. + if (User->isMachineOpcode() || + User->getOpcode() == ISD::HANDLENODE) // Root of the graph. + continue; + + if (User->getOpcode() == ISD::CopyToReg || + User->getOpcode() == ISD::CopyFromReg || + User->getOpcode() == ISD::INLINEASM) { + // If their node ID got reset to -1 then they've already been selected. + // Treat them like a MachineOpcode. + if (User->getNodeId() == -1) + continue; + } + + // If we have a TokenFactor, we handle it specially. + if (User->getOpcode() != ISD::TokenFactor) { + // If the node isn't a token factor and isn't part of our pattern, then it + // must be a random chained node in between two nodes we're selecting. + // This happens when we have something like: + // x = load ptr + // call + // y = x+4 + // store y -> ptr + // Because we structurally match the load/store as a read/modify/write, + // but the call is chained between them. We cannot fold in this case + // because it would induce a cycle in the graph. + if (!std::count(ChainedNodesInPattern.begin(), + ChainedNodesInPattern.end(), User)) + return CR_InducesCycle; + + // Otherwise we found a node that is part of our pattern. For example in: + // x = load ptr + // y = x+4 + // store y -> ptr + // This would happen when we're scanning down from the load and see the + // store as a user. Record that there is a use of ChainedNode that is + // part of the pattern and keep scanning uses. + Result = CR_LeadsToInteriorNode; + InteriorChainedNodes.push_back(User); + continue; + } + + // If we found a TokenFactor, there are two cases to consider: first if the + // TokenFactor is just hanging "below" the pattern we're matching (i.e. no + // uses of the TF are in our pattern) we just want to ignore it. Second, + // the TokenFactor can be sandwiched in between two chained nodes, like so: + // [Load chain] + // ^ + // | + // [Load] + // ^ ^ + // | \ DAG's like cheese + // / \ do you? + // / | + // [TokenFactor] [Op] + // ^ ^ + // | | + // \ / + // \ / + // [Store] + // + // In this case, the TokenFactor becomes part of our match and we rewrite it + // as a new TokenFactor. + // + // To distinguish these two cases, do a recursive walk down the uses. + switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) { + case CR_Simple: + // If the uses of the TokenFactor are just already-selected nodes, ignore + // it, it is "below" our pattern. + continue; + case CR_InducesCycle: + // If the uses of the TokenFactor lead to nodes that are not part of our + // pattern that are not selected, folding would turn this into a cycle, + // bail out now. + return CR_InducesCycle; + case CR_LeadsToInteriorNode: + break; // Otherwise, keep processing. + } + + // Okay, we know we're in the interesting interior case. The TokenFactor + // is now going to be considered part of the pattern so that we rewrite its + // uses (it may have uses that are not part of the pattern) with the + // ultimate chain result of the generated code. We will also add its chain + // inputs as inputs to the ultimate TokenFactor we create. + Result = CR_LeadsToInteriorNode; + ChainedNodesInPattern.push_back(User); + InteriorChainedNodes.push_back(User); + continue; + } + + return Result; +} + +/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains +/// operation for when the pattern matched at least one node with a chains. The +/// input vector contains a list of all of the chained nodes that we match. We +/// must determine if this is a valid thing to cover (i.e. matching it won't +/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will +/// be used as the input node chain for the generated nodes. +static SDValue +HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, + SelectionDAG *CurDAG) { + // Walk all of the chained nodes we've matched, recursively scanning down the + // users of the chain result. This adds any TokenFactor nodes that are caught + // in between chained nodes to the chained and interior nodes list. + SmallVector<SDNode*, 3> InteriorChainedNodes; + for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { + if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched, + InteriorChainedNodes) == CR_InducesCycle) + return SDValue(); // Would induce a cycle. + } + + // Okay, we have walked all the matched nodes and collected TokenFactor nodes + // that we are interested in. Form our input TokenFactor node. + SmallVector<SDValue, 3> InputChains; + for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { + // Add the input chain of this node to the InputChains list (which will be + // the operands of the generated TokenFactor) if it's not an interior node. + SDNode *N = ChainNodesMatched[i]; + if (N->getOpcode() != ISD::TokenFactor) { + if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N)) + continue; + + // Otherwise, add the input chain. + SDValue InChain = ChainNodesMatched[i]->getOperand(0); + assert(InChain.getValueType() == MVT::Other && "Not a chain"); + InputChains.push_back(InChain); + continue; + } + + // If we have a token factor, we want to add all inputs of the token factor + // that are not part of the pattern we're matching. + for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) { + if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(), + N->getOperand(op).getNode())) + InputChains.push_back(N->getOperand(op)); + } + } + + SDValue Res; + if (InputChains.size() == 1) + return InputChains[0]; + return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(), + MVT::Other, &InputChains[0], InputChains.size()); +} + +/// MorphNode - Handle morphing a node in place for the selector. +SDNode *SelectionDAGISel:: +MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, + const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) { + // It is possible we're using MorphNodeTo to replace a node with no + // normal results with one that has a normal result (or we could be + // adding a chain) and the input could have flags and chains as well. + // In this case we need to shifting the operands down. + // FIXME: This is a horrible hack and broken in obscure cases, no worse + // than the old isel though. We should sink this into MorphNodeTo. + int OldFlagResultNo = -1, OldChainResultNo = -1; + + unsigned NTMNumResults = Node->getNumValues(); + if (Node->getValueType(NTMNumResults-1) == MVT::Flag) { + OldFlagResultNo = NTMNumResults-1; + if (NTMNumResults != 1 && + Node->getValueType(NTMNumResults-2) == MVT::Other) + OldChainResultNo = NTMNumResults-2; + } else if (Node->getValueType(NTMNumResults-1) == MVT::Other) + OldChainResultNo = NTMNumResults-1; + + // Call the underlying SelectionDAG routine to do the transmogrification. Note + // that this deletes operands of the old node that become dead. + SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps); + + // MorphNodeTo can operate in two ways: if an existing node with the + // specified operands exists, it can just return it. Otherwise, it + // updates the node in place to have the requested operands. + if (Res == Node) { + // If we updated the node in place, reset the node ID. To the isel, + // this should be just like a newly allocated machine node. + Res->setNodeId(-1); + } + + unsigned ResNumResults = Res->getNumValues(); + // Move the flag if needed. + if ((EmitNodeInfo & OPFL_FlagOutput) && OldFlagResultNo != -1 && + (unsigned)OldFlagResultNo != ResNumResults-1) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldFlagResultNo), + SDValue(Res, ResNumResults-1)); + + if ((EmitNodeInfo & OPFL_FlagOutput) != 0) + --ResNumResults; + + // Move the chain reference if needed. + if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && + (unsigned)OldChainResultNo != ResNumResults-1) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), + SDValue(Res, ResNumResults-1)); + + // Otherwise, no replacement happened because the node already exists. Replace + // Uses of the old node with the new one. + if (Res != Node) + CurDAG->ReplaceAllUsesWith(Node, Res); + + return Res; +} + +/// CheckPatternPredicate - Implements OP_CheckPatternPredicate. +ALWAYS_INLINE static bool +CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const SmallVectorImpl<SDValue> &RecordedNodes) { + // Accept if it is exactly the same as a previously recorded node. + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + return N == RecordedNodes[RecNo]; +} + +/// CheckPatternPredicate - Implements OP_CheckPatternPredicate. +ALWAYS_INLINE static bool +CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SelectionDAGISel &SDISel) { + return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); +} + +/// CheckNodePredicate - Implements OP_CheckNodePredicate. +ALWAYS_INLINE static bool +CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SelectionDAGISel &SDISel, SDNode *N) { + return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]); +} + +ALWAYS_INLINE static bool +CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDNode *N) { + return N->getOpcode() == MatcherTable[MatcherIndex++]; +} + +ALWAYS_INLINE static bool +CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const TargetLowering &TLI) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (N.getValueType() == VT) return true; + + // Handle the case when VT is iPTR. + return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy(); +} + +ALWAYS_INLINE static bool +CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const TargetLowering &TLI, + unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI); +} + + +ALWAYS_INLINE static bool +CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N) { + return cast<CondCodeSDNode>(N)->get() == + (ISD::CondCode)MatcherTable[MatcherIndex++]; +} + +ALWAYS_INLINE static bool +CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const TargetLowering &TLI) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (cast<VTSDNode>(N)->getVT() == VT) + return true; + + // Handle the case when VT is iPTR. + return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy(); +} + +ALWAYS_INLINE static bool +CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N) { + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); + return C != 0 && C->getSExtValue() == Val; +} + +ALWAYS_INLINE static bool +CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, SelectionDAGISel &SDISel) { + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + + if (N->getOpcode() != ISD::AND) return false; + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val); +} + +ALWAYS_INLINE static bool +CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, SelectionDAGISel &SDISel) { + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + + if (N->getOpcode() != ISD::OR) return false; + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val); +} + +/// IsPredicateKnownToFail - If we know how and can do so without pushing a +/// scope, evaluate the current node. If the current predicate is known to +/// fail, set Result=true and return anything. If the current predicate is +/// known to pass, set Result=false and return the MatcherIndex to continue +/// with. If the current predicate is unknown, set Result=false and return the +/// MatcherIndex to continue with. +static unsigned IsPredicateKnownToFail(const unsigned char *Table, + unsigned Index, SDValue N, + bool &Result, SelectionDAGISel &SDISel, + SmallVectorImpl<SDValue> &RecordedNodes){ + switch (Table[Index++]) { + default: + Result = false; + return Index-1; // Could not evaluate this predicate. + case SelectionDAGISel::OPC_CheckSame: + Result = !::CheckSame(Table, Index, N, RecordedNodes); + return Index; + case SelectionDAGISel::OPC_CheckPatternPredicate: + Result = !::CheckPatternPredicate(Table, Index, SDISel); + return Index; + case SelectionDAGISel::OPC_CheckPredicate: + Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode()); + return Index; + case SelectionDAGISel::OPC_CheckOpcode: + Result = !::CheckOpcode(Table, Index, N.getNode()); + return Index; + case SelectionDAGISel::OPC_CheckType: + Result = !::CheckType(Table, Index, N, SDISel.TLI); + return Index; + case SelectionDAGISel::OPC_CheckChild0Type: + case SelectionDAGISel::OPC_CheckChild1Type: + case SelectionDAGISel::OPC_CheckChild2Type: + case SelectionDAGISel::OPC_CheckChild3Type: + case SelectionDAGISel::OPC_CheckChild4Type: + case SelectionDAGISel::OPC_CheckChild5Type: + case SelectionDAGISel::OPC_CheckChild6Type: + case SelectionDAGISel::OPC_CheckChild7Type: + Result = !::CheckChildType(Table, Index, N, SDISel.TLI, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type); + return Index; + case SelectionDAGISel::OPC_CheckCondCode: + Result = !::CheckCondCode(Table, Index, N); + return Index; + case SelectionDAGISel::OPC_CheckValueType: + Result = !::CheckValueType(Table, Index, N, SDISel.TLI); + return Index; + case SelectionDAGISel::OPC_CheckInteger: + Result = !::CheckInteger(Table, Index, N); + return Index; + case SelectionDAGISel::OPC_CheckAndImm: + Result = !::CheckAndImm(Table, Index, N, SDISel); + return Index; + case SelectionDAGISel::OPC_CheckOrImm: + Result = !::CheckOrImm(Table, Index, N, SDISel); + return Index; + } +} + + +struct MatchScope { + /// FailIndex - If this match fails, this is the index to continue with. + unsigned FailIndex; + + /// NodeStack - The node stack when the scope was formed. + SmallVector<SDValue, 4> NodeStack; + + /// NumRecordedNodes - The number of recorded nodes when the scope was formed. + unsigned NumRecordedNodes; + + /// NumMatchedMemRefs - The number of matched memref entries. + unsigned NumMatchedMemRefs; + + /// InputChain/InputFlag - The current chain/flag + SDValue InputChain, InputFlag; + + /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty. + bool HasChainNodesMatched, HasFlagResultNodesMatched; +}; + +SDNode *SelectionDAGISel:: +SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, + unsigned TableSize) { + // FIXME: Should these even be selected? Handle these cases in the caller? + switch (NodeToMatch->getOpcode()) { + default: + break; + case ISD::EntryToken: // These nodes remain the same. + case ISD::BasicBlock: + case ISD::Register: + case ISD::HANDLENODE: + case ISD::TargetConstant: + case ISD::TargetConstantFP: + case ISD::TargetConstantPool: + case ISD::TargetFrameIndex: + case ISD::TargetExternalSymbol: + case ISD::TargetBlockAddress: + case ISD::TargetJumpTable: + case ISD::TargetGlobalTLSAddress: + case ISD::TargetGlobalAddress: + case ISD::TokenFactor: + case ISD::CopyFromReg: + case ISD::CopyToReg: + NodeToMatch->setNodeId(-1); // Mark selected. + return 0; + case ISD::AssertSext: + case ISD::AssertZext: + CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), + NodeToMatch->getOperand(0)); + return 0; + case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch); + case ISD::EH_LABEL: return Select_EH_LABEL(NodeToMatch); + case ISD::UNDEF: return Select_UNDEF(NodeToMatch); + } + + assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); + + // Set up the node stack with NodeToMatch as the only node on the stack. + SmallVector<SDValue, 8> NodeStack; + SDValue N = SDValue(NodeToMatch, 0); + NodeStack.push_back(N); + + // MatchScopes - Scopes used when matching, if a match failure happens, this + // indicates where to continue checking. + SmallVector<MatchScope, 8> MatchScopes; + + // RecordedNodes - This is the set of nodes that have been recorded by the + // state machine. + SmallVector<SDValue, 8> RecordedNodes; + + // MatchedMemRefs - This is the set of MemRef's we've seen in the input + // pattern. + SmallVector<MachineMemOperand*, 2> MatchedMemRefs; + + // These are the current input chain and flag for use when generating nodes. + // Various Emit operations change these. For example, emitting a copytoreg + // uses and updates these. + SDValue InputChain, InputFlag; + + // ChainNodesMatched - If a pattern matches nodes that have input/output + // chains, the OPC_EmitMergeInputChains operation is emitted which indicates + // which ones they are. The result is captured into this list so that we can + // update the chain results when the pattern is complete. + SmallVector<SDNode*, 3> ChainNodesMatched; + SmallVector<SDNode*, 3> FlagResultNodesMatched; + + DEBUG(errs() << "ISEL: Starting pattern match on root node: "; + NodeToMatch->dump(CurDAG); + errs() << '\n'); + + // Determine where to start the interpreter. Normally we start at opcode #0, + // but if the state machine starts with an OPC_SwitchOpcode, then we + // accelerate the first lookup (which is guaranteed to be hot) with the + // OpcodeOffset table. + unsigned MatcherIndex = 0; + + if (!OpcodeOffset.empty()) { + // Already computed the OpcodeOffset table, just index into it. + if (N.getOpcode() < OpcodeOffset.size()) + MatcherIndex = OpcodeOffset[N.getOpcode()]; + DEBUG(errs() << " Initial Opcode index to " << MatcherIndex << "\n"); + + } else if (MatcherTable[0] == OPC_SwitchOpcode) { + // Otherwise, the table isn't computed, but the state machine does start + // with an OPC_SwitchOpcode instruction. Populate the table now, since this + // is the first time we're selecting an instruction. + unsigned Idx = 1; + while (1) { + // Get the size of this case. + unsigned CaseSize = MatcherTable[Idx++]; + if (CaseSize & 128) + CaseSize = GetVBR(CaseSize, MatcherTable, Idx); + if (CaseSize == 0) break; + + // Get the opcode, add the index to the table. + unsigned Opc = MatcherTable[Idx++]; + if (Opc >= OpcodeOffset.size()) + OpcodeOffset.resize((Opc+1)*2); + OpcodeOffset[Opc] = Idx; + Idx += CaseSize; + } + + // Okay, do the lookup for the first opcode. + if (N.getOpcode() < OpcodeOffset.size()) + MatcherIndex = OpcodeOffset[N.getOpcode()]; + } + + while (1) { + assert(MatcherIndex < TableSize && "Invalid index"); + BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++]; + switch (Opcode) { + case OPC_Scope: { + // Okay, the semantics of this operation are that we should push a scope + // then evaluate the first child. However, pushing a scope only to have + // the first check fail (which then pops it) is inefficient. If we can + // determine immediately that the first check (or first several) will + // immediately fail, don't even bother pushing a scope for them. + unsigned FailIndex; + + while (1) { + unsigned NumToSkip = MatcherTable[MatcherIndex++]; + if (NumToSkip & 128) + NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex); + // Found the end of the scope with no match. + if (NumToSkip == 0) { + FailIndex = 0; + break; + } + + FailIndex = MatcherIndex+NumToSkip; + + // If we can't evaluate this predicate without pushing a scope (e.g. if + // it is a 'MoveParent') or if the predicate succeeds on this node, we + // push the scope and evaluate the full predicate chain. + bool Result; + MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N, + Result, *this, RecordedNodes); + if (!Result) + break; + + DEBUG(errs() << " Skipped scope entry at index " << MatcherIndex + << " continuing at " << FailIndex << "\n"); + + + // Otherwise, we know that this case of the Scope is guaranteed to fail, + // move to the next case. + MatcherIndex = FailIndex; + } + + // If the whole scope failed to match, bail. + if (FailIndex == 0) break; + + // Push a MatchScope which indicates where to go if the first child fails + // to match. + MatchScope NewEntry; + NewEntry.FailIndex = FailIndex; + NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end()); + NewEntry.NumRecordedNodes = RecordedNodes.size(); + NewEntry.NumMatchedMemRefs = MatchedMemRefs.size(); + NewEntry.InputChain = InputChain; + NewEntry.InputFlag = InputFlag; + NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty(); + NewEntry.HasFlagResultNodesMatched = !FlagResultNodesMatched.empty(); + MatchScopes.push_back(NewEntry); + continue; + } + case OPC_RecordNode: + // Remember this node, it may end up being an operand in the pattern. + RecordedNodes.push_back(N); + continue; + + case OPC_RecordChild0: case OPC_RecordChild1: + case OPC_RecordChild2: case OPC_RecordChild3: + case OPC_RecordChild4: case OPC_RecordChild5: + case OPC_RecordChild6: case OPC_RecordChild7: { + unsigned ChildNo = Opcode-OPC_RecordChild0; + if (ChildNo >= N.getNumOperands()) + break; // Match fails if out of range child #. + + RecordedNodes.push_back(N->getOperand(ChildNo)); + continue; + } + case OPC_RecordMemRef: + MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand()); + continue; + + case OPC_CaptureFlagInput: + // If the current node has an input flag, capture it in InputFlag. + if (N->getNumOperands() != 0 && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) + InputFlag = N->getOperand(N->getNumOperands()-1); + continue; + + case OPC_MoveChild: { + unsigned ChildNo = MatcherTable[MatcherIndex++]; + if (ChildNo >= N.getNumOperands()) + break; // Match fails if out of range child #. + N = N.getOperand(ChildNo); + NodeStack.push_back(N); + continue; + } + + case OPC_MoveParent: + // Pop the current node off the NodeStack. + NodeStack.pop_back(); + assert(!NodeStack.empty() && "Node stack imbalance!"); + N = NodeStack.back(); + continue; + + case OPC_CheckSame: + if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break; + continue; + case OPC_CheckPatternPredicate: + if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break; + continue; + case OPC_CheckPredicate: + if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this, + N.getNode())) + break; + continue; + case OPC_CheckComplexPat: { + unsigned CPNum = MatcherTable[MatcherIndex++]; + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); + if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum, + RecordedNodes)) + break; + continue; + } + case OPC_CheckOpcode: + if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break; + continue; + + case OPC_CheckType: + if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break; + continue; + + case OPC_SwitchOpcode: { + unsigned CurNodeOpcode = N.getOpcode(); + unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; + unsigned CaseSize; + while (1) { + // Get the size of this case. + CaseSize = MatcherTable[MatcherIndex++]; + if (CaseSize & 128) + CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); + if (CaseSize == 0) break; + + // If the opcode matches, then we will execute this case. + if (CurNodeOpcode == MatcherTable[MatcherIndex++]) + break; + + // Otherwise, skip over this case. + MatcherIndex += CaseSize; + } + + // If no cases matched, bail out. + if (CaseSize == 0) break; + + // Otherwise, execute the case we found. + DEBUG(errs() << " OpcodeSwitch from " << SwitchStart + << " to " << MatcherIndex << "\n"); + continue; + } + + case OPC_SwitchType: { + MVT::SimpleValueType CurNodeVT = N.getValueType().getSimpleVT().SimpleTy; + unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; + unsigned CaseSize; + while (1) { + // Get the size of this case. + CaseSize = MatcherTable[MatcherIndex++]; + if (CaseSize & 128) + CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); + if (CaseSize == 0) break; + + MVT::SimpleValueType CaseVT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (CaseVT == MVT::iPTR) + CaseVT = TLI.getPointerTy().SimpleTy; + + // If the VT matches, then we will execute this case. + if (CurNodeVT == CaseVT) + break; + + // Otherwise, skip over this case. + MatcherIndex += CaseSize; + } + + // If no cases matched, bail out. + if (CaseSize == 0) break; + + // Otherwise, execute the case we found. + DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() + << "] from " << SwitchStart << " to " << MatcherIndex<<'\n'); + continue; + } + case OPC_CheckChild0Type: case OPC_CheckChild1Type: + case OPC_CheckChild2Type: case OPC_CheckChild3Type: + case OPC_CheckChild4Type: case OPC_CheckChild5Type: + case OPC_CheckChild6Type: case OPC_CheckChild7Type: + if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI, + Opcode-OPC_CheckChild0Type)) + break; + continue; + case OPC_CheckCondCode: + if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break; + continue; + case OPC_CheckValueType: + if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break; + continue; + case OPC_CheckInteger: + if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break; + continue; + case OPC_CheckAndImm: + if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break; + continue; + case OPC_CheckOrImm: + if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break; + continue; + + case OPC_CheckFoldableChainNode: { + assert(NodeStack.size() != 1 && "No parent node"); + // Verify that all intermediate nodes between the root and this one have + // a single use. + bool HasMultipleUses = false; + for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) + if (!NodeStack[i].hasOneUse()) { + HasMultipleUses = true; + break; + } + if (HasMultipleUses) break; + + // Check to see that the target thinks this is profitable to fold and that + // we can fold it without inducing cycles in the graph. + if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(), + NodeToMatch) || + !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(), + NodeToMatch, true/*We validate our own chains*/)) + break; + + continue; + } + case OPC_EmitInteger: { + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + RecordedNodes.push_back(CurDAG->getTargetConstant(Val, VT)); + continue; + } + case OPC_EmitRegister: { + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + unsigned RegNo = MatcherTable[MatcherIndex++]; + RecordedNodes.push_back(CurDAG->getRegister(RegNo, VT)); + continue; + } + + case OPC_EmitConvertToTarget: { + // Convert from IMM/FPIMM to target version. + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + SDValue Imm = RecordedNodes[RecNo]; + + if (Imm->getOpcode() == ISD::Constant) { + int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue(); + Imm = CurDAG->getTargetConstant(Val, Imm.getValueType()); + } else if (Imm->getOpcode() == ISD::ConstantFP) { + const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue(); + Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType()); + } + + RecordedNodes.push_back(Imm); + continue; + } + + case OPC_EmitMergeInputChains: { + assert(InputChain.getNode() == 0 && + "EmitMergeInputChains should be the first chain producing node"); + // This node gets a list of nodes we matched in the input that have + // chains. We want to token factor all of the input chains to these nodes + // together. However, if any of the input chains is actually one of the + // nodes matched in this pattern, then we have an intra-match reference. + // Ignore these because the newly token factored chain should not refer to + // the old nodes. + unsigned NumChains = MatcherTable[MatcherIndex++]; + assert(NumChains != 0 && "Can't TF zero chains"); + + assert(ChainNodesMatched.empty() && + "Should only have one EmitMergeInputChains per match"); + + // Read all of the chained nodes. + for (unsigned i = 0; i != NumChains; ++i) { + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode()); + + // FIXME: What if other value results of the node have uses not matched + // by this pattern? + if (ChainNodesMatched.back() != NodeToMatch && + !RecordedNodes[RecNo].hasOneUse()) { + ChainNodesMatched.clear(); + break; + } + } + + // If the inner loop broke out, the match fails. + if (ChainNodesMatched.empty()) + break; + + // Merge the input chains if they are not intra-pattern references. + InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); + + if (InputChain.getNode() == 0) + break; // Failed to merge. + + continue; + } + + case OPC_EmitCopyToReg: { + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + unsigned DestPhysReg = MatcherTable[MatcherIndex++]; + + if (InputChain.getNode() == 0) + InputChain = CurDAG->getEntryNode(); + + InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(), + DestPhysReg, RecordedNodes[RecNo], + InputFlag); + + InputFlag = InputChain.getValue(1); + continue; + } + + case OPC_EmitNodeXForm: { + unsigned XFormNo = MatcherTable[MatcherIndex++]; + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + RecordedNodes.push_back(RunSDNodeXForm(RecordedNodes[RecNo], XFormNo)); + continue; + } + + case OPC_EmitNode: + case OPC_MorphNodeTo: { + uint16_t TargetOpc = MatcherTable[MatcherIndex++]; + TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + unsigned EmitNodeInfo = MatcherTable[MatcherIndex++]; + // Get the result VT list. + unsigned NumVTs = MatcherTable[MatcherIndex++]; + SmallVector<EVT, 4> VTs; + for (unsigned i = 0; i != NumVTs; ++i) { + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy; + VTs.push_back(VT); + } + + if (EmitNodeInfo & OPFL_Chain) + VTs.push_back(MVT::Other); + if (EmitNodeInfo & OPFL_FlagOutput) + VTs.push_back(MVT::Flag); + + // This is hot code, so optimize the two most common cases of 1 and 2 + // results. + SDVTList VTList; + if (VTs.size() == 1) + VTList = CurDAG->getVTList(VTs[0]); + else if (VTs.size() == 2) + VTList = CurDAG->getVTList(VTs[0], VTs[1]); + else + VTList = CurDAG->getVTList(VTs.data(), VTs.size()); + + // Get the operand list. + unsigned NumOps = MatcherTable[MatcherIndex++]; + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i != NumOps; ++i) { + unsigned RecNo = MatcherTable[MatcherIndex++]; + if (RecNo & 128) + RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); + + assert(RecNo < RecordedNodes.size() && "Invalid EmitNode"); + Ops.push_back(RecordedNodes[RecNo]); + } + + // If there are variadic operands to add, handle them now. + if (EmitNodeInfo & OPFL_VariadicInfo) { + // Determine the start index to copy from. + unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo); + FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0; + assert(NodeToMatch->getNumOperands() >= FirstOpToCopy && + "Invalid variadic node"); + // Copy all of the variadic operands, not including a potential flag + // input. + for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands(); + i != e; ++i) { + SDValue V = NodeToMatch->getOperand(i); + if (V.getValueType() == MVT::Flag) break; + Ops.push_back(V); + } + } + + // If this has chain/flag inputs, add them. + if (EmitNodeInfo & OPFL_Chain) + Ops.push_back(InputChain); + if ((EmitNodeInfo & OPFL_FlagInput) && InputFlag.getNode() != 0) + Ops.push_back(InputFlag); + + // Create the node. + SDNode *Res = 0; + if (Opcode != OPC_MorphNodeTo) { + // If this is a normal EmitNode command, just create the new node and + // add the results to the RecordedNodes list. + Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(), + VTList, Ops.data(), Ops.size()); + + // Add all the non-flag/non-chain results to the RecordedNodes list. + for (unsigned i = 0, e = VTs.size(); i != e; ++i) { + if (VTs[i] == MVT::Other || VTs[i] == MVT::Flag) break; + RecordedNodes.push_back(SDValue(Res, i)); + } + + } else { + Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(), + EmitNodeInfo); + } + + // If the node had chain/flag results, update our notion of the current + // chain and flag. + if (EmitNodeInfo & OPFL_FlagOutput) { + InputFlag = SDValue(Res, VTs.size()-1); + if (EmitNodeInfo & OPFL_Chain) + InputChain = SDValue(Res, VTs.size()-2); + } else if (EmitNodeInfo & OPFL_Chain) + InputChain = SDValue(Res, VTs.size()-1); + + // If the OPFL_MemRefs flag is set on this node, slap all of the + // accumulated memrefs onto it. + // + // FIXME: This is vastly incorrect for patterns with multiple outputs + // instructions that access memory and for ComplexPatterns that match + // loads. + if (EmitNodeInfo & OPFL_MemRefs) { + MachineSDNode::mmo_iterator MemRefs = + MF->allocateMemRefsArray(MatchedMemRefs.size()); + std::copy(MatchedMemRefs.begin(), MatchedMemRefs.end(), MemRefs); + cast<MachineSDNode>(Res) + ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size()); + } + + DEBUG(errs() << " " + << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created") + << " node: "; Res->dump(CurDAG); errs() << "\n"); + + // If this was a MorphNodeTo then we're completely done! + if (Opcode == OPC_MorphNodeTo) { + // Update chain and flag uses. + UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched, + InputFlag, FlagResultNodesMatched, true); + return Res; + } + + continue; + } + + case OPC_MarkFlagResults: { + unsigned NumNodes = MatcherTable[MatcherIndex++]; + + // Read and remember all the flag-result nodes. + for (unsigned i = 0; i != NumNodes; ++i) { + unsigned RecNo = MatcherTable[MatcherIndex++]; + if (RecNo & 128) + RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); + + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + FlagResultNodesMatched.push_back(RecordedNodes[RecNo].getNode()); + } + continue; + } + + case OPC_CompleteMatch: { + // The match has been completed, and any new nodes (if any) have been + // created. Patch up references to the matched dag to use the newly + // created nodes. + unsigned NumResults = MatcherTable[MatcherIndex++]; + + for (unsigned i = 0; i != NumResults; ++i) { + unsigned ResSlot = MatcherTable[MatcherIndex++]; + if (ResSlot & 128) + ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex); + + assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame"); + SDValue Res = RecordedNodes[ResSlot]; + + // FIXME2: Eliminate this horrible hack by fixing the 'Gen' program + // after (parallel) on input patterns are removed. This would also + // allow us to stop encoding #results in OPC_CompleteMatch's table + // entry. + if (NodeToMatch->getNumValues() <= i || + NodeToMatch->getValueType(i) == MVT::Other || + NodeToMatch->getValueType(i) == MVT::Flag) + break; + assert((NodeToMatch->getValueType(i) == Res.getValueType() || + NodeToMatch->getValueType(i) == MVT::iPTR || + Res.getValueType() == MVT::iPTR || + NodeToMatch->getValueType(i).getSizeInBits() == + Res.getValueType().getSizeInBits()) && + "invalid replacement"); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); + } + + // If the root node defines a flag, add it to the flag nodes to update + // list. + if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Flag) + FlagResultNodesMatched.push_back(NodeToMatch); + + // Update chain and flag uses. + UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched, + InputFlag, FlagResultNodesMatched, false); + + assert(NodeToMatch->use_empty() && + "Didn't replace all uses of the node?"); + + // FIXME: We just return here, which interacts correctly with SelectRoot + // above. We should fix this to not return an SDNode* anymore. + return 0; + } + } + + // If the code reached this point, then the match failed. See if there is + // another child to try in the current 'Scope', otherwise pop it until we + // find a case to check. + while (1) { + if (MatchScopes.empty()) { + CannotYetSelect(NodeToMatch); + return 0; + } + + // Restore the interpreter state back to the point where the scope was + // formed. + MatchScope &LastScope = MatchScopes.back(); + RecordedNodes.resize(LastScope.NumRecordedNodes); + NodeStack.clear(); + NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end()); + N = NodeStack.back(); + + DEBUG(errs() << " Match failed at index " << MatcherIndex + << " continuing at " << LastScope.FailIndex << "\n"); + + if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size()) + MatchedMemRefs.resize(LastScope.NumMatchedMemRefs); + MatcherIndex = LastScope.FailIndex; + + InputChain = LastScope.InputChain; + InputFlag = LastScope.InputFlag; + if (!LastScope.HasChainNodesMatched) + ChainNodesMatched.clear(); + if (!LastScope.HasFlagResultNodesMatched) + FlagResultNodesMatched.clear(); + + // Check to see what the offset is at the new MatcherIndex. If it is zero + // we have reached the end of this scope, otherwise we have another child + // in the current scope to try. + unsigned NumToSkip = MatcherTable[MatcherIndex++]; + if (NumToSkip & 128) + NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex); + + // If we have another child in this scope to match, update FailIndex and + // try it. + if (NumToSkip != 0) { + LastScope.FailIndex = MatcherIndex+NumToSkip; + break; + } + + // End of this scope, pop it and try the next child in the containing + // scope. + MatchScopes.pop_back(); + } + } +} + + + void SelectionDAGISel::CannotYetSelect(SDNode *N) { std::string msg; raw_string_ostream Msg(msg); Msg << "Cannot yet select: "; - N->printrFull(Msg, CurDAG); + + if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN && + N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && + N->getOpcode() != ISD::INTRINSIC_VOID) { + N->printrFull(Msg, CurDAG); + } else { + bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; + unsigned iid = + cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue(); + if (iid < Intrinsic::num_intrinsics) + Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid); + else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo()) + Msg << "target intrinsic %" << TII->getName(iid); + else + Msg << "unknown intrinsic #" << iid; + } llvm_report_error(Msg.str()); } -void SelectionDAGISel::CannotYetSelectIntrinsic(SDNode *N) { - dbgs() << "Cannot yet select: "; - unsigned iid = - cast<ConstantSDNode>(N->getOperand(N->getOperand(0).getValueType() == - MVT::Other))->getZExtValue(); - if (iid < Intrinsic::num_intrinsics) - llvm_report_error("Cannot yet select: intrinsic %" + - Intrinsic::getName((Intrinsic::ID)iid)); - else if (const TargetIntrinsicInfo *tii = TM.getIntrinsicInfo()) - llvm_report_error(Twine("Cannot yet select: target intrinsic %") + - tii->getName(iid)); -} - char SelectionDAGISel::ID = 0; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index d74ec7e..8d0d884 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include <ctype.h> using namespace llvm; namespace llvm { @@ -540,6 +541,24 @@ TargetLowering::~TargetLowering() { delete &TLOF; } +/// canOpTrap - Returns true if the operation can trap for the value type. +/// VT must be a legal type. +bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const { + assert(isTypeLegal(VT)); + switch (Op) { + default: + return false; + case ISD::FDIV: + case ISD::FREM: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + return true; + } +} + + static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, unsigned &NumIntermediates, EVT &RegisterVT, @@ -1423,8 +1442,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::TRUNCATE: { // Simplify the input, using demanded bit information, and compute the known // zero/one bits live out. + unsigned OperandBitWidth = + Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); APInt TruncMask = NewMask; - TruncMask.zext(Op.getOperand(0).getValueSizeInBits()); + TruncMask.zext(OperandBitWidth); if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, KnownZero, KnownOne, TLO, Depth+1)) return true; @@ -1435,15 +1456,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // on the known demanded bits. if (Op.getOperand(0).getNode()->hasOneUse()) { SDValue In = Op.getOperand(0); - unsigned InBitWidth = In.getValueSizeInBits(); switch (In.getOpcode()) { default: break; case ISD::SRL: // Shrink SRL by a constant if none of the high bits shifted in are // demanded. if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){ - APInt HighBits = APInt::getHighBitsSet(InBitWidth, - InBitWidth - BitWidth); + APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, + OperandBitWidth - BitWidth); HighBits = HighBits.lshr(ShAmt->getZExtValue()); HighBits.trunc(BitWidth); @@ -1589,7 +1609,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { // Fall back to ComputeMaskedBits to catch other known cases. EVT OpVT = Val.getValueType(); - unsigned BitWidth = OpVT.getSizeInBits(); + unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero, KnownOne; DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne); @@ -1698,7 +1718,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, Lod->getSrcValue(), Lod->getSrcValueOffset() + bestOffset, - false, NewAlign); + false, false, NewAlign); return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, DAG.getConstant(bestMask.trunc(bestWidth), @@ -1757,7 +1777,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, break; // todo, be more careful with signed comparisons } } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT(); unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); EVT ExtDstTy = N0.getValueType(); @@ -1791,22 +1811,21 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond); } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { - // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC - if (N0.getOpcode() == ISD::SETCC) { + if (N0.getOpcode() == ISD::SETCC && + isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1); if (TrueWhenTrue) - return N0; - + return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); // Invert the condition. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType().isInteger()); return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); } - + if ((N0.getOpcode() == ISD::XOR || - (N0.getOpcode() == ISD::AND && + (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::XOR && N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && isa<ConstantSDNode>(N0.getOperand(1)) && @@ -1829,9 +1848,36 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOperand(0).getOperand(0), N0.getOperand(1)); } + return DAG.getSetCC(dl, VT, Val, N1, Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } + } else if (N1C->getAPIntValue() == 1 && + (VT == MVT::i1 || + getBooleanContents() == ZeroOrOneBooleanContent)) { + SDValue Op0 = N0; + if (Op0.getOpcode() == ISD::TRUNCATE) + Op0 = Op0.getOperand(0); + + if ((Op0.getOpcode() == ISD::XOR) && + Op0.getOperand(0).getOpcode() == ISD::SETCC && + Op0.getOperand(1).getOpcode() == ISD::SETCC) { + // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc) + Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; + return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1), + Cond); + } else if (Op0.getOpcode() == ISD::AND && + isa<ConstantSDNode>(Op0.getOperand(1)) && + cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) { + // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. + if (Op0.getValueType() != VT) + Op0 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)), + DAG.getConstant(1, VT)); + return DAG.getSetCC(dl, VT, Op0, + DAG.getConstant(0, Op0.getValueType()), + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } } } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 1d9bda4..ce72b2f 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -662,7 +662,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, if (!tii_->isTriviallyReMaterializable(DefMI, AA)) return false; bool SawStore = false; - if (!DefMI->isSafeToMove(tii_, SawStore, AA)) + if (!DefMI->isSafeToMove(tii_, AA, SawStore)) return false; if (TID.getNumDefs() != 1) return false; @@ -702,7 +702,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) { if (!li_->hasInterval(*SR)) continue; - DLR = li_->getInterval(*SR).getLiveRangeContaining(DefIdx); + const LiveRange *DLR = + li_->getInterval(*SR).getLiveRangeContaining(DefIdx); if (DLR && DLR->valno->getCopy() == CopyMI) DLR->valno->setCopy(0); } @@ -741,9 +742,21 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, NewMI->addOperand(MO); if (MO.isDef() && li_->hasInterval(MO.getReg())) { unsigned Reg = MO.getReg(); - DLR = li_->getInterval(Reg).getLiveRangeContaining(DefIdx); + const LiveRange *DLR = + li_->getInterval(Reg).getLiveRangeContaining(DefIdx); if (DLR && DLR->valno->getCopy() == CopyMI) DLR->valno->setCopy(0); + // Handle subregs as well + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (const unsigned* SR = tri_->getSubRegisters(Reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + const LiveRange *DLR = + li_->getInterval(*SR).getLiveRangeContaining(DefIdx); + if (DLR && DLR->valno->getCopy() == CopyMI) + DLR->valno->setCopy(0); + } + } } } @@ -752,6 +765,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, CopyMI->eraseFromParent(); ReMatCopies.insert(CopyMI); ReMatDefs.insert(DefMI); + DEBUG(dbgs() << "Remat: " << *NewMI); ++NumReMats; return true; } @@ -771,11 +785,16 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, SubIdx = 0; } + // Copy the register use-list before traversing it. We may be adding operands + // and invalidating pointers. + SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist; for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg), - E = mri_->reg_end(); I != E; ) { - MachineOperand &O = I.getOperand(); - MachineInstr *UseMI = &*I; - ++I; + E = mri_->reg_end(); I != E; ++I) + reglist.push_back(std::make_pair(&*I, I.getOperandNo())); + + for (unsigned N=0; N != reglist.size(); ++N) { + MachineInstr *UseMI = reglist[N].first; + MachineOperand &O = UseMI->getOperand(reglist[N].second); unsigned OldSubIdx = O.getSubReg(); if (DstIsPhys) { unsigned UseDstReg = DstReg; @@ -796,6 +815,19 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, O.setReg(UseDstReg); O.setSubReg(0); + if (OldSubIdx) { + // Def and kill of subregister of a virtual register actually defs and + // kills the whole register. Add imp-defs and imp-kills as needed. + if (O.isDef()) { + if(O.isDead()) + UseMI->addRegisterDead(DstReg, tri_, true); + else + UseMI->addRegisterDefined(DstReg, tri_); + } else if (!O.isUndef() && + (O.isKill() || + UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0)))) + UseMI->addRegisterKilled(DstReg, tri_, true); + } continue; } @@ -1148,12 +1180,14 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg, LiveInterval &SmallInt = li_->getInterval(SmallReg); unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt); unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt); - if (SmallSize > Threshold || LargeSize > Threshold) - if ((float)std::distance(mri_->use_nodbg_begin(SmallReg), - mri_->use_nodbg_end()) / SmallSize < - (float)std::distance(mri_->use_nodbg_begin(LargeReg), - mri_->use_nodbg_end()) / LargeSize) + if (LargeSize > Threshold) { + unsigned SmallUses = std::distance(mri_->use_nodbg_begin(SmallReg), + mri_->use_nodbg_end()); + unsigned LargeUses = std::distance(mri_->use_nodbg_begin(LargeReg), + mri_->use_nodbg_end()); + if (SmallUses*LargeSize < LargeUses*SmallSize) return false; + } return true; } @@ -1173,6 +1207,8 @@ SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI, for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(VirtReg), E = mri_->reg_end(); I != E; ++I) { MachineOperand &O = I.getOperand(); + if (O.isDebug()) + continue; MachineInstr *MI = &*I; if (MI == CopyMI || JoinedCopies.count(MI)) continue; @@ -1559,7 +1595,10 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { (isExtSubReg || DstRC->isASubClass()) && !isWinToJoinCrossClass(LargeReg, SmallReg, allocatableRCRegs_[NewRC].count())) { - DEBUG(dbgs() << "\tSrc/Dest are different register classes.\n"); + DEBUG(dbgs() << "\tSrc/Dest are different register classes: " + << SrcRC->getName() << "/" + << DstRC->getName() << " -> " + << NewRC->getName() << ".\n"); // Allow the coalescer to try again in case either side gets coalesced to // a physical register that's compatible with the other side. e.g. // r1024 = MOV32to32_ r1025 @@ -1680,6 +1719,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) || RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) { JoinedCopies.insert(CopyMI); + DEBUG(dbgs() << "Trivial!\n"); return true; } @@ -1839,7 +1879,7 @@ static unsigned ComputeUltimateVN(VNInfo *VNI, // If the VN has already been computed, just return it. if (ThisValNoAssignments[VN] >= 0) return ThisValNoAssignments[VN]; -// assert(ThisValNoAssignments[VN] != -2 && "Cyclic case?"); + assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers"); // If this val is not a copy from the other val, then it must be a new value // number in the destination. diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 8d4d1b2..059e8d6 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -44,7 +44,6 @@ namespace { const Type *FunctionContextTy; Constant *RegisterFn; Constant *UnregisterFn; - Constant *ResumeFn; Constant *BuiltinSetjmpFn; Constant *FrameAddrFn; Constant *LSDAAddrFn; @@ -67,8 +66,8 @@ namespace { } private: - void markInvokeCallSite(InvokeInst *II, unsigned InvokeNo, - Value *CallSite, + void insertCallSiteStore(Instruction *I, int Number, Value *CallSite); + void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite, SwitchInst *CatchSwitch); void splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes); bool insertSjLjEHSupport(Function &F); @@ -107,11 +106,6 @@ bool SjLjEHPass::doInitialization(Module &M) { Type::getVoidTy(M.getContext()), PointerType::getUnqual(FunctionContextTy), (Type *)0); - ResumeFn = - M.getOrInsertFunction("_Unwind_SjLj_Resume", - Type::getVoidTy(M.getContext()), - VoidPtrTy, - (Type *)0); FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); @@ -123,12 +117,22 @@ bool SjLjEHPass::doInitialization(Module &M) { return true; } +/// insertCallSiteStore - Insert a store of the call-site value to the +/// function context +void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number, + Value *CallSite) { + ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()), + Number); + // Insert a store of the call-site number + new StoreInst(CallSiteNoC, CallSite, true, I); // volatile +} + /// markInvokeCallSite - Insert code to mark the call_site for this invoke -void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo, +void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite, SwitchInst *CatchSwitch) { ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()), - InvokeNo); + InvokeNo); // The runtime comes back to the dispatcher with the call_site - 1 in // the context. Odd, but there it is. ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()), @@ -145,8 +149,11 @@ void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo, } } - // Insert a store of the invoke num before the invoke - new StoreInst(CallSiteNoC, CallSite, true, II); // volatile + // Insert the store of the call site value + insertCallSiteStore(II, InvokeNo, CallSite); + + // Record the call site value for the back end so it stays associated with + // the invoke. CallInst::Create(CallSiteFn, CallSiteNoC, "", II); // Add a switch case to our unwind block. @@ -272,8 +279,8 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { SmallVector<InvokeInst*,16> Invokes; // Look through the terminators of the basic blocks to find invokes, returns - // and unwinds - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + // and unwinds. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { // Remember all return instructions in case we insert an invoke into this // function. @@ -283,6 +290,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { Unwinds.push_back(UI); } + } // If we don't have any invokes or unwinds, there's nothing to do. if (Unwinds.empty() && Invokes.empty()) return false; @@ -478,24 +486,21 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { for (unsigned i = 0, e = Invokes.size(); i != e; ++i) markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch); - // The front end has likely added calls to _Unwind_Resume. We need - // to find those calls and mark the call_site as -1 immediately prior. - // resume is a noreturn function, so any block that has a call to it - // should end in an 'unreachable' instruction with the call immediately - // prior. That's how we'll search. - // ??? There's got to be a better way. this is fugly. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if ((dyn_cast<UnreachableInst>(BB->getTerminator()))) { - BasicBlock::iterator I = BB->getTerminator(); - // Check the previous instruction and see if it's a resume call - if (I == BB->begin()) continue; - if (CallInst *CI = dyn_cast<CallInst>(--I)) { - if (CI->getCalledFunction() == ResumeFn) { - Value *NegativeOne = Constant::getAllOnesValue(Int32Ty); - new StoreInst(NegativeOne, CallSite, true, I); // volatile - } + // Mark call instructions that aren't nounwind as no-action + // (call_site == -1). Skip the entry block, as prior to then, no function + // context has been created for this function and any unexpected exceptions + // thrown will go directly to the caller's context, which is what we want + // anyway, so no need to do anything here. + for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) { + for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) + if (CallInst *CI = dyn_cast<CallInst>(I)) { + // Ignore calls to the EH builtins (eh.selector, eh.exception) + Constant *Callee = CI->getCalledFunction(); + if (Callee != SelectorFn && Callee != ExceptionFn + && !CI->doesNotThrow()) + insertCallSiteStore(CI, -1, CallSite); } - } + } // Replace all unwinds with a branch to the unwind handler. // ??? Should this ever happen with sjlj exceptions? diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 48bb5af..8a6a727 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -113,7 +113,7 @@ bool StackProtector::RequiresStackProtector() const { if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) { // We apparently only care about character arrays. - if (!AT->getElementType()->isInteger(8)) + if (!AT->getElementType()->isIntegerTy(8)) continue; // If an array has more than SSPBufferSize bytes of allocated space, diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 9ab4058..3223e53 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -403,26 +403,45 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, II->RemoveOperand(i); } } - II->RemoveOperand(Idx+1); - II->RemoveOperand(Idx); - } + } else + Idx = 0; + + // If Idx is set, the operands at Idx and Idx+1 must be removed. + // We reuse the location to avoid expensive RemoveOperand calls. + DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); if (LI != SSAUpdateVals.end()) { // This register is defined in the tail block. for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = LI->second[j].first; unsigned SrcReg = LI->second[j].second; - II->addOperand(MachineOperand::CreateReg(SrcReg, false)); - II->addOperand(MachineOperand::CreateMBB(SrcBB)); + if (Idx != 0) { + II->getOperand(Idx).setReg(SrcReg); + II->getOperand(Idx+1).setMBB(SrcBB); + Idx = 0; + } else { + II->addOperand(MachineOperand::CreateReg(SrcReg, false)); + II->addOperand(MachineOperand::CreateMBB(SrcBB)); + } } } else { // Live in tail block, must also be live in predecessors. for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = TDBBs[j]; - II->addOperand(MachineOperand::CreateReg(Reg, false)); - II->addOperand(MachineOperand::CreateMBB(SrcBB)); + if (Idx != 0) { + II->getOperand(Idx).setReg(Reg); + II->getOperand(Idx+1).setMBB(SrcBB); + Idx = 0; + } else { + II->addOperand(MachineOperand::CreateReg(Reg, false)); + II->addOperand(MachineOperand::CreateMBB(SrcBB)); + } } } + if (Idx != 0) { + II->RemoveOperand(Idx+1); + II->RemoveOperand(Idx); + } } } } diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index a0fccab..e9e998f 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -150,6 +150,11 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, MBB.insert(I, MI); } +bool TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0, + const MachineInstr *MI1) const { + return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); +} + MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig, MachineFunction &MF) const { assert(!Orig->getDesc().isNotDuplicable() && @@ -157,37 +162,6 @@ MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig, return MF.CloneMachineInstr(Orig); } -bool -TargetInstrInfoImpl::isIdentical(const MachineInstr *MI, - const MachineInstr *Other, - const MachineRegisterInfo *MRI) const { - if (MI->getOpcode() != Other->getOpcode() || - MI->getNumOperands() != Other->getNumOperands()) - return false; - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - const MachineOperand &OMO = Other->getOperand(i); - if (MO.isReg() && MO.isDef()) { - assert(OMO.isReg() && OMO.isDef()); - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (Reg != OMO.getReg()) - return false; - } else if (MRI->getRegClass(MO.getReg()) != - MRI->getRegClass(OMO.getReg())) - return false; - - continue; - } - - if (!MO.isIdenticalTo(OMO)) - return false; - } - - return true; -} - unsigned TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const { unsigned FnSize = 0; diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp new file mode 100644 index 0000000..d127f53 --- /dev/null +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -0,0 +1,902 @@ +//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements classes used to handle lowerings specific to common +// object file formats. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +using namespace llvm; +using namespace dwarf; + +//===----------------------------------------------------------------------===// +// ELF +//===----------------------------------------------------------------------===// +typedef StringMap<const MCSectionELF*> ELFUniqueMapTy; + +TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() { + // If we have the section uniquing map, free it. + delete (ELFUniqueMapTy*)UniquingMap; +} + +const MCSection *TargetLoweringObjectFileELF:: +getELFSection(StringRef Section, unsigned Type, unsigned Flags, + SectionKind Kind, bool IsExplicit) const { + if (UniquingMap == 0) + UniquingMap = new ELFUniqueMapTy(); + ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap; + + // Do the lookup, if we have a hit, return it. + const MCSectionELF *&Entry = Map[Section]; + if (Entry) return Entry; + + return Entry = MCSectionELF::Create(Section, Type, Flags, Kind, IsExplicit, + getContext()); +} + +void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + if (UniquingMap != 0) + ((ELFUniqueMapTy*)UniquingMap)->clear(); + TargetLoweringObjectFile::Initialize(Ctx, TM); + + BSSSection = + getELFSection(".bss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, + SectionKind::getBSS()); + + TextSection = + getELFSection(".text", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC, + SectionKind::getText()); + + DataSection = + getELFSection(".data", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); + + ReadOnlySection = + getELFSection(".rodata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC, + SectionKind::getReadOnly()); + + TLSDataSection = + getELFSection(".tdata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | + MCSectionELF::SHF_WRITE, SectionKind::getThreadData()); + + TLSBSSSection = + getELFSection(".tbss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | + MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS()); + + DataRelSection = + getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + DataRelLocalSection = + getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getDataRelLocal()); + + DataRelROSection = + getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRel()); + + DataRelROLocalSection = + getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRelLocal()); + + MergeableConst4Section = + getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst4()); + + MergeableConst8Section = + getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst8()); + + MergeableConst16Section = + getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst16()); + + StaticCtorSection = + getELFSection(".ctors", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + StaticDtorSection = + getELFSection(".dtors", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + // Exception Handling Sections. + + // FIXME: We're emitting LSDA info into a readonly section on ELF, even though + // it contains relocatable pointers. In PIC mode, this is probably a big + // runtime hit for C++ apps. Either the contents of the LSDA need to be + // adjusted or this should be a data section. + LSDASection = + getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly()); + EHFrameSection = + getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + // Debug Info Sections. + DwarfAbbrevSection = + getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfInfoSection = + getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfLineSection = + getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfFrameSection = + getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfPubNamesSection = + getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfPubTypesSection = + getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfStrSection = + getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfLocSection = + getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfARangesSection = + getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfRangesSection = + getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfMacroInfoSection = + getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); +} + + +static SectionKind +getELFKindForNamedSection(StringRef Name, SectionKind K) { + if (Name.empty() || Name[0] != '.') return K; + + // Some lame default implementation based on some magic section names. + if (Name == ".bss" || + Name.startswith(".bss.") || + Name.startswith(".gnu.linkonce.b.") || + Name.startswith(".llvm.linkonce.b.") || + Name == ".sbss" || + Name.startswith(".sbss.") || + Name.startswith(".gnu.linkonce.sb.") || + Name.startswith(".llvm.linkonce.sb.")) + return SectionKind::getBSS(); + + if (Name == ".tdata" || + Name.startswith(".tdata.") || + Name.startswith(".gnu.linkonce.td.") || + Name.startswith(".llvm.linkonce.td.")) + return SectionKind::getThreadData(); + + if (Name == ".tbss" || + Name.startswith(".tbss.") || + Name.startswith(".gnu.linkonce.tb.") || + Name.startswith(".llvm.linkonce.tb.")) + return SectionKind::getThreadBSS(); + + return K; +} + + +static unsigned getELFSectionType(StringRef Name, SectionKind K) { + + if (Name == ".init_array") + return MCSectionELF::SHT_INIT_ARRAY; + + if (Name == ".fini_array") + return MCSectionELF::SHT_FINI_ARRAY; + + if (Name == ".preinit_array") + return MCSectionELF::SHT_PREINIT_ARRAY; + + if (K.isBSS() || K.isThreadBSS()) + return MCSectionELF::SHT_NOBITS; + + return MCSectionELF::SHT_PROGBITS; +} + + +static unsigned +getELFSectionFlags(SectionKind K) { + unsigned Flags = 0; + + if (!K.isMetadata()) + Flags |= MCSectionELF::SHF_ALLOC; + + if (K.isText()) + Flags |= MCSectionELF::SHF_EXECINSTR; + + if (K.isWriteable()) + Flags |= MCSectionELF::SHF_WRITE; + + if (K.isThreadLocal()) + Flags |= MCSectionELF::SHF_TLS; + + // K.isMergeableConst() is left out to honour PR4650 + if (K.isMergeableCString() || K.isMergeableConst4() || + K.isMergeableConst8() || K.isMergeableConst16()) + Flags |= MCSectionELF::SHF_MERGE; + + if (K.isMergeableCString()) + Flags |= MCSectionELF::SHF_STRINGS; + + return Flags; +} + + +const MCSection *TargetLoweringObjectFileELF:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + StringRef SectionName = GV->getSection(); + + // Infer section flags from the section name if we can. + Kind = getELFKindForNamedSection(SectionName, Kind); + + return getELFSection(SectionName, + getELFSectionType(SectionName, Kind), + getELFSectionFlags(Kind), Kind, true); +} + +static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) { + if (Kind.isText()) return ".gnu.linkonce.t."; + if (Kind.isReadOnly()) return ".gnu.linkonce.r."; + + if (Kind.isThreadData()) return ".gnu.linkonce.td."; + if (Kind.isThreadBSS()) return ".gnu.linkonce.tb."; + + if (Kind.isDataNoRel()) return ".gnu.linkonce.d."; + if (Kind.isDataRelLocal()) return ".gnu.linkonce.d.rel.local."; + if (Kind.isDataRel()) return ".gnu.linkonce.d.rel."; + if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local."; + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return ".gnu.linkonce.d.rel.ro."; +} + +const MCSection *TargetLoweringObjectFileELF:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + + // If this global is linkonce/weak and the target handles this by emitting it + // into a 'uniqued' section name, create and return the section now. + if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) { + const char *Prefix = getSectionPrefixForUniqueGlobal(Kind); + SmallString<128> Name; + Name.append(Prefix, Prefix+strlen(Prefix)); + Mang->getNameWithPrefix(Name, GV, false); + return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind), + getELFSectionFlags(Kind), Kind); + } + + if (Kind.isText()) return TextSection; + + if (Kind.isMergeable1ByteCString() || + Kind.isMergeable2ByteCString() || + Kind.isMergeable4ByteCString()) { + + // We also need alignment here. + // FIXME: this is getting the alignment of the character, not the + // alignment of the global! + unsigned Align = + TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)); + + const char *SizeSpec = ".rodata.str1."; + if (Kind.isMergeable2ByteCString()) + SizeSpec = ".rodata.str2."; + else if (Kind.isMergeable4ByteCString()) + SizeSpec = ".rodata.str4."; + else + assert(Kind.isMergeable1ByteCString() && "unknown string width"); + + + std::string Name = SizeSpec + utostr(Align); + return getELFSection(Name, MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_MERGE | + MCSectionELF::SHF_STRINGS, + Kind); + } + + if (Kind.isMergeableConst()) { + if (Kind.isMergeableConst4() && MergeableConst4Section) + return MergeableConst4Section; + if (Kind.isMergeableConst8() && MergeableConst8Section) + return MergeableConst8Section; + if (Kind.isMergeableConst16() && MergeableConst16Section) + return MergeableConst16Section; + return ReadOnlySection; // .const + } + + if (Kind.isReadOnly()) return ReadOnlySection; + + if (Kind.isThreadData()) return TLSDataSection; + if (Kind.isThreadBSS()) return TLSBSSSection; + + // Note: we claim that common symbols are put in BSSSection, but they are + // really emitted with the magic .comm directive, which creates a symbol table + // entry but not a section. + if (Kind.isBSS() || Kind.isCommon()) return BSSSection; + + if (Kind.isDataNoRel()) return DataSection; + if (Kind.isDataRelLocal()) return DataRelLocalSection; + if (Kind.isDataRel()) return DataRelSection; + if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return DataRelROSection; +} + +/// getSectionForConstant - Given a mergeable constant with the +/// specified size and relocation information, return a section that it +/// should be placed in. +const MCSection *TargetLoweringObjectFileELF:: +getSectionForConstant(SectionKind Kind) const { + if (Kind.isMergeableConst4() && MergeableConst4Section) + return MergeableConst4Section; + if (Kind.isMergeableConst8() && MergeableConst8Section) + return MergeableConst8Section; + if (Kind.isMergeableConst16() && MergeableConst16Section) + return MergeableConst16Section; + if (Kind.isReadOnly()) + return ReadOnlySection; + + if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return DataRelROSection; +} + +const MCExpr *TargetLoweringObjectFileELF:: +getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, unsigned Encoding) const { + + if (Encoding & dwarf::DW_EH_PE_indirect) { + MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += ".DW.stub"; + + // Add information about the stub reference to ELFMMI so that the stub + // gets emitted by the asmprinter. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); + MCSymbol *&StubSym = ELFMMI.getGVStubEntry(Sym); + if (StubSym == 0) { + Name.clear(); + Mang->getNameWithPrefix(Name, GV, false); + StubSym = getContext().GetOrCreateSymbol(Name.str()); + } + + return TargetLoweringObjectFile:: + getSymbolForDwarfReference(Sym, MMI, + Encoding & ~dwarf::DW_EH_PE_indirect); + } + + return TargetLoweringObjectFile:: + getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding); +} + +//===----------------------------------------------------------------------===// +// MachO +//===----------------------------------------------------------------------===// + +typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy; + +TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() { + // If we have the MachO uniquing map, free it. + delete (MachOUniqueMapTy*)UniquingMap; +} + + +const MCSectionMachO *TargetLoweringObjectFileMachO:: +getMachOSection(StringRef Segment, StringRef Section, + unsigned TypeAndAttributes, + unsigned Reserved2, SectionKind Kind) const { + // We unique sections by their segment/section pair. The returned section + // may not have the same flags as the requested section, if so this should be + // diagnosed by the client as an error. + + // Create the map if it doesn't already exist. + if (UniquingMap == 0) + UniquingMap = new MachOUniqueMapTy(); + MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap; + + // Form the name to look up. + SmallString<64> Name; + Name += Segment; + Name.push_back(','); + Name += Section; + + // Do the lookup, if we have a hit, return it. + const MCSectionMachO *&Entry = Map[Name.str()]; + if (Entry) return Entry; + + // Otherwise, return a new section. + return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes, + Reserved2, Kind, getContext()); +} + + +void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + if (UniquingMap != 0) + ((MachOUniqueMapTy*)UniquingMap)->clear(); + TargetLoweringObjectFile::Initialize(Ctx, TM); + + TextSection // .text + = getMachOSection("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); + DataSection // .data + = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel()); + + CStringSection // .cstring + = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS, + SectionKind::getMergeable1ByteCString()); + UStringSection + = getMachOSection("__TEXT","__ustring", 0, + SectionKind::getMergeable2ByteCString()); + FourByteConstantSection // .literal4 + = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS, + SectionKind::getMergeableConst4()); + EightByteConstantSection // .literal8 + = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS, + SectionKind::getMergeableConst8()); + + // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back + // to using it in -static mode. + SixteenByteConstantSection = 0; + if (TM.getRelocationModel() != Reloc::Static && + TM.getTargetData()->getPointerSize() == 32) + SixteenByteConstantSection = // .literal16 + getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS, + SectionKind::getMergeableConst16()); + + ReadOnlySection // .const + = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly()); + + TextCoalSection + = getMachOSection("__TEXT", "__textcoal_nt", + MCSectionMachO::S_COALESCED | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); + ConstTextCoalSection + = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED, + SectionKind::getText()); + ConstDataCoalSection + = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED, + SectionKind::getText()); + ConstDataSection // .const_data + = getMachOSection("__DATA", "__const", 0, + SectionKind::getReadOnlyWithRel()); + DataCoalSection + = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED, + SectionKind::getDataRel()); + DataCommonSection + = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL, + SectionKind::getBSS()); + DataBSSSection + = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL, + SectionKind::getBSS()); + + + LazySymbolPointerSection + = getMachOSection("__DATA", "__la_symbol_ptr", + MCSectionMachO::S_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); + NonLazySymbolPointerSection + = getMachOSection("__DATA", "__nl_symbol_ptr", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); + + if (TM.getRelocationModel() == Reloc::Static) { + StaticCtorSection + = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel()); + StaticDtorSection + = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel()); + } else { + StaticCtorSection + = getMachOSection("__DATA", "__mod_init_func", + MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, + SectionKind::getDataRel()); + StaticDtorSection + = getMachOSection("__DATA", "__mod_term_func", + MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, + SectionKind::getDataRel()); + } + + // Exception Handling. + LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0, + SectionKind::getDataRel()); + EHFrameSection = + getMachOSection("__TEXT", "__eh_frame", + MCSectionMachO::S_COALESCED | + MCSectionMachO::S_ATTR_NO_TOC | + MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS | + MCSectionMachO::S_ATTR_LIVE_SUPPORT, + SectionKind::getReadOnly()); + + // Debug Information. + DwarfAbbrevSection = + getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfInfoSection = + getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfLineSection = + getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfFrameSection = + getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfPubNamesSection = + getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfPubTypesSection = + getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfStrSection = + getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfLocSection = + getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfARangesSection = + getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfRangesSection = + getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfMacroInfoSection = + getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfDebugInlineSection = + getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); +} + +const MCSection *TargetLoweringObjectFileMachO:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + // Parse the section specifier and create it if valid. + StringRef Segment, Section; + unsigned TAA, StubSize; + std::string ErrorCode = + MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, + TAA, StubSize); + if (!ErrorCode.empty()) { + // If invalid, report the error with llvm_report_error. + llvm_report_error("Global variable '" + GV->getNameStr() + + "' has an invalid section specifier '" + GV->getSection()+ + "': " + ErrorCode + "."); + // Fall back to dropping it into the data section. + return DataSection; + } + + // Get the section. + const MCSectionMachO *S = + getMachOSection(Segment, Section, TAA, StubSize, Kind); + + // Okay, now that we got the section, verify that the TAA & StubSize agree. + // If the user declared multiple globals with different section flags, we need + // to reject it here. + if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { + // If invalid, report the error with llvm_report_error. + llvm_report_error("Global variable '" + GV->getNameStr() + + "' section type or attributes does not match previous" + " section specifier"); + } + + return S; +} + +const MCSection *TargetLoweringObjectFileMachO:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS"); + + if (Kind.isText()) + return GV->isWeakForLinker() ? TextCoalSection : TextSection; + + // If this is weak/linkonce, put this in a coalescable section, either in text + // or data depending on if it is writable. + if (GV->isWeakForLinker()) { + if (Kind.isReadOnly()) + return ConstTextCoalSection; + return DataCoalSection; + } + + // FIXME: Alignment check should be handled by section classifier. + if (Kind.isMergeable1ByteCString() && + TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) + return CStringSection; + + // Do not put 16-bit arrays in the UString section if they have an + // externally visible label, this runs into issues with certain linker + // versions. + if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() && + TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) + return UStringSection; + + if (Kind.isMergeableConst()) { + if (Kind.isMergeableConst4()) + return FourByteConstantSection; + if (Kind.isMergeableConst8()) + return EightByteConstantSection; + if (Kind.isMergeableConst16() && SixteenByteConstantSection) + return SixteenByteConstantSection; + } + + // Otherwise, if it is readonly, but not something we can specially optimize, + // just drop it in .const. + if (Kind.isReadOnly()) + return ReadOnlySection; + + // If this is marked const, put it into a const section. But if the dynamic + // linker needs to write to it, put it in the data segment. + if (Kind.isReadOnlyWithRel()) + return ConstDataSection; + + // Put zero initialized globals with strong external linkage in the + // DATA, __common section with the .zerofill directive. + if (Kind.isBSSExtern()) + return DataCommonSection; + + // Put zero initialized globals with local linkage in __DATA,__bss directive + // with the .zerofill directive (aka .lcomm). + if (Kind.isBSSLocal()) + return DataBSSSection; + + // Otherwise, just drop the variable in the normal data section. + return DataSection; +} + +const MCSection * +TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const { + // If this constant requires a relocation, we have to put it in the data + // segment, not in the text segment. + if (Kind.isDataRel() || Kind.isReadOnlyWithRel()) + return ConstDataSection; + + if (Kind.isMergeableConst4()) + return FourByteConstantSection; + if (Kind.isMergeableConst8()) + return EightByteConstantSection; + if (Kind.isMergeableConst16() && SixteenByteConstantSection) + return SixteenByteConstantSection; + return ReadOnlySection; // .const +} + +/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide +/// not to emit the UsedDirective for some symbols in llvm.used. +// FIXME: REMOVE this (rdar://7071300) +bool TargetLoweringObjectFileMachO:: +shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const { + /// On Darwin, internally linked data beginning with "L" or "l" does not have + /// the directive emitted (this occurs in ObjC metadata). + if (!GV) return false; + + // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix. + if (GV->hasLocalLinkage() && !isa<Function>(GV)) { + // FIXME: ObjC metadata is currently emitted as internal symbols that have + // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and + // this horrible hack can go away. + SmallString<64> Name; + Mang->getNameWithPrefix(Name, GV, false); + if (Name[0] == 'L' || Name[0] == 'l') + return false; + } + + return true; +} + +const MCExpr *TargetLoweringObjectFileMachO:: +getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, unsigned Encoding) const { + // The mach-o version of this method defaults to returning a stub reference. + + if (Encoding & DW_EH_PE_indirect) { + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += "$non_lazy_ptr"; + + // Add information about the stub reference to MachOMMI so that the stub + // gets emitted by the asmprinter. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); + MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym); + if (StubSym == 0) { + Name.clear(); + Mang->getNameWithPrefix(Name, GV, false); + StubSym = getContext().GetOrCreateSymbol(Name.str()); + } + + return TargetLoweringObjectFile:: + getSymbolForDwarfReference(Sym, MMI, + Encoding & ~dwarf::DW_EH_PE_indirect); + } + + return TargetLoweringObjectFile:: + getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding); +} + +unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const { + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const { + return DW_EH_PE_pcrel; +} + +unsigned TargetLoweringObjectFileMachO::getFDEEncoding() const { + return DW_EH_PE_pcrel; +} + +unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const { + return DW_EH_PE_absptr; +} + +//===----------------------------------------------------------------------===// +// COFF +//===----------------------------------------------------------------------===// + +typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy; + +TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() { + delete (COFFUniqueMapTy*)UniquingMap; +} + + +const MCSection *TargetLoweringObjectFileCOFF:: +getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const { + // Create the map if it doesn't already exist. + if (UniquingMap == 0) + UniquingMap = new MachOUniqueMapTy(); + COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap; + + // Do the lookup, if we have a hit, return it. + const MCSectionCOFF *&Entry = Map[Name]; + if (Entry) return Entry; + + return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext()); +} + +void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + if (UniquingMap != 0) + ((COFFUniqueMapTy*)UniquingMap)->clear(); + TargetLoweringObjectFile::Initialize(Ctx, TM); + TextSection = getCOFFSection("\t.text", true, SectionKind::getText()); + DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel()); + StaticCtorSection = + getCOFFSection(".ctors", false, SectionKind::getDataRel()); + StaticDtorSection = + getCOFFSection(".dtors", false, SectionKind::getDataRel()); + + // FIXME: We're emitting LSDA info into a readonly section on COFF, even + // though it contains relocatable pointers. In PIC mode, this is probably a + // big runtime hit for C++ apps. Either the contents of the LSDA need to be + // adjusted or this should be a data section. + LSDASection = + getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly()); + EHFrameSection = + getCOFFSection(".eh_frame", false, SectionKind::getDataRel()); + + // Debug info. + // FIXME: Don't use 'directive' mode here. + DwarfAbbrevSection = + getCOFFSection("\t.section\t.debug_abbrev,\"dr\"", + true, SectionKind::getMetadata()); + DwarfInfoSection = + getCOFFSection("\t.section\t.debug_info,\"dr\"", + true, SectionKind::getMetadata()); + DwarfLineSection = + getCOFFSection("\t.section\t.debug_line,\"dr\"", + true, SectionKind::getMetadata()); + DwarfFrameSection = + getCOFFSection("\t.section\t.debug_frame,\"dr\"", + true, SectionKind::getMetadata()); + DwarfPubNamesSection = + getCOFFSection("\t.section\t.debug_pubnames,\"dr\"", + true, SectionKind::getMetadata()); + DwarfPubTypesSection = + getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"", + true, SectionKind::getMetadata()); + DwarfStrSection = + getCOFFSection("\t.section\t.debug_str,\"dr\"", + true, SectionKind::getMetadata()); + DwarfLocSection = + getCOFFSection("\t.section\t.debug_loc,\"dr\"", + true, SectionKind::getMetadata()); + DwarfARangesSection = + getCOFFSection("\t.section\t.debug_aranges,\"dr\"", + true, SectionKind::getMetadata()); + DwarfRangesSection = + getCOFFSection("\t.section\t.debug_ranges,\"dr\"", + true, SectionKind::getMetadata()); + DwarfMacroInfoSection = + getCOFFSection("\t.section\t.debug_macinfo,\"dr\"", + true, SectionKind::getMetadata()); +} + +const MCSection *TargetLoweringObjectFileCOFF:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + return getCOFFSection(GV->getSection(), false, Kind); +} + +static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { + if (Kind.isText()) + return ".text$linkonce"; + if (Kind.isWriteable()) + return ".data$linkonce"; + return ".rdata$linkonce"; +} + + +const MCSection *TargetLoweringObjectFileCOFF:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + assert(!Kind.isThreadLocal() && "Doesn't support TLS"); + + // If this global is linkonce/weak and the target handles this by emitting it + // into a 'uniqued' section name, create and return the section now. + if (GV->isWeakForLinker()) { + const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); + SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); + Mang->getNameWithPrefix(Name, GV, false); + return getCOFFSection(Name.str(), false, Kind); + } + + if (Kind.isText()) + return getTextSection(); + + return getDataSection(); +} + diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 6c7c1a1..c840b39 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -160,7 +160,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, MachineBasicBlock::iterator OldPos) { // Check if it's safe to move this instruction. bool SeenStore = true; // Be conservative. - if (!MI->isSafeToMove(TII, SeenStore, AA)) + if (!MI->isSafeToMove(TII, AA, SeenStore)) return false; unsigned DefReg = 0; @@ -213,6 +213,9 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, unsigned NumVisited = 0; for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) { MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost. return false; ++NumVisited; @@ -451,13 +454,10 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, const TargetInstrInfo *TII, bool &IsCopy, unsigned &DstReg, bool &IsDstPhys) { - MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg); - if (UI == MRI->use_end()) - return 0; - MachineInstr &UseMI = *UI; - if (++UI != MRI->use_end()) - // More than one use. + if (!MRI->hasOneNonDBGUse(Reg)) + // None or more than one use. return 0; + MachineInstr &UseMI = *MRI->use_nodbg_begin(Reg); if (UseMI.getParent() != MBB) return 0; unsigned SrcReg; @@ -923,6 +923,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); mi != me; ) { MachineBasicBlock::iterator nmi = llvm::next(mi); + if (mi->isDebugValue()) { + mi = nmi; + continue; + } const TargetInstrDesc &TID = mi->getDesc(); bool FirstTied = true; @@ -1021,7 +1025,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { // copying it. if (DefMI && DefMI->getDesc().isAsCheapAsAMove() && - DefMI->isSafeToReMat(TII, regB, AA) && + DefMI->isSafeToReMat(TII, AA, regB) && isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 5956b61..ed02696 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -261,19 +261,21 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) { void VirtRegMap::print(raw_ostream &OS, const Module* M) const { const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); OS << "********** REGISTER MAP **********\n"; for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) { if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG) OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i]) - << "]\n"; + << "] " << MRI.getRegClass(i)->getName() << "\n"; } for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT) - OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i] << "]\n"; + OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i] + << "] " << MRI.getRegClass(i)->getName() << "\n"; OS << '\n'; } diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index ce62594..7aa0a91 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -46,7 +46,7 @@ namespace { static cl::opt<RewriterName> RewriterOpt("rewriter", - cl::desc("Rewriter to use: (default: local)"), + cl::desc("Rewriter to use (default=local)"), cl::Prefix, cl::values(clEnumVal(local, "local rewriter"), clEnumVal(trivial, "trivial rewriter"), @@ -62,6 +62,7 @@ VirtRegRewriter::~VirtRegRewriter() {} /// substitutePhysReg - Replace virtual register in MachineOperand with a /// physical register. Do the right thing with the sub-register index. +/// Note that operands may be added, so the MO reference is no longer valid. static void substitutePhysReg(MachineOperand &MO, unsigned Reg, const TargetRegisterInfo &TRI) { if (unsigned SubIdx = MO.getSubReg()) { @@ -123,14 +124,15 @@ struct TrivialRewriter : public VirtRegRewriter { continue; unsigned pReg = VRM.getPhys(reg); mri->setPhysRegUsed(pReg); - for (MachineRegisterInfo::reg_iterator regItr = mri->reg_begin(reg), - regEnd = mri->reg_end(); regItr != regEnd;) { - MachineOperand &mop = regItr.getOperand(); - assert(mop.isReg() && mop.getReg() == reg && "reg_iterator broken?"); - ++regItr; - substitutePhysReg(mop, pReg, *tri); - changed = true; - } + // Copy the register use-list before traversing it. + SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist; + for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg), + E = mri->reg_end(); I != E; ++I) + reglist.push_back(std::make_pair(&*I, I.getOperandNo())); + for (unsigned N=0; N != reglist.size(); ++N) + substitutePhysReg(reglist[N].first->getOperand(reglist[N].second), + pReg, *tri); + changed |= !reglist.empty(); } } @@ -1850,19 +1852,18 @@ private: KilledMIRegs.clear(); for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) { unsigned i = VirtUseOps[j]; - MachineOperand &MO = MI.getOperand(i); - unsigned VirtReg = MO.getReg(); + unsigned VirtReg = MI.getOperand(i).getReg(); assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register?"); - unsigned SubIdx = MO.getSubReg(); + unsigned SubIdx = MI.getOperand(i).getSubReg(); if (VRM.isAssignedReg(VirtReg)) { // This virtual register was assigned a physreg! unsigned Phys = VRM.getPhys(VirtReg); RegInfo->setPhysRegUsed(Phys); - if (MO.isDef()) + if (MI.getOperand(i).isDef()) ReusedOperands.markClobbered(Phys); - substitutePhysReg(MO, Phys, *TRI); + substitutePhysReg(MI.getOperand(i), Phys, *TRI); if (VRM.isImplicitlyDefined(VirtReg)) // FIXME: Is this needed? BuildMI(MBB, &MI, MI.getDebugLoc(), @@ -1871,10 +1872,10 @@ private: } // This virtual register is now known to be a spilled value. - if (!MO.isUse()) + if (!MI.getOperand(i).isUse()) continue; // Handle defs in the loop below (handle use&def here though) - bool AvoidReload = MO.isUndef(); + bool AvoidReload = MI.getOperand(i).isUndef(); // Check if it is defined by an implicit def. It should not be spilled. // Note, this is for correctness reason. e.g. // 8 %reg1024<def> = IMPLICIT_DEF diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp index 7bcd30a..9d07811 100644 --- a/lib/CompilerDriver/Action.cpp +++ b/lib/CompilerDriver/Action.cpp @@ -15,6 +15,7 @@ #include "llvm/CompilerDriver/BuiltinOptions.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SystemUtils.h" #include "llvm/System/Program.h" #include "llvm/System/TimeValue.h" @@ -24,13 +25,23 @@ using namespace llvm; using namespace llvmc; +namespace llvmc { + +extern int Main(int argc, char** argv); +extern const char* ProgramName; + +} + namespace { int ExecuteProgram(const std::string& name, const StrVector& args) { sys::Path prog = sys::Program::FindProgramByName(name); - if (prog.isEmpty()) - throw std::runtime_error("Can't find program '" + name + "'"); + if (prog.isEmpty()) { + prog = FindExecutable(name, ProgramName, (void *)(intptr_t)&Main); + if (prog.isEmpty()) + throw std::runtime_error("Can't find program '" + name + "'"); + } if (!prog.canExecute()) throw std::runtime_error("Program '" + name + "' is not executable."); diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp index 524607b..7d1c7fe 100644 --- a/lib/CompilerDriver/CompilationGraph.cpp +++ b/lib/CompilerDriver/CompilationGraph.cpp @@ -34,7 +34,8 @@ namespace llvmc { const std::string& LanguageMap::GetLanguage(const sys::Path& File) const { StringRef suf = File.getSuffix(); - LanguageMap::const_iterator Lang = this->find(suf); + LanguageMap::const_iterator Lang = + this->find(suf.empty() ? "*empty*" : suf); if (Lang == this->end()) throw std::runtime_error("File '" + File.str() + "' has unknown suffix '" + suf.str() + '\''); @@ -313,7 +314,7 @@ int CompilationGraph::Build (const sys::Path& TempDir, JoinTool* JT = &dynamic_cast<JoinTool&>(*CurNode->ToolPtr.getPtr()); // Are there any files in the join list? - if (JT->JoinListEmpty()) + if (JT->JoinListEmpty() && !(JT->WorksOnEmpty() && InputFilenames.empty())) continue; Action CurAction = JT->GenerateAction(CurNode->HasChildren(), diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp index 3a3487a..b5e507d 100644 --- a/lib/CompilerDriver/Main.cpp +++ b/lib/CompilerDriver/Main.cpp @@ -100,7 +100,8 @@ int Main(int argc, char** argv) { ProgramName = argv[0]; cl::ParseCommandLineOptions - (argc, argv, "LLVM Compiler Driver (Work In Progress)", true); + (argc, argv, "LLVM Compiler Driver (Work In Progress)", + /* ReadResponseFiles = */ false); PluginLoader Plugins; Plugins.RunInitialization(langMap, graph); @@ -126,10 +127,6 @@ int Main(int argc, char** argv) { return 0; } - if (InputFilenames.empty()) { - throw std::runtime_error("no input files"); - } - if (Time) { GlobalTimeLog = new std::stringstream; GlobalTimeLog->precision(2); diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp index 9f4ab49..5e558ca 100644 --- a/lib/CompilerDriver/Tool.cpp +++ b/lib/CompilerDriver/Tool.cpp @@ -17,6 +17,8 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/System/Path.h" +#include <algorithm> + using namespace llvm; using namespace llvmc; @@ -71,3 +73,22 @@ sys::Path Tool::OutFilename(const sys::Path& In, } return Out; } + +namespace { + template <class A, class B> + bool CompareFirst (std::pair<A,B> p1, std::pair<A,B> p2) { + return std::less<A>()(p1.first, p2.first); + } +} + +StrVector Tool::SortArgs(ArgsVector& Args) const { + StrVector Out; + + // HACK: this won't be needed when we'll migrate away from CommandLine. + std::stable_sort(Args.begin(), Args.end(), &CompareFirst<unsigned, std::string>); + for (ArgsVector::iterator B = Args.begin(), E = Args.end(); B != E; ++B) { + Out.push_back(B->second); + } + + return Out; +} diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 3e684e1..b2e2a04 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -339,12 +339,12 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, } // FALLS THROUGH case 1: - if (!FTy->getParamType(0)->isInteger(32)) { + if (!FTy->getParamType(0)->isIntegerTy(32)) { llvm_report_error("Invalid type for first argument of main() supplied"); } // FALLS THROUGH case 0: - if (!isa<IntegerType>(FTy->getReturnType()) && + if (!FTy->getReturnType()->isIntegerTy() && !FTy->getReturnType()->isVoidTy()) { llvm_report_error("Invalid return type of main() supplied"); } @@ -599,22 +599,22 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { switch (Op0->getType()->getTypeID()) { default: llvm_unreachable("Invalid bitcast operand"); case Type::IntegerTyID: - assert(DestTy->isFloatingPoint() && "invalid bitcast"); + assert(DestTy->isFloatingPointTy() && "invalid bitcast"); if (DestTy->isFloatTy()) GV.FloatVal = GV.IntVal.bitsToFloat(); else if (DestTy->isDoubleTy()) GV.DoubleVal = GV.IntVal.bitsToDouble(); break; case Type::FloatTyID: - assert(DestTy->isInteger(32) && "Invalid bitcast"); + assert(DestTy->isIntegerTy(32) && "Invalid bitcast"); GV.IntVal.floatToBits(GV.FloatVal); break; case Type::DoubleTyID: - assert(DestTy->isInteger(64) && "Invalid bitcast"); + assert(DestTy->isIntegerTy(64) && "Invalid bitcast"); GV.IntVal.doubleToBits(GV.DoubleVal); break; case Type::PointerTyID: - assert(isa<PointerType>(DestTy) && "Invalid bitcast"); + assert(DestTy->isPointerTy() && "Invalid bitcast"); break; // getConstantValue(Op0) above already converted it } return GV; diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index 141cb27..c7495d4 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -87,11 +87,11 @@ void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) { /*===-- Operations on execution engines -----------------------------------===*/ -LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, - LLVMModuleProviderRef MP, - char **OutError) { +LLVMBool LLVMCreateExecutionEngineForModule(LLVMExecutionEngineRef *OutEE, + LLVMModuleRef M, + char **OutError) { std::string Error; - EngineBuilder builder(unwrap(MP)); + EngineBuilder builder(unwrap(M)); builder.setEngineKind(EngineKind::Either) .setErrorStr(&Error); if (ExecutionEngine *EE = builder.create()){ @@ -102,11 +102,11 @@ LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, return 1; } -LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp, - LLVMModuleProviderRef MP, - char **OutError) { +LLVMBool LLVMCreateInterpreterForModule(LLVMExecutionEngineRef *OutInterp, + LLVMModuleRef M, + char **OutError) { std::string Error; - EngineBuilder builder(unwrap(MP)); + EngineBuilder builder(unwrap(M)); builder.setEngineKind(EngineKind::Interpreter) .setErrorStr(&Error); if (ExecutionEngine *Interp = builder.create()) { @@ -117,12 +117,12 @@ LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp, return 1; } -LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT, - LLVMModuleProviderRef MP, - unsigned OptLevel, - char **OutError) { +LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT, + LLVMModuleRef M, + unsigned OptLevel, + char **OutError) { std::string Error; - EngineBuilder builder(unwrap(MP)); + EngineBuilder builder(unwrap(M)); builder.setEngineKind(EngineKind::JIT) .setErrorStr(&Error) .setOptLevel((CodeGenOpt::Level)OptLevel); @@ -134,6 +134,35 @@ LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT, return 1; } +LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, + LLVMModuleProviderRef MP, + char **OutError) { + /* The module provider is now actually a module. */ + return LLVMCreateExecutionEngineForModule(OutEE, + reinterpret_cast<LLVMModuleRef>(MP), + OutError); +} + +LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp, + LLVMModuleProviderRef MP, + char **OutError) { + /* The module provider is now actually a module. */ + return LLVMCreateInterpreterForModule(OutInterp, + reinterpret_cast<LLVMModuleRef>(MP), + OutError); +} + +LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT, + LLVMModuleProviderRef MP, + unsigned OptLevel, + char **OutError) { + /* The module provider is now actually a module. */ + return LLVMCreateJITCompilerForModule(OutJIT, + reinterpret_cast<LLVMModuleRef>(MP), + OptLevel, OutError); +} + + void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE) { delete unwrap(EE); } @@ -173,17 +202,29 @@ void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) { unwrap(EE)->freeMachineCodeForFunction(unwrap<Function>(F)); } +void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M){ + unwrap(EE)->addModule(unwrap(M)); +} + void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){ - unwrap(EE)->addModule(unwrap(MP)); + /* The module provider is now actually a module. */ + LLVMAddModule(EE, reinterpret_cast<LLVMModuleRef>(MP)); +} + +LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M, + LLVMModuleRef *OutMod, char **OutError) { + Module *Mod = unwrap(M); + unwrap(EE)->removeModule(Mod); + *OutMod = wrap(Mod); + return 0; } LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP, LLVMModuleRef *OutMod, char **OutError) { - Module *M = unwrap(MP); - unwrap(EE)->removeModule(M); - *OutMod = wrap(M); - return 0; + /* The module provider is now actually a module. */ + return LLVMRemoveModule(EE, reinterpret_cast<LLVMModuleRef>(MP), OutMod, + OutError); } LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 73f5558..a2aad5a 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -591,7 +591,7 @@ void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy, ECStack.pop_back(); if (ECStack.empty()) { // Finished main. Put result into exit code... - if (RetTy && RetTy->isInteger()) { // Nonvoid return type? + if (RetTy && RetTy->isIntegerTy()) { // Nonvoid return type? ExitValue = Result; // Capture the exit value of the program } else { memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped)); @@ -761,7 +761,7 @@ void Interpreter::visitAllocaInst(AllocaInst &I) { GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I, gep_type_iterator E, ExecutionContext &SF) { - assert(isa<PointerType>(Ptr->getType()) && + assert(Ptr->getType()->isPointerTy() && "Cannot getElementOffset of a nonpointer type!"); uint64_t Total = 0; @@ -979,7 +979,7 @@ GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, const Type *DstTy, const Type *SrcTy = SrcVal->getType(); uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(SrcTy->isFloatingPoint() && "Invalid FPToUI instruction"); + assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction"); if (SrcTy->getTypeID() == Type::FloatTyID) Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth); @@ -993,7 +993,7 @@ GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy, const Type *SrcTy = SrcVal->getType(); uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(SrcTy->isFloatingPoint() && "Invalid FPToSI instruction"); + assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction"); if (SrcTy->getTypeID() == Type::FloatTyID) Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth); @@ -1005,7 +1005,7 @@ GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy, GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(DstTy->isFloatingPoint() && "Invalid UIToFP instruction"); + assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction"); if (DstTy->getTypeID() == Type::FloatTyID) Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal); @@ -1017,7 +1017,7 @@ GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy, GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, const Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(DstTy->isFloatingPoint() && "Invalid SIToFP instruction"); + assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction"); if (DstTy->getTypeID() == Type::FloatTyID) Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal); @@ -1031,7 +1031,7 @@ GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy, ExecutionContext &SF) { uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(isa<PointerType>(SrcVal->getType()) && "Invalid PtrToInt instruction"); + assert(SrcVal->getType()->isPointerTy() && "Invalid PtrToInt instruction"); Dest.IntVal = APInt(DBitWidth, (intptr_t) Src.PointerVal); return Dest; @@ -1040,7 +1040,7 @@ GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy, GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, const Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(isa<PointerType>(DstTy) && "Invalid PtrToInt instruction"); + assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction"); uint32_t PtrSize = TD.getPointerSizeInBits(); if (PtrSize != Src.IntVal.getBitWidth()) @@ -1055,27 +1055,27 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy, const Type *SrcTy = SrcVal->getType(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); - if (isa<PointerType>(DstTy)) { - assert(isa<PointerType>(SrcTy) && "Invalid BitCast"); + if (DstTy->isPointerTy()) { + assert(SrcTy->isPointerTy() && "Invalid BitCast"); Dest.PointerVal = Src.PointerVal; - } else if (DstTy->isInteger()) { + } else if (DstTy->isIntegerTy()) { if (SrcTy->isFloatTy()) { Dest.IntVal.zext(sizeof(Src.FloatVal) * CHAR_BIT); Dest.IntVal.floatToBits(Src.FloatVal); } else if (SrcTy->isDoubleTy()) { Dest.IntVal.zext(sizeof(Src.DoubleVal) * CHAR_BIT); Dest.IntVal.doubleToBits(Src.DoubleVal); - } else if (SrcTy->isInteger()) { + } else if (SrcTy->isIntegerTy()) { Dest.IntVal = Src.IntVal; } else llvm_unreachable("Invalid BitCast"); } else if (DstTy->isFloatTy()) { - if (SrcTy->isInteger()) + if (SrcTy->isIntegerTy()) Dest.FloatVal = Src.IntVal.bitsToFloat(); else Dest.FloatVal = Src.FloatVal; } else if (DstTy->isDoubleTy()) { - if (SrcTy->isInteger()) + if (SrcTy->isIntegerTy()) Dest.DoubleVal = Src.IntVal.bitsToDouble(); else Dest.DoubleVal = Src.DoubleVal; diff --git a/lib/ExecutionEngine/JIT/Android.mk b/lib/ExecutionEngine/JIT/Android.mk new file mode 100644 index 0000000..1c7e27f --- /dev/null +++ b/lib/ExecutionEngine/JIT/Android.mk @@ -0,0 +1,32 @@ +LOCAL_PATH:= $(call my-dir) + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + Intercept.cpp \ + JIT.cpp \ + JITDebugRegisterer.cpp \ + JITDwarfEmitter.cpp \ + JITEmitter.cpp \ + JITMemoryManager.cpp \ + OProfileJITEventListener.cpp \ + TargetSelect.cpp + +LOCAL_MODULE:= libLLVMJIT + +include $(LLVM_HOST_BUILD_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + JITMemoryManager.cpp + +LOCAL_MODULE:= libLLVMJIT + +include $(LLVM_DEVICE_BUILD_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index 616a66e..dd74d73 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -18,6 +18,7 @@ #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineCodeInfo.h" #include "llvm/ExecutionEngine/GenericValue.h" @@ -27,6 +28,7 @@ #include "llvm/Target/TargetJITInfo.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MutexGuard.h" #include "llvm/System/DynamicLibrary.h" #include "llvm/Config/config.h" @@ -237,9 +239,53 @@ ExecutionEngine *JIT::createJIT(Module *M, } } +namespace { +/// This class supports the global getPointerToNamedFunction(), which allows +/// bugpoint or gdb users to search for a function by name without any context. +class JitPool { + SmallPtrSet<JIT*, 1> JITs; // Optimize for process containing just 1 JIT. + mutable sys::Mutex Lock; +public: + void Add(JIT *jit) { + MutexGuard guard(Lock); + JITs.insert(jit); + } + void Remove(JIT *jit) { + MutexGuard guard(Lock); + JITs.erase(jit); + } + void *getPointerToNamedFunction(const char *Name) const { + MutexGuard guard(Lock); + assert(JITs.size() != 0 && "No Jit registered"); + //search function in every instance of JIT + for (SmallPtrSet<JIT*, 1>::const_iterator Jit = JITs.begin(), + end = JITs.end(); + Jit != end; ++Jit) { + if (Function *F = (*Jit)->FindFunctionNamed(Name)) + return (*Jit)->getPointerToFunction(F); + } + // The function is not available : fallback on the first created (will + // search in symbol of the current program/library) + return (*JITs.begin())->getPointerToNamedFunction(Name); + } +}; +ManagedStatic<JitPool> AllJits; +} +extern "C" { + // getPointerToNamedFunction - This function is used as a global wrapper to + // JIT::getPointerToNamedFunction for the purpose of resolving symbols when + // bugpoint is debugging the JIT. In that scenario, we are loading an .so and + // need to resolve function(s) that are being mis-codegenerated, so we need to + // resolve their addresses at runtime, and this is the way to do it. + void *getPointerToNamedFunction(const char *Name) { + return AllJits->getPointerToNamedFunction(Name); + } +} + JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode) - : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode) { + : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode), + isAlreadyCodeGenerating(false) { setTargetData(TM.getTargetData()); jitstate = new JITState(M); @@ -247,6 +293,9 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, // Initialize JCE JCE = createEmitter(*this, JMM, TM); + // Register in global list of all JITs. + AllJits->Add(this); + // Add target data MutexGuard locked(lock); FunctionPassManager &PM = jitstate->getPM(locked); @@ -281,6 +330,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, } JIT::~JIT() { + AllJits->Remove(this); delete jitstate; delete JCE; delete &TM; @@ -361,12 +411,12 @@ GenericValue JIT::runFunction(Function *F, // Handle some common cases first. These cases correspond to common `main' // prototypes. - if (RetTy->isInteger(32) || RetTy->isVoidTy()) { + if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) { switch (ArgValues.size()) { case 3: - if (FTy->getParamType(0)->isInteger(32) && - isa<PointerType>(FTy->getParamType(1)) && - isa<PointerType>(FTy->getParamType(2))) { + if (FTy->getParamType(0)->isIntegerTy(32) && + FTy->getParamType(1)->isPointerTy() && + FTy->getParamType(2)->isPointerTy()) { int (*PF)(int, char **, const char **) = (int(*)(int, char **, const char **))(intptr_t)FPtr; @@ -379,8 +429,8 @@ GenericValue JIT::runFunction(Function *F, } break; case 2: - if (FTy->getParamType(0)->isInteger(32) && - isa<PointerType>(FTy->getParamType(1))) { + if (FTy->getParamType(0)->isIntegerTy(32) && + FTy->getParamType(1)->isPointerTy()) { int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr; // Call the function. @@ -392,7 +442,7 @@ GenericValue JIT::runFunction(Function *F, break; case 1: if (FTy->getNumParams() == 1 && - FTy->getParamType(0)->isInteger(32)) { + FTy->getParamType(0)->isIntegerTy(32)) { GenericValue rv; int (*PF)(int) = (int(*)(int))(intptr_t)FPtr; rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue())); @@ -503,8 +553,12 @@ GenericValue JIT::runFunction(Function *F, else ReturnInst::Create(F->getContext(), StubBB); // Just return void. - // Finally, return the value returned by our nullary stub function. - return runFunction(Stub, std::vector<GenericValue>()); + // Finally, call our nullary stub function. + GenericValue Result = runFunction(Stub, std::vector<GenericValue>()); + // Erase it, since no other function can have a reference to it. + Stub->eraseFromParent(); + // And return the result. + return Result; } void JIT::RegisterJITEventListener(JITEventListener *L) { @@ -570,7 +624,6 @@ void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) { } void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) { - static bool isAlreadyCodeGenerating = false; assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!"); // JIT the function diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h index bb8f851..edae719 100644 --- a/lib/ExecutionEngine/JIT/JIT.h +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -61,6 +61,10 @@ class JIT : public ExecutionEngine { /// should be set to true. Doing so breaks freeMachineCodeForFunction. bool AllocateGVsWithCode; + /// True while the JIT is generating code. Used to assert against recursive + /// entry. + bool isAlreadyCodeGenerating; + JITState *jitstate; JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp index c1051a9..946351b 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -522,7 +522,11 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { JCE->emitInt64(((intptr_t)Jit.getPointerToGlobal(Personality))); } - JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); + // LSDA encoding: This must match the encoding used in EmitEHFrame () + if (PointerSize == 4) + JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); + else + JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8); JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); } else { JCE->emitULEB128Bytes(1); diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 34a9938..783ebb4 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -37,6 +37,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MutexGuard.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" @@ -57,7 +58,6 @@ using namespace llvm; STATISTIC(NumBytes, "Number of bytes of machine code compiled"); STATISTIC(NumRelos, "Number of relocations applied"); STATISTIC(NumRetries, "Number of retries with more memory"); -static JIT *TheJIT = 0; // A declaration may stop being a declaration once it's fully read from bitcode. @@ -109,9 +109,13 @@ namespace { /// particular GlobalVariable so that we can reuse them if necessary. GlobalToIndirectSymMapTy GlobalToIndirectSymMap; + /// Instance of the JIT this ResolverState serves. + JIT *TheJIT; + public: - JITResolverState() : FunctionToLazyStubMap(this), - FunctionToCallSitesMap(this) {} + JITResolverState(JIT *jit) : FunctionToLazyStubMap(this), + FunctionToCallSitesMap(this), + TheJIT(jit) {} FunctionToLazyStubMapTy& getFunctionToLazyStubMap( const MutexGuard& locked) { @@ -152,53 +156,18 @@ namespace { // was no stub. This function uses the call-site->function map to find a // relevant function, but asserts that only stubs and not other call sites // will be passed in. - Function *EraseStub(const MutexGuard &locked, void *Stub) { - CallSiteToFunctionMapTy::iterator C2F_I = - CallSiteToFunctionMap.find(Stub); - if (C2F_I == CallSiteToFunctionMap.end()) { - // Not a stub. - return NULL; - } - - Function *const F = C2F_I->second; -#ifndef NDEBUG - void *RealStub = FunctionToLazyStubMap.lookup(F); - assert(RealStub == Stub && - "Call-site that wasn't a stub pass in to EraseStub"); -#endif - FunctionToLazyStubMap.erase(F); - CallSiteToFunctionMap.erase(C2F_I); - - // Remove the stub from the function->call-sites map, and remove the whole - // entry from the map if that was the last call site. - FunctionToCallSitesMapTy::iterator F2C_I = FunctionToCallSitesMap.find(F); - assert(F2C_I != FunctionToCallSitesMap.end() && - "FunctionToCallSitesMap broken"); - bool Erased = F2C_I->second.erase(Stub); - (void)Erased; - assert(Erased && "FunctionToCallSitesMap broken"); - if (F2C_I->second.empty()) - FunctionToCallSitesMap.erase(F2C_I); - - return F; - } + Function *EraseStub(const MutexGuard &locked, void *Stub); - void EraseAllCallSites(const MutexGuard &locked, Function *F) { + void EraseAllCallSitesFor(const MutexGuard &locked, Function *F) { assert(locked.holds(TheJIT->lock)); - EraseAllCallSitesPrelocked(F); - } - void EraseAllCallSitesPrelocked(Function *F) { - FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F); - if (F2C == FunctionToCallSitesMap.end()) - return; - for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(), - E = F2C->second.end(); I != E; ++I) { - bool Erased = CallSiteToFunctionMap.erase(*I); - (void)Erased; - assert(Erased && "Missing call site->function mapping"); - } - FunctionToCallSitesMap.erase(F2C); + EraseAllCallSitesForPrelocked(F); } + void EraseAllCallSitesForPrelocked(Function *F); + + // Erases _all_ call sites regardless of their function. This is used to + // unregister the stub addresses from the StubToResolverMap in + // ~JITResolver(). + void EraseAllCallSitesPrelocked(); }; /// JITResolver - Keep track of, and resolve, call sites for functions that @@ -227,19 +196,16 @@ namespace { JITEmitter &JE; - static JITResolver *TheJITResolver; - public: - explicit JITResolver(JIT &jit, JITEmitter &je) : nextGOTIndex(0), JE(je) { - TheJIT = &jit; + /// Instance of JIT corresponding to this Resolver. + JIT *TheJIT; + public: + explicit JITResolver(JIT &jit, JITEmitter &je) + : state(&jit), nextGOTIndex(0), JE(je), TheJIT(&jit) { LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn); - assert(TheJITResolver == 0 && "Multiple JIT resolvers?"); - TheJITResolver = this; } - ~JITResolver() { - TheJITResolver = 0; - } + ~JITResolver(); /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's /// lazy-compilation stub if it has already been created. @@ -260,8 +226,6 @@ namespace { void getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs, SmallVectorImpl<void*> &Ptrs); - GlobalValue *invalidateStub(void *Stub); - /// getGOTIndexForAddress - Return a new or existing index in the GOT for /// an address. This function only manages slots, it does not manage the /// contents of the slots or the memory associated with the GOT. @@ -273,6 +237,55 @@ namespace { static void *JITCompilerFn(void *Stub); }; + class StubToResolverMapTy { + /// Map a stub address to a specific instance of a JITResolver so that + /// lazily-compiled functions can find the right resolver to use. + /// + /// Guarded by Lock. + std::map<void*, JITResolver*> Map; + + /// Guards Map from concurrent accesses. + mutable sys::Mutex Lock; + + public: + /// Registers a Stub to be resolved by Resolver. + void RegisterStubResolver(void *Stub, JITResolver *Resolver) { + MutexGuard guard(Lock); + Map.insert(std::make_pair(Stub, Resolver)); + } + /// Unregisters the Stub when it's invalidated. + void UnregisterStubResolver(void *Stub) { + MutexGuard guard(Lock); + Map.erase(Stub); + } + /// Returns the JITResolver instance that owns the Stub. + JITResolver *getResolverFromStub(void *Stub) const { + MutexGuard guard(Lock); + // The address given to us for the stub may not be exactly right, it might + // be a little bit after the stub. As such, use upper_bound to find it. + // This is the same trick as in LookupFunctionFromCallSite from + // JITResolverState. + std::map<void*, JITResolver*>::const_iterator I = Map.upper_bound(Stub); + assert(I != Map.begin() && "This is not a known stub!"); + --I; + return I->second; + } + /// True if any stubs refer to the given resolver. Only used in an assert(). + /// O(N) + bool ResolverHasStubs(JITResolver* Resolver) const { + MutexGuard guard(Lock); + for (std::map<void*, JITResolver*>::const_iterator I = Map.begin(), + E = Map.end(); I != E; ++I) { + if (I->second == Resolver) + return true; + } + return false; + } + }; + /// This needs to be static so that a lazy call stub can access it with no + /// context except the address of the stub. + ManagedStatic<StubToResolverMapTy> StubToResolverMap; + /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is /// used to output functions to memory for execution. class JITEmitter : public JITCodeEmitter { @@ -333,9 +346,6 @@ namespace { /// MMI - Machine module info for exception informations MachineModuleInfo* MMI; - // GVSet - a set to keep track of which globals have been seen - SmallPtrSet<const GlobalVariable*, 8> GVSet; - // CurFn - The llvm function being emitted. Only valid during // finishFunction(). const Function *CurFn; @@ -359,22 +369,15 @@ namespace { ValueMap<const Function *, EmittedCode, EmittedFunctionConfig> EmittedFunctions; - // CurFnStubUses - For a given Function, a vector of stubs that it - // references. This facilitates the JIT detecting that a stub is no - // longer used, so that it may be deallocated. - DenseMap<AssertingVH<const Function>, SmallVector<void*, 1> > CurFnStubUses; - - // StubFnRefs - For a given pointer to a stub, a set of Functions which - // reference the stub. When the count of a stub's references drops to zero, - // the stub is unused. - DenseMap<void *, SmallPtrSet<const Function*, 1> > StubFnRefs; - DILocation PrevDLT; + /// Instance of the JIT + JIT *TheJIT; + public: JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM) : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0), - EmittedFunctions(this), PrevDLT(NULL) { + EmittedFunctions(this), PrevDLT(NULL), TheJIT(&jit) { MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); if (jit.getJITInfo().needsGOT()) { MemMgr->AllocateGOT(); @@ -454,11 +457,6 @@ namespace { /// function body. void deallocateMemForFunction(const Function *F); - /// AddStubToCurrentFunction - Mark the current function being JIT'd as - /// using the stub at the specified address. Allows - /// deallocateMemForFunction to also remove stubs no longer referenced. - void AddStubToCurrentFunction(void *Stub); - virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn); virtual void emitLabel(uint64_t LabelID) { @@ -489,16 +487,86 @@ namespace { bool MayNeedFarStub); void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference); unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size); - unsigned addSizeOfGlobalsInConstantVal(const Constant *C, unsigned Size); - unsigned addSizeOfGlobalsInInitializer(const Constant *Init, unsigned Size); + unsigned addSizeOfGlobalsInConstantVal( + const Constant *C, unsigned Size, + SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals, + SmallVectorImpl<const GlobalVariable*> &Worklist); + unsigned addSizeOfGlobalsInInitializer( + const Constant *Init, unsigned Size, + SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals, + SmallVectorImpl<const GlobalVariable*> &Worklist); unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF); }; } -JITResolver *JITResolver::TheJITResolver = 0; - void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) { - JRS->EraseAllCallSitesPrelocked(F); + JRS->EraseAllCallSitesForPrelocked(F); +} + +Function *JITResolverState::EraseStub(const MutexGuard &locked, void *Stub) { + CallSiteToFunctionMapTy::iterator C2F_I = + CallSiteToFunctionMap.find(Stub); + if (C2F_I == CallSiteToFunctionMap.end()) { + // Not a stub. + return NULL; + } + + StubToResolverMap->UnregisterStubResolver(Stub); + + Function *const F = C2F_I->second; +#ifndef NDEBUG + void *RealStub = FunctionToLazyStubMap.lookup(F); + assert(RealStub == Stub && + "Call-site that wasn't a stub passed in to EraseStub"); +#endif + FunctionToLazyStubMap.erase(F); + CallSiteToFunctionMap.erase(C2F_I); + + // Remove the stub from the function->call-sites map, and remove the whole + // entry from the map if that was the last call site. + FunctionToCallSitesMapTy::iterator F2C_I = FunctionToCallSitesMap.find(F); + assert(F2C_I != FunctionToCallSitesMap.end() && + "FunctionToCallSitesMap broken"); + bool Erased = F2C_I->second.erase(Stub); + (void)Erased; + assert(Erased && "FunctionToCallSitesMap broken"); + if (F2C_I->second.empty()) + FunctionToCallSitesMap.erase(F2C_I); + + return F; +} + +void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) { + FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F); + if (F2C == FunctionToCallSitesMap.end()) + return; + StubToResolverMapTy &S2RMap = *StubToResolverMap; + for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(), + E = F2C->second.end(); I != E; ++I) { + S2RMap.UnregisterStubResolver(*I); + bool Erased = CallSiteToFunctionMap.erase(*I); + (void)Erased; + assert(Erased && "Missing call site->function mapping"); + } + FunctionToCallSitesMap.erase(F2C); +} + +void JITResolverState::EraseAllCallSitesPrelocked() { + StubToResolverMapTy &S2RMap = *StubToResolverMap; + for (CallSiteToFunctionMapTy::const_iterator + I = CallSiteToFunctionMap.begin(), + E = CallSiteToFunctionMap.end(); I != E; ++I) { + S2RMap.UnregisterStubResolver(I->first); + } + CallSiteToFunctionMap.clear(); + FunctionToCallSitesMap.clear(); +} + +JITResolver::~JITResolver() { + // No need to lock because we're in the destructor, and state isn't shared. + state.EraseAllCallSitesPrelocked(); + assert(!StubToResolverMap->ResolverHasStubs(this) && + "Resolver destroyed with stubs still alive."); } /// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub @@ -551,16 +619,22 @@ void *JITResolver::getLazyFunctionStub(Function *F) { DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '" << F->getName() << "'\n"); - // Finally, keep track of the stub-to-Function mapping so that the - // JITCompilerFn knows which function to compile! - state.AddCallSite(locked, Stub, F); - - // If we are JIT'ing non-lazily but need to call a function that does not - // exist yet, add it to the JIT's work list so that we can fill in the stub - // address later. - if (!Actual && !TheJIT->isCompilingLazily()) - if (!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage()) - TheJIT->addPendingFunction(F); + if (TheJIT->isCompilingLazily()) { + // Register this JITResolver as the one corresponding to this call site so + // JITCompilerFn will be able to find it. + StubToResolverMap->RegisterStubResolver(Stub, this); + + // Finally, keep track of the stub-to-Function mapping so that the + // JITCompilerFn knows which function to compile! + state.AddCallSite(locked, Stub, F); + } else if (!Actual) { + // If we are JIT'ing non-lazily but need to call a function that does not + // exist yet, add it to the JIT's work list so that we can fill in the + // stub address later. + assert(!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage() && + "'Actual' should have been set above."); + TheJIT->addPendingFunction(F); + } return Stub; } @@ -634,44 +708,12 @@ void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs, } } -GlobalValue *JITResolver::invalidateStub(void *Stub) { - MutexGuard locked(TheJIT->lock); - - GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked); - - // Look up the cheap way first, to see if it's a function stub we are - // invalidating. If so, remove it from both the forward and reverse maps. - if (Function *F = state.EraseStub(locked, Stub)) { - return F; - } - - // Otherwise, it might be an indirect symbol stub. Find it and remove it. - for (GlobalToIndirectSymMapTy::iterator i = GM.begin(), e = GM.end(); - i != e; ++i) { - if (i->second != Stub) - continue; - GlobalValue *GV = i->first; - GM.erase(i); - return GV; - } - - // Lastly, check to see if it's in the ExternalFnToStubMap. - for (std::map<void *, void *>::iterator i = ExternalFnToStubMap.begin(), - e = ExternalFnToStubMap.end(); i != e; ++i) { - if (i->second != Stub) - continue; - ExternalFnToStubMap.erase(i); - break; - } - - return 0; -} - /// JITCompilerFn - This function is called when a lazy compilation stub has /// been entered. It looks up which function this stub corresponds to, compiles /// it if necessary, then returns the resultant function pointer. void *JITResolver::JITCompilerFn(void *Stub) { - JITResolver &JR = *TheJITResolver; + JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub); + assert(JR && "Unable to find the corresponding JITResolver to the call site"); Function* F = 0; void* ActualPtr = 0; @@ -680,24 +722,24 @@ void *JITResolver::JITCompilerFn(void *Stub) { // Only lock for getting the Function. The call getPointerToFunction made // in this function might trigger function materializing, which requires // JIT lock to be unlocked. - MutexGuard locked(TheJIT->lock); + MutexGuard locked(JR->TheJIT->lock); // The address given to us for the stub may not be exactly right, it might // be a little bit after the stub. As such, use upper_bound to find it. pair<void*, Function*> I = - JR.state.LookupFunctionFromCallSite(locked, Stub); + JR->state.LookupFunctionFromCallSite(locked, Stub); F = I.second; ActualPtr = I.first; } // If we have already code generated the function, just return the address. - void *Result = TheJIT->getPointerToGlobalIfAvailable(F); + void *Result = JR->TheJIT->getPointerToGlobalIfAvailable(F); if (!Result) { // Otherwise we don't have it, do lazy compilation now. // If lazy compilation is disabled, emit a useful error message and abort. - if (!TheJIT->isCompilingLazily()) { + if (!JR->TheJIT->isCompilingLazily()) { llvm_report_error("LLVM JIT requested to do lazy compilation of function '" + F->getName() + "' when lazy compiles are disabled!"); } @@ -706,11 +748,11 @@ void *JITResolver::JITCompilerFn(void *Stub) { << "' In stub ptr = " << Stub << " actual ptr = " << ActualPtr << "\n"); - Result = TheJIT->getPointerToFunction(F); + Result = JR->TheJIT->getPointerToFunction(F); } // Reacquire the lock to update the GOT map. - MutexGuard locked(TheJIT->lock); + MutexGuard locked(JR->TheJIT->lock); // We might like to remove the call site from the CallSiteToFunction map, but // we can't do that! Multiple threads could be stuck, waiting to acquire the @@ -725,8 +767,8 @@ void *JITResolver::JITCompilerFn(void *Stub) { // if they see it still using the stub address. // Note: this is done so the Resolver doesn't have to manage GOT memory // Do this without allocating map space if the target isn't using a GOT - if(JR.revGOTMap.find(Stub) != JR.revGOTMap.end()) - JR.revGOTMap[Result] = JR.revGOTMap[Stub]; + if(JR->revGOTMap.find(Stub) != JR->revGOTMap.end()) + JR->revGOTMap[Result] = JR->revGOTMap[Stub]; return Result; } @@ -751,7 +793,6 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference, // that we're returning the same address for the function as any previous // call. TODO: Yes, this is wrong. The lazy stub isn't guaranteed to be // close enough to call. - AddStubToCurrentFunction(FnStub); return FnStub; } @@ -768,18 +809,10 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference, return TheJIT->getPointerToFunction(F); } - // Otherwise, we may need a to emit a stub, and, conservatively, we - // always do so. - void *StubAddr = Resolver.getLazyFunctionStub(F); - - // Add the stub to the current function's list of referenced stubs, so we can - // deallocate them if the current function is ever freed. It's possible to - // return null from getLazyFunctionStub in the case of a weak extern that - // fails to resolve. - if (StubAddr) - AddStubToCurrentFunction(StubAddr); - - return StubAddr; + // Otherwise, we may need a to emit a stub, and, conservatively, we always do + // so. Note that it's possible to return null from getLazyFunctionStub in the + // case of a weak extern that fails to resolve. + return Resolver.getLazyFunctionStub(F); } void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) { @@ -787,24 +820,9 @@ void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) { // resolved address. void *GVAddress = getPointerToGlobal(V, Reference, false); void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress); - - // Add the stub to the current function's list of referenced stubs, so we can - // deallocate them if the current function is ever freed. - AddStubToCurrentFunction(StubAddr); - return StubAddr; } -void JITEmitter::AddStubToCurrentFunction(void *StubAddr) { - assert(CurFn && "Stub added to current function, but current function is 0!"); - - SmallVectorImpl<void*> &StubsUsed = CurFnStubUses[CurFn]; - StubsUsed.push_back(StubAddr); - - SmallPtrSet<const Function *, 1> &FnRefs = StubFnRefs[StubAddr]; - FnRefs.insert(CurFn); -} - void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) { if (!DL.isUnknown()) { DILocation CurDLT = EmissionDetails.MF->getDILocation(DL); @@ -839,7 +857,7 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP, return Size; } -static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) { +static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI, JIT *jit) { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); if (JT.empty()) return 0; @@ -847,7 +865,7 @@ static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) { for (unsigned i = 0, e = JT.size(); i != e; ++i) NumEntries += JT[i].MBBs.size(); - return NumEntries * MJTI->getEntrySize(*TheJIT->getTargetData()); + return NumEntries * MJTI->getEntrySize(*jit->getTargetData()); } static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) { @@ -876,11 +894,14 @@ unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) { } /// addSizeOfGlobalsInConstantVal - find any globals that we haven't seen yet -/// but are referenced from the constant; put them in GVSet and add their -/// size into the running total Size. - -unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, - unsigned Size) { +/// but are referenced from the constant; put them in SeenGlobals and the +/// Worklist, and add their size into the running total Size. + +unsigned JITEmitter::addSizeOfGlobalsInConstantVal( + const Constant *C, + unsigned Size, + SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals, + SmallVectorImpl<const GlobalVariable*> &Worklist) { // If its undefined, return the garbage. if (isa<UndefValue>(C)) return Size; @@ -902,7 +923,7 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::BitCast: { - Size = addSizeOfGlobalsInConstantVal(Op0, Size); + Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist); break; } case Instruction::Add: @@ -918,8 +939,9 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, case Instruction::And: case Instruction::Or: case Instruction::Xor: { - Size = addSizeOfGlobalsInConstantVal(Op0, Size); - Size = addSizeOfGlobalsInConstantVal(CE->getOperand(1), Size); + Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist); + Size = addSizeOfGlobalsInConstantVal(CE->getOperand(1), Size, + SeenGlobals, Worklist); break; } default: { @@ -933,8 +955,10 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, if (C->getType()->getTypeID() == Type::PointerTyID) if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(C)) - if (GVSet.insert(GV)) + if (SeenGlobals.insert(GV)) { + Worklist.push_back(GV); Size = addSizeOfGlobal(GV, Size); + } return Size; } @@ -942,15 +966,18 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, /// addSizeOfGLobalsInInitializer - handle any globals that we haven't seen yet /// but are referenced from the given initializer. -unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init, - unsigned Size) { +unsigned JITEmitter::addSizeOfGlobalsInInitializer( + const Constant *Init, + unsigned Size, + SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals, + SmallVectorImpl<const GlobalVariable*> &Worklist) { if (!isa<UndefValue>(Init) && !isa<ConstantVector>(Init) && !isa<ConstantAggregateZero>(Init) && !isa<ConstantArray>(Init) && !isa<ConstantStruct>(Init) && Init->getType()->isFirstClassType()) - Size = addSizeOfGlobalsInConstantVal(Init, Size); + Size = addSizeOfGlobalsInConstantVal(Init, Size, SeenGlobals, Worklist); return Size; } @@ -961,7 +988,7 @@ unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init, unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { unsigned Size = 0; - GVSet.clear(); + SmallPtrSet<const GlobalVariable*, 8> SeenGlobals; for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { @@ -985,7 +1012,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { // assuming the addresses of the new globals in this module // start at 0 (or something) and adjusting them after codegen // complete. Another possibility is to grab a marker bit in GV. - if (GVSet.insert(GV)) + if (SeenGlobals.insert(GV)) // A variable as yet unseen. Add in its size. Size = addSizeOfGlobal(GV, Size); } @@ -994,12 +1021,14 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { } DEBUG(dbgs() << "JIT: About to look through initializers\n"); // Look for more globals that are referenced only from initializers. - // GVSet.end is computed each time because the set can grow as we go. - for (SmallPtrSet<const GlobalVariable *, 8>::iterator I = GVSet.begin(); - I != GVSet.end(); I++) { - const GlobalVariable* GV = *I; + SmallVector<const GlobalVariable*, 8> Worklist( + SeenGlobals.begin(), SeenGlobals.end()); + while (!Worklist.empty()) { + const GlobalVariable* GV = Worklist.back(); + Worklist.pop_back(); if (GV->hasInitializer()) - Size = addSizeOfGlobalsInInitializer(GV->getInitializer(), Size); + Size = addSizeOfGlobalsInInitializer(GV->getInitializer(), Size, + SeenGlobals, Worklist); } return Size; @@ -1032,7 +1061,7 @@ void JITEmitter::startFunction(MachineFunction &F) { MJTI->getEntryAlignment(*TheJIT->getTargetData())); // Add the jump table size - ActualSize += GetJumpTableSizeInBytes(MJTI); + ActualSize += GetJumpTableSizeInBytes(MJTI, TheJIT); } // Add the alignment for the function @@ -1301,40 +1330,6 @@ void JITEmitter::deallocateMemForFunction(const Function *F) { if (JITEmitDebugInfo) { DR->UnregisterFunction(F); } - - // If the function did not reference any stubs, return. - if (CurFnStubUses.find(F) == CurFnStubUses.end()) - return; - - // For each referenced stub, erase the reference to this function, and then - // erase the list of referenced stubs. - SmallVectorImpl<void *> &StubList = CurFnStubUses[F]; - for (unsigned i = 0, e = StubList.size(); i != e; ++i) { - void *Stub = StubList[i]; - - // If we already invalidated this stub for this function, continue. - if (StubFnRefs.count(Stub) == 0) - continue; - - SmallPtrSet<const Function *, 1> &FnRefs = StubFnRefs[Stub]; - FnRefs.erase(F); - - // If this function was the last reference to the stub, invalidate the stub - // in the JITResolver. Were there a memory manager deallocateStub routine, - // we could call that at this point too. - if (FnRefs.empty()) { - DEBUG(dbgs() << "\nJIT: Invalidated Stub at [" << Stub << "]\n"); - StubFnRefs.erase(Stub); - - // Invalidate the stub. If it is a GV stub, update the JIT's global - // mapping for that GV to zero. - GlobalValue *GV = Resolver.invalidateStub(Stub); - if (GV) { - TheJIT->updateGlobalMapping(GV, 0); - } - } - } - CurFnStubUses.erase(F); } @@ -1552,19 +1547,6 @@ JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM, return new JITEmitter(jit, JMM, tm); } -// getPointerToNamedFunction - This function is used as a global wrapper to -// JIT::getPointerToNamedFunction for the purpose of resolving symbols when -// bugpoint is debugging the JIT. In that scenario, we are loading an .so and -// need to resolve function(s) that are being mis-codegenerated, so we need to -// resolve their addresses at runtime, and this is the way to do it. -extern "C" { - void *getPointerToNamedFunction(const char *Name) { - if (Function *F = TheJIT->FindFunctionNamed(Name)) - return TheJIT->getPointerToFunction(F); - return TheJIT->getPointerToNamedFunction(Name); - } -} - // getPointerToFunctionOrStub - If the specified function has been // code-gen'd, return a pointer to the function. If not, compile it, or use // a stub to implement lazy compilation if available. diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 7f441b0..8487c83 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -159,7 +159,7 @@ static bool RecursiveResolveTypesI(const Type *DstTy, const Type *SrcTy, if (DstTy == SrcTy) return false; // If already equal, noop // If we found our opaque type, resolve it now! - if (isa<OpaqueType>(DstTy) || isa<OpaqueType>(SrcTy)) + if (DstTy->isOpaqueTy() || SrcTy->isOpaqueTy()) return ResolveTypes(DstTy, SrcTy); // Two types cannot be resolved together if they are of different primitive diff --git a/lib/MC/Android.mk b/lib/MC/Android.mk new file mode 100644 index 0000000..dac5a54 --- /dev/null +++ b/lib/MC/Android.mk @@ -0,0 +1,45 @@ +LOCAL_PATH:= $(call my-dir) + +mc_SRC_FILES := \ + MCAsmInfo.cpp \ + MCAsmInfoCOFF.cpp \ + MCAsmInfoDarwin.cpp \ + MCAsmStreamer.cpp \ + MCAssembler.cpp \ + MCCodeEmitter.cpp \ + MCContext.cpp \ + MCDisassembler.cpp \ + MCExpr.cpp \ + MCInst.cpp \ + MCInstPrinter.cpp \ + MCMachOStreamer.cpp \ + MCNullStreamer.cpp \ + MCSection.cpp \ + MCSectionELF.cpp \ + MCSectionMachO.cpp \ + MCStreamer.cpp \ + MCSymbol.cpp \ + MCValue.cpp \ + TargetAsmBackend.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(mc_SRC_FILES) + +LOCAL_MODULE:= libLLVMMC + +include $(LLVM_HOST_BUILD_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(mc_SRC_FILES) + +LOCAL_MODULE:= libLLVMMC + +include $(LLVM_DEVICE_BUILD_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index 9ead33b..4cf71dc 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -18,4 +18,5 @@ add_llvm_library(LLVMMC MCStreamer.cpp MCSymbol.cpp MCValue.cpp + TargetAsmBackend.cpp ) diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 828377f..1b66900 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" +#include <ctype.h> using namespace llvm; namespace { @@ -134,6 +135,9 @@ public: unsigned ValueSize = 1, unsigned MaxBytesToEmit = 0); + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0); + virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value = 0); @@ -513,6 +517,13 @@ void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, EmitEOL(); } +void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit) { + // Emit with a text fill value. + EmitValueToAlignment(ByteAlignment, MAI.getTextAlignFillValue(), + 1, MaxBytesToEmit); +} + void MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) { // FIXME: Verify that Offset is associated with the current section. @@ -552,7 +563,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) { for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { MCFixup &F = Fixups[i]; - MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind()); + const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind()); for (unsigned j = 0; j != Info.TargetSize; ++j) { unsigned Index = F.getOffset() * 8 + Info.TargetOffset + j; assert(Index < Code.size() * 8 && "Invalid offset in fixup!"); @@ -599,7 +610,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) { for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { MCFixup &F = Fixups[i]; - MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind()); + const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind()); OS << " fixup " << char('A' + i) << " - " << "offset: " << F.getOffset() << ", value: " << *F.getValue() << ", kind: " << Info.Name << "\n"; } @@ -617,6 +628,12 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { raw_ostream &OS = GetCommentOS(); OS << "<MCInst #" << Inst.getOpcode(); + StringRef InstName; + if (InstPrinter) + InstName = InstPrinter->getOpcodeName(Inst.getOpcode()); + if (!InstName.empty()) + OS << ' ' << InstName; + for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) { OS << "\n "; Inst.getOperand(i).print(OS, &MAI); diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index bdc886b..96227db 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -16,11 +16,17 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachO.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" + +// FIXME: Gross. +#include "../Target/X86/X86FixupKinds.h" + #include <vector> using namespace llvm; @@ -36,6 +42,8 @@ STATISTIC(EmittedFragments, "Number of emitted assembler fragments"); static void WriteFileData(raw_ostream &OS, const MCSectionData &SD, MachObjectWriter &MOW); +static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW); + /// isVirtualSection - Check if this is a section which does not actually exist /// in the object file. static bool isVirtualSection(const MCSection &Section) { @@ -45,6 +53,30 @@ static bool isVirtualSection(const MCSection &Section) { return (Type == MCSectionMachO::S_ZEROFILL); } +static unsigned getFixupKindLog2Size(unsigned Kind) { + switch (Kind) { + default: llvm_unreachable("invalid fixup kind!"); + case X86::reloc_pcrel_1byte: + case FK_Data_1: return 0; + case FK_Data_2: return 1; + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + case FK_Data_4: return 2; + case FK_Data_8: return 3; + } +} + +static bool isFixupKindPCRel(unsigned Kind) { + switch (Kind) { + default: + return false; + case X86::reloc_pcrel_1byte: + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + return true; + } +} + class MachObjectWriter { // See <mach-o/loader.h>. enum { @@ -402,13 +434,14 @@ public: uint32_t Word0; uint32_t Word1; }; - void ComputeScatteredRelocationInfo(MCAssembler &Asm, - MCSectionData::Fixup &Fixup, + void ComputeScatteredRelocationInfo(MCAssembler &Asm, MCFragment &Fragment, + MCAsmFixup &Fixup, const MCValue &Target, DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap, std::vector<MachRelocationEntry> &Relocs) { - uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset; + uint32_t Address = Fragment.getOffset() + Fixup.Offset; unsigned IsPCRel = 0; + unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); unsigned Type = RIT_Vanilla; // See <reloc.h>. @@ -424,11 +457,12 @@ public: Value2 = SD->getFragment()->getAddress() + SD->getOffset(); } - unsigned Log2Size = Log2_32(Fixup.Size); - assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!"); - // The value which goes in the fixup is current value of the expression. Fixup.FixedValue = Value - Value2 + Target.getConstant(); + if (isFixupKindPCRel(Fixup.Kind)) { + Fixup.FixedValue -= Address; + IsPCRel = 1; + } MachRelocationEntry MRE; MRE.Word0 = ((Address << 0) | @@ -453,8 +487,8 @@ public: } } - void ComputeRelocationInfo(MCAssembler &Asm, - MCSectionData::Fixup &Fixup, + void ComputeRelocationInfo(MCAssembler &Asm, MCDataFragment &Fragment, + MCAsmFixup &Fixup, DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap, std::vector<MachRelocationEntry> &Relocs) { MCValue Target; @@ -466,19 +500,22 @@ public: if (Target.getSymB() || (Target.getSymA() && !Target.getSymA()->isUndefined() && Target.getConstant())) - return ComputeScatteredRelocationInfo(Asm, Fixup, Target, + return ComputeScatteredRelocationInfo(Asm, Fragment, Fixup, Target, SymbolMap, Relocs); // See <reloc.h>. - uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset; + uint32_t Address = Fragment.getOffset() + Fixup.Offset; uint32_t Value = 0; unsigned Index = 0; unsigned IsPCRel = 0; + unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); unsigned IsExtern = 0; unsigned Type = 0; if (Target.isAbsolute()) { // constant // SymbolNum of 0 indicates the absolute section. + // + // FIXME: When is this generated? Type = RIT_Vanilla; Value = 0; llvm_unreachable("FIXME: Not yet implemented!"); @@ -495,10 +532,11 @@ public: // // FIXME: O(N) Index = 1; - for (MCAssembler::iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it, ++Index) + MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); + for (; it != ie; ++it, ++Index) if (&*it == SD->getFragment()->getParent()) break; + assert(it != ie && "Unable to find section index!"); Value = SD->getFragment()->getAddress() + SD->getOffset(); } @@ -508,8 +546,10 @@ public: // The value which goes in the fixup is current value of the expression. Fixup.FixedValue = Value + Target.getConstant(); - unsigned Log2Size = Log2_32(Fixup.Size); - assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!"); + if (isFixupKindPCRel(Fixup.Kind)) { + Fixup.FixedValue -= Address; + IsPCRel = 1; + } // struct relocation_info (8 bytes) MachRelocationEntry MRE; @@ -766,17 +806,20 @@ public: // is written. std::vector<MachRelocationEntry> RelocInfos; uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; - for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; - ++it) { + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { MCSectionData &SD = *it; // The assembler writes relocations in the reverse order they were seen. // // FIXME: It is probably more complicated than this. unsigned NumRelocsStart = RelocInfos.size(); - for (unsigned i = 0, e = SD.fixup_size(); i != e; ++i) - ComputeRelocationInfo(Asm, SD.getFixups()[e - i - 1], SymbolMap, - RelocInfos); + for (MCSectionData::reverse_iterator it2 = SD.rbegin(), + ie2 = SD.rend(); it2 != ie2; ++it2) + if (MCDataFragment *DF = dyn_cast<MCDataFragment>(&*it2)) + for (unsigned i = 0, e = DF->fixup_size(); i != e; ++i) + ComputeRelocationInfo(Asm, *DF, DF->getFixups()[e - i - 1], + SymbolMap, RelocInfos); unsigned NumRelocs = RelocInfos.size() - NumRelocsStart; uint64_t SectionStart = SectionDataStart + SD.getAddress(); @@ -871,6 +914,16 @@ public: OS << StringTable.str(); } } + + void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF) { + unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind); + + // FIXME: Endianness assumption. + assert(Fixup.Offset + Size <= DF.getContents().size() && + "Invalid fixup offset!"); + for (unsigned i = 0; i != Size; ++i) + DF.getContents()[Fixup.Offset + i] = uint8_t(Fixup.FixedValue >> (i * 8)); + } }; /* *** */ @@ -905,35 +958,12 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A) Address(~UINT64_C(0)), Size(~UINT64_C(0)), FileSize(~UINT64_C(0)), - LastFixupLookup(~0), HasInstructions(false) { if (A) A->getSectionList().push_back(this); } -const MCSectionData::Fixup * -MCSectionData::LookupFixup(const MCFragment *Fragment, uint64_t Offset) const { - // Use a one level cache to turn the common case of accessing the fixups in - // order into O(1) instead of O(N). - unsigned i = LastFixupLookup, Count = Fixups.size(), End = Fixups.size(); - if (i >= End) - i = 0; - while (Count--) { - const Fixup &F = Fixups[i]; - if (F.Fragment == Fragment && F.Offset == Offset) { - LastFixupLookup = i; - return &F; - } - - ++i; - if (i == End) - i = 0; - } - - return 0; -} - /* *** */ MCSymbolData::MCSymbolData() : Symbol(0) {} @@ -980,31 +1010,10 @@ void MCAssembler::LayoutSection(MCSectionData &SD) { } case MCFragment::FT_Data: + case MCFragment::FT_Fill: F.setFileSize(F.getMaxFileSize()); break; - case MCFragment::FT_Fill: { - MCFillFragment &FF = cast<MCFillFragment>(F); - - F.setFileSize(F.getMaxFileSize()); - - MCValue Target; - if (!FF.getValue().EvaluateAsRelocatable(Target)) - llvm_report_error("expected relocatable expression"); - - // If the fill value is constant, thats it. - if (Target.isAbsolute()) - break; - - // Otherwise, add fixups for the values. - for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) { - MCSectionData::Fixup Fix(F, i * FF.getValueSize(), - FF.getValue(),FF.getValueSize()); - SD.getFixups().push_back(Fix); - } - break; - } - case MCFragment::FT_Org: { MCOrgFragment &OF = cast<MCOrgFragment>(F); @@ -1051,6 +1060,64 @@ void MCAssembler::LayoutSection(MCSectionData &SD) { SD.setFileSize(Address - SD.getAddress()); } +/// WriteNopData - Write optimal nops to the output file for the \arg Count +/// bytes. This returns the number of bytes written. It may return 0 if +/// the \arg Count is more than the maximum optimal nops. +/// +/// FIXME this is X86 32-bit specific and should move to a better place. +static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW) { + static const uint8_t Nops[16][16] = { + // nop + {0x90}, + // xchg %ax,%ax + {0x66, 0x90}, + // nopl (%[re]ax) + {0x0f, 0x1f, 0x00}, + // nopl 0(%[re]ax) + {0x0f, 0x1f, 0x40, 0x00}, + // nopl 0(%[re]ax,%[re]ax,1) + {0x0f, 0x1f, 0x44, 0x00, 0x00}, + // nopw 0(%[re]ax,%[re]ax,1) + {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, + // nopl 0L(%[re]ax) + {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, + // nopl 0L(%[re]ax,%[re]ax,1) + {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + // nopw 0L(%[re]ax,%[re]ax,1) + {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + // nopw %cs:0L(%[re]ax,%[re]ax,1) + {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + // nopl 0(%[re]ax,%[re]ax,1) + // nopw 0(%[re]ax,%[re]ax,1) + {0x0f, 0x1f, 0x44, 0x00, 0x00, + 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, + // nopw 0(%[re]ax,%[re]ax,1) + // nopw 0(%[re]ax,%[re]ax,1) + {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, + 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, + // nopw 0(%[re]ax,%[re]ax,1) + // nopl 0L(%[re]ax) */ + {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, + 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, + // nopl 0L(%[re]ax) + // nopl 0L(%[re]ax) + {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, + // nopl 0L(%[re]ax) + // nopl 0L(%[re]ax,%[re]ax,1) + {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00} + }; + + if (Count > 15) + return 0; + + for (uint64_t i = 0; i < Count; i++) + MOW.Write8 (uint8_t(Nops[Count - 1][i])); + + return Count; +} + /// WriteFileData - Write the \arg F data to the output file. static void WriteFileData(raw_ostream &OS, const MCFragment &F, MachObjectWriter &MOW) { @@ -1074,6 +1141,14 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F, "' is not a divisor of padding size '" + Twine(AF.getFileSize()) + "'"); + // See if we are aligning with nops, and if so do that first to try to fill + // the Count bytes. Then if that did not fill any bytes or there are any + // bytes left to fill use the the Value and ValueSize to fill the rest. + if (AF.getEmitNops()) { + uint64_t NopByteCount = WriteNopData(Count, MOW); + Count -= NopByteCount; + } + for (uint64_t i = 0; i != Count; ++i) { switch (AF.getValueSize()) { default: @@ -1087,39 +1162,30 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F, break; } - case MCFragment::FT_Data: + case MCFragment::FT_Data: { + MCDataFragment &DF = cast<MCDataFragment>(F); + + // Apply the fixups. + // + // FIXME: Move elsewhere. + for (MCDataFragment::const_fixup_iterator it = DF.fixup_begin(), + ie = DF.fixup_end(); it != ie; ++it) + MOW.ApplyFixup(*it, DF); + OS << cast<MCDataFragment>(F).getContents().str(); break; + } case MCFragment::FT_Fill: { MCFillFragment &FF = cast<MCFillFragment>(F); - - int64_t Value = 0; - - MCValue Target; - if (!FF.getValue().EvaluateAsRelocatable(Target)) - llvm_report_error("expected relocatable expression"); - - if (Target.isAbsolute()) - Value = Target.getConstant(); for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) { - if (!Target.isAbsolute()) { - // Find the fixup. - // - // FIXME: Find a better way to write in the fixes. - const MCSectionData::Fixup *Fixup = - F.getParent()->LookupFixup(&F, i * FF.getValueSize()); - assert(Fixup && "Missing fixup for fill value!"); - Value = Fixup->FixedValue; - } - switch (FF.getValueSize()) { default: assert(0 && "Invalid size!"); - case 1: MOW.Write8 (uint8_t (Value)); break; - case 2: MOW.Write16(uint16_t(Value)); break; - case 4: MOW.Write32(uint32_t(Value)); break; - case 8: MOW.Write64(uint64_t(Value)); break; + case 1: MOW.Write8 (uint8_t (FF.getValue())); break; + case 2: MOW.Write16(uint16_t(FF.getValue())); break; + case 4: MOW.Write32(uint32_t(FF.getValue())); break; + case 8: MOW.Write64(uint64_t(FF.getValue())); break; } } break; @@ -1167,6 +1233,10 @@ static void WriteFileData(raw_ostream &OS, const MCSectionData &SD, } void MCAssembler::Finish() { + DEBUG_WITH_TYPE("mc-dump", { + llvm::errs() << "assembler backend - pre-layout\n--\n"; + dump(); }); + // Layout the concrete sections and fragments. uint64_t Address = 0; MCSectionData *Prev = 0; @@ -1205,9 +1275,149 @@ void MCAssembler::Finish() { Address += SD.getSize(); } + DEBUG_WITH_TYPE("mc-dump", { + llvm::errs() << "assembler backend - post-layout\n--\n"; + dump(); }); + // Write the object file. MachObjectWriter MOW(OS); MOW.WriteObject(*this); OS.flush(); } + + +// Debugging methods + +namespace llvm { + +raw_ostream &operator<<(raw_ostream &OS, const MCAsmFixup &AF) { + OS << "<MCAsmFixup" << " Offset:" << AF.Offset << " Value:" << *AF.Value + << " Kind:" << AF.Kind << ">"; + return OS; +} + +} + +void MCFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCFragment " << (void*) this << " Offset:" << Offset + << " FileSize:" << FileSize; + + OS << ">"; +} + +void MCAlignFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCAlignFragment "; + this->MCFragment::dump(); + OS << "\n "; + OS << " Alignment:" << getAlignment() + << " Value:" << getValue() << " ValueSize:" << getValueSize() + << " MaxBytesToEmit:" << getMaxBytesToEmit() << ">"; +} + +void MCDataFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCDataFragment "; + this->MCFragment::dump(); + OS << "\n "; + OS << " Contents:["; + for (unsigned i = 0, e = getContents().size(); i != e; ++i) { + if (i) OS << ","; + OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF); + } + OS << "] (" << getContents().size() << " bytes)"; + + if (!getFixups().empty()) { + OS << ",\n "; + OS << " Fixups:["; + for (fixup_iterator it = fixup_begin(), ie = fixup_end(); it != ie; ++it) { + if (it != fixup_begin()) OS << ",\n "; + OS << *it; + } + OS << "]"; + } + + OS << ">"; +} + +void MCFillFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCFillFragment "; + this->MCFragment::dump(); + OS << "\n "; + OS << " Value:" << getValue() << " ValueSize:" << getValueSize() + << " Count:" << getCount() << ">"; +} + +void MCOrgFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCOrgFragment "; + this->MCFragment::dump(); + OS << "\n "; + OS << " Offset:" << getOffset() << " Value:" << getValue() << ">"; +} + +void MCZeroFillFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCZeroFillFragment "; + this->MCFragment::dump(); + OS << "\n "; + OS << " Size:" << getSize() << " Alignment:" << getAlignment() << ">"; +} + +void MCSectionData::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCSectionData"; + OS << " Alignment:" << getAlignment() << " Address:" << Address + << " Size:" << Size << " FileSize:" << FileSize + << " Fragments:["; + for (iterator it = begin(), ie = end(); it != ie; ++it) { + if (it != begin()) OS << ",\n "; + it->dump(); + } + OS << "]>"; +} + +void MCSymbolData::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCSymbolData Symbol:" << getSymbol() + << " Fragment:" << getFragment() << " Offset:" << getOffset() + << " Flags:" << getFlags() << " Index:" << getIndex(); + if (isCommon()) + OS << " (common, size:" << getCommonSize() + << " align: " << getCommonAlignment() << ")"; + if (isExternal()) + OS << " (external)"; + if (isPrivateExtern()) + OS << " (private extern)"; + OS << ">"; +} + +void MCAssembler::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCAssembler\n"; + OS << " Sections:["; + for (iterator it = begin(), ie = end(); it != ie; ++it) { + if (it != begin()) OS << ",\n "; + it->dump(); + } + OS << "],\n"; + OS << " Symbols:["; + + for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) { + if (it != symbol_begin()) OS << ",\n "; + it->dump(); + } + OS << "]>\n"; +} diff --git a/lib/MC/MCCodeEmitter.cpp b/lib/MC/MCCodeEmitter.cpp index c122763..accb06c 100644 --- a/lib/MC/MCCodeEmitter.cpp +++ b/lib/MC/MCCodeEmitter.cpp @@ -16,3 +16,15 @@ MCCodeEmitter::MCCodeEmitter() { MCCodeEmitter::~MCCodeEmitter() { } + +const MCFixupKindInfo &MCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const { + static const MCFixupKindInfo Builtins[] = { + { "FK_Data_1", 0, 8 }, + { "FK_Data_2", 0, 16 }, + { "FK_Data_4", 0, 32 }, + { "FK_Data_8", 0, 64 } + }; + + assert(Kind <= 3 && "Unknown fixup kind"); + return Builtins[Kind]; +} diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp index e90c03c..92a7154 100644 --- a/lib/MC/MCInstPrinter.cpp +++ b/lib/MC/MCInstPrinter.cpp @@ -8,7 +8,14 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCInstPrinter.h" +#include "llvm/ADT/StringRef.h" using namespace llvm; MCInstPrinter::~MCInstPrinter() { } + +/// getOpcodeName - Return the name of the specified opcode enum (e.g. +/// "MOV32ri") or empty if we can't resolve it. +StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const { + return ""; +} diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 99a819f..a7a8a5d 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -137,6 +137,8 @@ public: virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, unsigned ValueSize = 1, unsigned MaxBytesToEmit = 0); + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0); virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value = 0); @@ -333,7 +335,22 @@ void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size, unsigned AddrSpace) { - new MCFillFragment(*AddValueSymbols(Value), Size, 1, CurSectionData); + MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); + if (!DF) + DF = new MCDataFragment(CurSectionData); + + // Avoid fixups when possible. + int64_t AbsValue; + if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) { + // FIXME: Endianness assumption. + for (unsigned i = 0; i != Size; ++i) + DF->getContents().push_back(uint8_t(AbsValue >> (i * 8))); + } else { + DF->getFixups().push_back(MCAsmFixup(DF->getContents().size(), + *AddValueSymbols(Value), + MCFixup::getKindForSize(Size))); + DF->getContents().resize(DF->getContents().size() + Size, 0); + } } void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment, @@ -342,7 +359,20 @@ void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment, if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, - CurSectionData); + false /* EmitNops */, CurSectionData); + + // Update the maximum alignment on the current section if necessary. + if (ByteAlignment > CurSectionData->getAlignment()) + CurSectionData->setAlignment(ByteAlignment); +} + +void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit) { + if (MaxBytesToEmit == 0) + MaxBytesToEmit = ByteAlignment; + // FIXME the 0x90 is the default x86 1 byte nop opcode. + new MCAlignFragment(ByteAlignment, 0x90, 1, MaxBytesToEmit, + true /* EmitNops */, CurSectionData); // Update the maximum alignment on the current section if necessary. if (ByteAlignment > CurSectionData->getAlignment()) @@ -365,12 +395,23 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { CurSectionData->setHasInstructions(true); - // FIXME: Relocations! SmallVector<MCFixup, 4> Fixups; SmallString<256> Code; raw_svector_ostream VecOS(Code); Emitter->EncodeInstruction(Inst, VecOS, Fixups); - EmitBytes(VecOS.str(), 0); + VecOS.flush(); + + // Add the fixups and data. + MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); + if (!DF) + DF = new MCDataFragment(CurSectionData); + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + MCFixup &F = Fixups[i]; + DF->getFixups().push_back(MCAsmFixup(DF->getContents().size()+F.getOffset(), + *F.getValue(), + F.getKind())); + } + DF->getContents().append(Code.begin(), Code.end()); } void MCMachOStreamer::Finish() { diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 46e9ebf..ab61799 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -55,6 +55,9 @@ namespace { unsigned ValueSize = 1, unsigned MaxBytesToEmit = 0) {} + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0) {} + virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value = 0) {} diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index d5bc396..6185c30 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -146,7 +146,7 @@ bool AsmParser::Run() { // FIXME: Target hook & command line option for initial section. Out.SwitchSection(getMachOSection("__TEXT", "__text", MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 0, SectionKind())); + 0, SectionKind::getText())); // Prime the lexer. @@ -325,9 +325,17 @@ bool AsmParser::ParseExpression(const MCExpr *&Res) { /// expr ::= primaryexpr /// bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) { + // Parse the expression. Res = 0; - return ParsePrimaryExpr(Res, EndLoc) || - ParseBinOpRHS(1, Res, EndLoc); + if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc)) + return true; + + // Try to constant fold it up front, if possible. + int64_t Value; + if (Res->EvaluateAsAbsolute(Value)) + Res = MCConstantExpr::Create(Value, getContext()); + + return false; } bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) { @@ -906,8 +914,10 @@ bool AsmParser::ParseDirectiveDarwinSection() { return Error(Loc, ErrorStr.c_str()); // FIXME: Arch specific. + bool isText = Segment == "__TEXT"; // FIXME: Hack. Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize, - SectionKind())); + isText ? SectionKind::getText() + : SectionKind::getDataRel())); return false; } @@ -921,8 +931,10 @@ bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment, Lex(); // FIXME: Arch specific. + bool isText = StringRef(Segment) == "__TEXT"; // FIXME: Hack. Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize, - SectionKind())); + isText ? SectionKind::getText() + : SectionKind::getDataRel())); // Set the implicit alignment, if any. // @@ -1229,8 +1241,14 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { } } - // FIXME: Target specific behavior about how the "extra" bytes are filled. - Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill); + // FIXME: hard code the parser to use EmitCodeAlignment for text when using + // the TextAlignFillValue. + if(Out.getCurrentSection()->getKind().isText() && + Lexer.getMAI().getTextAlignFillValue() == FillExpr) + Out.EmitCodeAlignment(Alignment, MaxBytesToFill); + else + // FIXME: Target specific behavior about how the "extra" bytes are filled. + Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill); return false; } @@ -1354,7 +1372,7 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { if (IsLocal) { Out.EmitZerofill(getMachOSection("__DATA", "__bss", MCSectionMachO::S_ZEROFILL, 0, - SectionKind()), + SectionKind::getBSS()), Sym, Size, 1 << Pow2Alignment); return false; } @@ -1390,7 +1408,7 @@ bool AsmParser::ParseDirectiveDarwinZerofill() { // Create the zerofill section but no symbol Out.EmitZerofill(getMachOSection(Segment, Section, MCSectionMachO::S_ZEROFILL, 0, - SectionKind())); + SectionKind::getBSS())); return false; } @@ -1448,7 +1466,7 @@ bool AsmParser::ParseDirectiveDarwinZerofill() { // FIXME: Arch specific. Out.EmitZerofill(getMachOSection(Segment, Section, MCSectionMachO::S_ZEROFILL, 0, - SectionKind()), + SectionKind::getBSS()), Sym, Size, 1 << Pow2Alignment); return false; diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp index 6cc67a2..370aad1 100644 --- a/lib/MC/MCSectionMachO.cpp +++ b/lib/MC/MCSectionMachO.cpp @@ -10,6 +10,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/raw_ostream.h" +#include <ctype.h> using namespace llvm; /// SectionTypeDescriptors - These are strings that describe the various section diff --git a/lib/MC/TargetAsmBackend.cpp b/lib/MC/TargetAsmBackend.cpp new file mode 100644 index 0000000..918d272 --- /dev/null +++ b/lib/MC/TargetAsmBackend.cpp @@ -0,0 +1,19 @@ +//===-- TargetAsmBackend.cpp - Target Assembly Backend ---------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmBackend.h" +using namespace llvm; + +TargetAsmBackend::TargetAsmBackend(const Target &T) + : TheTarget(T) +{ +} + +TargetAsmBackend::~TargetAsmBackend() { +} diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 1e6d22f..8f860a6 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include <limits.h> #include <cstring> using namespace llvm; @@ -625,17 +626,58 @@ APFloat::copySignificand(const APFloat &rhs) /* Make this number a NaN, with an arbitrary but deterministic value for the significand. If double or longer, this is a signalling NaN, which may not be ideal. If float, this is QNaN(0). */ -void -APFloat::makeNaN(unsigned type) +void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { category = fcNaN; - // FIXME: Add double and long double support for QNaN(0). - if (semantics->precision == 24 && semantics->maxExponent == 127) { - type |= 0x7fc00000U; - type &= ~0x80000000U; - } else - type = ~0U; - APInt::tcSet(significandParts(), type, partCount()); + sign = Negative; + + integerPart *significand = significandParts(); + unsigned numParts = partCount(); + + // Set the significand bits to the fill. + if (!fill || fill->getNumWords() < numParts) + APInt::tcSet(significand, 0, numParts); + if (fill) { + APInt::tcAssign(significand, fill->getRawData(), + std::min(fill->getNumWords(), numParts)); + + // Zero out the excess bits of the significand. + unsigned bitsToPreserve = semantics->precision - 1; + unsigned part = bitsToPreserve / 64; + bitsToPreserve %= 64; + significand[part] &= ((1ULL << bitsToPreserve) - 1); + for (part++; part != numParts; ++part) + significand[part] = 0; + } + + unsigned QNaNBit = semantics->precision - 2; + + if (SNaN) { + // We always have to clear the QNaN bit to make it an SNaN. + APInt::tcClearBit(significand, QNaNBit); + + // If there are no bits set in the payload, we have to set + // *something* to make it a NaN instead of an infinity; + // conventionally, this is the next bit down from the QNaN bit. + if (APInt::tcIsZero(significand, numParts)) + APInt::tcSetBit(significand, QNaNBit - 1); + } else { + // We always have to set the QNaN bit to make it a QNaN. + APInt::tcSetBit(significand, QNaNBit); + } + + // For x87 extended precision, we want to make a NaN, not a + // pseudo-NaN. Maybe we should expose the ability to make + // pseudo-NaNs? + if (semantics == &APFloat::x87DoubleExtended) + APInt::tcSetBit(significand, QNaNBit + 1); +} + +APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative, + const APInt *fill) { + APFloat value(Sem, uninitialized); + value.makeNaN(SNaN, Negative, fill); + return value; } APFloat & @@ -700,9 +742,14 @@ APFloat::APFloat(const fltSemantics &ourSemantics) { sign = false; } +APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { + assertArithmeticOK(ourSemantics); + // Allocates storage if necessary but does not initialize it. + initialize(&ourSemantics); +} APFloat::APFloat(const fltSemantics &ourSemantics, - fltCategory ourCategory, bool negative, unsigned type) + fltCategory ourCategory, bool negative) { assertArithmeticOK(ourSemantics); initialize(&ourSemantics); @@ -711,7 +758,7 @@ APFloat::APFloat(const fltSemantics &ourSemantics, if (category == fcNormal) category = fcZero; else if (ourCategory == fcNaN) - makeNaN(type); + makeNaN(); } APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text) @@ -2345,11 +2392,24 @@ APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mo if (decDigitValue(*D.firstSigDigit) >= 10U) { category = fcZero; fs = opOK; - } else if ((D.normalizedExponent + 1) * 28738 - <= 8651 * (semantics->minExponent - (int) semantics->precision)) { + + /* Check whether the normalized exponent is high enough to overflow + max during the log-rebasing in the max-exponent check below. */ + } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { + fs = handleOverflow(rounding_mode); + + /* If it wasn't, then it also wasn't high enough to overflow max + during the log-rebasing in the min-exponent check. Check that it + won't overflow min in either check, then perform the min-exponent + check. */ + } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || + (D.normalizedExponent + 1) * 28738 <= + 8651 * (semantics->minExponent - (int) semantics->precision)) { /* Underflow to zero and round. */ zeroSignificand(); fs = normalize(rounding_mode, lfLessThanHalf); + + /* We can finally safely perform the max-exponent check. */ } else if ((D.normalizedExponent - 1) * 42039 >= 12655 * semantics->maxExponent) { /* Overflow and round. */ @@ -3306,7 +3366,7 @@ namespace { void APFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, - unsigned FormatMaxPadding) { + unsigned FormatMaxPadding) const { switch (category) { case fcInfinity: if (isNegative()) diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 3bce3f3..6a6384a 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -2344,13 +2344,21 @@ APInt::tcExtractBit(const integerPart *parts, unsigned int bit) & ((integerPart) 1 << bit % integerPartWidth)) != 0; } -/* Set the given bit of a bignum. */ +/* Set the given bit of a bignum. */ void APInt::tcSetBit(integerPart *parts, unsigned int bit) { parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth); } +/* Clears the given bit of a bignum. */ +void +APInt::tcClearBit(integerPart *parts, unsigned int bit) +{ + parts[bit / integerPartWidth] &= + ~((integerPart) 1 << (bit % integerPartWidth)); +} + /* Returns the bit number of the least significant set bit of a number. If the input number has no bits set -1U is returned. */ unsigned int diff --git a/lib/Support/Android.mk b/lib/Support/Android.mk new file mode 100644 index 0000000..e972753 --- /dev/null +++ b/lib/Support/Android.mk @@ -0,0 +1,72 @@ +LOCAL_PATH:= $(call my-dir) + +support_SRC_FILES := \ + APFloat.cpp \ + APInt.cpp \ + APSInt.cpp \ + Allocator.cpp \ + CommandLine.cpp \ + ConstantRange.cpp \ + Debug.cpp \ + DeltaAlgorithm.cpp \ + Dwarf.cpp \ + ErrorHandling.cpp \ + FileUtilities.cpp \ + FoldingSet.cpp \ + FormattedStream.cpp \ + GraphWriter.cpp \ + IsInf.cpp \ + IsNAN.cpp \ + ManagedStatic.cpp \ + MemoryBuffer.cpp \ + MemoryObject.cpp \ + PluginLoader.cpp \ + PrettyStackTrace.cpp \ + Regex.cpp \ + SlowOperationInformer.cpp \ + SmallPtrSet.cpp \ + SmallVector.cpp \ + SourceMgr.cpp \ + Statistic.cpp \ + StringExtras.cpp \ + StringMap.cpp \ + StringPool.cpp \ + StringRef.cpp \ + SystemUtils.cpp \ + TargetRegistry.cpp \ + Timer.cpp \ + Triple.cpp \ + Twine.cpp \ + circular_raw_ostream.cpp \ + raw_os_ostream.cpp \ + raw_ostream.cpp \ + regcomp.c \ + regerror.c \ + regexec.c \ + regfree.c \ + regstrlcpy.c + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +# FIXME: This only requires RTTI because tblgen uses it. Fix that. +REQUIRES_RTTI := 1 + +LOCAL_SRC_FILES := $(support_SRC_FILES) + +LOCAL_MODULE:= libLLVMSupport + +include $(LLVM_HOST_BUILD_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(support_SRC_FILES) + +LOCAL_MODULE:= libLLVMSupport + +include $(LLVM_DEVICE_BUILD_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 961dc1f..2ab4103 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -650,7 +650,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, if (Handler == 0) { if (SinkOpts.empty()) { errs() << ProgramName << ": Unknown command line argument '" - << argv[i] << "'. Try: '" << argv[0] << " --help'\n"; + << argv[i] << "'. Try: '" << argv[0] << " -help'\n"; ErrorParsing = true; } else { for (SmallVectorImpl<Option*>::iterator I = SinkOpts.begin(), @@ -673,7 +673,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, errs() << ProgramName << ": Not enough positional command line arguments specified!\n" << "Must specify at least " << NumPositionalRequired - << " positional arguments: See: " << argv[0] << " --help\n"; + << " positional arguments: See: " << argv[0] << " -help\n"; ErrorParsing = true; } else if (!HasUnlimitedPositionals @@ -681,7 +681,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, errs() << ProgramName << ": Too many positional arguments specified!\n" << "Can specify at most " << PositionalOpts.size() - << " positional arguments: See: " << argv[0] << " --help\n"; + << " positional arguments: See: " << argv[0] << " -help\n"; ErrorParsing = true; } else if (ConsumeAfterOpt == 0) { @@ -1029,7 +1029,7 @@ void generic_parser_base::printOptionInfo(const Option &O, //===----------------------------------------------------------------------===// -// --help and --help-hidden option implementation +// -help and -help-hidden option implementation // static int OptNameCompare(const void *LHS, const void *RHS) { @@ -1134,7 +1134,7 @@ static HelpPrinter NormalPrinter(false); static HelpPrinter HiddenPrinter(true); static cl::opt<HelpPrinter, true, parser<bool> > -HOp("help", cl::desc("Display available options (--help-hidden for more)"), +HOp("help", cl::desc("Display available options (-help-hidden for more)"), cl::location(NormalPrinter), cl::ValueDisallowed); static cl::opt<HelpPrinter, true, parser<bool> > @@ -1222,8 +1222,8 @@ void cl::PrintHelpMessage() { // NormalPrinter variable is a HelpPrinter and the help gets printed when // its operator= is invoked. That's because the "normal" usages of the // help printer is to be assigned true/false depending on whether the - // --help option was given or not. Since we're circumventing that we have - // to make it look like --help was given, so we assign true. + // -help option was given or not. Since we're circumventing that we have + // to make it look like -help was given, so we assign true. NormalPrinter = true; } diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp index 9ab3666..c72b5a1 100644 --- a/lib/Support/FormattedStream.cpp +++ b/lib/Support/FormattedStream.cpp @@ -56,15 +56,14 @@ void formatted_raw_ostream::ComputeColumn(const char *Ptr, size_t Size) { /// PadToColumn - Align the output to some column number. /// /// \param NewCol - The column to move to. -/// \param MinPad - The minimum space to give after the most recent -/// I/O, even if the current column + minpad > newcol. /// -void formatted_raw_ostream::PadToColumn(unsigned NewCol) { +formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) { // Figure out what's in the buffer and add it to the column count. ComputeColumn(getBufferStart(), GetNumBytesInBuffer()); // Output spaces until we reach the desired column. indent(std::max(int(NewCol - ColumnScanned), 1)); + return *this; } void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) { diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index c8bca6e..ec84f9b 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -137,7 +137,7 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait, args.clear(); args.push_back(gv.c_str()); args.push_back(PSFilename.c_str()); - args.push_back("-spartan"); + args.push_back("--spartan"); args.push_back(0); ErrMsg.clear(); diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 9253b01..eb046d0 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -174,7 +174,8 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, #ifdef O_BINARY OpenFlags |= O_BINARY; // Open input file in binary mode on win32. #endif - int FD = ::open(Filename.str().c_str(), O_RDONLY|OpenFlags); + SmallString<256> PathBuf(Filename.begin(), Filename.end()); + int FD = ::open(PathBuf.c_str(), O_RDONLY|OpenFlags); if (FD == -1) { if (ErrStr) *ErrStr = strerror(errno); return 0; diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp index 618ca05..a7631de 100644 --- a/lib/Support/Regex.cpp +++ b/lib/Support/Regex.cpp @@ -90,3 +90,79 @@ bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){ return true; } + +std::string Regex::sub(StringRef Repl, StringRef String, + std::string *Error) { + SmallVector<StringRef, 8> Matches; + + // Reset error, if given. + if (Error && !Error->empty()) *Error = ""; + + // Return the input if there was no match. + if (!match(String, &Matches)) + return String; + + // Otherwise splice in the replacement string, starting with the prefix before + // the match. + std::string Res(String.begin(), Matches[0].begin()); + + // Then the replacement string, honoring possible substitutions. + while (!Repl.empty()) { + // Skip to the next escape. + std::pair<StringRef, StringRef> Split = Repl.split('\\'); + + // Add the skipped substring. + Res += Split.first; + + // Check for terminimation and trailing backslash. + if (Split.second.empty()) { + if (Repl.size() != Split.first.size() && + Error && Error->empty()) + *Error = "replacement string contained trailing backslash"; + break; + } + + // Otherwise update the replacement string and interpret escapes. + Repl = Split.second; + + // FIXME: We should have a StringExtras function for mapping C99 escapes. + switch (Repl[0]) { + // Treat all unrecognized characters as self-quoting. + default: + Res += Repl[0]; + Repl = Repl.substr(1); + break; + + // Single character escapes. + case 't': + Res += '\t'; + Repl = Repl.substr(1); + break; + case 'n': + Res += '\n'; + Repl = Repl.substr(1); + break; + + // Decimal escapes are backreferences. + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + // Extract the backreference number. + StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789")); + Repl = Repl.substr(Ref.size()); + + unsigned RefValue; + if (!Ref.getAsInteger(10, RefValue) && + RefValue < Matches.size()) + Res += Matches[RefValue]; + else if (Error && Error->empty()) + *Error = "invalid backreference string '" + Ref.str() + "'"; + break; + } + } + } + + // And finally the suffix. + Res += StringRef(Matches[0].end(), String.end() - Matches[0].end()); + + return Res; +} diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index ae2640b..2b262dc 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/APInt.h" using namespace llvm; @@ -172,23 +173,28 @@ size_t StringRef::count(StringRef Str) const { return Count; } +static unsigned GetAutoSenseRadix(StringRef &Str) { + if (Str.startswith("0x")) { + Str = Str.substr(2); + return 16; + } else if (Str.startswith("0b")) { + Str = Str.substr(2); + return 2; + } else if (Str.startswith("0")) { + return 8; + } else { + return 10; + } +} + + /// GetAsUnsignedInteger - Workhorse method that converts a integer character /// sequence of radix up to 36 to an unsigned long long value. static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix, unsigned long long &Result) { // Autosense radix if not specified. - if (Radix == 0) { - if (Str.startswith("0x")) { - Str = Str.substr(2); - Radix = 16; - } else if (Str.startswith("0b")) { - Str = Str.substr(2); - Radix = 2; - } else if (Str.startswith("0")) - Radix = 8; - else - Radix = 10; - } + if (Radix == 0) + Radix = GetAutoSenseRadix(Str); // Empty strings (after the radix autosense) are invalid. if (Str.empty()) return true; @@ -272,3 +278,78 @@ bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const { Result = Val; return false; } + +bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { + StringRef Str = *this; + + // Autosense radix if not specified. + if (Radix == 0) + Radix = GetAutoSenseRadix(Str); + + assert(Radix > 1 && Radix <= 36); + + // Empty strings (after the radix autosense) are invalid. + if (Str.empty()) return true; + + // Skip leading zeroes. This can be a significant improvement if + // it means we don't need > 64 bits. + while (!Str.empty() && Str.front() == '0') + Str = Str.substr(1); + + // If it was nothing but zeroes.... + if (Str.empty()) { + Result = APInt(64, 0); + return false; + } + + // (Over-)estimate the required number of bits. + unsigned Log2Radix = 0; + while ((1U << Log2Radix) < Radix) Log2Radix++; + bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix); + + unsigned BitWidth = Log2Radix * Str.size(); + if (BitWidth < Result.getBitWidth()) + BitWidth = Result.getBitWidth(); // don't shrink the result + else + Result.zext(BitWidth); + + APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix + if (!IsPowerOf2Radix) { + // These must have the same bit-width as Result. + RadixAP = APInt(BitWidth, Radix); + CharAP = APInt(BitWidth, 0); + } + + // Parse all the bytes of the string given this radix. + Result = 0; + while (!Str.empty()) { + unsigned CharVal; + if (Str[0] >= '0' && Str[0] <= '9') + CharVal = Str[0]-'0'; + else if (Str[0] >= 'a' && Str[0] <= 'z') + CharVal = Str[0]-'a'+10; + else if (Str[0] >= 'A' && Str[0] <= 'Z') + CharVal = Str[0]-'A'+10; + else + return true; + + // If the parsed value is larger than the integer radix, the string is + // invalid. + if (CharVal >= Radix) + return true; + + // Add in this character. + if (IsPowerOf2Radix) { + Result <<= Log2Radix; + Result |= CharVal; + } else { + Result *= RadixAP; + CharAP = CharVal; + Result += CharAP; + } + + Str = Str.substr(1); + } + + return false; +} diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 5a76184..61bf0a7 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -40,6 +40,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { case x86: return "i386"; case x86_64: return "x86_64"; case xcore: return "xcore"; + case mblaze: return "mblaze"; } return "<invalid>"; @@ -62,6 +63,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case ppc64: case ppc: return "ppc"; + case mblaze: return "mblaze"; + case sparcv9: case sparc: return "sparc"; @@ -127,6 +130,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { return ppc64; if (Name == "ppc") return ppc; + if (Name == "mblaze") + return mblaze; if (Name == "sparc") return sparc; if (Name == "sparcv9") @@ -198,6 +203,8 @@ const char *Triple::getArchNameForAssembler() { return "ppc"; if (Str == "powerpc64") return "ppc64"; + if (Str == "mblaze" || Str == "microblaze") + return "mblaze"; if (Str == "arm") return "arm"; if (Str == "armv4t" || Str == "thumbv4t") @@ -234,6 +241,8 @@ void Triple::Parse() const { Arch = ppc; else if ((ArchName == "powerpc64") || (ArchName == "ppu")) Arch = ppc64; + else if (ArchName == "mblaze") + Arch = mblaze; else if (ArchName == "arm" || ArchName.startswith("armv") || ArchName == "xscale") diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index af6dc7c..071c924 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -368,6 +368,7 @@ void format_object_base::home() { /// if no error occurred. raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo, unsigned Flags) : pos(0) { + assert(Filename != 0 && "Filename is null"); // Verify that we don't have both "append" and "excl". assert((!(Flags & F_Excl) || !(Flags & F_Append)) && "Cannot specify both 'excl' and 'append' file creation flags!"); @@ -574,12 +575,18 @@ void raw_svector_ostream::resync() { } void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) { - assert(Ptr == OS.end() && OS.size() + Size <= OS.capacity() && - "Invalid write_impl() call!"); - - // We don't need to copy the bytes, just commit the bytes to the - // SmallVector. - OS.set_size(OS.size() + Size); + // If we're writing bytes from the end of the buffer into the smallvector, we + // don't need to copy the bytes, just commit the bytes because they are + // already in the right place. + if (Ptr == OS.end()) { + assert(OS.size() + Size <= OS.capacity() && "Invalid write_impl() call!"); + OS.set_size(OS.size() + Size); + } else { + assert(GetNumBytesInBuffer() == 0 && + "Should be writing from buffer if some bytes in it"); + // Otherwise, do copy the bytes. + OS.append(Ptr, Ptr+Size); + } // Grow the vector if necessary. if (OS.capacity() - OS.size() < 64) diff --git a/lib/System/Android.mk b/lib/System/Android.mk new file mode 100644 index 0000000..3f11fc7 --- /dev/null +++ b/lib/System/Android.mk @@ -0,0 +1,46 @@ +LOCAL_PATH:= $(call my-dir) + +system_SRC_FILES := \ + Alarm.cpp \ + Atomic.cpp \ + Disassembler.cpp \ + Errno.cpp \ + Host.cpp \ + IncludeFile.cpp \ + Memory.cpp \ + Mutex.cpp \ + Path.cpp \ + Process.cpp \ + Program.cpp \ + RWMutex.cpp \ + Signals.cpp \ + ThreadLocal.cpp \ + Threading.cpp \ + TimeValue.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +REQUIRES_RTTI := 1 + +LOCAL_SRC_FILES := $(system_SRC_FILES) +LOCAL_CFLAGS += -march=i686 + +LOCAL_MODULE:= libLLVMSystem + +include $(LLVM_HOST_BUILD_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) + +REQUIRES_RTTI := 1 + +LOCAL_SRC_FILES := $(system_SRC_FILES) + +LOCAL_MODULE:= libLLVMSystem + +include $(LLVM_DEVICE_BUILD_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/System/Unix/Host.inc b/lib/System/Unix/Host.inc index c76d6a4..5b11876 100644 --- a/lib/System/Unix/Host.inc +++ b/lib/System/Unix/Host.inc @@ -21,6 +21,7 @@ #include "Unix.h" #include <sys/utsname.h> #include <string> +#include <ctype.h> using namespace llvm; diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc index e8c2806..c10498a 100644 --- a/lib/System/Unix/Program.inc +++ b/lib/System/Unix/Program.inc @@ -126,7 +126,7 @@ static void TimeOutHandler(int Sig) { static void SetMemoryLimits (unsigned size) { -#if HAVE_SYS_RESOURCE_H +#if HAVE_SYS_RESOURCE_H && HAVE_GETRLIMIT && HAVE_SETRLIMIT struct rlimit r; __typeof__ (r.rlim_cur) limit = (__typeof__ (r.rlim_cur)) (size) * 1048576; diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc index 676e1e5..c8ec68a 100644 --- a/lib/System/Unix/Signals.inc +++ b/lib/System/Unix/Signals.inc @@ -52,7 +52,16 @@ static const int *const IntSigsEnd = // KillSigs - Signals that are synchronous with the program that will cause it // to die. static const int KillSigs[] = { - SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGSYS, SIGXCPU, SIGXFSZ + SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV +#ifdef SIGSYS + , SIGSYS +#endif +#ifdef SIGXCPU + , SIGXCPU +#endif +#ifdef SIGXFSZ + , SIGXFSZ +#endif #ifdef SIGEMT , SIGEMT #endif diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 6fe7c2c..8e537d8 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -643,6 +643,13 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); + // tGPR is used sometimes in ARM instructions that need to avoid using + // certain registers. Just treat it as GPR here. + if (DestRC == ARM::tGPRRegisterClass) + DestRC = ARM::GPRRegisterClass; + if (SrcRC == ARM::tGPRRegisterClass) + SrcRC = ARM::GPRRegisterClass; + if (DestRC != SrcRC) { if (DestRC->getSize() != SrcRC->getSize()) return false; @@ -697,6 +704,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MFI.getObjectSize(FI), Align); + // tGPR is used sometimes in ARM instructions that need to avoid using + // certain registers. Just treat it as GPR here. + if (RC == ARM::tGPRRegisterClass) + RC = ARM::GPRRegisterClass; + if (RC == ARM::GPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) .addReg(SrcReg, getKillRegState(isKill)) @@ -745,6 +757,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MFI.getObjectSize(FI), Align); + // tGPR is used sometimes in ARM instructions that need to avoid using + // certain registers. Just treat it as GPR here. + if (RC == ARM::tGPRRegisterClass) + RC = ARM::GPRRegisterClass; + if (RC == ARM::GPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); @@ -1020,9 +1037,8 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { return MI; } -bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0, - const MachineInstr *MI1, - const MachineRegisterInfo *MRI) const { +bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, + const MachineInstr *MI1) const { int Opcode = MI0->getOpcode(); if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic || @@ -1051,7 +1067,7 @@ bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0, return ACPV0->hasSameValue(ACPV1); } - return TargetInstrInfoImpl::isIdentical(MI0, MI1, MRI); + return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); } /// getInstrPredicate - If instruction is predicated, returns its predicate diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 0d9d4a7..0194231 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -289,8 +289,8 @@ public: MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; - virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other, - const MachineRegisterInfo *MRI) const; + virtual bool produceSameValue(const MachineInstr *MI0, + const MachineInstr *MI1) const; }; static inline @@ -332,7 +332,7 @@ bool isJumpTableBranchOpcode(int Opc) { static inline bool isIndirectBranchOpcode(int Opc) { - return Opc == ARM::BRIND || Opc == ARM::tBRIND; + return Opc == ARM::BRIND || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND; } /// getInstrPredicate - If instruction is predicated, returns its predicate diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index cb0bd1d..577c363 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -513,7 +513,7 @@ cannotEliminateFrame(const MachineFunction &MF) const { } /// estimateStackSize - Estimate and return the size of the frame. -static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) { +static unsigned estimateStackSize(MachineFunction &MF) { const MachineFrameInfo *FFI = MF.getFrameInfo(); int Offset = 0; for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { @@ -583,14 +583,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, SmallVector<unsigned, 4> UnspilledCS2GPRs; ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - - // Calculate and set max stack object alignment early, so we can decide - // whether we will need stack realignment (and thus FP). - if (RealignStack) { - MachineFrameInfo *MFI = MF.getFrameInfo(); - MFI->calculateMaxStackAlignment(); - } - // Spill R4 if Thumb2 function requires stack realignment - it will be used as // scratch register. // FIXME: It will be better just to find spare register here. @@ -679,8 +671,16 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } } + // If any of the stack slot references may be out of range of an immediate + // offset, make sure a register (or a spill slot) is available for the + // register scavenger. Note that if we're indexing off the frame pointer, the + // effective stack size is 4 bytes larger since the FP points to the stack + // slot of the previous FP. + bool BigStack = RS && + estimateStackSize(MF) + (hasFP(MF) ? 4 : 0) >= estimateRSStackSizeLimit(MF); + bool ExtraCSSpill = false; - if (!CanEliminateFrame || cannotEliminateFrame(MF)) { + if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. @@ -735,51 +735,43 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // callee-saved register or reserve a special spill slot to facilitate // register scavenging. Thumb1 needs a spill slot for stack pointer // adjustments also, even when the frame itself is small. - if (RS && !ExtraCSSpill) { - MachineFrameInfo *MFI = MF.getFrameInfo(); - // If any of the stack slot references may be out of range of an - // immediate offset, make sure a register (or a spill slot) is - // available for the register scavenger. Note that if we're indexing - // off the frame pointer, the effective stack size is 4 bytes larger - // since the FP points to the stack slot of the previous FP. - if (estimateStackSize(MF, MFI) + (hasFP(MF) ? 4 : 0) - >= estimateRSStackSizeLimit(MF)) { - // If any non-reserved CS register isn't spilled, just spill one or two - // extra. That should take care of it! - unsigned NumExtras = TargetAlign / 4; - SmallVector<unsigned, 2> Extras; - while (NumExtras && !UnspilledCS1GPRs.empty()) { - unsigned Reg = UnspilledCS1GPRs.back(); - UnspilledCS1GPRs.pop_back(); + if (BigStack && !ExtraCSSpill) { + // If any non-reserved CS register isn't spilled, just spill one or two + // extra. That should take care of it! + unsigned NumExtras = TargetAlign / 4; + SmallVector<unsigned, 2> Extras; + while (NumExtras && !UnspilledCS1GPRs.empty()) { + unsigned Reg = UnspilledCS1GPRs.back(); + UnspilledCS1GPRs.pop_back(); + if (!isReservedReg(MF, Reg)) { + Extras.push_back(Reg); + NumExtras--; + } + } + // For non-Thumb1 functions, also check for hi-reg CS registers + if (!AFI->isThumb1OnlyFunction()) { + while (NumExtras && !UnspilledCS2GPRs.empty()) { + unsigned Reg = UnspilledCS2GPRs.back(); + UnspilledCS2GPRs.pop_back(); if (!isReservedReg(MF, Reg)) { Extras.push_back(Reg); NumExtras--; } } - // For non-Thumb1 functions, also check for hi-reg CS registers - if (!AFI->isThumb1OnlyFunction()) { - while (NumExtras && !UnspilledCS2GPRs.empty()) { - unsigned Reg = UnspilledCS2GPRs.back(); - UnspilledCS2GPRs.pop_back(); - if (!isReservedReg(MF, Reg)) { - Extras.push_back(Reg); - NumExtras--; - } - } - } - if (Extras.size() && NumExtras == 0) { - for (unsigned i = 0, e = Extras.size(); i != e; ++i) { - MF.getRegInfo().setPhysRegUsed(Extras[i]); - AFI->setCSRegisterIsSpilled(Extras[i]); - } - } else if (!AFI->isThumb1OnlyFunction()) { - // note: Thumb1 functions spill to R12, not the stack. - // Reserve a slot closest to SP or frame pointer. - const TargetRegisterClass *RC = ARM::GPRRegisterClass; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + } + if (Extras.size() && NumExtras == 0) { + for (unsigned i = 0, e = Extras.size(); i != e; ++i) { + MF.getRegInfo().setPhysRegUsed(Extras[i]); + AFI->setCSRegisterIsSpilled(Extras[i]); } + } else if (!AFI->isThumb1OnlyFunction()) { + // note: Thumb1 functions spill to R12, not the stack. Reserve a slot + // closest to SP or frame pointer. + const TargetRegisterClass *RC = ARM::GPRRegisterClass; + MachineFrameInfo *MFI = MF.getFrameInfo(); + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); } } } @@ -1093,6 +1085,15 @@ hasReservedCallFrame(MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); } +// canSimplifyCallFramePseudos - If there is a reserved call frame, the +// call frame pseudos can be simplified. Unlike most targets, having a FP +// is not sufficient here since we still may reference some objects via SP +// even when FP is available in Thumb2 mode. +bool ARMBaseRegisterInfo:: +canSimplifyCallFramePseudos(MachineFunction &MF) const { + return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); +} + static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, @@ -1127,13 +1128,14 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); assert(!AFI->isThumb1OnlyFunction() && - "This eliminateCallFramePseudoInstr does not suppor Thumb1!"); + "This eliminateCallFramePseudoInstr does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); // Replace the pseudo instruction with a new instruction... unsigned Opc = Old->getOpcode(); - ARMCC::CondCodes Pred = (ARMCC::CondCodes)Old->getOperand(1).getImm(); - // FIXME: Thumb2 version of ADJCALLSTACKUP and ADJCALLSTACKDOWN? + int PIdx = Old->findFirstPredOperandIdx(); + ARMCC::CondCodes Pred = (PIdx == -1) + ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. unsigned PredReg = Old->getOperand(2).getReg(); @@ -1157,7 +1159,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); assert(!AFI->isThumb1OnlyFunction() && "This eliminateFrameIndex does not support Thumb1!"); @@ -1168,12 +1169,12 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } int FrameIndex = MI.getOperand(i).getIndex(); - int Offset = MFI->getObjectOffset(FrameIndex) + MFI->getStackSize() + SPAdj; unsigned FrameReg; - Offset = getFrameIndexReference(MF, FrameIndex, FrameReg); + int Offset = getFrameIndexReference(MF, FrameIndex, FrameReg); if (FrameReg != ARM::SP) SPAdj = 0; + Offset += SPAdj; // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; @@ -1264,7 +1265,7 @@ emitPrologue(MachineFunction &MF) const { MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); assert(!AFI->isThumb1OnlyFunction() && - "This emitPrologue does not suppor Thumb1!"); + "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); unsigned NumBytes = MFI->getStackSize(); @@ -1349,7 +1350,9 @@ emitPrologue(MachineFunction &MF) const { unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + if (STI.isTargetDarwin() || hasFP(MF)) + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); @@ -1425,7 +1428,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); assert(!AFI->isThumb1OnlyFunction() && - "This emitEpilogue does not suppor Thumb1!"); + "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 33ba21d..64f6ff1 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -138,6 +138,7 @@ public: virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const; virtual bool hasReservedCallFrame(MachineFunction &MF) const; + virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const; virtual void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index bd703f4..21c6cb3 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -86,6 +86,7 @@ namespace { void emitWordLE(unsigned Binary); void emitDWordLE(uint64_t Binary); void emitConstPoolInstruction(const MachineInstr &MI); + void emitMOVi32immInstruction(const MachineInstr &MI); void emitMOVi2piecesInstruction(const MachineInstr &MI); void emitLEApcrelJTInstruction(const MachineInstr &MI); void emitPseudoMoveInstruction(const MachineInstr &MI); @@ -143,6 +144,15 @@ namespace { return getMachineOpValue(MI, MI.getOperand(OpIdx)); } + /// getMovi32Value - Return binary encoding of operand for movw/movt. If the + /// machine operand requires relocation, record the relocation and return zero. + unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO, + unsigned Reloc); + unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx, + unsigned Reloc) { + return getMovi32Value(MI, MI.getOperand(OpIdx), Reloc); + } + /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. /// unsigned getShiftOp(unsigned Imm) const ; @@ -214,6 +224,54 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const { return 0; } +/// getMovi32Value - Return binary encoding of operand for movw/movt. If the +/// machine operand requires relocation, record the relocation and return zero. +unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI, + const MachineOperand &MO, + unsigned Reloc) { + assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw)) + && "Relocation to this function should be for movt or movw"); + switch(MO.getType()) { + case MachineOperand::MO_Register: + return ARMRegisterInfo::getRegisterNumbering(MO.getReg()); + break; + + case MachineOperand::MO_Immediate: + return static_cast<unsigned>(MO.getImm()); + break; + + case MachineOperand::MO_FPImmediate: + return static_cast<unsigned>( + MO.getFPImm()->getValueAPF().bitcastToAPInt().getLimitedValue()); + break; + + case MachineOperand::MO_MachineBasicBlock: + emitMachineBasicBlock(MO.getMBB(), Reloc); + break; + + case MachineOperand::MO_ConstantPoolIndex: + emitConstPoolAddress(MO.getIndex(), Reloc); + break; + + case MachineOperand::MO_JumpTableIndex: + emitJumpTableAddress(MO.getIndex(), Reloc); + break; + + case MachineOperand::MO_ExternalSymbol: + emitExternalSymbolAddress(MO.getSymbolName(), Reloc); + break; + + case MachineOperand::MO_GlobalAddress: + emitGlobalAddress(MO.getGlobal(), Reloc, true, false); + break; + + default: + llvm_unreachable("Unsupported immediate operand type for movw/movt"); + break; + } + return 0; +} + /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, @@ -433,6 +491,41 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { } } +void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) { + const MachineOperand &MO0 = MI.getOperand(0); + const MachineOperand &MO1 = MI.getOperand(1); + + unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF; + + // Emit the 'mov' instruction. + unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode imm. + Binary |= Lo16 & 0xFFF; + Binary |= ((Lo16 >> 12) & 0xF) << 16; // imm4:imm12, Insts[19-16] = imm4, Insts[11-0] = imm12 + emitWordLE(Binary); + + unsigned Hi16 = (getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16) & 0xFFFF; + // Emit the 'mov' instruction. + Binary = 0x34 << 20; // movt: Insts[27-20] = 0b00110100 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + Binary |= Hi16 & 0xFFF; + Binary |= ((Hi16 >> 12) & 0xF) << 16; + emitWordLE(Binary); +} + void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) { const MachineOperand &MO0 = MI.getOperand(0); const MachineOperand &MO1 = MI.getOperand(1); @@ -552,7 +645,6 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { switch (Opcode) { default: llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); - // FIXME: Add support for MOVimm32. case TargetOpcode::INLINEASM: { // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. @@ -599,6 +691,11 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { emitMiscLoadStoreInstruction(MI, ARM::PC); break; } + + case ARM::MOVi32imm: + emitMOVi32immInstruction(MI); + break; + case ARM::MOVi2pieces: // Two instructions to materialize a constant. emitMOVi2piecesInstruction(MI); @@ -1138,7 +1235,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { // Set the conditional execution predicate Binary |= II->getPredicate(&MI) << ARMII::CondShift; - if (TID.Opcode == ARM::BX_RET) + if (TID.Opcode == ARM::BX_RET || TID.Opcode == ARM::MOVPCLR) // The return register is LR. Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::LR); else diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index a458269..013e00a 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -65,7 +65,7 @@ public: } SDNode *Select(SDNode *N); - virtual void InstructionSelect(); + bool SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &A, SDValue &B, SDValue &C); bool SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base, @@ -201,11 +201,6 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { } -void ARMDAGToDAGISel::InstructionSelect() { - SelectRoot(*CurDAG); - CurDAG->RemoveDeadNodes(); -} - bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &BaseReg, diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 614e684..6a2c6bb 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -294,6 +294,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); + setTargetDAGCombine(ISD::SELECT_CC); } computeRegisterProperties(); @@ -544,6 +545,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VZIP: return "ARMISD::VZIP"; case ARMISD::VUZP: return "ARMISD::VUZP"; case ARMISD::VTRN: return "ARMISD::VTRN"; + case ARMISD::FMAX: return "ARMISD::FMAX"; + case ARMISD::FMIN: return "ARMISD::FMIN"; } } @@ -863,7 +866,8 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); } return DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset); + PseudoSourceValue::getStack(), LocMemOffset, + false, false, 0); } void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, @@ -920,7 +924,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // These operations are automatically eliminated by the prolog/epilog pass Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); - SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32); + SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); RegsToPassVector RegsToPass; SmallVector<SDValue, 8> MemOpChains; @@ -969,8 +973,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); } else { assert(VA.isMemLoc()); - if (StackPtr.getNode() == 0) - StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, dl, DAG, VA, Flags)); @@ -983,8 +985,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { assert(VA.isMemLoc()); - if (StackPtr.getNode() == 0) - StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, dl, DAG, VA, Flags)); @@ -1031,7 +1031,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); @@ -1052,7 +1053,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); @@ -1238,7 +1240,8 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { } CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); if (RelocM == Reloc::Static) return Result; SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -1261,7 +1264,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue Chain = Argument.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -1278,8 +1282,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl, - DAG.GetOrdering(Chain.getNode())); + DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); return CallResult.first; } @@ -1308,21 +1311,24 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); Chain = Offset.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); } else { // local exec model ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); } // The address of the thread local variable is the add of the thread @@ -1358,13 +1364,15 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue Chain = Result.getValue(1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); if (!UseGOTOFF) Result = DAG.getLoad(PtrVT, dl, Chain, Result, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, + false, false, 0); return Result; } else { // If we have T2 ops, we can materialize the address directly via movt/movw @@ -1376,7 +1384,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); } } } @@ -1403,7 +1412,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { @@ -1413,7 +1423,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, Chain, Result, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, + false, false, 0); return Result; } @@ -1434,7 +1445,8 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } @@ -1467,7 +1479,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { @@ -1515,7 +1528,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, + false, false, 0); } SDValue @@ -1592,7 +1606,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, - PseudoSourceValue::getFixedStack(FI), 0); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); } else { Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); @@ -1707,7 +1722,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0)); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0)); } } @@ -1745,7 +1761,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0); + PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0, + false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); @@ -1939,13 +1956,14 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { } if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, - PseudoSourceValue::getJumpTable(), 0); + PseudoSourceValue::getJumpTable(), 0, + false, false, 0); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } else { Addr = DAG.getLoad(PTy, dl, Chain, Addr, - PseudoSourceValue::getJumpTable(), 0); + PseudoSourceValue::getJumpTable(), 0, false, false, 0); Chain = Addr.getValue(1); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } @@ -1993,7 +2011,8 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { ? ARM::R7 : ARM::R11; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + false, false, 0); return FrameAddr; } @@ -2038,7 +2057,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff); + SrcSV, SrcSVOff + SrcOff, false, false, 0); TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } @@ -2047,9 +2066,9 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, - DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff); + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstSV, DstSVOff + DstOff, false, false, 0); DstOff += VTSize; } Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); @@ -2075,7 +2094,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff); + SrcSV, SrcSVOff + SrcOff, false, false, 0); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; @@ -2097,7 +2116,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff); + DstSV, DstSVOff + DstOff, false, false, 0); ++i; DstOff += VTSize; BytesLeft -= VTSize; @@ -3835,23 +3854,106 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC +/// to match f32 max/min patterns to use NEON vmax/vmin instructions. +static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *ST) { + // If the target supports NEON, try to use vmax/vmin instructions for f32 + // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set, + // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is + // a NaN; only do the transformation when it matches that behavior. + + // For now only do this when using NEON for FP operations; if using VFP, it + // is not obvious that the benefit outweighs the cost of switching to the + // NEON pipeline. + if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || + N->getValueType(0) != MVT::f32) + return SDValue(); + + SDValue CondLHS = N->getOperand(0); + SDValue CondRHS = N->getOperand(1); + SDValue LHS = N->getOperand(2); + SDValue RHS = N->getOperand(3); + ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); + + unsigned Opcode = 0; + bool IsReversed; + if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { + IsReversed = false; // x CC y ? x : y + } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { + IsReversed = true ; // x CC y ? y : x + } else { + return SDValue(); + } + + bool IsUnordered; + switch (CC) { + default: break; + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETULT: + case ISD::SETULE: + // If LHS is NaN, an ordered comparison will be false and the result will + // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS + // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. + IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); + if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) + break; + // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin + // will return -0, so vmin can only be used for unsafe math or if one of + // the operands is known to be nonzero. + if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && + !UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) + break; + Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; + break; + + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETUGT: + case ISD::SETUGE: + // If LHS is NaN, an ordered comparison will be false and the result will + // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS + // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. + IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); + if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) + break; + // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax + // will return +0, so vmax can only be used for unsafe math or if one of + // the operands is known to be nonzero. + if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && + !UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) + break; + Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; + break; + } + + if (!Opcode) + return SDValue(); + return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); +} + SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; - case ISD::ADD: return PerformADDCombine(N, DCI); - case ISD::SUB: return PerformSUBCombine(N, DCI); + case ISD::ADD: return PerformADDCombine(N, DCI); + case ISD::SUB: return PerformSUBCombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); - case ISD::INTRINSIC_WO_CHAIN: - return PerformIntrinsicCombine(N, DCI.DAG); + case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: case ISD::SRA: - case ISD::SRL: - return PerformShiftCombine(N, DCI.DAG, Subtarget); + case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: - return PerformExtendCombine(N, DCI.DAG, Subtarget); + case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); + case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); } return SDValue(); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 3c5df45..f8f8adc 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -131,7 +131,11 @@ namespace llvm { VREV16, // reverse elements within 16-bit halfwords VZIP, // zip (interleave) VUZP, // unzip (deinterleave) - VTRN // transpose + VTRN, // transpose + + // Floating-point max and min: + FMAX, + FMIN }; } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 169eeed..76595fa 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -56,6 +56,9 @@ def NEONGetLnFrm : Format<25>; def NEONSetLnFrm : Format<26>; def NEONDupFrm : Format<27>; +def MiscFrm : Format<29>; +def ThumbMiscFrm : Format<30>; + // Misc flags. // the instruction has a Rn register operand. @@ -705,6 +708,20 @@ class AI3ldsbpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{24} = 1; // P bit let Inst{27-25} = 0b000; } +class AI3lddpr<dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin, + opc, asm, cstr, pattern> { + let Inst{4} = 1; + let Inst{5} = 0; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 1; // P bit + let Inst{27-25} = 0b000; +} + // Pre-indexed stores class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin, @@ -720,6 +737,19 @@ class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{24} = 1; // P bit let Inst{27-25} = 0b000; } +class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin, + opc, asm, cstr, pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 1; // P bit + let Inst{27-25} = 0b000; +} // Post-indexed loads class AI3ldhpo<dag oops, dag iops, Format f, InstrItinClass itin, @@ -731,7 +761,7 @@ class AI3ldhpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{6} = 0; // S bit let Inst{7} = 1; let Inst{20} = 1; // L bit - let Inst{21} = 1; // W bit + let Inst{21} = 0; // W bit let Inst{24} = 0; // P bit let Inst{27-25} = 0b000; } @@ -744,7 +774,7 @@ class AI3ldshpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{6} = 1; // S bit let Inst{7} = 1; let Inst{20} = 1; // L bit - let Inst{21} = 1; // W bit + let Inst{21} = 0; // W bit let Inst{24} = 0; // P bit let Inst{27-25} = 0b000; } @@ -757,7 +787,20 @@ class AI3ldsbpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{6} = 1; // S bit let Inst{7} = 1; let Inst{20} = 1; // L bit - let Inst{21} = 1; // W bit + let Inst{21} = 0; // W bit + let Inst{24} = 0; // P bit + let Inst{27-25} = 0b000; +} +class AI3lddpo<dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin, + opc, asm, cstr, pattern> { + let Inst{4} = 1; + let Inst{5} = 0; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit let Inst{24} = 0; // P bit let Inst{27-25} = 0b000; } @@ -772,7 +815,20 @@ class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{6} = 0; // S bit let Inst{7} = 1; let Inst{20} = 0; // L bit - let Inst{21} = 1; // W bit + let Inst{21} = 0; // W bit + let Inst{24} = 0; // P bit + let Inst{27-25} = 0b000; +} +class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin, + opc, asm, cstr, pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit let Inst{24} = 0; // P bit let Inst{27-25} = 0b000; } @@ -1147,6 +1203,19 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre, let Inst{8} = 1; // The W bit. } +// Helper class for disassembly only +// A6.3.16 & A6.3.17 +// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions. +class T2I_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + let Inst{31-27} = 0b11111; + let Inst{26-24} = 0b011; + let Inst{23} = long; + let Inst{22-20} = op22_20; + let Inst{7-4} = op7_4; +} + // Tv5Pat - Same as Pat<>, but requires V5T Thumb mode. class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [IsThumb1Only, HasV5T]; @@ -1324,6 +1393,15 @@ class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, let Inst{4} = 0; } +// VFP conversion between floating-point and fixed-point +class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, + dag oops, dag iops, InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> { + // size (fixed-point number): sx == 0 ? 16 : 32 + let Inst{7} = op5; // sx +} + // VFP conversion instructions, if no NEON class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, dag oops, dag iops, InstrItinClass itin, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 852c74e..3812aba 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -113,6 +113,8 @@ def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // +def HasV4T : Predicate<"Subtarget->hasV4TOps()">; +def NoV4T : Predicate<"!Subtarget->hasV4TOps()">; def HasV5T : Predicate<"Subtarget->hasV5TOps()">; def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; def HasV6 : Predicate<"Subtarget->hasV6Ops()">; @@ -130,8 +132,6 @@ def IsThumb2 : Predicate<"Subtarget->isThumb2()">; def IsARM : Predicate<"!Subtarget->isThumb()">; def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; -def CarryDefIsUnused : Predicate<"!N->hasAnyUseOfValue(1)">; -def CarryDefIsUsed : Predicate<"N->hasAnyUseOfValue(1)">; // FIXME: Eventually this will be just "hasV6T2Ops". def UseMovt : Predicate<"Subtarget->useMovt()">; @@ -176,7 +176,7 @@ def imm16_31 : PatLeaf<(i32 imm), [{ return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32; }]>; -def so_imm_neg : +def so_imm_neg : PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(-(int)N->getZExtValue()) != -1; }], so_imm_neg_XFORM>; @@ -194,7 +194,7 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{ /// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield /// e.g., 0xf000ffff def bf_inv_mask_imm : Operand<i32>, - PatLeaf<(imm), [{ + PatLeaf<(imm), [{ uint32_t v = (uint32_t)N->getZExtValue(); if (v == 0xffffffff) return 0; @@ -227,7 +227,7 @@ def lo16AllZero : PatLeaf<(i32 imm), [{ return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0; }], hi16>; -/// imm0_65535 predicate - True if the 32-bit immediate is in the range +/// imm0_65535 predicate - True if the 32-bit immediate is in the range /// [0.65535]. def imm0_65535 : PatLeaf<(i32 imm), [{ return (uint32_t)N->getZExtValue() < 65536; @@ -236,6 +236,21 @@ def imm0_65535 : PatLeaf<(i32 imm), [{ class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>; +/// adde and sube predicates - True based on whether the carry flag output +/// will be needed or not. +def adde_dead_carry : + PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS), + [{return !N->hasAnyUseOfValue(1);}]>; +def sube_dead_carry : + PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS), + [{return !N->hasAnyUseOfValue(1);}]>; +def adde_live_carry : + PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS), + [{return N->hasAnyUseOfValue(1);}]>; +def sube_live_carry : + PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS), + [{return N->hasAnyUseOfValue(1);}]>; + //===----------------------------------------------------------------------===// // Operand Definitions. // @@ -501,6 +516,22 @@ multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> { } } +multiclass AI_unary_rrot_np<bits<8> opcod, string opc> { + def r : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src), + IIC_iUNAr, opc, "\t$dst, $src", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]> { + let Inst{11-10} = 0b00; + let Inst{19-16} = 0b1111; + } + def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot), + IIC_iUNAsi, opc, "\t$dst, $src, ror $rot", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]> { + let Inst{19-16} = 0b1111; + } +} + /// AI_bin_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> { @@ -510,13 +541,29 @@ multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> { Requires<[IsARM, HasV6]> { let Inst{11-10} = 0b00; } - def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), + def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, + i32imm:$rot), IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot", [(set GPR:$dst, (opnode GPR:$LHS, (rotr GPR:$RHS, rot_imm:$rot)))]>, Requires<[IsARM, HasV6]>; } +// For disassembly only. +multiclass AI_bin_rrot_np<bits<8> opcod, string opc> { + def rr : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), + IIC_iALUr, opc, "\t$dst, $LHS, $RHS", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]> { + let Inst{11-10} = 0b00; + } + def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, + i32imm:$rot), + IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]>; +} + /// AI1_adde_sube_irs - Define instructions and patterns for adde and sube. let Uses = [CPSR] in { multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, @@ -524,13 +571,13 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iALUi, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>, - Requires<[IsARM, CarryDefIsUnused]> { + Requires<[IsARM]> { let Inst{25} = 1; } def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, - Requires<[IsARM, CarryDefIsUnused]> { + Requires<[IsARM]> { let isCommutable = Commutable; let Inst{11-4} = 0b00000000; let Inst{25} = 0; @@ -538,7 +585,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, - Requires<[IsARM, CarryDefIsUnused]> { + Requires<[IsARM]> { let Inst{25} = 0; } } @@ -549,16 +596,14 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iALUi, !strconcat(opc, "\t$dst, $a, $b"), [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>, - Requires<[IsARM, CarryDefIsUsed]> { - let Defs = [CPSR]; + Requires<[IsARM]> { let Inst{20} = 1; let Inst{25} = 1; } def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, !strconcat(opc, "\t$dst, $a, $b"), [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, - Requires<[IsARM, CarryDefIsUsed]> { - let Defs = [CPSR]; + Requires<[IsARM]> { let Inst{11-4} = 0b00000000; let Inst{20} = 1; let Inst{25} = 0; @@ -566,8 +611,7 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$dst, $a, $b"), [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, - Requires<[IsARM, CarryDefIsUsed]> { - let Defs = [CPSR]; + Requires<[IsARM]> { let Inst{20} = 1; let Inst{25} = 0; } @@ -593,18 +637,153 @@ PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, i32imm:$size), NoItinerary, "${instid:label} ${cpidx:cpentry}", []>; -let Defs = [SP], Uses = [SP] in { +// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE +// from removing one half of the matched pairs. That breaks PEI, which assumes +// these will always be in pairs, and asserts if it finds otherwise. Better way? +let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def ADJCALLSTACKUP : PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary, "@ ADJCALLSTACKUP $amt1", [(ARMcallseq_end timm:$amt1, timm:$amt2)]>; -def ADJCALLSTACKDOWN : +def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, "@ ADJCALLSTACKDOWN $amt", [(ARMcallseq_start timm:$amt)]>; } +def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-0} = 0b00000000; +} + +def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-0} = 0b00000001; +} + +def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-0} = 0b00000010; +} + +def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-0} = 0b00000011; +} + +def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel", + "\t$dst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]> { + let Inst{27-20} = 0b01101000; + let Inst{7-4} = 0b1011; +} + +def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-0} = 0b00000100; +} + +// The i32imm operand $val can be used by a debugger to store more information +// about the breakpoint. +def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM]> { + let Inst{27-20} = 0b00010010; + let Inst{7-4} = 0b0111; +} + +// Change Processor State is a system instruction -- for disassembly only. +// The singleton $opt operand contains the following information: +// opt{4-0} = mode from Inst{4-0} +// opt{5} = changemode from Inst{17} +// opt{8-6} = AIF from Inst{8-6} +// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable +def CPS : AXI<(outs),(ins i32imm:$opt), MiscFrm, NoItinerary, "cps${opt:cps}", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM]> { + let Inst{31-28} = 0b1111; + let Inst{27-20} = 0b00010000; + let Inst{16} = 0; + let Inst{5} = 0; +} + +// Preload signals the memory system of possible future data/instruction access. +// These are for disassembly only. +multiclass APreLoad<bit data, bit read, string opc> { + + def i : AXI<(outs), (ins GPR:$base, i32imm:$imm), MiscFrm, NoItinerary, + !strconcat(opc, "\t[$base, $imm]"), []> { + let Inst{31-26} = 0b111101; + let Inst{25} = 0; // 0 for immediate form + let Inst{24} = data; + let Inst{22} = read; + let Inst{21-20} = 0b01; + } + + def r : AXI<(outs), (ins addrmode2:$addr), MiscFrm, NoItinerary, + !strconcat(opc, "\t$addr"), []> { + let Inst{31-26} = 0b111101; + let Inst{25} = 1; // 1 for register form + let Inst{24} = data; + let Inst{22} = read; + let Inst{21-20} = 0b01; + let Inst{4} = 0; + } +} + +defm PLD : APreLoad<1, 1, "pld">; +defm PLDW : APreLoad<1, 0, "pldw">; +defm PLI : APreLoad<0, 1, "pli">; + +def SETENDBE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tbe", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM]> { + let Inst{31-28} = 0b1111; + let Inst{27-20} = 0b00010000; + let Inst{16} = 1; + let Inst{9} = 1; + let Inst{7-4} = 0b0000; +} + +def SETENDLE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tle", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM]> { + let Inst{31-28} = 0b1111; + let Inst{27-20} = 0b00010000; + let Inst{16} = 1; + let Inst{9} = 0; + let Inst{7-4} = 0b0000; +} + +def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV7]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-4} = 0b1111; +} + +// A5.4 Permanently UNDEFINED instructions. +def TRAP : AI<(outs), (ins), MiscFrm, NoItinerary, "trap", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM]> { + let Inst{27-25} = 0b011; + let Inst{24-20} = 0b11111; + let Inst{7-5} = 0b111; + let Inst{4} = 0b1; +} + // Address computation and loads and stores in PIC mode. let isNotDuplicable = 1 in { def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), @@ -665,7 +844,7 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), "(${label}_${id}-(", "${:private}PCRELL${:uid}+8))\n"), !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p\t$dst, pc, #${:private}PCRELV${:uid}")), + "add$p\t$dst, pc, #${:private}PCRELV${:uid}")), []> { let Inst{25} = 1; } @@ -674,24 +853,50 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), // Control Flow Instructions. // -let isReturn = 1, isTerminator = 1, isBarrier = 1 in - def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, - "bx", "\tlr", [(ARMretflag)]> { - let Inst{3-0} = 0b1110; - let Inst{7-4} = 0b0001; - let Inst{19-8} = 0b111111111111; - let Inst{27-20} = 0b00010010; +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { + // ARMV4T and above + def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, + "bx", "\tlr", [(ARMretflag)]>, + Requires<[IsARM, HasV4T]> { + let Inst{3-0} = 0b1110; + let Inst{7-4} = 0b0001; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; + } + + // ARMV4 only + def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br, + "mov", "\tpc, lr", [(ARMretflag)]>, + Requires<[IsARM, NoV4T]> { + let Inst{11-0} = 0b000000001110; + let Inst{15-12} = 0b1111; + let Inst{19-16} = 0b0000; + let Inst{27-20} = 0b00011010; + } } // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { + // ARMV4T and above def BRIND : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst", - [(brind GPR:$dst)]> { + [(brind GPR:$dst)]>, + Requires<[IsARM, HasV4T]> { let Inst{7-4} = 0b0001; let Inst{19-8} = 0b111111111111; let Inst{27-20} = 0b00010010; let Inst{31-28} = 0b1110; } + + // ARMV4 only + def MOVPCRX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "mov\tpc, $dst", + [(brind GPR:$dst)]>, + Requires<[IsARM, NoV4T]> { + let Inst{11-4} = 0b00000000; + let Inst{15-12} = 0b1111; + let Inst{19-16} = 0b0000; + let Inst{27-20} = 0b00011010; + let Inst{31-28} = 0b1110; + } } // FIXME: remove when we have a way to marking a MI with these properties. @@ -732,14 +937,26 @@ let isCall = 1, } // ARMv4T - def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops), + // Note: Restrict $func to the tGPR regclass to prevent it being in LR. + def BX : ABXIx2<(outs), (ins tGPR:$func, variable_ops), IIC_Br, "mov\tlr, pc\n\tbx\t$func", - [(ARMcall_nolink GPR:$func)]>, - Requires<[IsARM, IsNotDarwin]> { + [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsARM, HasV4T, IsNotDarwin]> { let Inst{7-4} = 0b0001; let Inst{19-8} = 0b111111111111; let Inst{27-20} = 0b00010010; } + + // ARMv4 + def BMOVPCRX : ABXIx2<(outs), (ins tGPR:$func, variable_ops), + IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func", + [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsARM, NoV4T, IsNotDarwin]> { + let Inst{11-4} = 0b00000000; + let Inst{15-12} = 0b1111; + let Inst{19-16} = 0b0000; + let Inst{27-20} = 0b00011010; + } } // On Darwin R9 is call-clobbered. @@ -769,13 +986,26 @@ let isCall = 1, } // ARMv4T - def BXr9 : ABXIx2<(outs), (ins GPR:$func, variable_ops), + // Note: Restrict $func to the tGPR regclass to prevent it being in LR. + def BXr9 : ABXIx2<(outs), (ins tGPR:$func, variable_ops), IIC_Br, "mov\tlr, pc\n\tbx\t$func", - [(ARMcall_nolink GPR:$func)]>, Requires<[IsARM, IsDarwin]> { + [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsARM, HasV4T, IsDarwin]> { let Inst{7-4} = 0b0001; let Inst{19-8} = 0b111111111111; let Inst{27-20} = 0b00010010; } + + // ARMv4 + def BMOVPCRXr9 : ABXIx2<(outs), (ins tGPR:$func, variable_ops), + IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func", + [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsARM, NoV4T, IsDarwin]> { + let Inst{11-4} = 0b00000000; + let Inst{15-12} = 0b1111; + let Inst{19-16} = 0b0000; + let Inst{27-20} = 0b00011010; + } } let isBranch = 1, isTerminator = 1 in { @@ -821,25 +1051,75 @@ let isBranch = 1, isTerminator = 1 in { } // isBarrier = 1 // FIXME: should be able to write a pattern for ARMBrcond, but can't use - // a two-value operand where a dag node expects two operands. :( + // a two-value operand where a dag node expects two operands. :( def Bcc : ABI<0b1010, (outs), (ins brtarget:$target), IIC_Br, "b", "\t$target", [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>; } +// Branch and Exchange Jazelle -- for disassembly only +def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0010; + //let Inst{19-8} = 0xfff; + let Inst{7-4} = 0b0010; +} + +// Secure Monitor Call is a system instruction -- for disassembly only +def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0110; + let Inst{7-4} = 0b0111; +} + +// Supervisor Call (Software Interrupt) -- for disassembly only +let isCall = 1 in { +def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", + [/* For disassembly only; pattern left blank */]>; +} + +// Store Return State is a system instruction -- for disassembly only +def SRSW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode), + NoItinerary, "srs${addr:submode}\tsp!, $mode", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{22-20} = 0b110; // W = 1 +} + +def SRS : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode), + NoItinerary, "srs${addr:submode}\tsp, $mode", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{22-20} = 0b100; // W = 0 +} + +// Return From Exception is a system instruction -- for disassembly only +def RFEW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base), + NoItinerary, "rfe${addr:submode}\t$base!", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{22-20} = 0b011; // W = 1 +} + +def RFE : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base), + NoItinerary, "rfe${addr:submode}\t$base", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{22-20} = 0b001; // W = 0 +} + //===----------------------------------------------------------------------===// // Load / store Instructions. // // Load -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldr", "\t$dst, $addr", [(set GPR:$dst, (load addrmode2:$addr))]>; // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldr", "\t$dst, $addr", []>; @@ -848,7 +1128,7 @@ def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr, "ldrh", "\t$dst, $addr", [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>; -def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, +def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldrb", "\t$dst, $addr", [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>; @@ -907,6 +1187,51 @@ def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb), def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; + +// For disassembly only +def LDRD_PRE : AI3lddpr<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb), + (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr, + "ldrd", "\t$dst1, $dst2, $addr!", "$addr.base = $base_wb", []>, + Requires<[IsARM, HasV5TE]>; + +// For disassembly only +def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb), + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadr, + "ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>, + Requires<[IsARM, HasV5TE]>; + +} + +// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only. + +def LDRT : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru, + "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { + let Inst{21} = 1; // overwrite +} + +def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru, + "ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { + let Inst{21} = 1; // overwrite +} + +def LDRSBT : AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base,am2offset:$offset), LdMiscFrm, IIC_iLoadru, + "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { + let Inst{21} = 1; // overwrite +} + +def LDRHT : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, am3offset:$offset), LdMiscFrm, IIC_iLoadru, + "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> { + let Inst{21} = 1; // overwrite +} + +def LDRSHT : AI3ldshpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, + "ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> { + let Inst{21} = 1; // overwrite } // Store @@ -915,8 +1240,8 @@ def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, [(store GPR:$src, addrmode2:$addr)]>; // Stores with truncate -def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, IIC_iStorer, - "strh", "\t$src, $addr", +def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, + IIC_iStorer, "strh", "\t$src, $addr", [(truncstorei16 GPR:$src, addrmode3:$addr)]>; def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, @@ -931,47 +1256,87 @@ def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), // Indexed stores def STR_PRE : AI2stwpr<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base, am2offset:$offset), + (ins GPR:$src, GPR:$base, am2offset:$offset), StFrm, IIC_iStoreru, "str", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>; def STR_POST : AI2stwpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am2offset:$offset), + (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, IIC_iStoreru, "str", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_store GPR:$src, GPR:$base, am2offset:$offset))]>; def STRH_PRE : AI3sthpr<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am3offset:$offset), + (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, IIC_iStoreru, "strh", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>; def STRH_POST: AI3sthpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am3offset:$offset), + (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, IIC_iStoreru, "strh", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti16 GPR:$src, GPR:$base, am3offset:$offset))]>; def STRB_PRE : AI2stbpr<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am2offset:$offset), + (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, IIC_iStoreru, "strb", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; def STRB_POST: AI2stbpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am2offset:$offset), + (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, IIC_iStoreru, "strb", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; +// For disassembly only +def STRD_PRE : AI3stdpr<(outs GPR:$base_wb), + (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset), + StMiscFrm, IIC_iStoreru, + "strd", "\t$src1, $src2, [$base, $offset]!", + "$base = $base_wb", []>; + +// For disassembly only +def STRD_POST: AI3stdpo<(outs GPR:$base_wb), + (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset), + StMiscFrm, IIC_iStoreru, + "strd", "\t$src1, $src2, [$base], $offset", + "$base = $base_wb", []>; + +// STRT, STRBT, and STRHT are for disassembly only. + +def STRT : AI2stwpo<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am2offset:$offset), + StFrm, IIC_iStoreru, + "strt", "\t$src, [$base], $offset", "$base = $base_wb", + [/* For disassembly only; pattern left blank */]> { + let Inst{21} = 1; // overwrite +} + +def STRBT : AI2stbpo<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am2offset:$offset), + StFrm, IIC_iStoreru, + "strbt", "\t$src, [$base], $offset", "$base = $base_wb", + [/* For disassembly only; pattern left blank */]> { + let Inst{21} = 1; // overwrite +} + +def STRHT: AI3sthpo<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am3offset:$offset), + StMiscFrm, IIC_iStoreru, + "strht", "\t$src, [$base], $offset", "$base = $base_wb", + [/* For disassembly only; pattern left blank */]> { + let Inst{21} = 1; // overwrite +} + //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -999,7 +1364,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, let Inst{25} = 0; } -def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), +def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, IIC_iMOVsr, "mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP { let Inst{25} = 0; @@ -1012,7 +1377,7 @@ def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi, } let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src), +def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src), DPFrm, IIC_iMOVi, "movw", "\t$dst, $src", [(set GPR:$dst, imm0_65535:$src)]>, @@ -1026,7 +1391,7 @@ def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm), DPFrm, IIC_iMOVi, "movt", "\t$dst, $imm", [(set GPR:$dst, - (or (and GPR:$src, 0xffff), + (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>, UnaryDP, Requires<[IsARM, HasV6T2]> { let Inst{20} = 0; @@ -1045,7 +1410,7 @@ def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi, // due to flag operands. let Defs = [CPSR] in { -def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, +def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi, "movs", "\t$dst, $src, lsr #1", [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP; def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, @@ -1069,7 +1434,11 @@ defm SXTAB : AI_bin_rrot<0b01101010, defm SXTAH : AI_bin_rrot<0b01101011, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; -// TODO: SXT(A){B|H}16 +// For disassembly only +defm SXTB16 : AI_unary_rrot_np<0b01101000, "sxtb16">; + +// For disassembly only +defm SXTAB16 : AI_bin_rrot_np<0b01101000, "sxtab16">; // Zero extenders @@ -1093,9 +1462,9 @@ defm UXTAH : AI_bin_rrot<0b01101111, "uxtah", } // This isn't safe in general, the add is two 16-bit units, not a 32-bit add. -//defm UXTAB16 : xxx<"uxtab16", 0xff00ff>; +// For disassembly only +defm UXTAB16 : AI_bin_rrot_np<0b01101100, "uxtab16">; -// TODO: UXT(A){B|H}16 def SBFX : I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), @@ -1131,13 +1500,13 @@ defm SUBS : AI1_bin_s_irs<0b0010, "subs", BinOpFrag<(subc node:$LHS, node:$RHS)>>; defm ADC : AI1_adde_sube_irs<0b0101, "adc", - BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>; defm SBC : AI1_adde_sube_irs<0b0110, "sbc", - BinOpFrag<(sube node:$LHS, node:$RHS)>>; + BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs", - BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs", - BinOpFrag<(sube node:$LHS, node:$RHS)>>; + BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>; // These don't define reg/reg forms, because they are handled above. def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, @@ -1171,14 +1540,14 @@ def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, let Uses = [CPSR] in { def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iALUi, "rsc", "\t$dst, $a, $b", - [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>, - Requires<[IsARM, CarryDefIsUnused]> { + [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>, + Requires<[IsARM]> { let Inst{25} = 1; } def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b", - [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>, - Requires<[IsARM, CarryDefIsUnused]> { + [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>, + Requires<[IsARM]> { let Inst{25} = 0; } } @@ -1187,15 +1556,15 @@ def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), let Defs = [CPSR], Uses = [CPSR] in { def RSCSri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iALUi, "rscs\t$dst, $a, $b", - [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>, - Requires<[IsARM, CarryDefIsUnused]> { + [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>, + Requires<[IsARM]> { let Inst{20} = 1; let Inst{25} = 1; } def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b", - [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>, - Requires<[IsARM, CarryDefIsUnused]> { + [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>, + Requires<[IsARM]> { let Inst{20} = 1; let Inst{25} = 0; } @@ -1216,6 +1585,126 @@ def : ARMPat<(add GPR:$src, so_imm_neg:$imm), // (mul X, 2^n+1) -> (add (X << n), X) // (mul X, 2^n-1) -> (rsb X, (X << n)) +// ARM Arithmetic Instruction -- for disassembly only +// GPR:$dst = GPR:$a op GPR:$b +class AAI<bits<8> op27_20, bits<4> op7_4, string opc> + : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, + opc, "\t$dst, $a, $b", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-20} = op27_20; + let Inst{7-4} = op7_4; +} + +// Saturating add/subtract -- for disassembly only + +def QADD : AAI<0b00010000, 0b0101, "qadd">; +def QADD16 : AAI<0b01100010, 0b0001, "qadd16">; +def QADD8 : AAI<0b01100010, 0b1001, "qadd8">; +def QASX : AAI<0b01100010, 0b0011, "qasx">; +def QDADD : AAI<0b00010100, 0b0101, "qdadd">; +def QDSUB : AAI<0b00010110, 0b0101, "qdsub">; +def QSAX : AAI<0b01100010, 0b0101, "qsax">; +def QSUB : AAI<0b00010010, 0b0101, "qsub">; +def QSUB16 : AAI<0b01100010, 0b0111, "qsub16">; +def QSUB8 : AAI<0b01100010, 0b1111, "qsub8">; +def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">; +def UQADD8 : AAI<0b01100110, 0b1001, "uqadd8">; +def UQASX : AAI<0b01100110, 0b0011, "uqasx">; +def UQSAX : AAI<0b01100110, 0b0101, "uqsax">; +def UQSUB16 : AAI<0b01100110, 0b0111, "uqsub16">; +def UQSUB8 : AAI<0b01100110, 0b1111, "uqsub8">; + +// Signed/Unsigned add/subtract -- for disassembly only + +def SASX : AAI<0b01100001, 0b0011, "sasx">; +def SADD16 : AAI<0b01100001, 0b0001, "sadd16">; +def SADD8 : AAI<0b01100001, 0b1001, "sadd8">; +def SSAX : AAI<0b01100001, 0b0101, "ssax">; +def SSUB16 : AAI<0b01100001, 0b0111, "ssub16">; +def SSUB8 : AAI<0b01100001, 0b1111, "ssub8">; +def UASX : AAI<0b01100101, 0b0011, "uasx">; +def UADD16 : AAI<0b01100101, 0b0001, "uadd16">; +def UADD8 : AAI<0b01100101, 0b1001, "uadd8">; +def USAX : AAI<0b01100101, 0b0101, "usax">; +def USUB16 : AAI<0b01100101, 0b0111, "usub16">; +def USUB8 : AAI<0b01100101, 0b1111, "usub8">; + +// Signed/Unsigned halving add/subtract -- for disassembly only + +def SHASX : AAI<0b01100011, 0b0011, "shasx">; +def SHADD16 : AAI<0b01100011, 0b0001, "shadd16">; +def SHADD8 : AAI<0b01100011, 0b1001, "shadd8">; +def SHSAX : AAI<0b01100011, 0b0101, "shsax">; +def SHSUB16 : AAI<0b01100011, 0b0111, "shsub16">; +def SHSUB8 : AAI<0b01100011, 0b1111, "shsub8">; +def UHASX : AAI<0b01100111, 0b0011, "uhasx">; +def UHADD16 : AAI<0b01100111, 0b0001, "uhadd16">; +def UHADD8 : AAI<0b01100111, 0b1001, "uhadd8">; +def UHSAX : AAI<0b01100111, 0b0101, "uhsax">; +def UHSUB16 : AAI<0b01100111, 0b0111, "uhsub16">; +def UHSUB8 : AAI<0b01100111, 0b1111, "uhsub8">; + +// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only + +def USAD8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), + MulFrm /* for convenience */, NoItinerary, "usad8", + "\t$dst, $a, $b", []>, + Requires<[IsARM, HasV6]> { + let Inst{27-20} = 0b01111000; + let Inst{15-12} = 0b1111; + let Inst{7-4} = 0b0001; +} +def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + MulFrm /* for convenience */, NoItinerary, "usada8", + "\t$dst, $a, $b, $acc", []>, + Requires<[IsARM, HasV6]> { + let Inst{27-20} = 0b01111000; + let Inst{7-4} = 0b0001; +} + +// Signed/Unsigned saturate -- for disassembly only + +def SSATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), + DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-21} = 0b0110101; + let Inst{6-4} = 0b001; +} + +def SSATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), + DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-21} = 0b0110101; + let Inst{6-4} = 0b101; +} + +def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm, + NoItinerary, "ssat16", "\t$dst, $bit_pos, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-20} = 0b01101010; + let Inst{7-4} = 0b0011; +} + +def USATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), + DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-21} = 0b0110111; + let Inst{6-4} = 0b001; +} + +def USATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), + DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-21} = 0b0110111; + let Inst{6-4} = 0b101; +} + +def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm, + NoItinerary, "usat16", "\t$dst, $bit_pos, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-20} = 0b01101110; + let Inst{7-4} = 0b0011; +} //===----------------------------------------------------------------------===// // Bitwise Instructions. @@ -1239,6 +1728,17 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), let Inst{6-0} = 0b0011111; } +// A8.6.18 BFI - Bitfield insert (Encoding A1) +// Added for disassembler with the pattern field purposely left blank. +def BFI : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), + AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, + "bfi", "\t$dst, $src, $imm", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-21} = 0b0111110; + let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15 +} + def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, "mvn", "\t$dst, $src", [(set GPR:$dst, (not GPR:$src))]>, UnaryDP { @@ -1251,7 +1751,7 @@ def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, let Inst{25} = 0; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, +def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, IIC_iMOVi, "mvn", "\t$dst, $imm", [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP { let Inst{25} = 1; @@ -1314,6 +1814,14 @@ def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), let Inst{15-12} = 0b1111; } +def SMMULR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + IIC_iMUL32, "smmulr", "\t$dst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b0011; // R = 1 + let Inst{15-12} = 0b1111; +} + def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c", [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>, @@ -1321,6 +1829,12 @@ def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), let Inst{7-4} = 0b0001; } +def SMMLAR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), + IIC_iMAC32, "smmlar", "\t$dst, $a, $b, $c", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b0011; // R = 1 +} def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c", @@ -1329,6 +1843,13 @@ def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), let Inst{7-4} = 0b1101; } +def SMMLSR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), + IIC_iMAC32, "smmlsr", "\t$dst, $a, $b, $c", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b1111; // R = 1 +} + multiclass AI_smul<string opc, PatFrag opnode> { def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b", @@ -1400,7 +1921,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, (i32 16)))))]>, + (sra GPR:$b, (i32 16)))))]>, Requires<[IsARM, HasV5TE]> { let Inst{5} = 0; let Inst{6} = 1; @@ -1446,8 +1967,87 @@ multiclass AI_smla<string opc, PatFrag opnode> { defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; -// TODO: Halfword multiple accumulate long: SMLAL<x><y> -// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD +// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only +def SMLALBB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), + IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 0; +} + +def SMLALBT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), + IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 1; +} + +def SMLALTB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), + IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 0; +} + +def SMLALTT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), + IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 1; +} + +// Helper class for AI_smld -- for disassembly only +class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops, + InstrItinClass itin, string opc, string asm> + : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> { + let Inst{4} = 1; + let Inst{5} = swap; + let Inst{6} = sub; + let Inst{7} = 0; + let Inst{21-20} = 0b00; + let Inst{22} = long; + let Inst{27-23} = 0b01110; +} + +multiclass AI_smld<bit sub, string opc> { + + def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b, $acc">; + + def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b, $acc">; + + def LD : AMulDualI<1, sub, 0, (outs GPR:$ldst,GPR:$hdst), (ins GPR:$a,GPR:$b), + NoItinerary, !strconcat(opc, "ld"), "\t$ldst, $hdst, $a, $b">; + + def LDX : AMulDualI<1, sub, 1, (outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), + NoItinerary, !strconcat(opc, "ldx"),"\t$ldst, $hdst, $a, $b">; + +} + +defm SMLA : AI_smld<0, "smla">; +defm SMLS : AI_smld<1, "smls">; + +multiclass AI_sdml<bit sub, string opc> { + + def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b"> { + let Inst{15-12} = 0b1111; + } + + def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b"> { + let Inst{15-12} = 0b1111; + } + +} + +defm SMUA : AI_sdml<0, "smua">; +defm SMUS : AI_sdml<1, "smus">; //===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. @@ -1505,7 +2105,7 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, LSL $shamt", + IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), (and (shl GPR:$src2, (i32 imm:$shamt)), 0xFFFF0000)))]>, @@ -1522,7 +2122,7 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, ASR $shamt", + IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), (and (sra GPR:$src2, imm16_31:$shamt), 0xFFFF)))]>, Requires<[IsARM, HasV6]> { @@ -1568,7 +2168,7 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use -// a two-value operand where a dag node expects two operands. :( +// a two-value operand where a dag node expects two operands. :( def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm, IIC_iCMOVr, "mov", "\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, @@ -1606,6 +2206,7 @@ def Int_MemBarrierV7 : AInoP<(outs), (ins), Requires<[IsARM, HasV7]> { let Inst{31-4} = 0xf57ff05; // FIXME: add support for options other than a full system DMB + // See DMB disassembly-only variants below. let Inst{3-0} = 0b1111; } @@ -1616,6 +2217,7 @@ def Int_SyncBarrierV7 : AInoP<(outs), (ins), Requires<[IsARM, HasV7]> { let Inst{31-4} = 0xf57ff04; // FIXME: add support for options other than a full system DSB + // See DSB disassembly-only variants below. let Inst{3-0} = 0b1111; } @@ -1638,6 +2240,64 @@ def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero), } } +// Helper class for multiclass MemB -- for disassembly only +class AMBI<string opc, string asm> + : AInoP<(outs), (ins), MiscFrm, NoItinerary, opc, asm, + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV7]> { + let Inst{31-20} = 0xf57; +} + +multiclass MemB<bits<4> op7_4, string opc> { + + def st : AMBI<opc, "\tst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b1110; + } + + def ish : AMBI<opc, "\tish"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b1011; + } + + def ishst : AMBI<opc, "\tishst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b1010; + } + + def nsh : AMBI<opc, "\tnsh"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0111; + } + + def nshst : AMBI<opc, "\tnshst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0110; + } + + def osh : AMBI<opc, "\tosh"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0011; + } + + def oshst : AMBI<opc, "\toshst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0010; + } +} + +// These DMB variants are for disassembly only. +defm DMB : MemB<0b0101, "dmb">; + +// These DSB variants are for disassembly only. +defm DSB : MemB<0b0100, "dsb">; + +// ISB has only full system option -- for disassembly only +def ISBsy : AMBI<"isb", ""> { + let Inst{7-4} = 0b0110; + let Inst{3-0} = 0b1111; +} + let usesCustomInserter = 1 in { let Uses = [CPSR] in { def ATOMIC_LOAD_ADD_I8 : PseudoInst< @@ -1777,6 +2437,35 @@ def STREXD : AIstrex<0b01, (outs GPR:$success), []>; } +// Clear-Exclusive is for disassembly only. +def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV7]> { + let Inst{31-20} = 0xf57; + let Inst{7-4} = 0b0001; +} + +// SWP/SWPB are deprecated in V6/V7 and for disassembly only. +let mayLoad = 1 in { +def SWP : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary, + "swp", "\t$dst, $src, [$ptr]", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-23} = 0b00010; + let Inst{22} = 0; // B = 0 + let Inst{21-20} = 0b00; + let Inst{7-4} = 0b1001; +} + +def SWPB : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary, + "swpb", "\t$dst, $src, [$ptr]", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-23} = 0b00010; + let Inst{22} = 1; // B = 1 + let Inst{21-20} = 0b00; + let Inst{7-4} = 0b1001; +} +} + //===----------------------------------------------------------------------===// // TLS Instructions // @@ -1827,7 +2516,7 @@ let Defs = // Two piece so_imms. let isReMaterializable = 1 in -def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), +def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), Pseudo, IIC_iMOVi, "mov", "\t$dst, $src", [(set GPR:$dst, so_imm2part:$src)]>, @@ -1852,7 +2541,7 @@ def : ARMPat<(add GPR:$LHS, so_neg_imm2part:$RHS), // FIXME: Remove this when we can do generalized remat. let isReMaterializable = 1 in def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi, - "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", + "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", [(set GPR:$dst, (i32 imm:$src))]>, Requires<[IsARM, HasV6T2]>; @@ -1959,3 +2648,226 @@ include "ARMInstrVFP.td" // include "ARMInstrNEON.td" + +//===----------------------------------------------------------------------===// +// Coprocessor Instructions. For disassembly only. +// + +def CDP : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "cdp", "\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{4} = 0; +} + +def CDP2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "cdp2\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{4} = 0; +} + +class ACI<dag oops, dag iops, string opc, string asm> + : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, NoItinerary, + opc, asm, "", [/* For disassembly only; pattern left blank */]> { + let Inst{27-25} = 0b110; +} + +multiclass LdStCop<bits<4> op31_28, bit load, string opc> { + + def _OFFSET : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), + opc, "\tp$cop, cr$CRd, $addr"> { + let Inst{31-28} = op31_28; + let Inst{24} = 1; // P = 1 + let Inst{21} = 0; // W = 0 + let Inst{22} = 0; // D = 0 + let Inst{20} = load; + } + + def _PRE : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), + opc, "\tp$cop, cr$CRd, $addr!"> { + let Inst{31-28} = op31_28; + let Inst{24} = 1; // P = 1 + let Inst{21} = 1; // W = 1 + let Inst{22} = 0; // D = 0 + let Inst{20} = load; + } + + def _POST : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset), + opc, "\tp$cop, cr$CRd, [$base], $offset"> { + let Inst{31-28} = op31_28; + let Inst{24} = 0; // P = 0 + let Inst{21} = 1; // W = 1 + let Inst{22} = 0; // D = 0 + let Inst{20} = load; + } + + def _OPTION : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, i32imm:$option), + opc, "\tp$cop, cr$CRd, [$base], $option"> { + let Inst{31-28} = op31_28; + let Inst{24} = 0; // P = 0 + let Inst{23} = 1; // U = 1 + let Inst{21} = 0; // W = 0 + let Inst{22} = 0; // D = 0 + let Inst{20} = load; + } + + def L_OFFSET : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), + opc, "l\tp$cop, cr$CRd, $addr"> { + let Inst{31-28} = op31_28; + let Inst{24} = 1; // P = 1 + let Inst{21} = 0; // W = 0 + let Inst{22} = 1; // D = 1 + let Inst{20} = load; + } + + def L_PRE : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), + opc, "l\tp$cop, cr$CRd, $addr!"> { + let Inst{31-28} = op31_28; + let Inst{24} = 1; // P = 1 + let Inst{21} = 1; // W = 1 + let Inst{22} = 1; // D = 1 + let Inst{20} = load; + } + + def L_POST : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset), + opc, "l\tp$cop, cr$CRd, [$base], $offset"> { + let Inst{31-28} = op31_28; + let Inst{24} = 0; // P = 0 + let Inst{21} = 1; // W = 1 + let Inst{22} = 1; // D = 1 + let Inst{20} = load; + } + + def L_OPTION : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option), + opc, "l\tp$cop, cr$CRd, [$base], $option"> { + let Inst{31-28} = op31_28; + let Inst{24} = 0; // P = 0 + let Inst{23} = 1; // U = 1 + let Inst{21} = 0; // W = 0 + let Inst{22} = 1; // D = 1 + let Inst{20} = load; + } +} + +defm LDC : LdStCop<{?,?,?,?}, 1, "ldc">; +defm LDC2 : LdStCop<0b1111, 1, "ldc2">; +defm STC : LdStCop<{?,?,?,?}, 0, "stc">; +defm STC2 : LdStCop<0b1111, 0, "stc2">; + +def MCR : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "mcr", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{20} = 0; + let Inst{4} = 1; +} + +def MCR2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "mcr2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{20} = 0; + let Inst{4} = 1; +} + +def MRC : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "mrc", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{20} = 1; + let Inst{4} = 1; +} + +def MRC2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "mrc2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{20} = 1; + let Inst{4} = 1; +} + +def MCRR : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, + GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), + NoItinerary, "mcrr", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0100; +} + +def MCRR2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, + GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), + NoItinerary, "mcrr2\tp$cop, $opc, $Rt, $Rt2, cr$CRm", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{23-20} = 0b0100; +} + +def MRRC : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, + GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), + NoItinerary, "mrrc", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0101; +} + +def MRRC2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, + GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), + NoItinerary, "mrrc2\tp$cop, $opc, $Rt, $Rt2, cr$CRm", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{23-20} = 0b0101; +} + +//===----------------------------------------------------------------------===// +// Move between special register and ARM core register -- for disassembly only +// + +def MRS : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary, "mrs", "\t$dst, cpsr", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0000; + let Inst{7-4} = 0b0000; +} + +def MRSsys : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary,"mrs","\t$dst, spsr", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0100; + let Inst{7-4} = 0b0000; +} + +// FIXME: mask is ignored for the time being. +def MSR : ABI<0b0001,(outs),(ins GPR:$src), NoItinerary, "msr", "\tcpsr, $src", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0010; + let Inst{7-4} = 0b0000; +} + +// FIXME: mask is ignored for the time being. +def MSRi : ABI<0b0011,(outs),(ins so_imm:$a), NoItinerary, "msr", "\tcpsr, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0010; + let Inst{7-4} = 0b0000; +} + +// FIXME: mask is ignored for the time being. +def MSRsys : ABI<0b0001,(outs),(ins GPR:$src),NoItinerary,"msr","\tspsr, $src", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0110; + let Inst{7-4} = 0b0000; +} + +// FIXME: mask is ignored for the time being. +def MSRsysi : ABI<0b0011,(outs),(ins so_imm:$a),NoItinerary,"msr","\tspsr, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0110; + let Inst{7-4} = 0b0000; +} diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index e2be7ba..3aa0810 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -83,11 +83,17 @@ def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>; def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; +def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>]>; +def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; +def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; + //===----------------------------------------------------------------------===// // NEON operand definitions //===----------------------------------------------------------------------===// @@ -123,9 +129,7 @@ def h64imm : Operand<i64> { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { def VLDMD : NI<(outs), (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), - IIC_fpLoadm, - "vldm", "${addr:submode} ${addr:base}, $dst1", - []> { + IIC_fpLoadm, "vldm", "${addr:submode} ${addr:base}, $dst1", []> { let Inst{27-25} = 0b110; let Inst{20} = 1; let Inst{11-9} = 0b101; @@ -133,9 +137,7 @@ def VLDMD : NI<(outs), def VLDMS : NI<(outs), (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), - IIC_fpLoadm, - "vldm", "${addr:submode} ${addr:base}, $dst1", - []> { + IIC_fpLoadm, "vldm", "${addr:submode} ${addr:base}, $dst1", []> { let Inst{27-25} = 0b110; let Inst{20} = 1; let Inst{11-9} = 0b101; @@ -144,10 +146,9 @@ def VLDMS : NI<(outs), */ // Use vldmia to load a Q register as a D register pair. -def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), - IIC_fpLoadm, - "vldmia", "$addr, ${dst:dregpair}", - [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { +def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm, + "vldmia", "$addr, ${dst:dregpair}", + [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { let Inst{27-25} = 0b110; let Inst{24} = 0; // P bit let Inst{23} = 1; // U bit @@ -156,10 +157,9 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), } // Use vstmia to store a Q register as a D register pair. -def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), - IIC_fpStorem, - "vstmia", "$addr, ${src:dregpair}", - [(store (v2f64 QPR:$src), addrmode4:$addr)]> { +def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem, + "vstmia", "$addr, ${src:dregpair}", + [(store (v2f64 QPR:$src), addrmode4:$addr)]> { let Inst{27-25} = 0b110; let Inst{24} = 0; // P bit let Inst{23} = 1; // U bit @@ -191,6 +191,29 @@ def VLD1q32 : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>; def VLD1qf : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>; def VLD1q64 : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>; +// These (dreg triple/quadruple) are for disassembly only. +class VLD1D3<bits<4> op7_4, string OpcodeStr, string Dt> + : NLdSt<0, 0b10, 0b0110, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt, + "\\{$dst1, $dst2, $dst3\\}, $addr", "", + [/* For disassembly only; pattern left blank */]>; +class VLD1D4<bits<4> op7_4, string OpcodeStr, string Dt> + : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt, + "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", + [/* For disassembly only; pattern left blank */]>; + +def VLD1d8T : VLD1D3<0b0000, "vld1", "8">; +def VLD1d16T : VLD1D3<0b0100, "vld1", "16">; +def VLD1d32T : VLD1D3<0b1000, "vld1", "32">; +//def VLD1d64T : VLD1D3<0b1100, "vld1", "64">; + +def VLD1d8Q : VLD1D4<0b0000, "vld1", "8">; +def VLD1d16Q : VLD1D4<0b0100, "vld1", "16">; +def VLD1d32Q : VLD1D4<0b1000, "vld1", "32">; +//def VLD1d64Q : VLD1D4<0b1100, "vld1", "64">; + + let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // VLD2 : Vector Load (multiple 2-element structures) @@ -216,6 +239,16 @@ def VLD2q8 : VLD2Q<0b0000, "vld2", "8">; def VLD2q16 : VLD2Q<0b0100, "vld2", "16">; def VLD2q32 : VLD2Q<0b1000, "vld2", "32">; +// These (double-spaced dreg pair) are for disassembly only. +class VLD2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt> + : NLdSt<0,0b10,0b1001,op7_4, (outs DPR:$dst1, DPR:$dst2), + (ins addrmode6:$addr), IIC_VLD2, + OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; + +def VLD2d8D : VLD2Ddbl<0b0000, "vld2", "8">; +def VLD2d16D : VLD2Ddbl<0b0100, "vld2", "16">; +def VLD2d32D : VLD2Ddbl<0b1000, "vld2", "32">; + // VLD3 : Vector Load (multiple 3-element structures) class VLD3D<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), @@ -285,105 +318,64 @@ def VLD4q32b : VLD4WB<0b1000, "vld4", "32">; class VLD2LN<bits<4> op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VLD2, - OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", + IIC_VLD2, OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", "$src1 = $dst1, $src2 = $dst2", []>; // vld2 to single-spaced registers. def VLD2LNd8 : VLD2LN<0b0001, "vld2", "8">; -def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { - let Inst{5} = 0; -} -def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { - let Inst{6} = 0; -} +def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 0; } +def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 0; } // vld2 to double-spaced even registers. -def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { - let Inst{5} = 1; -} -def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { - let Inst{6} = 1; -} +def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; } +def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; } // vld2 to double-spaced odd registers. -def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { - let Inst{5} = 1; -} -def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { - let Inst{6} = 1; -} +def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; } +def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; } // VLD3LN : Vector Load (single 3-element structure to one lane) class VLD3LN<bits<4> op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VLD3, - OpcodeStr, Dt, + nohash_imm:$lane), IIC_VLD3, OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; // vld3 to single-spaced registers. -def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> { - let Inst{4} = 0; -} -def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { - let Inst{5-4} = 0b00; -} -def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { - let Inst{6-4} = 0b000; -} +def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> { let Inst{4} = 0; } +def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b00; } +def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b000; } // vld3 to double-spaced even registers. -def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { - let Inst{5-4} = 0b10; -} -def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { - let Inst{6-4} = 0b100; -} +def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; } +def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; } // vld3 to double-spaced odd registers. -def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { - let Inst{5-4} = 0b10; -} -def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { - let Inst{6-4} = 0b100; -} +def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; } +def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; } // VLD4LN : Vector Load (single 4-element structure to one lane) class VLD4LN<bits<4> op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VLD4, - OpcodeStr, Dt, + nohash_imm:$lane), IIC_VLD4, OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr", "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; // vld4 to single-spaced registers. def VLD4LNd8 : VLD4LN<0b0011, "vld4", "8">; -def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { - let Inst{5} = 0; -} -def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { - let Inst{6} = 0; -} +def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 0; } +def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 0; } // vld4 to double-spaced even registers. -def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { - let Inst{5} = 1; -} -def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { - let Inst{6} = 1; -} +def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; } +def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; } // vld4 to double-spaced odd registers. -def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { - let Inst{5} = 1; -} -def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { - let Inst{6} = 1; -} +def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; } +def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; } // VLD1DUP : Vector Load (single element to all lanes) // VLD2DUP : Vector Load (single 2-element structure to all lanes) @@ -418,6 +410,31 @@ def VST1qf : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>; def VST1q64 : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>; } // hasExtraSrcRegAllocReq +// These (dreg triple/quadruple) are for disassembly only. +class VST1D3<bits<4> op7_4, string OpcodeStr, string Dt> + : NLdSt<0, 0b00, 0b0110, op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, + OpcodeStr, Dt, + "\\{$src1, $src2, $src3\\}, $addr", "", + [/* For disassembly only; pattern left blank */]>; +class VST1D4<bits<4> op7_4, string OpcodeStr, string Dt> + : NLdSt<0, 0b00, 0b0010, op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, OpcodeStr, Dt, + "\\{$src1, $src2, $src3, $src4\\}, $addr", "", + [/* For disassembly only; pattern left blank */]>; + +def VST1d8T : VST1D3<0b0000, "vst1", "8">; +def VST1d16T : VST1D3<0b0100, "vst1", "16">; +def VST1d32T : VST1D3<0b1000, "vst1", "32">; +//def VST1d64T : VST1D3<0b1100, "vst1", "64">; + +def VST1d8Q : VST1D4<0b0000, "vst1", "8">; +def VST1d16Q : VST1D4<0b0100, "vst1", "16">; +def VST1d32Q : VST1D4<0b1000, "vst1", "32">; +//def VST1d64Q : VST1D4<0b1100, "vst1", "64">; + + let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { // VST2 : Vector Store (multiple 2-element structures) @@ -428,8 +445,7 @@ class VST2D<bits<4> op7_4, string OpcodeStr, string Dt> class VST2Q<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0011,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; def VST2d8 : VST2D<0b0000, "vst2", "8">; @@ -443,6 +459,16 @@ def VST2q8 : VST2Q<0b0000, "vst2", "8">; def VST2q16 : VST2Q<0b0100, "vst2", "16">; def VST2q32 : VST2Q<0b1000, "vst2", "32">; +// These (double-spaced dreg pair) are for disassembly only. +class VST2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt> + : NLdSt<0, 0b00, 0b1001, op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, + OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>; + +def VST2d8D : VST2Ddbl<0b0000, "vst2", "8">; +def VST2d16D : VST2Ddbl<0b0100, "vst2", "16">; +def VST2d32D : VST2Ddbl<0b1000, "vst2", "32">; + // VST3 : Vector Store (multiple 3-element structures) class VST3D<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0100,op7_4, (outs), @@ -476,14 +502,12 @@ def VST3q32b : VST3WB<0b1000, "vst3", "32">; class VST4D<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0000,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; class VST4WB<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "$addr.addr = $wb", []>; def VST4d8 : VST4D<0b0000, "vst4", "8">; @@ -511,104 +535,63 @@ def VST4q32b : VST4WB<0b1000, "vst4", "32">; // VST2LN : Vector Store (single 2-element structure from one lane) class VST2LN<bits<4> op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VST, - OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", - "", []>; + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), + IIC_VST, OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", + "", []>; // vst2 to single-spaced registers. def VST2LNd8 : VST2LN<0b0001, "vst2", "8">; -def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { - let Inst{5} = 0; -} -def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { - let Inst{6} = 0; -} +def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 0; } +def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 0; } // vst2 to double-spaced even registers. -def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { - let Inst{5} = 1; -} -def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { - let Inst{6} = 1; -} +def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; } +def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; } // vst2 to double-spaced odd registers. -def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { - let Inst{5} = 1; -} -def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { - let Inst{6} = 1; -} +def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; } +def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; } // VST3LN : Vector Store (single 3-element structure from one lane) class VST3LN<bits<4> op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VST, - OpcodeStr, Dt, - "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>; + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VST, OpcodeStr, Dt, + "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>; // vst3 to single-spaced registers. -def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> { - let Inst{4} = 0; -} -def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { - let Inst{5-4} = 0b00; -} -def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { - let Inst{6-4} = 0b000; -} +def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> { let Inst{4} = 0; } +def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b00; } +def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b000; } // vst3 to double-spaced even registers. -def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { - let Inst{5-4} = 0b10; -} -def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { - let Inst{6-4} = 0b100; -} +def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; } +def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; } // vst3 to double-spaced odd registers. -def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { - let Inst{5-4} = 0b10; -} -def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { - let Inst{6-4} = 0b100; -} +def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; } +def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; } // VST4LN : Vector Store (single 4-element structure from one lane) class VST4LN<bits<4> op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VST, - OpcodeStr, Dt, + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VST, OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr", - "", []>; + "", []>; // vst4 to single-spaced registers. def VST4LNd8 : VST4LN<0b0011, "vst4", "8">; -def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { - let Inst{5} = 0; -} -def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { - let Inst{6} = 0; -} +def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 0; } +def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 0; } // vst4 to double-spaced even registers. -def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { - let Inst{5} = 1; -} -def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { - let Inst{6} = 1; -} +def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; } +def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; } // vst4 to double-spaced odd registers. -def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { - let Inst{5} = 1; -} -def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { - let Inst{6} = 1; -} +def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; } +def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; } } // mayStore = 1, hasExtraSrcRegAllocReq = 1 @@ -656,34 +639,26 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{ // Instruction Classes //===----------------------------------------------------------------------===// -// Basic 2-register operations, both double- and quad-register. +// Basic 2-register operations: single-, double- and quad-register. +class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), + IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>; class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; -// Basic 2-register operations, scalar single-precision. -class N2VDs<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> - : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), - IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>; - -class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> - : NEONFPPat<(ResTy (OpNode SPR:$a)), - (EXTRACT_SUBREG - (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)), - arm_ssubreg_0)>; - // Basic 2-register intrinsics, both double- and quad-register. class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, @@ -700,21 +675,6 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; -// Basic 2-register intrinsics, scalar single-precision -class N2VDInts<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin, - OpcodeStr, Dt, "$dst, $src", "", []>; - -class N2VDIntsPat<SDNode OpNode, NeonI Inst> - : NEONFPPat<(f32 (OpNode SPR:$a)), - (EXTRACT_SUBREG - (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)), - arm_ssubreg_0)>; - // Narrow 2-register intrinsics. class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -742,15 +702,22 @@ class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, InstrItinClass itin, string OpcodeStr, string Dt> : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2), - (ins QPR:$src1, QPR:$src2), itin, - OpcodeStr, Dt, "$dst1, $dst2", + (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2", "$src1 = $dst1, $src2 = $dst2", []>; -// Basic 3-register operations, both double- and quad-register. +// Basic 3-register operations: single-, double- and quad-register. +class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, + SDNode OpNode, bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, + OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { + let isCommutable = Commutable; +} + class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, - SDNode OpNode, bit Commutable> + ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, OpcodeStr, Dt, "$dst, $src1, $src2", "", @@ -763,9 +730,9 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3VX<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, - OpcodeStr, "$dst, $src1, $src2", "", - [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, + OpcodeStr, "$dst, $src1, $src2", "", + [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{ let isCommutable = Commutable; } class N3VDSL<bits<2> op21_20, bits<4> op11_8, @@ -776,27 +743,23 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), - (Ty (NEONvduplane (Ty DPR_VFP2:$src2), - imm:$lane)))))]> { + (Ty (NEONvduplane (Ty DPR_VFP2:$src2), imm:$lane)))))]>{ let isCommutable = 0; } class N3VDSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - IIC_VMULi16D, - OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + IIC_VMULi16D, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), - (Ty (NEONvduplane (Ty DPR_8:$src2), - imm:$lane)))))]> { + (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { let isCommutable = 0; } class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, - SDNode OpNode, bit Commutable> + ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 1, op4, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst, $src1, $src2", "", @@ -805,12 +768,11 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, } class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, - ValueType ResTy, ValueType OpTy, - SDNode OpNode, bit Commutable> + ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3VX<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, - OpcodeStr, "$dst, $src1, $src2", "", - [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, + OpcodeStr, "$dst, $src1, $src2", "", + [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{ let isCommutable = Commutable; } class N3VQSL<bits<2> op21_20, bits<4> op11_8, @@ -825,13 +787,11 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8, imm:$lane)))))]> { let isCommutable = 0; } -class N3VQSL16<bits<2> op21_20, bits<4> op11_8, - string OpcodeStr, string Dt, +class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), - IIC_VMULi16Q, - OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + IIC_VMULi16Q, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_8:$src2), @@ -839,27 +799,10 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8, let isCommutable = 0; } -// Basic 3-register operations, scalar single-precision -class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDNode OpNode, bit Commutable> - : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, - OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { - let isCommutable = Commutable; -} -class N3VDsPat<SDNode OpNode, NeonI Inst> - : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), - (EXTRACT_SUBREG - (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)), - arm_ssubreg_0)>; - // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, - Intrinsic IntOp, bit Commutable> + ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, OpcodeStr, Dt, "$dst, $src1, $src2", "", @@ -891,8 +834,7 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, - Intrinsic IntOp, bit Commutable> + ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 1, op4, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst, $src1, $src2", "", @@ -924,7 +866,15 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, let isCommutable = 0; } -// Multiply-Add/Sub operations, both double- and quad-register. +// Multiply-Add/Sub operations: single-, double- and quad-register. +class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDNode MulOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR_VFP2:$dst), + (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>; + class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> @@ -976,8 +926,8 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (MulOp QPR:$src2, - (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), - imm:$lane)))))))]>; + (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), + imm:$lane)))))))]>; class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, @@ -989,25 +939,8 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (MulOp QPR:$src2, - (ResTy (NEONvduplane (OpTy DPR_8:$src3), - imm:$lane)))))))]>; - -// Multiply-Add/Sub operations, scalar single-precision -class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode MulOp, SDNode OpNode> - : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR_VFP2:$dst), - (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin, - OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>; - -class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst> - : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), - (EXTRACT_SUBREG - (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$acc, arm_ssubreg_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)), - arm_ssubreg_0)>; + (ResTy (NEONvduplane (OpTy DPR_8:$src3), + imm:$lane)))))))]>; // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. @@ -1050,9 +983,9 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, (OpTy DPR:$src2), (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3), imm:$lane)))))]>; -class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> +class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, @@ -1063,7 +996,6 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass iti (OpTy (NEONvduplane (OpTy DPR_8:$src3), imm:$lane)))))]>; - // Narrowing 3-register intrinsics. class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, @@ -1095,9 +1027,9 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, (ResTy (IntOp (OpTy DPR:$src1), (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2), imm:$lane)))))]>; -class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> +class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", @@ -1249,6 +1181,45 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, // S = single int (32 bit) elements // D = double int (64 bit) elements +// Neon 2-register vector operations -- for disassembly only. + +// First with only element sizes of 8, 16 and 32 bits: +multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op4, string opc, string Dt, + string asm> { + // 64-bit vector types. + def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, + (outs DPR:$dst), (ins DPR:$src), NoItinerary, + opc, !strconcat(Dt, "8"), asm, "", []>; + def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, + (outs DPR:$dst), (ins DPR:$src), NoItinerary, + opc, !strconcat(Dt, "16"), asm, "", []>; + def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, + (outs DPR:$dst), (ins DPR:$src), NoItinerary, + opc, !strconcat(Dt, "32"), asm, "", []>; + def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, + (outs DPR:$dst), (ins DPR:$src), NoItinerary, + opc, "f32", asm, "", []> { + let Inst{10} = 1; // overwrite F = 1 + } + + // 128-bit vector types. + def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, + (outs QPR:$dst), (ins QPR:$src), NoItinerary, + opc, !strconcat(Dt, "8"), asm, "", []>; + def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, + (outs QPR:$dst), (ins QPR:$src), NoItinerary, + opc, !strconcat(Dt, "16"), asm, "", []>; + def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, + (outs QPR:$dst), (ins QPR:$src), NoItinerary, + opc, !strconcat(Dt, "32"), asm, "", []>; + def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, + (outs QPR:$dst), (ins QPR:$src), NoItinerary, + opc, "f32", asm, "", []> { + let Inst{10} = 1; // overwrite F = 1 + } +} + // Neon 3-register vector operations. // First with only element sizes of 8, 16 and 32 bits: @@ -1262,22 +1233,22 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, OpNode, Commutable>; def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, - OpcodeStr, !strconcat(Dt, "16"), - v4i16, v4i16, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "16"), + v4i16, v4i16, OpNode, Commutable>; def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, - OpcodeStr, !strconcat(Dt, "32"), - v2i32, v2i32, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "32"), + v2i32, v2i32, OpNode, Commutable>; // 128-bit vector types. def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, - OpcodeStr, !strconcat(Dt, "8"), - v16i8, v16i8, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "8"), + v16i8, v16i8, OpNode, Commutable>; def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, - OpcodeStr, !strconcat(Dt, "16"), - v8i16, v8i16, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "16"), + v8i16, v8i16, OpNode, Commutable>; def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, - OpcodeStr, !strconcat(Dt, "32"), - v4i32, v4i32, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "32"), + v4i32, v4i32, OpNode, Commutable>; } multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> { @@ -1372,7 +1343,7 @@ multiclass N3VIntSL_HS<bits<4> op11_8, def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, - OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; } @@ -1386,8 +1357,8 @@ multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, : N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, OpcodeStr, Dt, IntOp, Commutable> { def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16, - OpcodeStr, !strconcat(Dt, "8"), - v8i8, v8i8, IntOp, Commutable>; + OpcodeStr, !strconcat(Dt, "8"), + v8i8, v8i8, IntOp, Commutable>; def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp, Commutable>; @@ -1402,11 +1373,11 @@ multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, : N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, OpcodeStr, Dt, IntOp, Commutable> { def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32, - OpcodeStr, !strconcat(Dt, "64"), - v1i64, v1i64, IntOp, Commutable>; + OpcodeStr, !strconcat(Dt, "64"), + v1i64, v1i64, IntOp, Commutable>; def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32, - OpcodeStr, !strconcat(Dt, "64"), - v2i64, v2i64, IntOp, Commutable>; + OpcodeStr, !strconcat(Dt, "64"), + v2i64, v2i64, IntOp, Commutable>; } @@ -1511,9 +1482,11 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8, def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, - OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, + mul, ShOp>; def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, - OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, + mul, ShOp>; } // Neon 3-argument intrinsics, @@ -1522,19 +1495,19 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, - OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; + OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, - OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D, - OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q, - OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; + OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q, - OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q, - OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; } @@ -1576,17 +1549,17 @@ multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, - itinD, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; + itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, - itinD, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; + itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; // 128-bit vector types. def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, - itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; + itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, - itinQ, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; + itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, - itinQ, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; + itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; } @@ -1846,29 +1819,31 @@ def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul", "p8", def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul", "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32", - v2f32, v2f32, fmul, 1>; + v2f32, v2f32, fmul, 1>; def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32", - v4f32, v4f32, fmul, 1>; -defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; -def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; -def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>; + v4f32, v4f32, fmul, 1>; +defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; +def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; +def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, + v2f32, fmul>; + def : Pat<(v8i16 (mul (v8i16 QPR:$src1), (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), (v4i16 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i16_reg imm:$lane))), + (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (mul (v4i32 QPR:$src1), (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), (v2i32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), (v4f32 (VMULslfq (v4f32 QPR:$src1), (v2f32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; // VQDMULH : Vector Saturating Doubling Multiply Returning High Half @@ -1883,14 +1858,14 @@ def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), imm:$lane)))), (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), (v4i16 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i16_reg imm:$lane))), + (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), (v2i32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half @@ -1905,14 +1880,14 @@ def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), imm:$lane)))), (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), (v4i16 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i16_reg imm:$lane))), + (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), (v2i32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) @@ -1950,30 +1925,28 @@ def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", v4f32, v2f32, fmul, fadd>; def : Pat<(v8i16 (add (v8i16 QPR:$src1), - (mul (v8i16 QPR:$src2), - (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), - (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), - (v8i16 QPR:$src2), + (mul (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), + (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), (v4i16 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i16_reg imm:$lane))), + (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (add (v4i32 QPR:$src1), - (mul (v4i32 QPR:$src2), - (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), - (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), - (v4i32 QPR:$src2), + (mul (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), + (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), (v2i32 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), - (fmul (v4f32 QPR:$src2), - (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), + (fmul (v4f32 QPR:$src2), + (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), (v4f32 (VMLAslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), (v2f32 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; // VMLAL : Vector Multiply Accumulate Long (Q += D * D) @@ -2003,30 +1976,27 @@ def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", v4f32, v2f32, fmul, fsub>; def : Pat<(v8i16 (sub (v8i16 QPR:$src1), - (mul (v8i16 QPR:$src2), - (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), - (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), - (v8i16 QPR:$src2), + (mul (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), + (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), (v4i16 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i16_reg imm:$lane))), + (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (sub (v4i32 QPR:$src1), - (mul (v4i32 QPR:$src2), - (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), - (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), - (v4i32 QPR:$src2), + (mul (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), + (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), (v2i32 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), - (fmul (v4f32 QPR:$src2), - (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), - (v4f32 (VMLSslfq (v4f32 QPR:$src1), - (v4f32 QPR:$src2), + (fmul (v4f32 QPR:$src2), + (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), + (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), (v2f32 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; // VMLSL : Vector Multiply Subtract Long (Q -= D * D) @@ -2088,6 +2058,10 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, NEONvceq, 1>; def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, NEONvceq, 1>; +// For disassembly only. +defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", + "$dst, $src, #0">; + // VCGE : Vector Compare Greater Than or Equal defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>; @@ -2097,6 +2071,13 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, NEONvcge, 0>; def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, NEONvcge, 0>; +// For disassembly only. +defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", + "$dst, $src, #0">; +// For disassembly only. +defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", + "$dst, $src, #0">; + // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>; @@ -2106,6 +2087,13 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, NEONvcgt, 0>; def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, NEONvcgt, 0>; +// For disassembly only. +defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", + "$dst, $src, #0">; +// For disassembly only. +defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", + "$dst, $src, #0">; + // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; @@ -2247,9 +2235,9 @@ defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>; // Vector Maximum and Minimum. // VMAX : Vector Maximum -defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, +defm VMAXs : N3VInt_QHS<0,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>; -defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, +defm VMAXu : N3VInt_QHS<1,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>; def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32", v2f32, v2f32, int_arm_neon_vmaxs, 1>; @@ -2257,9 +2245,9 @@ def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>; // VMIN : Vector Minimum -defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, +defm VMINs : N3VInt_QHS<0,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vmin", "s", int_arm_neon_vmins, 1>; -defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, +defm VMINu : N3VInt_QHS<1,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vmin", "u", int_arm_neon_vminu, 1>; def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32", v2f32, v2f32, int_arm_neon_vmins, 1>; @@ -2401,16 +2389,17 @@ def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", v2i64, v2i32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow -defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", NEONvshrn>; +defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", + NEONvshrn>; // VRSHL : Vector Rounding Shift defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts, 0>; + IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts,0>; defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu, 0>; + IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu,0>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>; -defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>; +defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>; +defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", @@ -2418,14 +2407,14 @@ defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", // VQSHL : Vector Saturating Shift defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts, 0>; + IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts,0>; defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu, 0>; + IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu,0>; // VQSHL : Vector Saturating Shift Left (Immediate) -defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>; -defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>; +defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>; +defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>; // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) -defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu", "s", NEONvqshlsu>; +defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D, "vqshlu","s",NEONvqshlsu>; // VQSHRN : Vector Saturating Shift Right and Narrow defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", @@ -2438,10 +2427,10 @@ defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", NEONvqshrnsu>; // VQRSHL : Vector Saturating Rounding Shift -defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, +defm VQRSHLs : N3VInt_QHSD<0,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vqrshl", "s", int_arm_neon_vqrshifts, 0>; -defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, +defm VQRSHLu : N3VInt_QHSD<1,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vqrshl", "u", int_arm_neon_vqrshiftu, 0>; @@ -2508,7 +2497,7 @@ def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; // VNEG : Vector Negate (floating-point) -def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, +def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, "vneg", "f32", "$dst, $src", "", [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; @@ -2547,6 +2536,14 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiQ, "vcnt", "8", v16i8, v16i8, int_arm_neon_vcnt>; +// Vector Swap -- for disassembly only. +def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, + (outs DPR:$dst), (ins DPR:$src), NoItinerary, + "vswp", "$dst, $src", "", []>; +def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, + (outs QPR:$dst), (ins QPR:$src), NoItinerary, + "vswp", "$dst, $src", "", []>; + // Vector Move Operations. // VMOV : Vector Move (Register) @@ -2678,10 +2675,10 @@ def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane))>; def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), - (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1), DPR_VFP2)), + (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), (SSubReg_f32_reg imm:$src2))>; def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), - (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1), QPR_VFP2)), + (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), (SSubReg_f32_reg imm:$src2))>; //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; @@ -2849,11 +2846,13 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), (INSERT_SUBREG QPR:$src, - (i64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))), + (i64 (EXTRACT_SUBREG QPR:$src, + (DSubReg_f64_reg imm:$lane))), (DSubReg_f64_other_reg imm:$lane))>; def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)), (INSERT_SUBREG QPR:$src, - (f64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))), + (f64 (EXTRACT_SUBREG QPR:$src, + (DSubReg_f64_reg imm:$lane))), (DSubReg_f64_other_reg imm:$lane))>; // VMOVN : Vector Narrowing Move @@ -3092,70 +3091,110 @@ def VTBX4 // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// +class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> + : NEONFPPat<(ResTy (OpNode SPR:$a)), + (EXTRACT_SUBREG (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), + SPR:$a, arm_ssubreg_0)), + arm_ssubreg_0)>; + +class N3VSPat<SDNode OpNode, NeonI Inst> + : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), + (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$a, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$b, arm_ssubreg_0)), + arm_ssubreg_0)>; + +class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> + : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), + (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$acc, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$a, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$b, arm_ssubreg_0)), + arm_ssubreg_0)>; + // These need separate instructions because they must use DPR_VFP2 register // class which have SPR sub-registers. // Vector Add Operations used for single-precision FP let neverHasSideEffects = 1 in -def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd", "f32", v2f32, v2f32, fadd,1>; -def : N3VDsPat<fadd, VADDfd_sfp>; +def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>; +def : N3VSPat<fadd, VADDfd_sfp>; // Vector Sub Operations used for single-precision FP let neverHasSideEffects = 1 in -def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub", "f32", v2f32, v2f32, fsub,0>; -def : N3VDsPat<fsub, VSUBfd_sfp>; +def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>; +def : N3VSPat<fsub, VSUBfd_sfp>; // Vector Multiply Operations used for single-precision FP let neverHasSideEffects = 1 in -def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul", "f32", v2f32, v2f32, fmul,1>; -def : N3VDsPat<fmul, VMULfd_sfp>; +def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>; +def : N3VSPat<fmul, VMULfd_sfp>; // Vector Multiply-Accumulate/Subtract used for single-precision FP // vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so // we want to avoid them for now. e.g., alternating vmla/vadd instructions. //let neverHasSideEffects = 1 in -//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32,fmul,fadd>; -//def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>; +//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32", +// v2f32, fmul, fadd>; +//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>; //let neverHasSideEffects = 1 in -//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32,fmul,fsub>; -//def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>; +//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32", +// v2f32, fmul, fsub>; +//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>; // Vector Absolute used for single-precision FP let neverHasSideEffects = 1 in -def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAD, "vabs", "f32", - v2f32, v2f32, int_arm_neon_vabs>; -def : N2VDIntsPat<fabs, VABSfd_sfp>; +def VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0, + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, + "vabs", "f32", "$dst, $src", "", []>; +def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>; // Vector Negate used for single-precision FP let neverHasSideEffects = 1 in -def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, - "vneg", "f32", "$dst, $src", "", []>; -def : N2VDIntsPat<fneg, VNEGf32d_sfp>; +def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, + "vneg", "f32", "$dst, $src", "", []>; +def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>; + +// Vector Maximum used for single-precision FP +let neverHasSideEffects = 1 in +def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), + (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, + "vmax", "f32", "$dst, $src1, $src2", "", []>; +def : N3VSPat<NEONfmax, VMAXfd_sfp>; + +// Vector Minimum used for single-precision FP +let neverHasSideEffects = 1 in +def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), + (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, + "vmin", "f32", "$dst, $src1, $src2", "", []>; +def : N3VSPat<NEONfmin, VMINfd_sfp>; // Vector Convert between single-precision FP and integer let neverHasSideEffects = 1 in -def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", - v2i32, v2f32, fp_to_sint>; -def : N2VDsPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>; +def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", + v2i32, v2f32, fp_to_sint>; +def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>; let neverHasSideEffects = 1 in -def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", - v2i32, v2f32, fp_to_uint>; -def : N2VDsPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>; +def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", + v2i32, v2f32, fp_to_uint>; +def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>; let neverHasSideEffects = 1 in -def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", - v2f32, v2i32, sint_to_fp>; -def : N2VDsPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>; +def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", + v2f32, v2i32, sint_to_fp>; +def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>; let neverHasSideEffects = 1 in -def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", - v2f32, v2i32, uint_to_fp>; -def : N2VDsPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>; +def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", + v2f32, v2i32, uint_to_fp>; +def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 1dcea26..786dd65 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -120,7 +120,10 @@ def t_addrmode_sp : Operand<i32>, // Miscellaneous Instructions. // -let Defs = [SP], Uses = [SP] in { +// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE +// from removing one half of the matched pairs. That breaks PEI, which assumes +// these will always be in pairs, and asserts if it finds otherwise. Better way? +let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def tADJCALLSTACKUP : PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary, "@ tADJCALLSTACKUP $amt1", @@ -132,6 +135,76 @@ PseudoInst<(outs), (ins i32imm:$amt), NoItinerary, [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>; } +def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101111> { + let Inst{9-8} = 0b11; + let Inst{7-0} = 0b00000000; +} + +def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101111> { + let Inst{9-8} = 0b11; + let Inst{7-0} = 0b00010000; +} + +def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101111> { + let Inst{9-8} = 0b11; + let Inst{7-0} = 0b00100000; +} + +def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101111> { + let Inst{9-8} = 0b11; + let Inst{7-0} = 0b00110000; +} + +def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101111> { + let Inst{9-8} = 0b11; + let Inst{7-0} = 0b01000000; +} + +def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101101> { + let Inst{9-5} = 0b10010; + let Inst{3} = 1; +} + +def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101101> { + let Inst{9-5} = 0b10010; + let Inst{3} = 0; +} + +// The i32imm operand $val can be used by a debugger to store more information +// about the breakpoint. +def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101111> { + let Inst{9-8} = 0b10; +} + +// Change Processor State is a system instruction -- for disassembly only. +// The singleton $opt operand contains the following information: +// opt{4-0} = mode ==> don't care +// opt{5} = changemode ==> 0 (false for 16-bit Thumb instr) +// opt{8-6} = AIF from Inst{2-0} +// opt{10-9} = 1:imod from Inst{4} with 0b10 as enable and 0b11 as disable +// +// The opt{4-0} and opt{5} sub-fields are to accommodate 32-bit Thumb and ARM +// CPS which has more options. +def tCPS : T1I<(outs), (ins i32imm:$opt), NoItinerary, "cps${opt:cps}", + [/* For disassembly only; pattern left blank */]>, + T1Misc<0b0110011>; + // For both thumb1 and thumb2. let isNotDuplicable = 1 in def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, @@ -200,7 +273,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { let Inst{6-3} = 0b1110; // Rm = lr } // Alternative return instruction used by vararg functions. - def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target", []>, + def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target",[]>, T1Special<{1,1,0,?}>; // A6.2.3 & A8.6.25 } @@ -228,20 +301,20 @@ let isCall = 1, D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { // Also used for Thumb2 def tBL : TIx2<0b11110, 0b11, 1, - (outs), (ins i32imm:$func, variable_ops), IIC_Br, + (outs), (ins i32imm:$func, variable_ops), IIC_Br, "bl\t${func:call}", [(ARMtcall tglobaladdr:$func)]>, Requires<[IsThumb, IsNotDarwin]>; // ARMv5T and above, also used for Thumb2 def tBLXi : TIx2<0b11110, 0b11, 0, - (outs), (ins i32imm:$func, variable_ops), IIC_Br, + (outs), (ins i32imm:$func, variable_ops), IIC_Br, "blx\t${func:call}", [(ARMcall tglobaladdr:$func)]>, Requires<[IsThumb, HasV5T, IsNotDarwin]>; // Also used for Thumb2 - def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, + def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, "blx\t$func", [(ARMtcall GPR:$func)]>, Requires<[IsThumb, HasV5T, IsNotDarwin]>, @@ -249,7 +322,7 @@ let isCall = 1, // ARMv4T def tBX : TIx2<{?,?,?,?,?}, {?,?}, ?, - (outs), (ins tGPR:$func, variable_ops), IIC_Br, + (outs), (ins tGPR:$func, variable_ops), IIC_Br, "mov\tlr, pc\n\tbx\t$func", [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb1Only, IsNotDarwin]>; @@ -263,20 +336,20 @@ let isCall = 1, D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { // Also used for Thumb2 def tBLr9 : TIx2<0b11110, 0b11, 1, - (outs), (ins i32imm:$func, variable_ops), IIC_Br, + (outs), (ins i32imm:$func, variable_ops), IIC_Br, "bl\t${func:call}", [(ARMtcall tglobaladdr:$func)]>, Requires<[IsThumb, IsDarwin]>; // ARMv5T and above, also used for Thumb2 def tBLXi_r9 : TIx2<0b11110, 0b11, 0, - (outs), (ins i32imm:$func, variable_ops), IIC_Br, + (outs), (ins i32imm:$func, variable_ops), IIC_Br, "blx\t${func:call}", [(ARMcall tglobaladdr:$func)]>, Requires<[IsThumb, HasV5T, IsDarwin]>; // Also used for Thumb2 - def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, + def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, "blx\t$func", [(ARMtcall GPR:$func)]>, Requires<[IsThumb, HasV5T, IsDarwin]>, @@ -284,7 +357,7 @@ let isCall = 1, // ARMv4T def tBXr9 : TIx2<{?,?,?,?,?}, {?,?}, ?, - (outs), (ins tGPR:$func, variable_ops), IIC_Br, + (outs), (ins tGPR:$func, variable_ops), IIC_Br, "mov\tlr, pc\n\tbx\t$func", [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb1Only, IsDarwin]>; @@ -299,7 +372,7 @@ let isBranch = 1, isTerminator = 1 in { // Far jump let Defs = [LR] in - def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br, + def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br, "bl\t$target\t@ far jump",[]>; def tBR_JTr : T1JTI<(outs), @@ -332,16 +405,34 @@ let isBranch = 1, isTerminator = 1 in { T1Misc<{1,0,?,1,?,?,?}>; } +// A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only +// A8.6.16 B: Encoding T1 +// If Inst{11-8} == 0b1111 then SEE SVC +let isCall = 1 in { +def tSVC : T1pI<(outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", []>, + Encoding16 { + let Inst{15-12} = 0b1101; + let Inst{11-8} = 0b1111; +} +} + +// A8.6.16 B: Encoding T1 -- for disassembly only +// If Inst{11-8} == 0b1110 then UNDEFINED +def tTRAP : T1I<(outs), (ins), IIC_Br, "trap", []>, Encoding16 { + let Inst{15-12} = 0b1101; + let Inst{11-8} = 0b1110; +} + //===----------------------------------------------------------------------===// // Load Store Instructions. // -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in -def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, +let canFoldAsLoad = 1, isReMaterializable = 1 in +def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, "ldr", "\t$dst, $addr", [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>, T1LdSt<0b100>; -def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, +def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, "ldr", "\t$dst, $addr", []>, T1LdSt4Imm<{1,?,?}>; @@ -391,15 +482,14 @@ def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, // Load tconstpool // FIXME: Use ldr.n to work around a Darwin assembler bug. -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, "ldr", ".n\t$dst, $addr", [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>, T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59 // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, "ldr", "\t$dst, $addr", []>, T1LdStSP<{1,?,?}>; @@ -644,7 +734,7 @@ def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, // multiply register let isCommutable = 1 in def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32, - "mul", "\t$dst, $rhs", + "mul", "\t$dst, $rhs, $dst", /* A8.6.105 MUL Encoding T1 */ [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>, T1DataProcessing<0b1101>; @@ -761,7 +851,7 @@ def tUXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, T1Misc<{0,0,1,0,1,0,?}>; -// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation. +// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation. // Expanded after instruction selection into a branch sequence. let usesCustomInserter = 1 in // Expanded after instruction selection. def tMOVCCr_pseudo : diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 55c7aa2..6241766 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -13,7 +13,7 @@ // IT block predicate field def it_pred : Operand<i32> { - let PrintMethod = "printPredicateOperand"; + let PrintMethod = "printMandatoryPredicateOperand"; } // IT block condition mask @@ -53,10 +53,10 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{ // bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11]. def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ - return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1; + return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1; }]>; -// t2_so_imm_not - Match an immediate that is a complement +// t2_so_imm_not - Match an immediate that is a complement // of a t2_so_imm. def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{ @@ -114,13 +114,13 @@ def imm0_4095 : Operand<i32>, return (uint32_t)N->getZExtValue() < 4096; }]>; -def imm0_4095_neg : PatLeaf<(i32 imm), [{ - return (uint32_t)(-N->getZExtValue()) < 4096; -}], imm_neg_XFORM>; +def imm0_4095_neg : PatLeaf<(i32 imm), [{ + return (uint32_t)(-N->getZExtValue()) < 4096; +}], imm_neg_XFORM>; def imm0_255_neg : PatLeaf<(i32 imm), [{ return (uint32_t)(-N->getZExtValue()) < 255; -}], imm_neg_XFORM>; +}], imm_neg_XFORM>; // Define Thumb2 specific addressing modes. @@ -131,7 +131,7 @@ def t2addrmode_imm12 : Operand<i32>, let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } -// t2addrmode_imm8 := reg - imm8 +// t2addrmode_imm8 := reg +/- imm8 def t2addrmode_imm8 : Operand<i32>, ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { let PrintMethod = "printT2AddrModeImm8Operand"; @@ -208,7 +208,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a // binary operation that produces a value. These are predicable and can be /// changed to modify CPSR. -multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, +multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0, string wide =""> { // shifted imm def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, @@ -368,15 +368,16 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, } /// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns -/// for a binary operation that produces a value and use and define the carry +/// for a binary operation that produces a value and use the carry /// bit. It's not predicable. let Uses = [CPSR] in { -multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { +multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, + bit Commutable = 0> { // shifted imm def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, "\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUnused]> { + Requires<[IsThumb2]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -387,7 +388,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUnused]> { + Requires<[IsThumb2]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -401,19 +402,23 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUnused]> { + Requires<[IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; let Inst{20} = 0; // The S bit. } - // Carry setting variants +} + +// Carry setting variants +let Defs = [CPSR] in { +multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, + bit Commutable = 0> { // shifted imm - def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, - !strconcat(opc, "s\t$dst, $lhs, $rhs"), - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUsed]> { - let Defs = [CPSR]; + def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + opc, "\t$dst, $lhs, $rhs", + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, + Requires<[IsThumb2]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -421,11 +426,10 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm let Inst{15} = 0; } // register - def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - !strconcat(opc, "s.w\t$dst, $lhs, $rhs"), - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUsed]> { - let Defs = [CPSR]; + def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + opc, ".w\t$dst, $lhs, $rhs", + [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, + Requires<[IsThumb2]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -436,11 +440,10 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm let Inst{5-4} = 0b00; // type } // shifted register - def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, - !strconcat(opc, "s.w\t$dst, $lhs, $rhs"), - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUsed]> { - let Defs = [CPSR]; + def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + opc, ".w\t$dst, $lhs, $rhs", + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, + Requires<[IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -448,6 +451,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm } } } +} /// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit. let Defs = [CPSR] in { @@ -626,19 +630,6 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> { } } -/// T2I_picld - Defines the PIC load pattern. -class T2I_picld<string opc, PatFrag opnode> : - T2I<(outs GPR:$dst), (ins addrmodepc:$addr), IIC_iLoadi, - !strconcat("\n${addr:label}:\n\t", opc), "\t$dst, $addr", - [(set GPR:$dst, (opnode addrmodepc:$addr))]>; - -/// T2I_picst - Defines the PIC store pattern. -class T2I_picst<string opc, PatFrag opnode> : - T2I<(outs), (ins GPR:$src, addrmodepc:$addr), IIC_iStorer, - !strconcat("\n${addr:label}:\n\t", opc), "\t$src, $addr", - [(opnode GPR:$src, addrmodepc:$addr)]>; - - /// T2I_unary_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { @@ -666,6 +657,57 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { } } +// SXTB16 and UXTB16 do not need the .w qualifier. +multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> { + def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + opc, "\t$dst, $src", + [(set GPR:$dst, (opnode GPR:$src))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = 0b00; // rotate + } + def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + opc, "\t$dst, $src, ror $rot", + [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = {?,?}; // rotate + } +} + +// DO variant - disassembly only, no pattern + +multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> { + def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + opc, "\t$dst, $src", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = 0b00; // rotate + } + def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + opc, "\t$dst, $src, ror $rot", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = {?,?}; // rotate + } +} + /// T2I_bin_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { @@ -692,6 +734,29 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { } } +// DO variant - disassembly only, no pattern + +multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { + def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr, + opc, "\t$dst, $LHS, $RHS", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = 0b00; // rotate + } + def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), + IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = {?,?}; // rotate + } +} + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -734,7 +799,7 @@ def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), let Inst{19-16} = 0b1101; // Rn = sp let Inst{15} = 0; } -def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), +def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), IIC_iALUi, "addw", "\t$dst, $sp, $imm", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; @@ -787,6 +852,25 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), let Inst{15} = 0; } +// Signed and unsigned division, for disassembly only +def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, + "sdiv", "\t$dst, $a, $b", []> { + let Inst{31-27} = 0b11111; + let Inst{26-21} = 0b011100; + let Inst{20} = 0b1; + let Inst{15-12} = 0b1111; + let Inst{7-4} = 0b1111; +} + +def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, + "udiv", "\t$dst, $a, $b", []> { + let Inst{31-27} = 0b11111; + let Inst{26-21} = 0b011101; + let Inst{20} = 0b1; + let Inst{15-12} = 0b1111; + let Inst{7-4} = 0b1111; +} + // Pseudo instruction that will expand into a t2SUBrSPi + a copy. let usesCustomInserter = 1 in { // Expanded after instruction selection. def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), @@ -803,7 +887,7 @@ def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), // // Load -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in defm t2LDR : T2I_ld<0, 0b10, "ldr", UnOpFrag<(load node:$Src)>>; // Loads with zero extension @@ -925,10 +1009,32 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb), []>; } +// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are +// for disassembly only. +// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 +class T2IldT<bit signed, bits<2> type, string opc> + : T2Ii8<(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc, + "\t$dst, $addr", []> { + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = signed; + let Inst{23} = 0; + let Inst{22-21} = type; + let Inst{20} = 1; // load + let Inst{11} = 1; + let Inst{10-8} = 0b110; // PUW. +} + +def t2LDRT : T2IldT<0, 0b10, "ldrt">; +def t2LDRBT : T2IldT<0, 0b00, "ldrbt">; +def t2LDRHT : T2IldT<0, 0b01, "ldrht">; +def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt">; +def t2LDRSHT : T2IldT<1, 0b01, "ldrsht">; + // Store -defm t2STR : T2I_st<0b10, "str", BinOpFrag<(store node:$LHS, node:$RHS)>>; -defm t2STRB : T2I_st<0b00, "strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; -defm t2STRH : T2I_st<0b01, "strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; +defm t2STR :T2I_st<0b10,"str", BinOpFrag<(store node:$LHS, node:$RHS)>>; +defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; +defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; // Store doubleword let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in @@ -979,9 +1085,98 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; +// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly +// only. +// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4 +class T2IstT<bits<2> type, string opc> + : T2Ii8<(outs GPR:$src), (ins t2addrmode_imm8:$addr), IIC_iStorei, opc, + "\t$src, $addr", []> { + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = 0; // not signed + let Inst{23} = 0; + let Inst{22-21} = type; + let Inst{20} = 0; // store + let Inst{11} = 1; + let Inst{10-8} = 0b110; // PUW +} + +def t2STRT : T2IstT<0b10, "strt">; +def t2STRBT : T2IstT<0b00, "strbt">; +def t2STRHT : T2IstT<0b01, "strht">; // FIXME: ldrd / strd pre / post variants +// T2Ipl (Preload Data/Instruction) signals the memory system of possible future +// data/instruction access. These are for disassembly only. +multiclass T2Ipl<bit instr, bit write, string opc> { + + def i12 : T2I<(outs), (ins t2addrmode_imm12:$addr), IIC_iLoadi, opc, + "\t$addr", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 1; // U = 1 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + } + + def i8 : T2I<(outs), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc, + "\t$addr", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 0; // U = 0 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + let Inst{11-8} = 0b1100; + } + + // A8.6.118 #0 and #-0 differs. Translates -0 to -1, -1 to -2, ..., etc. + def pci : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoadi, opc, + "\t[pc, ${imm:negzero}]", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = ?; // add = (U == 1) + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{19-16} = 0b1111; // Rn = 0b1111 + let Inst{15-12} = 0b1111; + } + + def r : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoadi, opc, + "\t[$base, $a]", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 0; // add = TRUE for T1 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + let Inst{11-6} = 0000000; + let Inst{5-4} = 0b00; // no shift is applied + } + + def s : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoadi, opc, + "\t[$base, $a, lsl $shamt]", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 0; // add = TRUE for T1 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + let Inst{11-6} = 0000000; + } +} + +defm t2PLD : T2Ipl<0, 0, "pld">; +defm t2PLDW : T2Ipl<0, 1, "pldw">; +defm t2PLI : T2Ipl<1, 0, "pli">; + //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -989,7 +1184,7 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), let mayLoad = 1, hasExtraDefRegAllocReq = 1 in def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> { + IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' @@ -1001,7 +1196,7 @@ def t2LDM : T2XI<(outs), let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> { + IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' @@ -1074,13 +1269,15 @@ defm t2SXTB : T2I_unary_rrot<0b100, "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; defm t2SXTH : T2I_unary_rrot<0b000, "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; +defm t2SXTB16 : T2I_unary_rrot_DO<0b010, "sxtb16">; defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; defm t2SXTAH : T2I_bin_rrot<0b000, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; +defm t2SXTAB16 : T2I_bin_rrot_DO<0b010, "sxtab16">; -// TODO: SXT(A){B|H}16 +// TODO: SXT(A){B|H}16 - done for disassembly only // Zero extenders @@ -1089,7 +1286,7 @@ defm t2UXTB : T2I_unary_rrot<0b101, "uxtb", UnOpFrag<(and node:$Src, 0x000000FF)>>; defm t2UXTH : T2I_unary_rrot<0b001, "uxth", UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -defm t2UXTB16 : T2I_unary_rrot<0b011, "uxtb16", +defm t2UXTB16 : T2I_unary_rrot_nw<0b011, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), @@ -1101,6 +1298,7 @@ defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; defm t2UXTAH : T2I_bin_rrot<0b001, "uxtah", BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; +defm t2UXTAB16 : T2I_bin_rrot_DO<0b011, "uxtab16">; } //===----------------------------------------------------------------------===// @@ -1119,9 +1317,13 @@ defm t2SUBS : T2I_bin_s_irs <0b1101, "sub", BinOpFrag<(subc node:$LHS, node:$RHS)>>; defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", - BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>; defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", - BinOpFrag<(sube node:$LHS, node:$RHS)>>; + BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; +defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc", + BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; +defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc", + BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>; // RSB defm t2RSB : T2I_rbin_is <0b1110, "rsb", @@ -1138,6 +1340,155 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; +// Select Bytes -- for disassembly only + +def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel", + "\t$dst, $a, $b", []> { + let Inst{31-27} = 0b11111; + let Inst{26-24} = 0b010; + let Inst{23} = 0b1; + let Inst{22-20} = 0b010; + let Inst{15-12} = 0b1111; + let Inst{7} = 0b1; + let Inst{6-4} = 0b000; +} + +// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned) +// And Miscellaneous operations -- for disassembly only +class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc> + : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, opc, + "\t$dst, $a, $b", [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0101; + let Inst{22-20} = op22_20; + let Inst{15-12} = 0b1111; + let Inst{7-4} = op7_4; +} + +// Saturating add/subtract -- for disassembly only + +def t2QADD : T2I_pam<0b000, 0b1000, "qadd">; +def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">; +def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">; +def t2QASX : T2I_pam<0b010, 0b0001, "qasx">; +def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd">; +def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub">; +def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">; +def t2QSUB : T2I_pam<0b000, 0b1010, "qsub">; +def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">; +def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">; +def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">; +def t2UQADD8 : T2I_pam<0b000, 0b0101, "uqadd8">; +def t2UQASX : T2I_pam<0b010, 0b0101, "uqasx">; +def t2UQSAX : T2I_pam<0b110, 0b0101, "uqsax">; +def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">; +def t2UQSUB8 : T2I_pam<0b100, 0b0101, "uqsub8">; + +// Signed/Unsigned add/subtract -- for disassembly only + +def t2SASX : T2I_pam<0b010, 0b0000, "sasx">; +def t2SADD16 : T2I_pam<0b001, 0b0000, "sadd16">; +def t2SADD8 : T2I_pam<0b000, 0b0000, "sadd8">; +def t2SSAX : T2I_pam<0b110, 0b0000, "ssax">; +def t2SSUB16 : T2I_pam<0b101, 0b0000, "ssub16">; +def t2SSUB8 : T2I_pam<0b100, 0b0000, "ssub8">; +def t2UASX : T2I_pam<0b010, 0b0100, "uasx">; +def t2UADD16 : T2I_pam<0b001, 0b0100, "uadd16">; +def t2UADD8 : T2I_pam<0b000, 0b0100, "uadd8">; +def t2USAX : T2I_pam<0b110, 0b0100, "usax">; +def t2USUB16 : T2I_pam<0b101, 0b0100, "usub16">; +def t2USUB8 : T2I_pam<0b100, 0b0100, "usub8">; + +// Signed/Unsigned halving add/subtract -- for disassembly only + +def t2SHASX : T2I_pam<0b010, 0b0010, "shasx">; +def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">; +def t2SHADD8 : T2I_pam<0b000, 0b0010, "shadd8">; +def t2SHSAX : T2I_pam<0b110, 0b0010, "shsax">; +def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">; +def t2SHSUB8 : T2I_pam<0b100, 0b0010, "shsub8">; +def t2UHASX : T2I_pam<0b010, 0b0110, "uhasx">; +def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">; +def t2UHADD8 : T2I_pam<0b000, 0b0110, "uhadd8">; +def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">; +def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">; +def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">; + +// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only + +def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + NoItinerary, "usad8", "\t$dst, $a, $b", []> { + let Inst{15-12} = 0b1111; +} +def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), + (ins GPR:$a, GPR:$b, GPR:$acc), NoItinerary, "usada8", + "\t$dst, $a, $b, $acc", []>; + +// Signed/Unsigned saturate -- for disassembly only + +def t2SSATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), + NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{25-22} = 0b1100; + let Inst{20} = 0; + let Inst{15} = 0; + let Inst{21} = 0; // sh = '0' +} + +def t2SSATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), + NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{25-22} = 0b1100; + let Inst{20} = 0; + let Inst{15} = 0; + let Inst{21} = 1; // sh = '1' +} + +def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, + "ssat16", "\t$dst, $bit_pos, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{25-22} = 0b1100; + let Inst{20} = 0; + let Inst{15} = 0; + let Inst{21} = 1; // sh = '1' + let Inst{14-12} = 0b000; // imm3 = '000' + let Inst{7-6} = 0b00; // imm2 = '00' +} + +def t2USATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), + NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{25-22} = 0b1110; + let Inst{20} = 0; + let Inst{15} = 0; + let Inst{21} = 0; // sh = '0' +} + +def t2USATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), + NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{25-22} = 0b1110; + let Inst{20} = 0; + let Inst{15} = 0; + let Inst{21} = 1; // sh = '1' +} + +def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, + "usat16", "\t$dst, $bit_pos, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{25-22} = 0b1110; + let Inst{20} = 0; + let Inst{15} = 0; + let Inst{21} = 1; // sh = '1' + let Inst{14-12} = 0b000; // imm3 = '000' + let Inst{7-6} = 0b00; // imm2 = '00' +} //===----------------------------------------------------------------------===// // Shift and rotate Instructions. @@ -1342,6 +1693,8 @@ def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, } } // neverHasSideEffects +// Rounding variants of the below included for disassembly only + // Most significant word multiply def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, "smmul", "\t$dst, $a, $b", @@ -1353,6 +1706,15 @@ def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } +def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + "smmulr", "\t$dst, $a, $b", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b101; + let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) + let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) +} + def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c", [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]> { @@ -1363,6 +1725,14 @@ def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } +def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, + "smmlar", "\t$dst, $a, $b, $c", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b101; + let Inst{15-12} = {?, ?, ?, ?}; // Ra + let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) +} def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c", @@ -1374,6 +1744,15 @@ def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } +def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, + "smmlsr", "\t$dst, $a, $b, $c", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b110; + let Inst{15-12} = {?, ?, ?, ?}; // Ra + let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) +} + multiclass T2I_smul<string opc, PatFrag opnode> { def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b", @@ -1466,7 +1845,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, (i32 16)))))]> { + (sra GPR:$b, (i32 16)))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1490,7 +1869,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), - (sra GPR:$b, (i32 16)))))]> { + (sra GPR:$b, (i32 16)))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1502,7 +1881,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sext_inreg GPR:$b, i16)), (i32 16))))]> { + (sext_inreg GPR:$b, i16)), (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1514,7 +1893,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sra GPR:$b, (i32 16))), (i32 16))))]> { + (sra GPR:$b, (i32 16))), (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1527,16 +1906,70 @@ multiclass T2I_smla<string opc, PatFrag opnode> { defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; -// TODO: Halfword multiple accumulate long: SMLAL<x><y> -// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD - +// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only +def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>; +def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>; +def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>; +def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>; + +// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD +// These are for disassembly only. + +def t2SMUAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> { + let Inst{15-12} = 0b1111; +} +def t2SMUADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> { + let Inst{15-12} = 0b1111; +} +def t2SMUSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> { + let Inst{15-12} = 0b1111; +} +def t2SMUSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> { + let Inst{15-12} = 0b1111; +} +def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), + (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlad", + "\t$dst, $a, $b, $acc", []>; +def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), + (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smladx", + "\t$dst, $a, $b, $acc", []>; +def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), + (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsd", + "\t$dst, $a, $b, $acc", []>; +def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), + (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsdx", + "\t$dst, $a, $b, $acc", []>; +def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlald", + "\t$ldst, $hdst, $a, $b", []>; +def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaldx", + "\t$ldst, $hdst, $a, $b", []>; +def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsld", + "\t$ldst, $hdst, $a, $b", []>; +def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsldx", + "\t$ldst, $hdst, $a, $b", []>; //===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. // -class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> +class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> : T2I<oops, iops, itin, opc, asm, pattern> { let Inst{31-27} = 0b11111; let Inst{26-22} = 0b01010; @@ -1572,7 +2005,7 @@ def t2REVSH : T2I_misc<0b01, 0b11, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, (shl GPR:$src, (i32 8))), i16))]>; def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, LSL $shamt", + IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), (and (shl GPR:$src2, (i32 imm:$shamt)), 0xFFFF0000)))]> { @@ -1590,7 +2023,7 @@ def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>; def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, ASR $shamt", + IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), (and (sra GPR:$src2, imm16_31:$shamt), 0xFFFF)))]> { @@ -1643,7 +2076,7 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq", // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use -// a two-value operand where a dag node expects two operands. :( +// a two-value operand where a dag node expects two operands. :( def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr, "mov", ".w\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, @@ -1723,6 +2156,66 @@ def t2Int_SyncBarrierV7 : AInoP<(outs), (ins), } } +// Helper class for multiclass T2MemB -- for disassembly only +class T2I_memb<string opc, string asm> + : T2I<(outs), (ins), NoItinerary, opc, asm, + [/* For disassembly only; pattern left blank */]>, + Requires<[IsThumb2, HasV7]> { + let Inst{31-20} = 0xf3b; + let Inst{15-14} = 0b10; + let Inst{12} = 0; +} + +multiclass T2MemB<bits<4> op7_4, string opc> { + + def st : T2I_memb<opc, "\tst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b1110; + } + + def ish : T2I_memb<opc, "\tish"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b1011; + } + + def ishst : T2I_memb<opc, "\tishst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b1010; + } + + def nsh : T2I_memb<opc, "\tnsh"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0111; + } + + def nshst : T2I_memb<opc, "\tnshst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0110; + } + + def osh : T2I_memb<opc, "\tosh"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0011; + } + + def oshst : T2I_memb<opc, "\toshst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0010; + } +} + +// These DMB variants are for disassembly only. +defm t2DMB : T2MemB<0b0101, "dmb">; + +// These DSB variants are for disassembly only. +defm t2DSB : T2MemB<0b0100, "dsb">; + +// ISB has only full system option -- for disassembly only +def t2ISBsy : T2I_memb<"isb", ""> { + let Inst{7-4} = 0b0110; + let Inst{3-0} = 0b1111; +} + class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern, bits<4> rt2 = 0b1111> @@ -1789,6 +2282,16 @@ def t2STREXD : T2I_strex<0b11, (outs GPR:$success), {?, ?, ?, ?}>; } +// Clear-Exclusive is for disassembly only. +def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV7]> { + let Inst{31-20} = 0xf3b; + let Inst{15-14} = 0b10; + let Inst{12} = 0; + let Inst{7-4} = 0b0010; +} + //===----------------------------------------------------------------------===// // TLS Instructions // @@ -1906,6 +2409,24 @@ def t2TBH : let Inst{15-8} = 0b11110000; let Inst{7-4} = 0b0001; // H form } + +// Generic versions of the above two instructions, for disassembly only + +def t2TBBgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br, + "tbb", "\t[$a, $b]", []>{ + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0001101; + let Inst{15-8} = 0b11110000; + let Inst{7-4} = 0b0000; // B form +} + +def t2TBHgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br, + "tbh", "\t[$a, $b, lsl #1]", []> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0001101; + let Inst{15-8} = 0b11110000; + let Inst{7-4} = 0b0001; // H form +} } // isNotDuplicable, isIndirectBranch } // isBranch, isTerminator, isBarrier @@ -1931,6 +2452,119 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), let Inst{15-8} = 0b10111111; } +// Branch and Exchange Jazelle -- for disassembly only +// Rm = Inst{19-16} +def t2BXJ : T2I<(outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{26} = 0; + let Inst{25-20} = 0b111100; + let Inst{15-14} = 0b10; + let Inst{12} = 0; +} + +// Change Processor State is a system instruction -- for disassembly only. +// The singleton $opt operand contains the following information: +// opt{4-0} = mode from Inst{4-0} +// opt{5} = changemode from Inst{17} +// opt{8-6} = AIF from Inst{8-6} +// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable +def t2CPS : T2XI<(outs),(ins i32imm:$opt), NoItinerary, "cps${opt:cps}", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{26} = 0; + let Inst{25-20} = 0b111010; + let Inst{15-14} = 0b10; + let Inst{12} = 0; +} + +// A6.3.4 Branches and miscellaneous control +// Table A6-14 Change Processor State, and hint instructions +// Helper class for disassembly only. +class T2I_hint<bits<8> op7_0, string opc, string asm> + : T2I<(outs), (ins), NoItinerary, opc, asm, + [/* For disassembly only; pattern left blank */]> { + let Inst{31-20} = 0xf3a; + let Inst{15-14} = 0b10; + let Inst{12} = 0; + let Inst{10-8} = 0b000; + let Inst{7-0} = op7_0; +} + +def t2NOP : T2I_hint<0b00000000, "nop", ".w">; +def t2YIELD : T2I_hint<0b00000001, "yield", ".w">; +def t2WFE : T2I_hint<0b00000010, "wfe", ".w">; +def t2WFI : T2I_hint<0b00000011, "wfi", ".w">; +def t2SEV : T2I_hint<0b00000100, "sev", ".w">; + +def t2DBG : T2I<(outs),(ins i32imm:$opt), NoItinerary, "dbg", "\t$opt", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-20} = 0xf3a; + let Inst{15-14} = 0b10; + let Inst{12} = 0; + let Inst{10-8} = 0b000; + let Inst{7-4} = 0b1111; +} + +// Secure Monitor Call is a system instruction -- for disassembly only +// Option = Inst{19-16} +def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{26-20} = 0b1111111; + let Inst{15-12} = 0b1000; +} + +// Store Return State is a system instruction -- for disassembly only +def t2SRSDBW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0000010; // W = 1 +} + +def t2SRSDB : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0000000; // W = 0 +} + +def t2SRSIAW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0011010; // W = 1 +} + +def t2SRSIA : T2I<(outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0011000; // W = 0 +} + +// Return From Exception is a system instruction -- for disassembly only +def t2RFEDBW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfedb", "\t$base!", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0000011; // W = 1 +} + +def t2RFEDB : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeab", "\t$base", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0000001; // W = 0 +} + +def t2RFEIAW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base!", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0011011; // W = 1 +} + +def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0011001; // W = 0 +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns // @@ -1970,9 +2604,59 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // Pseudo instruction that combines ldr from constpool and add pc. This should // be expanded into two instructions late to allow if-conversion and // scheduling. -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb2]>; + +//===----------------------------------------------------------------------===// +// Move between special register and ARM core register -- for disassembly only +// + +// Rd = Instr{11-8} +def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{26} = 0; + let Inst{25-21} = 0b11111; + let Inst{20} = 0; // The R bit. + let Inst{15-14} = 0b10; + let Inst{12} = 0; +} + +// Rd = Instr{11-8} +def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{26} = 0; + let Inst{25-21} = 0b11111; + let Inst{20} = 1; // The R bit. + let Inst{15-14} = 0b10; + let Inst{12} = 0; +} + +// FIXME: mask is ignored for the time being. +// Rn = Inst{19-16} +def t2MSR : T2I<(outs), (ins GPR:$src), NoItinerary, "msr", "\tcpsr, $src", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{26} = 0; + let Inst{25-21} = 0b11100; + let Inst{20} = 0; // The R bit. + let Inst{15-14} = 0b10; + let Inst{12} = 0; +} + +// FIXME: mask is ignored for the time being. +// Rn = Inst{19-16} +def t2MSRsys : T2I<(outs), (ins GPR:$src), NoItinerary, "msr", "\tspsr, $src", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{26} = 0; + let Inst{25-21} = 0b11100; + let Inst{20} = 1; // The R bit. + let Inst{15-14} = 0b10; + let Inst{12} = 0; +} diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 365e1e3..7c117ed 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -54,7 +54,7 @@ def vfp_f64imm : Operand<f64>, // Load / store Instructions. // -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1 in { def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), IIC_fpLoad64, "vldr", ".64\t$dst, $addr", [(set DPR:$dst, (load addrmode5:$addr))]>; @@ -412,6 +412,101 @@ def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, let Inst{7} = 0; // Z bit } +// Convert between floating-point and fixed-point +// Data type for fixed-point naming convention: +// S16 (U=0, sx=0) -> SH +// U16 (U=1, sx=0) -> UH +// S32 (U=0, sx=1) -> SL +// U32 (U=1, sx=1) -> UL + +let Constraints = "$a = $dst" in { + +// FP to Fixed-Point: + +def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +// Fixed-Point to FP: + +def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +} // End of 'let Constraints = "$src = $dst" in' + //===----------------------------------------------------------------------===// // FP FMA Operations. // diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index bef5a06..6db6ba4 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -48,7 +48,13 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction; // write our own wrapper, which does things our way, so we have complete // control over register saving and restoring. extern "C" { -#if defined(__arm__) + // We don't need this on Android (generally on hand-held devices). This + // function is for the purpose of supporting "lazy symbol lookup" (lookup + // undefined symbol at runtime) (Actually, if you tried to remove the + // !defined(ANDROID) guard, you'll get compilation error since Android's + // toolchain choose armv5te as its CPU architecture which does not support + // instruction 'stmdb' and 'ldmia' within the function) +#if defined(__arm__) && !defined(ANDROID) void ARMCompilationCallback(); asm( ".text\n" @@ -60,7 +66,7 @@ extern "C" { // whole compilation callback doesn't exist as far as the caller is // concerned, so we can't just preserve the callee saved regs. "stmdb sp!, {r0, r1, r2, r3, lr}\n" -#ifndef __SOFTFP__ +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" #endif // The LR contains the address of the stub function on entry. @@ -83,7 +89,7 @@ extern "C" { // 6-20 | D0..D7 | Saved VFP registers // +--------+ // -#ifndef __SOFTFP__ +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) // Restore VFP caller-saved registers. "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" #endif @@ -318,6 +324,18 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, *((intptr_t*)RelocPos) |= ResultPtr; break; } + case ARM::reloc_arm_movw: { + ResultPtr = ResultPtr & 0xFFFF; + *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; + *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; // imm4:imm12, Insts[19-16] = imm4, Insts[11-0] = imm12 + break; + } + case ARM::reloc_arm_movt: { + ResultPtr = (ResultPtr >> 16) & 0xFFFF; + *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; + *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; // imm4:imm12, Insts[19-16] = imm4, Insts[11-0] = imm12 + break; + } } } } diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index b78b95b..19f1e3b 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -350,7 +350,8 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, : ARMRegisterInfo::getRegisterNumbering(Reg); // AM4 - register numbers in ascending order. // AM5 - consecutive register numbers in ascending order. - if (NewOffset == Offset + (int)Size && + if (Reg != ARM::SP && + NewOffset == Offset + (int)Size && ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) { Offset += Size; PRegNum = RegNum; @@ -747,11 +748,24 @@ static bool isMemoryOp(const MachineInstr *MI) { if (MMO->isVolatile()) return false; - // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is not. + // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is + // not. if (MMO->getAlignment() < 4) return false; } + // str <undef> could probably be eliminated entirely, but for now we just want + // to avoid making a mess of it. + // FIXME: Use str <undef> as a wildcard to enable better stm folding. + if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && + MI->getOperand(0).isUndef()) + return false; + + // Likewise don't mess with references to undefined addresses. + if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && + MI->getOperand(1).isUndef()) + return false; + int Opcode = MI->getOpcode(); switch (Opcode) { default: break; diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h index 2cc2950..86e7206 100644 --- a/lib/Target/ARM/ARMRelocations.h +++ b/lib/Target/ARM/ARMRelocations.h @@ -47,7 +47,13 @@ namespace llvm { reloc_arm_pic_jt, // reloc_arm_branch - Branch address relocation. - reloc_arm_branch + reloc_arm_branch, + + // reloc_arm_movt - MOVT immediate relocation. + reloc_arm_movt, + + // reloc_arm_movw - MOVW immediate relocation. + reloc_arm_movw }; } } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 426862c..622034b 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -33,7 +33,7 @@ UseMOVT("arm-use-movt", ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, bool isT) - : ARMArchVersion(V4T) + : ARMArchVersion(V4) , ARMFPUType(None) , UseNEONForSinglePrecisionFP(UseNEONFP) , IsThumb(isT) @@ -54,6 +54,11 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, // Parse features string. CPUString = ParseSubtargetFeatures(FS, CPUString); + // When no arch is specified either by CPU or by attributes, make the default + // ARMv4T. + if (CPUString == "generic" && (FS.empty() || FS == "generic")) + ARMArchVersion = V4T; + // Set the boolean corresponding to the current target triple, or the default // if one cannot be determined, to true. unsigned Len = TT.length(); @@ -68,25 +73,28 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, } if (Idx) { unsigned SubVer = TT[Idx]; - if (SubVer > '4' && SubVer <= '9') { - if (SubVer >= '7') { - ARMArchVersion = V7A; - } else if (SubVer == '6') { - ARMArchVersion = V6; - if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') - ARMArchVersion = V6T2; - } else if (SubVer == '5') { - ARMArchVersion = V5T; - if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') - ARMArchVersion = V5TE; - } - if (ARMArchVersion >= V6T2) - ThumbMode = Thumb2; + if (SubVer >= '7' && SubVer <= '9') { + ARMArchVersion = V7A; + } else if (SubVer == '6') { + ARMArchVersion = V6; + if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') + ARMArchVersion = V6T2; + } else if (SubVer == '5') { + ARMArchVersion = V5T; + if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') + ARMArchVersion = V5TE; + } else if (SubVer == '4') { + if (Len >= Idx+2 && TT[Idx+1] == 't') + ARMArchVersion = V4T; + else + ARMArchVersion = V4; } } // Thumb2 implies at least V6T2. - if (ARMArchVersion < V6T2 && ThumbMode >= Thumb2) + if (ARMArchVersion >= V6T2) + ThumbMode = Thumb2; + else if (ThumbMode >= Thumb2) ARMArchVersion = V6T2; if (Len >= 10) { diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 3f06b7b..6980851 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -26,7 +26,7 @@ class GlobalValue; class ARMSubtarget : public TargetSubtarget { protected: enum ARMArchEnum { - V4T, V5T, V5TE, V6, V6T2, V7A + V4, V4T, V5T, V5TE, V6, V6T2, V7A }; enum ARMFPEnum { @@ -38,7 +38,7 @@ protected: Thumb2 }; - /// ARMArchVersion - ARM architecture version: V4T (base), V5T, V5TE, + /// ARMArchVersion - ARM architecture version: V4, V4T (base), V5T, V5TE, /// V6, V6T2, V7A. ARMArchEnum ARMArchVersion; diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index 9703403..a488c0a 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -10,7 +10,7 @@ #ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H #define LLVM_TARGET_ARM_TARGETOBJECTFILE_H -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/MC/MCSectionELF.h" namespace llvm { @@ -24,7 +24,7 @@ namespace llvm { if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) { StaticCtorSection = - getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, + getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, SectionKind::getDataRel()); StaticDtorSection = diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk new file mode 100644 index 0000000..ea796af --- /dev/null +++ b/lib/Target/ARM/Android.mk @@ -0,0 +1,49 @@ +LOCAL_PATH := $(call my-dir) + +# For the device only +# ===================================================== +include $(CLEAR_VARS) +include $(CLEAR_TBLGEN_VARS) + +TBLGEN_TABLES := \ + ARMGenRegisterInfo.h.inc \ + ARMGenRegisterNames.inc \ + ARMGenRegisterInfo.inc \ + ARMGenInstrNames.inc \ + ARMGenInstrInfo.inc \ + ARMGenDAGISel.inc \ + ARMGenSubtarget.inc \ + ARMGenCodeEmitter.inc \ + ARMGenCallingConv.inc + +LOCAL_SRC_FILES := \ + ARMBaseInstrInfo.cpp \ + ARMBaseRegisterInfo.cpp \ + ARMCodeEmitter.cpp \ + ARMConstantIslandPass.cpp \ + ARMConstantPoolValue.cpp \ + ARMExpandPseudoInsts.cpp \ + ARMISelDAGToDAG.cpp \ + ARMISelLowering.cpp \ + ARMInstrInfo.cpp \ + ARMJITInfo.cpp \ + ARMLoadStoreOptimizer.cpp \ + ARMMCAsmInfo.cpp \ + ARMRegisterInfo.cpp \ + ARMSubtarget.cpp \ + ARMTargetMachine.cpp \ + NEONMoveFix.cpp \ + NEONPreAllocPass.cpp \ + Thumb1InstrInfo.cpp \ + Thumb1RegisterInfo.cpp \ + Thumb2ITBlockPass.cpp \ + Thumb2InstrInfo.cpp \ + Thumb2RegisterInfo.cpp \ + Thumb2SizeReduction.cpp + +LOCAL_MODULE:= libLLVMARMCodeGen + +include $(LLVM_DEVICE_BUILD_MK) +include $(LLVM_TBLGEN_RULES_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 0a75c09..d6d595c 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" @@ -37,7 +38,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" @@ -122,6 +122,7 @@ namespace { void printT2AddrModeSoRegOperand(const MachineInstr *MI, int OpNum); void printPredicateOperand(const MachineInstr *MI, int OpNum); + void printMandatoryPredicateOperand(const MachineInstr *MI, int OpNum); void printSBitModifierOperand(const MachineInstr *MI, int OpNum); void printPCLabel(const MachineInstr *MI, int OpNum); void printRegisterList(const MachineInstr *MI, int OpNum); @@ -786,6 +787,12 @@ void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int OpNum) { O << ARMCondCodeToString(CC); } +void ARMAsmPrinter::printMandatoryPredicateOperand(const MachineInstr *MI, + int OpNum) { + ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); + O << ARMCondCodeToString(CC); +} + void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum){ unsigned Reg = MI->getOperand(OpNum).getReg(); if (Reg) { diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index d7d8e09..a2084b0 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -325,6 +325,12 @@ void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum) { O << ARMCondCodeToString(CC); } +void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI, + unsigned OpNum) { + ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); + O << ARMCondCodeToString(CC); +} + void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum){ if (MI->getOperand(OpNum).getReg()) { assert(MI->getOperand(OpNum).getReg() == ARM::CPSR && diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index 23a7f05..b7964c9 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -71,6 +71,7 @@ public: void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum) {} void printPredicateOperand(const MCInst *MI, unsigned OpNum); + void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum); void printSBitModifierOperand(const MCInst *MI, unsigned OpNum); void printRegisterList(const MCInst *MI, unsigned OpNum); void printCPInstOperand(const MCInst *MI, unsigned OpNum, diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 9efb5a1..57b65cf 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -10,6 +10,8 @@ Reimplement 'select' in terms of 'SEL'. * Implement pre/post increment support. (e.g. PR935) * Implement smarter constant generation for binops with large immediates. +A few ARMv6T2 ops should be pattern matched: BFI, SBFX, and UBFX + //===---------------------------------------------------------------------===// Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the diff --git a/lib/Target/ARM/TargetInfo/Android.mk b/lib/Target/ARM/TargetInfo/Android.mk new file mode 100644 index 0000000..c1998a1 --- /dev/null +++ b/lib/Target/ARM/TargetInfo/Android.mk @@ -0,0 +1,24 @@ +LOCAL_PATH := $(call my-dir) + +# For the device only +# ===================================================== +include $(CLEAR_VARS) +include $(CLEAR_TBLGEN_VARS) + +TBLGEN_TABLES := \ + ARMGenRegisterNames.inc \ + ARMGenInstrNames.inc + +TBLGEN_TD_DIR := $(LOCAL_PATH)/.. + +LOCAL_SRC_FILES := \ + ARMTargetInfo.cpp + +LOCAL_C_INCLUDES += \ + $(LOCAL_PATH)/.. + +LOCAL_MODULE:= libLLVMARMInfo + +include $(LLVM_DEVICE_BUILD_MK) +include $(LLVM_TBLGEN_RULES_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index d6630ce..163d1e9 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -450,9 +450,9 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset -= AFI->getGPRCalleeSavedArea1Offset(); else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) Offset -= AFI->getGPRCalleeSavedArea2Offset(); - else if (hasFP(MF)) { - assert(SPAdj == 0 && "Unexpected"); - // There is alloca()'s in this function, must reference off the frame + else if (MF.getFrameInfo()->hasVarSizedObjects()) { + assert(SPAdj == 0 && hasFP(MF) && "Unexpected"); + // There are alloca()'s in this function, must reference off the frame // pointer instead. FrameReg = getFrameRegister(MF); Offset -= AFI->getFramePtrSpillOffset(); @@ -778,9 +778,19 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { } static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { - return (MI->getOpcode() == ARM::tRestore && - MI->getOperand(1).isFI() && - isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)); + if (MI->getOpcode() == ARM::tRestore && + MI->getOperand(1).isFI() && + isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) + return true; + else if (MI->getOpcode() == ARM::tPOP) { + // The first three operands are predicates and such. The last two are + // imp-def and imp-use of SP. Check everything in between. + for (int i = 3, e = MI->getNumOperands() - 2; i != e; ++i) + if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) + return false; + return true; + } + return false; } void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, @@ -794,13 +804,13 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); + const unsigned *CSRegs = getCalleeSavedRegs(); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. - const unsigned *CSRegs = getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do --MBBI; @@ -836,6 +846,9 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, } if (VARegSaveSize) { + // Move back past the callee-saved register restoration + while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs)) + ++MBBI; // Epilogue for vararg functions: pop LR to R3 and branch off it. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(0) // No write back. @@ -845,6 +858,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) .addReg(ARM::R3, RegState::Kill); + // erase the old tBX_RET instruction MBB.erase(MBBI); } } diff --git a/lib/Target/Alpha/AlphaCallingConv.td b/lib/Target/Alpha/AlphaCallingConv.td index 38ada69..bde8819 100644 --- a/lib/Target/Alpha/AlphaCallingConv.td +++ b/lib/Target/Alpha/AlphaCallingConv.td @@ -14,7 +14,8 @@ //===----------------------------------------------------------------------===// def RetCC_Alpha : CallingConv<[ // i64 is returned in register R0 - CCIfType<[i64], CCAssignToReg<[R0]>>, + // R1 is an llvm extension, I don't know what gcc does + CCIfType<[i64], CCAssignToReg<[R0,R1]>>, // f32 / f64 are returned in F0/F1 CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>> diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp index eaefef9..5303d85 100644 --- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -64,7 +64,7 @@ namespace { /// that the bits 1-7 of LHS are already zero. If LHS is non-null, we are /// in checking mode. If LHS is null, we assume that the mask has already /// been validated before. - uint64_t get_zapImm(SDValue LHS, uint64_t Constant) { + uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const { uint64_t BitsToCheck = 0; unsigned Result = 0; for (unsigned i = 0; i != 8; ++i) { @@ -159,10 +159,6 @@ namespace { // target-specific node if it hasn't already been changed. SDNode *Select(SDNode *N); - /// InstructionSelect - This callback is invoked by - /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelect(); - virtual const char *getPassName() const { return "Alpha DAG->DAG Pattern Instruction Selection"; } @@ -222,20 +218,11 @@ SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() { return CurDAG->getRegister(GlobalRetAddr, TLI.getPointerTy()).getNode(); } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void AlphaDAGToDAGISel::InstructionSelect() { - // Select target instructions for the DAG. - SelectRoot(*CurDAG); - CurDAG->RemoveDeadNodes(); -} - // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. SDNode *AlphaDAGToDAGISel::Select(SDNode *N) { - if (N->isMachineOpcode()) { + if (N->isMachineOpcode()) return NULL; // Already selected. - } DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 0bbe567..5d8310e 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -21,7 +21,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Module.h" @@ -282,7 +282,8 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, DAG.getIntPtrConstant(VA.getLocMemOffset())); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), 0)); + PseudoSourceValue::getStack(), 0, + false, false, 0)); } } @@ -426,7 +427,8 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i64); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, + false, false, 0); } InVals.push_back(ArgVal); } @@ -442,14 +444,16 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false); if (i == 0) VarArgsBase = FI; SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64); - LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0)); + LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, + false, false, 0)); if (TargetRegisterInfo::isPhysicalRegister(args_float[i])) args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass); argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64); FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true, false); SDFI = DAG.getFrameIndex(FI, MVT::i64); - LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0)); + LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, + false, false, 0)); } //Set up a token factor with all the stack traffic @@ -528,11 +532,12 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue(); DebugLoc dl = N->getDebugLoc(); - SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0); + SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0, + false, false, 0); SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, DAG.getConstant(8, MVT::i64)); SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1), - Tmp, NULL, 0, MVT::i32); + Tmp, NULL, 0, MVT::i32, false, false, 0); DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset); if (N->getValueType(0).isFloatingPoint()) { @@ -547,7 +552,7 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset, DAG.getConstant(8, MVT::i64)); Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, NULL, 0, - MVT::i32); + MVT::i32, false, false, 0); } /// LowerOperation - Provide custom lowering hooks for some operations. @@ -694,9 +699,10 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { SDValue Result; if (Op.getValueType() == MVT::i32) Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr, - NULL, 0, MVT::i32); + NULL, 0, MVT::i32, false, false, 0); else - Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0); + Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0, + false, false, 0); return Result; } case ISD::VACOPY: { @@ -706,15 +712,18 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); - SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0); - SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0); + SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0, + false, false, 0); + SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0, + false, false, 0); SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP, DAG.getConstant(8, MVT::i64)); Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result, - NP, NULL,0, MVT::i32); + NP, NULL,0, MVT::i32, false, false, 0); SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP, DAG.getConstant(8, MVT::i64)); - return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32); + return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32, + false, false, 0); } case ISD::VASTART: { SDValue Chain = Op.getOperand(0); @@ -723,11 +732,12 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { // vastart stores the address of the VarArgsBase and VarArgsOffset SDValue FR = DAG.getFrameIndex(VarArgsBase, MVT::i64); - SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0); + SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0, + false, false, 0); SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, DAG.getConstant(8, MVT::i64)); return DAG.getTruncStore(S1, dl, DAG.getConstant(VarArgsOffset, MVT::i64), - SA2, NULL, 0, MVT::i32); + SA2, NULL, 0, MVT::i32, false, false, 0); } case ISD::RETURNADDR: return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc::getUnknownLoc(), @@ -749,7 +759,8 @@ void AlphaTargetLowering::ReplaceNodeResults(SDNode *N, SDValue Chain, DataPtr; LowerVAARG(N, Chain, DataPtr, DAG); - SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0); + SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0, + false, false, 0); Results.push_back(Res); Results.push_back(SDValue(Res.getNode(), 1)); } diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td index 8917e86..341c4a7 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -92,7 +92,7 @@ def immSExt16int : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended ((int64_t)N->getZExtValue() << 32) >> 32; }], SExt16>; -def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm:$L), [{ +def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm), [{ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); if (!RHS) return 0; uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue()); @@ -602,9 +602,9 @@ def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle count def MB : MfcPForm<0x18, 0x4000, "mb", s_imisc>; //memory barrier def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier -def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 1), (i64 imm:$dev)), +def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)), (WMB)>; -def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 imm:$ss), (i64 imm:$dev)), +def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)), (MB)>; //Basic Floating point ops diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index 64bdd62..ba662fb 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -251,7 +251,7 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const { } else { std::string msg; raw_string_ostream Msg(msg); - Msg << "Too big a stack frame at " + NumBytes; + Msg << "Too big a stack frame at " << NumBytes; llvm_report_error(Msg.str()); } @@ -303,15 +303,14 @@ void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF, } else { std::string msg; raw_string_ostream Msg(msg); - Msg << "Too big a stack frame at " + NumBytes; + Msg << "Too big a stack frame at " << NumBytes; llvm_report_error(Msg.str()); } } } unsigned AlphaRegisterInfo::getRARegister() const { - llvm_unreachable("What is the return address register"); - return 0; + return Alpha::R26; } unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/lib/Target/Android.mk b/lib/Target/Android.mk new file mode 100644 index 0000000..8bf4340 --- /dev/null +++ b/lib/Target/Android.mk @@ -0,0 +1,38 @@ +LOCAL_PATH:= $(call my-dir) + +target_SRC_FILES := \ + Mangler.cpp \ + SubtargetFeature.cpp \ + Target.cpp \ + TargetAsmLexer.cpp \ + TargetData.cpp \ + TargetELFWriterInfo.cpp \ + TargetFrameInfo.cpp \ + TargetInstrInfo.cpp \ + TargetIntrinsicInfo.cpp \ + TargetLoweringObjectFile.cpp \ + TargetMachine.cpp \ + TargetRegisterInfo.cpp \ + TargetSubtarget.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(target_SRC_FILES) + +LOCAL_MODULE:= libLLVMTarget + +include $(LLVM_HOST_BUILD_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(target_SRC_FILES) + +LOCAL_MODULE:= libLLVMTarget + +include $(LLVM_DEVICE_BUILD_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp index 2c9cc60..c8d71aa 100644 --- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp +++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp @@ -41,7 +41,7 @@ namespace { BlackfinDAGToDAGISel(BlackfinTargetMachine &TM, CodeGenOpt::Level OptLevel) : SelectionDAGISel(TM, OptLevel) {} - virtual void InstructionSelect(); + virtual void PostprocessISelDAG(); virtual const char *getPassName() const { return "Blackfin DAG->DAG Pattern Instruction Selection"; @@ -72,13 +72,7 @@ FunctionPass *llvm::createBlackfinISelDag(BlackfinTargetMachine &TM, return new BlackfinDAGToDAGISel(TM, OptLevel); } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void BlackfinDAGToDAGISel::InstructionSelect() { - // Select target instructions for the DAG. - SelectRoot(*CurDAG); - DEBUG(errs() << "Selected selection DAG before regclass fixup:\n"); - DEBUG(CurDAG->dump()); +void BlackfinDAGToDAGISel::PostprocessISelDAG() { FixRegisterClasses(*CurDAG); } diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index 269707a..5ce2013 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -22,7 +22,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -206,7 +206,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain, int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true, false); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, + false, false, 0)); } } @@ -329,7 +330,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN, PseudoSourceValue::getStack(), - Offset)); + Offset, false, false, 0)); } } diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index fd4c4e7..10f873f 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -49,7 +49,6 @@ #include "llvm/System/Host.h" #include "llvm/Config/config.h" #include <algorithm> -#include <sstream> using namespace llvm; extern "C" void LLVMInitializeCBackendTarget() { @@ -153,26 +152,16 @@ namespace { return false; } - raw_ostream &printType(formatted_raw_ostream &Out, - const Type *Ty, + raw_ostream &printType(raw_ostream &Out, const Type *Ty, bool isSigned = false, const std::string &VariableName = "", bool IgnoreName = false, const AttrListPtr &PAL = AttrListPtr()); - std::ostream &printType(std::ostream &Out, const Type *Ty, - bool isSigned = false, - const std::string &VariableName = "", - bool IgnoreName = false, - const AttrListPtr &PAL = AttrListPtr()); - raw_ostream &printSimpleType(formatted_raw_ostream &Out, - const Type *Ty, - bool isSigned, - const std::string &NameSoFar = ""); - std::ostream &printSimpleType(std::ostream &Out, const Type *Ty, - bool isSigned, + raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty, + bool isSigned, const std::string &NameSoFar = ""); - void printStructReturnPointerFunctionType(formatted_raw_ostream &Out, + void printStructReturnPointerFunctionType(raw_ostream &Out, const AttrListPtr &PAL, const PointerType *Ty); @@ -385,8 +374,8 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) { // If this isn't a struct or array type, remove it from our set of types // to name. This simplifies emission later. - if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second) && - !isa<ArrayType>(I->second)) { + if (!I->second->isStructTy() && !I->second->isOpaqueTy() && + !I->second->isArrayTy()) { TST.remove(I); } else { // If this is not used, remove it from the symbol table. @@ -405,7 +394,7 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) { unsigned RenameCounter = 0; for (std::set<const Type *>::const_iterator I = UT.begin(), E = UT.end(); I != E; ++I) - if (isa<StructType>(*I) || isa<ArrayType>(*I)) { + if ((*I)->isStructTy() || (*I)->isArrayTy()) { while (M.addTypeName("unnamed"+utostr(RenameCounter), *I)) ++RenameCounter; Changed = true; @@ -454,11 +443,12 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) { /// printStructReturnPointerFunctionType - This is like printType for a struct /// return type, except, instead of printing the type as void (*)(Struct*, ...) /// print it as "Struct (*)(...)", for struct return functions. -void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out, +void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out, const AttrListPtr &PAL, const PointerType *TheTy) { const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType()); - std::stringstream FunctionInnards; + std::string tstr; + raw_string_ostream FunctionInnards(tstr); FunctionInnards << " (*) ("; bool PrintedType = false; @@ -470,7 +460,7 @@ void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out, FunctionInnards << ", "; const Type *ArgTy = *I; if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - assert(isa<PointerType>(ArgTy)); + assert(ArgTy->isPointerTy()); ArgTy = cast<PointerType>(ArgTy)->getElementType(); } printType(FunctionInnards, ArgTy, @@ -484,63 +474,14 @@ void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out, FunctionInnards << "void"; } FunctionInnards << ')'; - std::string tstr = FunctionInnards.str(); printType(Out, RetTy, - /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr); + /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str()); } raw_ostream & -CWriter::printSimpleType(formatted_raw_ostream &Out, const Type *Ty, - bool isSigned, +CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned, const std::string &NameSoFar) { - assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) && - "Invalid type for printSimpleType"); - switch (Ty->getTypeID()) { - case Type::VoidTyID: return Out << "void " << NameSoFar; - case Type::IntegerTyID: { - unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); - if (NumBits == 1) - return Out << "bool " << NameSoFar; - else if (NumBits <= 8) - return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar; - else if (NumBits <= 16) - return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar; - else if (NumBits <= 32) - return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar; - else if (NumBits <= 64) - return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar; - else { - assert(NumBits <= 128 && "Bit widths > 128 not implemented yet"); - return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar; - } - } - case Type::FloatTyID: return Out << "float " << NameSoFar; - case Type::DoubleTyID: return Out << "double " << NameSoFar; - // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is - // present matches host 'long double'. - case Type::X86_FP80TyID: - case Type::PPC_FP128TyID: - case Type::FP128TyID: return Out << "long double " << NameSoFar; - - case Type::VectorTyID: { - const VectorType *VTy = cast<VectorType>(Ty); - return printSimpleType(Out, VTy->getElementType(), isSigned, - " __attribute__((vector_size(" + - utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar); - } - - default: -#ifndef NDEBUG - errs() << "Unknown primitive type: " << *Ty << "\n"; -#endif - llvm_unreachable(0); - } -} - -std::ostream & -CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned, - const std::string &NameSoFar) { - assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) && + assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) && "Invalid type for printSimpleType"); switch (Ty->getTypeID()) { case Type::VoidTyID: return Out << "void " << NameSoFar; @@ -587,120 +528,16 @@ CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned, // Pass the Type* and the variable name and this prints out the variable // declaration. // -raw_ostream &CWriter::printType(formatted_raw_ostream &Out, - const Type *Ty, +raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, bool isSigned, const std::string &NameSoFar, bool IgnoreName, const AttrListPtr &PAL) { - if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) { - printSimpleType(Out, Ty, isSigned, NameSoFar); - return Out; - } - - // Check to see if the type is named. - if (!IgnoreName || isa<OpaqueType>(Ty)) { - std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty); - if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar; - } - - switch (Ty->getTypeID()) { - case Type::FunctionTyID: { - const FunctionType *FTy = cast<FunctionType>(Ty); - std::stringstream FunctionInnards; - FunctionInnards << " (" << NameSoFar << ") ("; - unsigned Idx = 1; - for (FunctionType::param_iterator I = FTy->param_begin(), - E = FTy->param_end(); I != E; ++I) { - const Type *ArgTy = *I; - if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - assert(isa<PointerType>(ArgTy)); - ArgTy = cast<PointerType>(ArgTy)->getElementType(); - } - if (I != FTy->param_begin()) - FunctionInnards << ", "; - printType(FunctionInnards, ArgTy, - /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), ""); - ++Idx; - } - if (FTy->isVarArg()) { - if (FTy->getNumParams()) - FunctionInnards << ", ..."; - } else if (!FTy->getNumParams()) { - FunctionInnards << "void"; - } - FunctionInnards << ')'; - std::string tstr = FunctionInnards.str(); - printType(Out, FTy->getReturnType(), - /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr); - return Out; - } - case Type::StructTyID: { - const StructType *STy = cast<StructType>(Ty); - Out << NameSoFar + " {\n"; - unsigned Idx = 0; - for (StructType::element_iterator I = STy->element_begin(), - E = STy->element_end(); I != E; ++I) { - Out << " "; - printType(Out, *I, false, "field" + utostr(Idx++)); - Out << ";\n"; - } - Out << '}'; - if (STy->isPacked()) - Out << " __attribute__ ((packed))"; - return Out; - } - - case Type::PointerTyID: { - const PointerType *PTy = cast<PointerType>(Ty); - std::string ptrName = "*" + NameSoFar; - - if (isa<ArrayType>(PTy->getElementType()) || - isa<VectorType>(PTy->getElementType())) - ptrName = "(" + ptrName + ")"; - - if (!PAL.isEmpty()) - // Must be a function ptr cast! - return printType(Out, PTy->getElementType(), false, ptrName, true, PAL); - return printType(Out, PTy->getElementType(), false, ptrName); - } - - case Type::ArrayTyID: { - const ArrayType *ATy = cast<ArrayType>(Ty); - unsigned NumElements = ATy->getNumElements(); - if (NumElements == 0) NumElements = 1; - // Arrays are wrapped in structs to allow them to have normal - // value semantics (avoiding the array "decay"). - Out << NameSoFar << " { "; - printType(Out, ATy->getElementType(), false, - "array[" + utostr(NumElements) + "]"); - return Out << "; }"; - } - - case Type::OpaqueTyID: { - std::string TyName = "struct opaque_" + itostr(OpaqueCounter++); - assert(TypeNames.find(Ty) == TypeNames.end()); - TypeNames[Ty] = TyName; - return Out << TyName << ' ' << NameSoFar; - } - default: - llvm_unreachable("Unhandled case in getTypeProps!"); - } - - return Out; -} - -// Pass the Type* and the variable name and this prints out the variable -// declaration. -// -std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty, - bool isSigned, const std::string &NameSoFar, - bool IgnoreName, const AttrListPtr &PAL) { - if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) { + if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) { printSimpleType(Out, Ty, isSigned, NameSoFar); return Out; } // Check to see if the type is named. - if (!IgnoreName || isa<OpaqueType>(Ty)) { + if (!IgnoreName || Ty->isOpaqueTy()) { std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty); if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar; } @@ -708,14 +545,15 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty, switch (Ty->getTypeID()) { case Type::FunctionTyID: { const FunctionType *FTy = cast<FunctionType>(Ty); - std::stringstream FunctionInnards; + std::string tstr; + raw_string_ostream FunctionInnards(tstr); FunctionInnards << " (" << NameSoFar << ") ("; unsigned Idx = 1; for (FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); I != E; ++I) { const Type *ArgTy = *I; if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - assert(isa<PointerType>(ArgTy)); + assert(ArgTy->isPointerTy()); ArgTy = cast<PointerType>(ArgTy)->getElementType(); } if (I != FTy->param_begin()) @@ -731,9 +569,8 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty, FunctionInnards << "void"; } FunctionInnards << ')'; - std::string tstr = FunctionInnards.str(); printType(Out, FTy->getReturnType(), - /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr); + /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str()); return Out; } case Type::StructTyID: { @@ -756,8 +593,8 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty, const PointerType *PTy = cast<PointerType>(Ty); std::string ptrName = "*" + NameSoFar; - if (isa<ArrayType>(PTy->getElementType()) || - isa<VectorType>(PTy->getElementType())) + if (PTy->getElementType()->isArrayTy() || + PTy->getElementType()->isVectorTy()) ptrName = "(" + ptrName + ")"; if (!PAL.isEmpty()) @@ -1144,7 +981,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { Out << "(("; printType(Out, CPV->getType()); // sign doesn't matter Out << ")/*UNDEF*/"; - if (!isa<VectorType>(CPV->getType())) { + if (!CPV->getType()->isVectorTy()) { Out << "0)"; } else { Out << "{})"; @@ -1396,7 +1233,7 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) { } if (NeedsExplicitCast) { Out << "(("; - if (Ty->isInteger() && Ty != Type::getInt1Ty(Ty->getContext())) + if (Ty->isIntegerTy() && Ty != Type::getInt1Ty(Ty->getContext())) printSimpleType(Out, Ty, TypeIsSigned); else printType(Out, Ty); // not integer, sign doesn't matter @@ -1497,7 +1334,7 @@ void CWriter::writeInstComputationInline(Instruction &I) { // We can't currently support integer types other than 1, 8, 16, 32, 64. // Validate this. const Type *Ty = I.getType(); - if (Ty->isInteger() && (Ty!=Type::getInt1Ty(I.getContext()) && + if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) && Ty!=Type::getInt8Ty(I.getContext()) && Ty!=Type::getInt16Ty(I.getContext()) && Ty!=Type::getInt32Ty(I.getContext()) && @@ -1660,7 +1497,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) { // If the operand was a pointer, convert to a large integer type. const Type* OpTy = Operand->getType(); - if (isa<PointerType>(OpTy)) + if (OpTy->isPointerTy()) OpTy = TD->getIntPtrType(Operand->getContext()); Out << "(("; @@ -2102,10 +1939,10 @@ bool CWriter::doInitialization(Module &M) { // complete. If the value is an aggregate, print out { 0 }, and let // the compiler figure out the rest of the zeros. Out << " = " ; - if (isa<StructType>(I->getInitializer()->getType()) || - isa<VectorType>(I->getInitializer()->getType())) { + if (I->getInitializer()->getType()->isStructTy() || + I->getInitializer()->getType()->isVectorTy()) { Out << "{ 0 }"; - } else if (isa<ArrayType>(I->getInitializer()->getType())) { + } else if (I->getInitializer()->getType()->isArrayTy()) { // As with structs and vectors, but with an extra set of braces // because arrays are wrapped in structs. Out << "{ { 0 } }"; @@ -2274,7 +2111,7 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) { // Out << "/* Structure contents */\n"; for (I = TST.begin(); I != End; ++I) - if (isa<StructType>(I->second) || isa<ArrayType>(I->second)) + if (I->second->isStructTy() || I->second->isArrayTy()) // Only print out used types! printContainedStructs(I->second, StructPrinted); } @@ -2287,14 +2124,15 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) { void CWriter::printContainedStructs(const Type *Ty, std::set<const Type*> &StructPrinted) { // Don't walk through pointers. - if (isa<PointerType>(Ty) || Ty->isPrimitiveType() || Ty->isInteger()) return; + if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy()) + return; // Print all contained types first. for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); I != E; ++I) printContainedStructs(*I, StructPrinted); - if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) { + if (Ty->isStructTy() || Ty->isArrayTy()) { // Check to see if we have already printed this struct. if (StructPrinted.insert(Ty).second) { // Print structure type out. @@ -2327,7 +2165,8 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { const FunctionType *FT = cast<FunctionType>(F->getFunctionType()); const AttrListPtr &PAL = F->getAttributes(); - std::stringstream FunctionInnards; + std::string tstr; + raw_string_ostream FunctionInnards(tstr); // Print out the name... FunctionInnards << GetValueName(F) << '('; @@ -2382,7 +2221,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { if (PrintedArg) FunctionInnards << ", "; const Type *ArgTy = *I; if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - assert(isa<PointerType>(ArgTy)); + assert(ArgTy->isPointerTy()); ArgTy = cast<PointerType>(ArgTy)->getElementType(); } printType(FunctionInnards, ArgTy, @@ -2423,8 +2262,8 @@ static inline bool isFPIntBitCast(const Instruction &I) { return false; const Type *SrcTy = I.getOperand(0)->getType(); const Type *DstTy = I.getType(); - return (SrcTy->isFloatingPoint() && DstTy->isInteger()) || - (DstTy->isFloatingPoint() && SrcTy->isInteger()); + return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) || + (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy()); } void CWriter::printFunction(Function &F) { @@ -2713,7 +2552,7 @@ void CWriter::visitPHINode(PHINode &I) { void CWriter::visitBinaryOperator(Instruction &I) { // binary instructions, shift instructions, setCond instructions. - assert(!isa<PointerType>(I.getType())); + assert(!I.getType()->isPointerTy()); // We must cast the results of binary operations which might be promoted. bool needsCast = false; @@ -3489,7 +3328,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead. if (isAddressExposed(Ptr)) { writeOperandInternal(Ptr, Static); - } else if (I != E && isa<StructType>(*I)) { + } else if (I != E && (*I)->isStructTy()) { // If we didn't already emit the first operand, see if we can print it as // P->f instead of "P[0].f" writeOperand(Ptr); @@ -3504,13 +3343,13 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, } for (; I != E; ++I) { - if (isa<StructType>(*I)) { + if ((*I)->isStructTy()) { Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue(); - } else if (isa<ArrayType>(*I)) { + } else if ((*I)->isArrayTy()) { Out << ".array["; writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); Out << ']'; - } else if (!isa<VectorType>(*I)) { + } else if (!(*I)->isVectorTy()) { Out << '['; writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); Out << ']'; @@ -3668,7 +3507,7 @@ void CWriter::visitInsertValueInst(InsertValueInst &IVI) { i != e; ++i) { const Type *IndexedTy = ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), b, i+1); - if (isa<ArrayType>(IndexedTy)) + if (IndexedTy->isArrayTy()) Out << ".array[" << *i << "]"; else Out << ".field" << *i; @@ -3689,7 +3528,7 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { i != e; ++i) { const Type *IndexedTy = ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), b, i+1); - if (isa<ArrayType>(IndexedTy)) + if (IndexedTy->isArrayTy()) Out << ".array[" << *i << "]"; else Out << ".field" << *i; @@ -3705,7 +3544,8 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM, formatted_raw_ostream &o, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel) { + CodeGenOpt::Level OptLevel, + bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(createGCLoweringPass()); diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h index 715bbda..d178e7f 100644 --- a/lib/Target/CBackend/CTargetMachine.h +++ b/lib/Target/CBackend/CTargetMachine.h @@ -27,7 +27,8 @@ struct CTargetMachine : public TargetMachine { virtual bool addPassesToEmitWholeFile(PassManager &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel); + CodeGenOpt::Level OptLevel, + bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } }; diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td index 06eb149..47cb579 100644 --- a/lib/Target/CellSPU/SPU64InstrInfo.td +++ b/lib/Target/CellSPU/SPU64InstrInfo.td @@ -123,8 +123,8 @@ multiclass CompareLogicalGreaterThan64 { defm I64LGT: CompareLogicalGreaterThan64; def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>; -def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), - I64LGTv2i64.Fragment>; +//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), +// I64LGTv2i64.Fragment>; // i64 setult: def : I64SETCCNegCond<setule, I64LGTr64>; @@ -201,8 +201,8 @@ multiclass CompareGreaterThan64 { defm I64GT: CompareLogicalGreaterThan64; def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>; -def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), - I64GTv2i64.Fragment>; +//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), +// I64GTv2i64.Fragment>; // i64 setult: def : I64SETCCNegCond<setle, I64GTr64>; diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 80693e1..396a921 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -294,15 +294,19 @@ namespace { ((vecVT == MVT::v2i64) && ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || - (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) - return Select(bvNode); + (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) { + HandleSDNode Dummy(SDValue(bvNode, 0)); + if (SDNode *N = Select(bvNode)) + return N; + return Dummy.getValue().getNode(); + } // No, need to emit a constant pool spill: std::vector<Constant*> CV; for (size_t i = 0; i < bvNode->getNumOperands(); ++i) { ConstantSDNode *V = dyn_cast<ConstantSDNode > (bvNode->getOperand(i)); - CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue())); + CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); } Constant *CP = ConstantVector::get(CV); @@ -311,10 +315,15 @@ namespace { SDValue CGPoolOffset = SPU::LowerConstantPool(CPIdx, *CurDAG, SPUtli.getSPUTargetMachine()); - return SelectCode(CurDAG->getLoad(vecVT, dl, - CurDAG->getEntryNode(), CGPoolOffset, - PseudoSourceValue::getConstantPool(), 0, - false, Alignment).getNode()); + + HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl, + CurDAG->getEntryNode(), CGPoolOffset, + PseudoSourceValue::getConstantPool(),0, + false, false, Alignment)); + CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue()); + if (SDNode *N = SelectCode(Dummy.getValue().getNode())) + return N; + return Dummy.getValue().getNode(); } /// Select - Convert the specified operand from a target-independent to a @@ -390,10 +399,6 @@ namespace { return false; } - /// InstructionSelect - This callback is invoked by - /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelect(); - virtual const char *getPassName() const { return "Cell SPU DAG->DAG Pattern Instruction Selection"; } @@ -411,16 +416,6 @@ namespace { }; } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void -SPUDAGToDAGISel::InstructionSelect() -{ - // Select target instructions for the DAG. - SelectRoot(*CurDAG); - CurDAG->RemoveDeadNodes(); -} - /*! \arg Op The ISD instruction operand \arg N The address to be tested @@ -692,9 +687,8 @@ SPUDAGToDAGISel::Select(SDNode *N) { SDValue Ops[8]; DebugLoc dl = N->getDebugLoc(); - if (N->isMachineOpcode()) { + if (N->isMachineOpcode()) return NULL; // Already selected. - } if (Opc == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); @@ -759,43 +753,67 @@ SPUDAGToDAGISel::Select(SDNode *N) { } SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode()); - SDNode *PromoteScalar = - SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, - Op0VecVT, Op0).getNode()); - + + HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, + Op0VecVT, Op0)); + + SDValue PromScalar; + if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode())) + PromScalar = SDValue(N, 0); + else + PromScalar = PromoteScalar.getValue(); + SDValue zextShuffle = CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, - SDValue(PromoteScalar, 0), - SDValue(PromoteScalar, 0), + PromScalar, PromScalar, SDValue(shufMaskLoad, 0)); - // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we - // re-use it in the VEC2PREFSLOT selection without needing to explicitly - // call SelectCode (it's already done for us.) - SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, OpVecVT, zextShuffle).getNode()); - return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT, - zextShuffle).getNode()); + HandleSDNode Dummy2(zextShuffle); + if (SDNode *N = SelectCode(Dummy2.getValue().getNode())) + zextShuffle = SDValue(N, 0); + else + zextShuffle = Dummy2.getValue(); + HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT, + zextShuffle)); + + CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); + SelectCode(Dummy.getValue().getNode()); + return Dummy.getValue().getNode(); } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); - return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT, - N->getOperand(0), N->getOperand(1), - SDValue(CGLoad, 0)).getNode()); + HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT, + N->getOperand(0), N->getOperand(1), + SDValue(CGLoad, 0))); + + CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); + if (SDNode *N = SelectCode(Dummy.getValue().getNode())) + return N; + return Dummy.getValue().getNode(); } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode()); - return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT, - N->getOperand(0), N->getOperand(1), - SDValue(CGLoad, 0)).getNode()); + HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT, + N->getOperand(0), N->getOperand(1), + SDValue(CGLoad, 0))); + + CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); + if (SDNode *N = SelectCode(Dummy.getValue().getNode())) + return N; + return Dummy.getValue().getNode(); } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); - return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT, - N->getOperand(0), N->getOperand(1), - SDValue(CGLoad, 0)).getNode()); + HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT, + N->getOperand(0), N->getOperand(1), + SDValue(CGLoad, 0))); + CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); + if (SDNode *N = SelectCode(Dummy.getValue().getNode())) + return N; + return Dummy.getValue().getNode(); } else if (Opc == ISD::TRUNCATE) { SDValue Op0 = N->getOperand(0); if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL) @@ -832,17 +850,14 @@ SPUDAGToDAGISel::Select(SDNode *N) { } } } else if (Opc == ISD::SHL) { - if (OpVT == MVT::i64) { + if (OpVT == MVT::i64) return SelectSHLi64(N, OpVT); - } } else if (Opc == ISD::SRL) { - if (OpVT == MVT::i64) { + if (OpVT == MVT::i64) return SelectSRLi64(N, OpVT); - } } else if (Opc == ISD::SRA) { - if (OpVT == MVT::i64) { + if (OpVT == MVT::i64) return SelectSRAi64(N, OpVT); - } } else if (Opc == ISD::FNEG && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) { DebugLoc dl = N->getDebugLoc(); @@ -1224,13 +1239,15 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, ? shufmask.getNode() : emitBuildVector(shufmask.getNode())); - SDNode *shufNode = - Select(CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, + SDValue shufNode = + CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, SDValue(lhsNode, 0), SDValue(rhsNode, 0), - SDValue(shufMaskNode, 0)).getNode()); - - return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, - SDValue(shufNode, 0)); + SDValue(shufMaskNode, 0)); + HandleSDNode Dummy(shufNode); + SDNode *SN = SelectCode(Dummy.getValue().getNode()); + if (SN == 0) SN = Dummy.getValue().getNode(); + + return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(SN, 0)); } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) { return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(emitBuildVector(i64vec.getNode()), 0)); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index fe0f019..e863ee3 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -25,7 +25,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/Support/Debug.h" @@ -118,8 +118,7 @@ namespace { TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, Op.getDebugLoc(), - DAG.GetOrdering(InChain.getNode())); + Callee, Args, DAG, Op.getDebugLoc()); return CallInfo.first; } @@ -669,7 +668,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // Re-emit as a v16i8 vector load result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), 16); + LN->isVolatile(), LN->isNonTemporal(), 16); // Update the chain the_chain = result.getValue(1); @@ -820,7 +819,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // Re-emit as a v16i8 vector load alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, SN->getSrcValue(), SN->getSrcValueOffset(), - SN->isVolatile(), 16); + SN->isVolatile(), SN->isNonTemporal(), 16); // Update the chain the_chain = alignLoadVec.getValue(1); @@ -861,7 +860,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { result = DAG.getStore(the_chain, dl, result, basePtr, LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), LN->getAlignment()); + LN->isVolatile(), LN->isNonTemporal(), + LN->getAlignment()); #if 0 && !defined(NDEBUG) if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { @@ -1086,7 +1086,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, // or we're forced to do vararg int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0); ArgOffset += StackSlotSize; } @@ -1108,7 +1108,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, true, false); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8); - SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0, + false, false, 0); Chain = Store.getOperand(0); MemOps.push_back(Store); @@ -1190,7 +1191,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); ArgOffset += StackSlotSize; } break; @@ -1199,7 +1201,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); ArgOffset += StackSlotSize; } break; @@ -1212,7 +1215,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); ArgOffset += StackSlotSize; } break; diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp index 5ef3c6b..3e17a51 100644 --- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp +++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp @@ -34,5 +34,8 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) { // Exception handling is not supported on CellSPU (think about it: you only // have 256K for code+data. Would you support exception handling?) ExceptionsType = ExceptionHandling::None; + + // SPU assembly requires ".section" before ".bss" + UsesELFSectionDirectiveForBSS = true; } diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 3dd8ca7..9c5893c 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -221,7 +221,7 @@ namespace { APFloat APF = APFloat(CFP->getValueAPF()); // copy if (CFP->getType() == Type::getFloatTy(CFP->getContext())) APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); - Out << "ConstantFP::get(getGlobalContext(), "; + Out << "ConstantFP::get(mod->getContext(), "; Out << "APFloat("; #if HAVE_PRINTF_A char Buffer[100]; @@ -344,23 +344,23 @@ namespace { std::string CppWriter::getCppName(const Type* Ty) { // First, handle the primitive types .. easy - if (Ty->isPrimitiveType() || Ty->isInteger()) { + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { switch (Ty->getTypeID()) { - case Type::VoidTyID: return "Type::getVoidTy(getGlobalContext())"; + case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())"; case Type::IntegerTyID: { unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth(); - return "IntegerType::get(getGlobalContext(), " + utostr(BitWidth) + ")"; + return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")"; } - case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(getGlobalContext())"; - case Type::FloatTyID: return "Type::getFloatTy(getGlobalContext())"; - case Type::DoubleTyID: return "Type::getDoubleTy(getGlobalContext())"; - case Type::LabelTyID: return "Type::getLabelTy(getGlobalContext())"; + case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())"; + case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())"; + case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())"; + case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())"; default: error("Invalid primitive type"); break; } // shouldn't be returned, but make it sensible - return "Type::getVoidTy(getGlobalContext())"; + return "Type::getVoidTy(mod->getContext())"; } // Now, see if we've seen the type before and return that @@ -493,7 +493,7 @@ namespace { bool CppWriter::printTypeInternal(const Type* Ty) { // We don't print definitions for primitive types - if (Ty->isPrimitiveType() || Ty->isInteger()) + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) return false; // If we already defined this type, we don't need to define it again. @@ -514,7 +514,7 @@ namespace { TypeMap::const_iterator I = UnresolvedTypes.find(Ty); if (I == UnresolvedTypes.end()) { Out << "PATypeHolder " << typeName; - Out << "_fwd = OpaqueType::get(getGlobalContext());"; + Out << "_fwd = OpaqueType::get(mod->getContext());"; nl(Out); UnresolvedTypes[Ty] = typeName; } @@ -615,7 +615,7 @@ namespace { } case Type::OpaqueTyID: { Out << "OpaqueType* " << typeName; - Out << " = OpaqueType::get(getGlobalContext());"; + Out << " = OpaqueType::get(mod->getContext());"; nl(Out); break; } @@ -686,7 +686,7 @@ namespace { // For primitive types and types already defined, just add a name TypeMap::const_iterator TNI = TypeNames.find(TI->second); - if (TI->second->isInteger() || TI->second->isPrimitiveType() || + if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() || TNI != TypeNames.end()) { Out << "mod->addTypeName(\""; printEscapedString(TI->first); @@ -751,7 +751,7 @@ namespace { if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { std::string constValue = CI->getValue().toString(10, true); Out << "ConstantInt* " << constName - << " = ConstantInt::get(getGlobalContext(), APInt(" + << " = ConstantInt::get(mod->getContext(), APInt(" << cast<IntegerType>(CI->getType())->getBitWidth() << ", StringRef(\"" << constValue << "\"), 10));"; } else if (isa<ConstantAggregateZero>(CV)) { @@ -769,7 +769,7 @@ namespace { CA->getType()->getElementType() == Type::getInt8Ty(CA->getContext())) { Out << "Constant* " << constName << - " = ConstantArray::get(getGlobalContext(), \""; + " = ConstantArray::get(mod->getContext(), \""; std::string tmp = CA->getAsString(); bool nullTerminate = false; if (tmp[tmp.length()-1] == 0) { @@ -995,7 +995,7 @@ namespace { void CppWriter::printVariableHead(const GlobalVariable *GV) { nl(Out) << "GlobalVariable* " << getCppName(GV); if (is_inline) { - Out << " = mod->getGlobalVariable(getGlobalContext(), "; + Out << " = mod->getGlobalVariable(mod->getContext(), "; printEscapedString(GV->getName()); Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)"; nl(Out) << "if (!" << getCppName(GV) << ") {"; @@ -1094,7 +1094,7 @@ namespace { case Instruction::Ret: { const ReturnInst* ret = cast<ReturnInst>(I); - Out << "ReturnInst::Create(getGlobalContext(), " + Out << "ReturnInst::Create(mod->getContext(), " << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");"; break; } @@ -1171,7 +1171,7 @@ namespace { } case Instruction::Unreachable: { Out << "new UnreachableInst(" - << "getGlobalContext(), " + << "mod->getContext(), " << bbname << ");"; break; } @@ -1673,7 +1673,7 @@ namespace { BI != BE; ++BI) { std::string bbname(getCppName(BI)); Out << "BasicBlock* " << bbname << - " = BasicBlock::Create(getGlobalContext(), \""; + " = BasicBlock::Create(mod->getContext(), \""; if (BI->hasName()) printEscapedString(BI->getName()); Out << "\"," << getCppName(BI->getParent()) << ",0);"; @@ -2009,7 +2009,8 @@ char CppWriter::ID = 0; bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM, formatted_raw_ostream &o, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel) { + CodeGenOpt::Level OptLevel, + bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(new CppWriter(o)); return false; diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index 1f74f76..b7aae91 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -30,7 +30,8 @@ struct CPPTargetMachine : public TargetMachine { virtual bool addPassesToEmitWholeFile(PassManager &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel); + CodeGenOpt::Level OptLevel, + bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } }; diff --git a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt new file mode 100644 index 0000000..cfb2fc8 --- /dev/null +++ b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt @@ -0,0 +1,9 @@ +include_directories(
+ ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/..
+ )
+
+add_llvm_library(LLVMMBlazeAsmPrinter
+ MBlazeAsmPrinter.cpp + )
+add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
\ No newline at end of file diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp new file mode 100644 index 0000000..6fe1026 --- /dev/null +++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp @@ -0,0 +1,302 @@ +//===-- MBlazeAsmPrinter.cpp - MBlaze LLVM assembly writer ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format MBlaze assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mblaze-asm-printer" + +#include "MBlaze.h" +#include "MBlazeSubtarget.h" +#include "MBlazeInstrInfo.h" +#include "MBlazeTargetMachine.h" +#include "MBlazeMachineFunction.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/MathExtras.h" +#include <cctype> + +using namespace llvm; + +namespace { + class MBlazeAsmPrinter : public AsmPrinter { + const MBlazeSubtarget *Subtarget; + public: + explicit MBlazeAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T ) + : AsmPrinter(O, TM, Ctx, Streamer, T) { + Subtarget = &TM.getSubtarget<MBlazeSubtarget>(); + } + + virtual const char *getPassName() const { + return "MBlaze Assembly Printer"; + } + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + void printOperand(const MachineInstr *MI, int opNum); + void printUnsignedImm(const MachineInstr *MI, int opNum); + void printFSLImm(const MachineInstr *MI, int opNum); + void printMemOperand(const MachineInstr *MI, int opNum, + const char *Modifier = 0); + void printFCCOperand(const MachineInstr *MI, int opNum, + const char *Modifier = 0); + void printSavedRegsBitmask(); + void printHex32(unsigned int Value); + + const char *emitCurrentABIString(); + void emitFrameDirective(); + + void printInstruction(const MachineInstr *MI); // autogenerated. + void EmitInstruction(const MachineInstr *MI) { + printInstruction(MI); + O << '\n'; + } + virtual void EmitFunctionBodyStart(); + virtual void EmitFunctionBodyEnd(); + static const char *getRegisterName(unsigned RegNo); + + virtual void EmitFunctionEntryLabel(); + void EmitStartOfAsmFile(Module &M); + }; +} // end of anonymous namespace + +#include "MBlazeGenAsmWriter.inc" + +//===----------------------------------------------------------------------===// +// +// MBlaze Asm Directives +// +// -- Frame directive "frame Stackpointer, Stacksize, RARegister" +// Describe the stack frame. +// +// -- Mask directives "mask bitmask, offset" +// Tells the assembler which registers are saved and where. +// bitmask - contain a little endian bitset indicating which registers are +// saved on function prologue (e.g. with a 0x80000000 mask, the +// assembler knows the register 31 (RA) is saved at prologue. +// offset - the position before stack pointer subtraction indicating where +// the first saved register on prologue is located. (e.g. with a +// +// Consider the following function prologue: +// +// .frame R19,48,R15 +// .mask 0xc0000000,-8 +// addiu R1, R1, -48 +// sw R15, 40(R1) +// sw R19, 36(R1) +// +// With a 0xc0000000 mask, the assembler knows the register 15 (R15) and +// 19 (R19) are saved at prologue. As the save order on prologue is from +// left to right, R15 is saved first. A -8 offset means that after the +// stack pointer subtration, the first register in the mask (R15) will be +// saved at address 48-8=40. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Mask directives +//===----------------------------------------------------------------------===// + +// Create a bitmask with all callee saved registers for CPU or Floating Point +// registers. For CPU registers consider RA, GP and FP for saving if necessary. +void MBlazeAsmPrinter::printSavedRegsBitmask() { + const TargetRegisterInfo &RI = *TM.getRegisterInfo(); + const MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>(); + + // CPU Saved Registers Bitmasks + unsigned int CPUBitmask = 0; + + // Set the CPU Bitmasks + const MachineFrameInfo *MFI = MF->getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(CSI[i].getReg()); + if (CSI[i].getRegClass() == MBlaze::CPURegsRegisterClass) + CPUBitmask |= (1 << RegNum); + } + + // Return Address and Frame registers must also be set in CPUBitmask. + if (RI.hasFP(*MF)) + CPUBitmask |= (1 << MBlazeRegisterInfo:: + getRegisterNumbering(RI.getFrameRegister(*MF))); + + if (MFI->hasCalls()) + CPUBitmask |= (1 << MBlazeRegisterInfo:: + getRegisterNumbering(RI.getRARegister())); + + // Print CPUBitmask + O << "\t.mask \t"; printHex32(CPUBitmask); O << ',' + << MBlazeFI->getCPUTopSavedRegOff() << '\n'; +} + +// Print a 32 bit hex number with all numbers. +void MBlazeAsmPrinter::printHex32(unsigned int Value) { + O << "0x"; + for (int i = 7; i >= 0; i--) + O << utohexstr( (Value & (0xF << (i*4))) >> (i*4) ); +} + +//===----------------------------------------------------------------------===// +// Frame and Set directives +//===----------------------------------------------------------------------===// + +/// Frame Directive +void MBlazeAsmPrinter::emitFrameDirective() { + const TargetRegisterInfo &RI = *TM.getRegisterInfo(); + + unsigned stackReg = RI.getFrameRegister(*MF); + unsigned returnReg = RI.getRARegister(); + unsigned stackSize = MF->getFrameInfo()->getStackSize(); + + + O << "\t.frame\t" << getRegisterName(stackReg) + << ',' << stackSize << ',' + << getRegisterName(returnReg) + << '\n'; +} + +void MBlazeAsmPrinter::EmitFunctionEntryLabel() { + O << "\t.ent\t" << *CurrentFnSym << '\n'; + OutStreamer.EmitLabel(CurrentFnSym); +} + +/// EmitFunctionBodyStart - Targets can override this to emit stuff before +/// the first basic block in the function. +void MBlazeAsmPrinter::EmitFunctionBodyStart() { + emitFrameDirective(); + printSavedRegsBitmask(); +} + +/// EmitFunctionBodyEnd - Targets can override this to emit stuff after +/// the last basic block in the function. +void MBlazeAsmPrinter::EmitFunctionBodyEnd() { + O << "\t.end\t" << *CurrentFnSym << '\n'; +} + +// Print out an operand for an inline asm expression. +bool MBlazeAsmPrinter:: +PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant,const char *ExtraCode){ + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + + printOperand(MI, OpNo); + return false; +} + +void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { + const MachineOperand &MO = MI->getOperand(opNum); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << getRegisterName(MO.getReg()); + break; + + case MachineOperand::MO_Immediate: + O << (int)MO.getImm(); + break; + + case MachineOperand::MO_FPImmediate: { + const ConstantFP* fp = MO.getFPImm(); + printHex32(fp->getValueAPF().bitcastToAPInt().getZExtValue()); + O << ";\t# immediate = " << *fp; + break; + } + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(OutContext); + return; + + case MachineOperand::MO_GlobalAddress: + O << *GetGlobalValueSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_ExternalSymbol: + O << *GetExternalSymbolSymbol(MO.getSymbolName()); + break; + + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" + << getFunctionNumber() << "_" << MO.getIndex(); + if (MO.getOffset()) + O << "+" << MO.getOffset(); + break; + + default: + llvm_unreachable("<unknown operand type>"); + } +} + +void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.getType() == MachineOperand::MO_Immediate) + O << (unsigned int)MO.getImm(); + else + printOperand(MI, opNum); +} + +void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.getType() == MachineOperand::MO_Immediate) + O << "rfsl" << (unsigned int)MO.getImm(); + else + printOperand(MI, opNum); +} + +void MBlazeAsmPrinter:: +printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier) { + printOperand(MI, opNum+1); + O << ", "; + printOperand(MI, opNum); +} + +void MBlazeAsmPrinter:: +printFCCOperand(const MachineInstr *MI, int opNum, const char *Modifier) { + const MachineOperand& MO = MI->getOperand(opNum); + O << MBlaze::MBlazeFCCToString((MBlaze::CondCode)MO.getImm()); +} + +void MBlazeAsmPrinter::EmitStartOfAsmFile(Module &M) { +} + +// Force static initialization. +extern "C" void LLVMInitializeMBlazeAsmPrinter() { + RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget); +} diff --git a/lib/Target/MBlaze/AsmPrinter/Makefile b/lib/Target/MBlaze/AsmPrinter/Makefile new file mode 100644 index 0000000..c8e4d8f --- /dev/null +++ b/lib/Target/MBlaze/AsmPrinter/Makefile @@ -0,0 +1,17 @@ +##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMMBlazeAsmPrinter + +# Hack: we need to include 'main' MBlaze target directory to grab +# private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt new file mode 100644 index 0000000..c93e3df --- /dev/null +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -0,0 +1,27 @@ +set(LLVM_TARGET_DEFINITIONS MBlaze.td) + +tablegen(MBlazeGenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(MBlazeGenRegisterNames.inc -gen-register-enums) +tablegen(MBlazeGenRegisterInfo.inc -gen-register-desc) +tablegen(MBlazeGenInstrNames.inc -gen-instr-enums) +tablegen(MBlazeGenInstrInfo.inc -gen-instr-desc) +tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer) +tablegen(MBlazeGenDAGISel.inc -gen-dag-isel) +tablegen(MBlazeGenCallingConv.inc -gen-callingconv) +tablegen(MBlazeGenSubtarget.inc -gen-subtarget) +tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic) + +add_llvm_target(MBlazeCodeGen + MBlazeDelaySlotFiller.cpp + MBlazeInstrInfo.cpp + MBlazeISelDAGToDAG.cpp + MBlazeISelLowering.cpp + MBlazeMCAsmInfo.cpp + MBlazeRegisterInfo.cpp + MBlazeSubtarget.cpp + MBlazeTargetMachine.cpp + MBlazeTargetObjectFile.cpp + MBlazeIntrinsicInfo.cpp + ) + +target_link_libraries (LLVMMBlazeCodeGen LLVMSelectionDAG) diff --git a/lib/Target/MBlaze/MBlaze.h b/lib/Target/MBlaze/MBlaze.h new file mode 100644 index 0000000..f9d828b --- /dev/null +++ b/lib/Target/MBlaze/MBlaze.h @@ -0,0 +1,39 @@ +//===-- MBlaze.h - Top-level interface for MBlaze ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in +// the LLVM MBlaze back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_MBLAZE_H +#define TARGET_MBLAZE_H + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class MBlazeTargetMachine; + class FunctionPass; + class MachineCodeEmitter; + class formatted_raw_ostream; + + FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM); + FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM); + + extern Target TheMBlazeTarget; +} // end namespace llvm; + +// Defines symbolic names for MBlaze registers. This defines a mapping from +// register name to register number. +#include "MBlazeGenRegisterNames.inc" + +// Defines symbolic names for the MBlaze instructions. +#include "MBlazeGenInstrNames.inc" + +#endif diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td new file mode 100644 index 0000000..1679752 --- /dev/null +++ b/lib/Target/MBlaze/MBlaze.td @@ -0,0 +1,85 @@ +//===- MBlaze.td - Describe the MBlaze Target Machine -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is the top level entry point for the MBlaze target. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Register File, Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "MBlazeRegisterInfo.td" +include "MBlazeSchedule.td" +include "MBlazeIntrinsics.td" +include "MBlazeInstrInfo.td" +include "MBlazeCallingConv.td" + +def MBlazeInstrInfo : InstrInfo { + let TSFlagsFields = []; + let TSFlagsShifts = []; +} + + +//===----------------------------------------------------------------------===// +// Microblaze Subtarget features // +//===----------------------------------------------------------------------===// + +def FeaturePipe3 : SubtargetFeature<"pipe3", "HasPipe3", "true", + "Implements 3-stage pipeline.">; +def FeatureBarrel : SubtargetFeature<"barrel", "HasBarrel", "true", + "Implements barrel shifter.">; +def FeatureDiv : SubtargetFeature<"div", "HasDiv", "true", + "Implements hardware divider.">; +def FeatureMul : SubtargetFeature<"mul", "HasMul", "true", + "Implements hardware multiplier.">; +def FeatureFSL : SubtargetFeature<"fsl", "HasFSL", "true", + "Implements FSL instructions.">; +def FeatureEFSL : SubtargetFeature<"efsl", "HasEFSL", "true", + "Implements extended FSL instructions.">; +def FeatureMSRSet : SubtargetFeature<"msrset", "HasMSRSet", "true", + "Implements MSR register set and clear.">; +def FeatureException : SubtargetFeature<"exception", "HasException", "true", + "Implements hardware exception support.">; +def FeaturePatCmp : SubtargetFeature<"patcmp", "HasPatCmp", "true", + "Implements pattern compare instruction.">; +def FeatureFPU : SubtargetFeature<"fpu", "HasFPU", "true", + "Implements floating point unit.">; +def FeatureESR : SubtargetFeature<"esr", "HasESR", "true", + "Implements ESR and EAR registers">; +def FeaturePVR : SubtargetFeature<"pvr", "HasPVR", "true", + "Implements processor version register.">; +def FeatureMul64 : SubtargetFeature<"mul64", "HasMul64", "true", + "Implements multiplier with 64-bit result">; +def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true", + "Implements sqrt and floating point convert.">; +def FeatureMMU : SubtargetFeature<"mmu", "HasMMU", "true", + "Implements memory management unit.">; + +//===----------------------------------------------------------------------===// +// MBlaze processors supported. +//===----------------------------------------------------------------------===// + +class Proc<string Name, list<SubtargetFeature> Features> + : Processor<Name, MBlazeGenericItineraries, Features>; + + +def : Proc<"v400", []>; +def : Proc<"v500", []>; +def : Proc<"v600", []>; +def : Proc<"v700", []>; +def : Proc<"v710", []>; + +def MBlaze : Target { + let InstructionSet = MBlazeInstrInfo; +} diff --git a/lib/Target/MBlaze/MBlazeCallingConv.td b/lib/Target/MBlaze/MBlazeCallingConv.td new file mode 100644 index 0000000..ddd4998 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeCallingConv.td @@ -0,0 +1,26 @@ +//===- MBlazeCallingConv.td - Calling Conventions for MBlaze ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for MBlaze architecture. +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget<string F, CCAction A>: + CCIf<!strconcat("State.getTarget().getSubtarget<MBlazeSubtarget>().", F), A>; + +//===----------------------------------------------------------------------===// +// MBlaze ABI Calling Convention +//===----------------------------------------------------------------------===// + +def RetCC_MBlaze : CallingConv<[ + // i32 are returned in registers R3, R4 + CCIfType<[i32], CCAssignToReg<[R3, R4]>>, + + // f32 are returned in registers F3, F4 + CCIfType<[f32], CCAssignToReg<[F3, F4]>> +]>; diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp new file mode 100644 index 0000000..42fea25 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp @@ -0,0 +1,75 @@ +//===-- DelaySlotFiller.cpp - MBlaze delay slot filler --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Simple pass to fills delay slots with NOPs. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "delay-slot-filler" + +#include "MBlaze.h" +#include "MBlazeTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/Statistic.h" + +using namespace llvm; + +STATISTIC(FilledSlots, "Number of delay slots filled"); + +namespace { + struct Filler : public MachineFunctionPass { + + TargetMachine &TM; + const TargetInstrInfo *TII; + + static char ID; + Filler(TargetMachine &tm) + : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { } + + virtual const char *getPassName() const { + return "MBlaze Delay Slot Filler"; + } + + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &F) { + bool Changed = false; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) + Changed |= runOnMachineBasicBlock(*FI); + return Changed; + } + + }; + char Filler::ID = 0; +} // end of anonymous namespace + +/// runOnMachineBasicBlock - Fill in delay slots for the given basic block. +/// Currently, we fill delay slots with NOPs. We assume there is only one +/// delay slot per delayed instruction. +bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) + if (I->getDesc().hasDelaySlot()) { + MachineBasicBlock::iterator J = I; + ++J; + BuildMI(MBB, J, I->getDebugLoc(), TII->get(MBlaze::NOP)); + ++FilledSlots; + Changed = true; + } + return Changed; +} + +/// createMBlazeDelaySlotFillerPass - Returns a pass that fills in delay +/// slots in MBlaze MachineFunctions +FunctionPass *llvm::createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &tm) { + return new Filler(tm); +} + diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp new file mode 100644 index 0000000..7e59c4a --- /dev/null +++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp @@ -0,0 +1,339 @@ +//===-- MBlazeISelDAGToDAG.cpp - A dag to dag inst selector for MBlaze ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the MBlaze target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mblaze-isel" +#include "MBlaze.h" +#include "MBlazeISelLowering.h" +#include "MBlazeMachineFunction.h" +#include "MBlazeRegisterInfo.h" +#include "MBlazeSubtarget.h" +#include "MBlazeTargetMachine.h" +#include "llvm/GlobalValue.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/CFG.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MBlazeDAGToDAGISel - MBlaze specific code to select MBlaze machine +// instructions for SelectionDAG operations. +//===----------------------------------------------------------------------===// +namespace { + +class MBlazeDAGToDAGISel : public SelectionDAGISel { + + /// TM - Keep a reference to MBlazeTargetMachine. + MBlazeTargetMachine &TM; + + /// Subtarget - Keep a pointer to the MBlazeSubtarget around so that we can + /// make the right decision when generating code for different targets. + const MBlazeSubtarget &Subtarget; + +public: + explicit MBlazeDAGToDAGISel(MBlazeTargetMachine &tm) : + SelectionDAGISel(tm), + TM(tm), Subtarget(tm.getSubtarget<MBlazeSubtarget>()) {} + + // Pass Name + virtual const char *getPassName() const { + return "MBlaze DAG->DAG Pattern Instruction Selection"; + } +private: + // Include the pieces autogenerated from the target description. + #include "MBlazeGenDAGISel.inc" + + /// getTargetMachine - Return a reference to the TargetMachine, casted + /// to the target-specific type. + const MBlazeTargetMachine &getTargetMachine() { + return static_cast<const MBlazeTargetMachine &>(TM); + } + + /// getInstrInfo - Return a reference to the TargetInstrInfo, casted + /// to the target-specific type. + const MBlazeInstrInfo *getInstrInfo() { + return getTargetMachine().getInstrInfo(); + } + + SDNode *getGlobalBaseReg(); + SDNode *Select(SDNode *N); + + // Complex Pattern. + bool SelectAddr(SDNode *Op, SDValue N, + SDValue &Base, SDValue &Offset); + + // Address Selection + bool SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index); + bool SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base); + + // getI32Imm - Return a target constant with the specified value, of type i32. + inline SDValue getI32Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); + } +}; + +} + +/// isIntS32Immediate - This method tests to see if the node is either a 32-bit +/// or 64-bit immediate, and if the value can be accurately represented as a +/// sign extension from a 32-bit value. If so, this returns true and the +/// immediate. +static bool isIntS32Immediate(SDNode *N, int32_t &Imm) { + unsigned Opc = N->getOpcode(); + if (Opc != ISD::Constant) + return false; + + Imm = (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); + if (N->getValueType(0) == MVT::i32) + return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); + else + return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); +} + +static bool isIntS32Immediate(SDValue Op, int32_t &Imm) { + return isIntS32Immediate(Op.getNode(), Imm); +} + + +/// SelectAddressRegReg - Given the specified addressed, check to see if it +/// can be represented as an indexed [r+r] operation. Returns false if it +/// can be more efficiently represented with [r+imm]. +bool MBlazeDAGToDAGISel:: +SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { + if (N.getOpcode() == ISD::FrameIndex) return false; + if (N.getOpcode() == ISD::TargetExternalSymbol || + N.getOpcode() == ISD::TargetGlobalAddress) + return false; // direct calls. + + int32_t imm = 0; + if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { + if (isIntS32Immediate(N.getOperand(1), imm)) + return false; // r+i + + if (N.getOperand(0).getOpcode() == ISD::TargetJumpTable || + N.getOperand(1).getOpcode() == ISD::TargetJumpTable) + return false; // jump tables. + + Base = N.getOperand(1); + Index = N.getOperand(0); + return true; + } + + return false; +} + +/// Returns true if the address N can be represented by a base register plus +/// a signed 32-bit displacement [r+imm], and if it is not better +/// represented as reg+reg. +bool MBlazeDAGToDAGISel:: +SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) { + // If this can be more profitably realized as r+r, fail. + if (SelectAddrRegReg(Op, N, Disp, Base)) + return false; + + if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { + int32_t imm = 0; + if (isIntS32Immediate(N.getOperand(1), imm)) { + Disp = CurDAG->getTargetConstant(imm, MVT::i32); + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { + Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType()); + } else { + Base = N.getOperand(0); + } + DEBUG( errs() << "WESLEY: Using Operand Immediate\n" ); + return true; // [r+i] + } + } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { + // Loading from a constant address. + uint32_t Imm = CN->getZExtValue(); + Disp = CurDAG->getTargetConstant(Imm, CN->getValueType(0)); + Base = CurDAG->getRegister(MBlaze::R0, CN->getValueType(0)); + DEBUG( errs() << "WESLEY: Using Constant Node\n" ); + return true; + } + + Disp = CurDAG->getTargetConstant(0, TM.getTargetLowering()->getPointerTy()); + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) + Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType()); + else + Base = N; + return true; // [r+0] +} + +/// getGlobalBaseReg - Output the instructions required to put the +/// GOT address into a register. +SDNode *MBlazeDAGToDAGISel::getGlobalBaseReg() { + unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); + return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); +} + +/// ComplexPattern used on MBlazeInstrInfo +/// Used on MBlaze Load/Store instructions +bool MBlazeDAGToDAGISel:: +SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) { + // if Address is FI, get the TargetFrameIndex. + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + // on PIC code Load GA + if (TM.getRelocationModel() == Reloc::PIC_) { + if ((Addr.getOpcode() == ISD::TargetGlobalAddress) || + (Addr.getOpcode() == ISD::TargetConstantPool) || + (Addr.getOpcode() == ISD::TargetJumpTable)){ + Base = CurDAG->getRegister(MBlaze::R15, MVT::i32); + Offset = Addr; + return true; + } + } else { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { + if (Predicate_immSExt16(CN)) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> + (Addr.getOperand(0))) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + } else { + Base = Addr.getOperand(0); + } + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + return true; + } + } + } + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +/// Select instructions not customized! Used for +/// expanded, promoted and normal instructions +SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + DebugLoc dl = Node->getDebugLoc(); + + // Dump information about the Node being selected + DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); + + // If we have a custom node, we already have selected! + if (Node->isMachineOpcode()) { + DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + return NULL; + } + + /// + // Instruction Selection not handled by the auto-generated + // tablegen selection should be handled here. + /// + switch(Opcode) { + default: break; + + // Get target GOT address. + case ISD::GLOBAL_OFFSET_TABLE: + return getGlobalBaseReg(); + + case ISD::FrameIndex: { + SDValue imm = CurDAG->getTargetConstant(0, MVT::i32); + int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex(); + EVT VT = Node->getValueType(0); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); + unsigned Opc = MBlaze::ADDI; + if (Node->hasOneUse()) + return CurDAG->SelectNodeTo(Node, Opc, VT, TFI, imm); + return CurDAG->getMachineNode(Opc, dl, VT, TFI, imm); + } + + + /// Handle direct and indirect calls when using PIC. On PIC, when + /// GOT is smaller than about 64k (small code) the GA target is + /// loaded with only one instruction. Otherwise GA's target must + /// be loaded with 3 instructions. + case MBlazeISD::JmpLink: { + if (TM.getRelocationModel() == Reloc::PIC_) { + SDValue Chain = Node->getOperand(0); + SDValue Callee = Node->getOperand(1); + SDValue R20Reg = CurDAG->getRegister(MBlaze::R20, MVT::i32); + SDValue InFlag(0, 0); + + if ( (isa<GlobalAddressSDNode>(Callee)) || + (isa<ExternalSymbolSDNode>(Callee)) ) + { + /// Direct call for global addresses and external symbols + SDValue GPReg = CurDAG->getRegister(MBlaze::R15, MVT::i32); + + // Use load to get GOT target + SDValue Ops[] = { Callee, GPReg, Chain }; + SDValue Load = SDValue(CurDAG->getMachineNode(MBlaze::LW, dl, + MVT::i32, MVT::Other, Ops, 3), 0); + Chain = Load.getValue(1); + + // Call target must be on T9 + Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Load, InFlag); + } else + /// Indirect call + Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Callee, InFlag); + + // Emit Jump and Link Register + SDNode *ResNode = CurDAG->getMachineNode(MBlaze::BRLID, dl, MVT::Other, + MVT::Flag, R20Reg, Chain); + Chain = SDValue(ResNode, 0); + InFlag = SDValue(ResNode, 1); + ReplaceUses(SDValue(Node, 0), Chain); + ReplaceUses(SDValue(Node, 1), InFlag); + return ResNode; + } + } + } + + // Select the default instruction + SDNode *ResNode = SelectCode(Node); + + DEBUG(errs() << "=> "); + if (ResNode == NULL || ResNode == Node) + DEBUG(Node->dump(CurDAG)); + else + DEBUG(ResNode->dump(CurDAG)); + DEBUG(errs() << "\n"); + return ResNode; +} + +/// createMBlazeISelDag - This pass converts a legalized DAG into a +/// MBlaze-specific DAG, ready for instruction scheduling. +FunctionPass *llvm::createMBlazeISelDag(MBlazeTargetMachine &TM) { + return new MBlazeDAGToDAGISel(TM); +} diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp new file mode 100644 index 0000000..f0864d0 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -0,0 +1,975 @@ +//===-- MBlazeISelLowering.cpp - MBlaze DAG Lowering Implementation -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that MBlaze uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mblaze-lower" +#include "MBlazeISelLowering.h" +#include "MBlazeMachineFunction.h" +#include "MBlazeTargetMachine.h" +#include "MBlazeTargetObjectFile.h" +#include "MBlazeSubtarget.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Intrinsics.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +const char *MBlazeTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + case MBlazeISD::JmpLink : return "MBlazeISD::JmpLink"; + case MBlazeISD::GPRel : return "MBlazeISD::GPRel"; + case MBlazeISD::Wrap : return "MBlazeISD::Wrap"; + case MBlazeISD::ICmp : return "MBlazeISD::ICmp"; + case MBlazeISD::Ret : return "MBlazeISD::Ret"; + case MBlazeISD::Select_CC : return "MBlazeISD::Select_CC"; + default : return NULL; + } +} + +MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) + : TargetLowering(TM, new MBlazeTargetObjectFile()) { + Subtarget = &TM.getSubtarget<MBlazeSubtarget>(); + + // MBlaze does not have i1 type, so use i32 for + // setcc operations results (slt, sgt, ...). + setBooleanContents(ZeroOrOneBooleanContent); + + // Set up the register classes + addRegisterClass(MVT::i32, MBlaze::CPURegsRegisterClass); + if (Subtarget->hasFPU()) { + addRegisterClass(MVT::f32, MBlaze::FGR32RegisterClass); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + } + + // Floating point operations which are not supported + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f32, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FPOWI, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FLOG, MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FEXP, MVT::f32, Expand); + + // Load extented operations for i1 types must be promoted + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + + // MBlaze has no REM or DIVREM operations. + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + + // If the processor doesn't support multiply then expand it + if (!Subtarget->hasMul()) { + setOperationAction(ISD::MUL, MVT::i32, Expand); + } + + // If the processor doesn't support 64-bit multiply then expand + if (!Subtarget->hasMul() || !Subtarget->hasMul64()) { + setOperationAction(ISD::MULHS, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHU, MVT::i64, Expand); + } + + // If the processor doesn't support division then expand + if (!Subtarget->hasDiv()) { + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + } + + // Expand unsupported conversions + setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); + + // Expand SELECT_CC + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + // MBlaze doesn't have MUL_LOHI + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + + // Used by legalize types to correctly generate the setcc result. + // Without this, every float setcc comes with a AND/OR with the result, + // we don't want this, since the fpcmp result goes to a flag register, + // which is used implicitly by brcond and select operations. + AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32); + AddPromotedToType(ISD::SELECT, MVT::i1, MVT::i32); + AddPromotedToType(ISD::SELECT_CC, MVT::i1, MVT::i32); + + // MBlaze Custom Operations + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + + // Variable Argument support + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + + + // Operations not directly supported by MBlaze. + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTR, MVT::i32, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::CTLZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + + // We don't have line number support yet. + setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); + + // Use the default for now + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + + // MBlaze doesn't have extending float->double load/store + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + setStackPointerRegisterToSaveRestore(MBlaze::R1); + computeRegisterProperties(); +} + +MVT::SimpleValueType MBlazeTargetLowering::getSetCCResultType(EVT VT) const { + return MVT::i32; +} + +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned MBlazeTargetLowering::getFunctionAlignment(const Function *) const { + return 2; +} + +SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) + { + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + } + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Lower helper functions +//===----------------------------------------------------------------------===// +MachineBasicBlock* MBlazeTargetLowering:: +EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, + DenseMap<MachineBasicBlock*, + MachineBasicBlock*> *EM) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + + switch (MI->getOpcode()) { + default: assert(false && "Unexpected instr type to insert"); + case MBlaze::ShiftRL: + case MBlaze::ShiftRA: + case MBlaze::ShiftL: { + // To "insert" a shift left instruction, we actually have to insert a + // simple loop. The incoming instruction knows the destination vreg to + // set, the source vreg to operate over and the shift amount. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // start: + // andi samt, samt, 31 + // beqid samt, finish + // add dst, src, r0 + // loop: + // addik samt, samt, -1 + // sra dst, dst + // bneid samt, loop + // nop + // finish: + MachineFunction *F = BB->getParent(); + MachineRegisterInfo &R = F->getRegInfo(); + MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB); + + unsigned IAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); + BuildMI(BB, dl, TII->get(MBlaze::ANDI), IAMT) + .addReg(MI->getOperand(2).getReg()) + .addImm(31); + + unsigned IVAL = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); + BuildMI(BB, dl, TII->get(MBlaze::ADDI), IVAL) + .addReg(MI->getOperand(1).getReg()) + .addImm(0); + + BuildMI(BB, dl, TII->get(MBlaze::BEQID)) + .addReg(IAMT) + .addMBB(finish); + + F->insert(It, loop); + F->insert(It, finish); + + // Update machine-CFG edges by first adding all successors of the current + // block to the new block which will contain the Phi node for the select. + // Also inform sdisel of the edge changes. + for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), + e = BB->succ_end(); i != e; ++i) { + EM->insert(std::make_pair(*i, finish)); + finish->addSuccessor(*i); + } + + // Next, remove all successors of the current block, and add the true + // and fallthrough blocks as its successors. + while(!BB->succ_empty()) + BB->removeSuccessor(BB->succ_begin()); + BB->addSuccessor(loop); + BB->addSuccessor(finish); + + // Next, add the finish block as a successor of the loop block + loop->addSuccessor(finish); + loop->addSuccessor(loop); + + unsigned DST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); + unsigned NDST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); + BuildMI(loop, dl, TII->get(MBlaze::PHI), DST) + .addReg(IVAL).addMBB(BB) + .addReg(NDST).addMBB(loop); + + unsigned SAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); + unsigned NAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); + BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT) + .addReg(IAMT).addMBB(BB) + .addReg(NAMT).addMBB(loop); + + if (MI->getOpcode() == MBlaze::ShiftL) + BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST); + else if (MI->getOpcode() == MBlaze::ShiftRA) + BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST); + else if (MI->getOpcode() == MBlaze::ShiftRL) + BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST); + else + llvm_unreachable( "Cannot lower unknown shift instruction" ); + + BuildMI(loop, dl, TII->get(MBlaze::ADDI), NAMT) + .addReg(SAMT) + .addImm(-1); + + BuildMI(loop, dl, TII->get(MBlaze::BNEID)) + .addReg(NAMT) + .addMBB(loop); + + BuildMI(finish, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) + .addReg(IVAL).addMBB(BB) + .addReg(NDST).addMBB(loop); + + // The pseudo instruction is no longer needed so remove it + F->DeleteMachineInstr(MI); + return finish; + } + + case MBlaze::Select_FCC: + case MBlaze::Select_CC: { + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineFunction *F = BB->getParent(); + MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB); + + unsigned Opc; + switch (MI->getOperand(4).getImm()) { + default: llvm_unreachable( "Unknown branch condition" ); + case MBlazeCC::EQ: Opc = MBlaze::BNEID; break; + case MBlazeCC::NE: Opc = MBlaze::BEQID; break; + case MBlazeCC::GT: Opc = MBlaze::BLEID; break; + case MBlazeCC::LT: Opc = MBlaze::BGEID; break; + case MBlazeCC::GE: Opc = MBlaze::BLTID; break; + case MBlazeCC::LE: Opc = MBlaze::BGTID; break; + } + + BuildMI(BB, dl, TII->get(Opc)) + .addReg(MI->getOperand(3).getReg()) + .addMBB(dneBB); + + F->insert(It, flsBB); + F->insert(It, dneBB); + + // Update machine-CFG edges by first adding all successors of the current + // block to the new block which will contain the Phi node for the select. + // Also inform sdisel of the edge changes. + for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), + e = BB->succ_end(); i != e; ++i) { + EM->insert(std::make_pair(*i, dneBB)); + dneBB->addSuccessor(*i); + } + + // Next, remove all successors of the current block, and add the true + // and fallthrough blocks as its successors. + while(!BB->succ_empty()) + BB->removeSuccessor(BB->succ_begin()); + BB->addSuccessor(flsBB); + BB->addSuccessor(dneBB); + flsBB->addSuccessor(dneBB); + + // sinkMBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) + // .addReg(MI->getOperand(1).getReg()).addMBB(flsBB) + // .addReg(MI->getOperand(2).getReg()).addMBB(BB); + + BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(2).getReg()).addMBB(flsBB) + .addReg(MI->getOperand(1).getReg()).addMBB(BB); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return dneBB; + } + } +} + +//===----------------------------------------------------------------------===// +// Misc Lower Operation implementation +//===----------------------------------------------------------------------===// +// + +SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue TrueVal = Op.getOperand(2); + SDValue FalseVal = Op.getOperand(3); + DebugLoc dl = Op.getDebugLoc(); + unsigned Opc; + + SDValue CompareFlag; + if (LHS.getValueType() == MVT::i32) { + Opc = MBlazeISD::Select_CC; + CompareFlag = DAG.getNode(MBlazeISD::ICmp, dl, MVT::i32, LHS, RHS) + .getValue(1); + } else { + llvm_unreachable( "Cannot lower select_cc with unknown type" ); + } + + return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal, + CompareFlag); +} + +SDValue MBlazeTargetLowering:: +LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { + // FIXME there isn't actually debug info here + DebugLoc dl = Op.getDebugLoc(); + GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + + return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA); +} + +SDValue MBlazeTargetLowering:: +LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { + llvm_unreachable("TLS not implemented for MicroBlaze."); + return SDValue(); // Not reached +} + +SDValue MBlazeTargetLowering:: +LowerJumpTable(SDValue Op, SelectionDAG &DAG) { + SDValue ResNode; + SDValue HiPart; + // FIXME there isn't actually debug info here + DebugLoc dl = Op.getDebugLoc(); + bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; + unsigned char OpFlag = IsPIC ? MBlazeII::MO_GOT : MBlazeII::MO_ABS_HILO; + + EVT PtrVT = Op.getValueType(); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + + SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag); + return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, JTI); + //return JTI; +} + +SDValue MBlazeTargetLowering:: +LowerConstantPool(SDValue Op, SelectionDAG &DAG) { + SDValue ResNode; + EVT PtrVT = Op.getValueType(); + ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); + Constant *C = N->getConstVal(); + SDValue Zero = DAG.getConstant(0, PtrVT); + DebugLoc dl = Op.getDebugLoc(); + + SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + N->getOffset(), MBlazeII::MO_ABS_HILO); + return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP); +} + +SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + SDValue FI = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0, + false, false, 0); +} + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +#include "MBlazeGenCallingConv.inc" + +static bool CC_MBlaze2(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + static const unsigned RegsSize=6; + static const unsigned IntRegs[] = { + MBlaze::R5, MBlaze::R6, MBlaze::R7, + MBlaze::R8, MBlaze::R9, MBlaze::R10 + }; + + static const unsigned FltRegs[] = { + MBlaze::F5, MBlaze::F6, MBlaze::F7, + MBlaze::F8, MBlaze::F9, MBlaze::F10 + }; + + unsigned Reg=0; + + // Promote i8 and i16 + if (LocVT == MVT::i8 || LocVT == MVT::i16) { + LocVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } + + if (ValVT == MVT::i32) { + Reg = State.AllocateReg(IntRegs, RegsSize); + LocVT = MVT::i32; + } else if (ValVT == MVT::f32) { + Reg = State.AllocateReg(FltRegs, RegsSize); + LocVT = MVT::f32; + } + + if (!Reg) { + unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; + unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + } else { + unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; + State.AllocateStack(SizeInBytes, SizeInBytes); + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + } + + return false; // CC must always match +} + +//===----------------------------------------------------------------------===// +// Call Calling Convention Implementation +//===----------------------------------------------------------------------===// + +/// LowerCall - functions arguments are copied from virtual regs to +/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. +/// TODO: isVarArg, isTailCall. +SDValue MBlazeTargetLowering:: +LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) { + // MBlaze does not yet support tail call optimization + isTailCall = false; + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, + *DAG.getContext()); + CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze2); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + SmallVector<SDValue, 8> MemOpChains; + + // First/LastArgStackLoc contains the first/last + // "at stack" argument location. + int LastArgStackLoc = 0; + unsigned FirstStackArgLoc = 0; + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + EVT RegVT = VA.getLocVT(); + SDValue Arg = Outs[i].Val; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); + break; + } + + // Arguments that can be passed on register must be kept at + // RegsToPass vector + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + // Register can't get to this point... + assert(VA.isMemLoc()); + + // Create the frame index object for this incoming parameter + LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset()); + int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + LastArgStackLoc, true, false); + + SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); + + // emit ISD::STORE whichs stores the + // parameter value to a stack Location + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); + } + } + + // Transform all store nodes into one single node because all store + // nodes are independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emited instructions must be + // stuck together. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + unsigned char OpFlag = MBlazeII::MO_NO_FLAG; + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), + getPointerTy(), 0, OpFlag); + else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), + getPointerTy(), OpFlag); + + // MBlazeJmpLink = #chain, #target_address, #opt_in_flags... + // = Chain, Callee, Reg#1, Reg#2, ... + // + // Returns a chain & a flag for retval copy to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + } + + if (InFlag.getNode()) + Ops.push_back(InFlag); + + Chain = DAG.getNode(MBlazeISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + if (!Ins.empty()) + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + // return. + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, + Ins, dl, DAG, InVals); +} + +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers. +SDValue MBlazeTargetLowering:: +LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, + bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) { + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallResult(Ins, RetCC_MBlaze); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), + RVLocs[i].getValVT(), InFlag).getValue(1); + InFlag = Chain.getValue(2); + InVals.push_back(Chain.getValue(0)); + } + + return Chain; +} + +//===----------------------------------------------------------------------===// +// Formal Arguments Calling Convention Implementation +//===----------------------------------------------------------------------===// + +/// LowerFormalArguments - transform physical registers into +/// virtual registers and generate load operations for +/// arguments places on the stack. +SDValue MBlazeTargetLowering:: +LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + + unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF); + VarArgsFrameIndex = 0; + + // Used with vargs to acumulate store chains. + std::vector<SDValue> OutChains; + + // Keep track of the last register used for arguments + unsigned ArgRegEnd = 0; + + // Assign locations to all of the incoming arguments. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze2); + SDValue StackPtr; + + unsigned FirstStackArgLoc = 0; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + // Arguments stored on registers + if (VA.isRegLoc()) { + EVT RegVT = VA.getLocVT(); + ArgRegEnd = VA.getLocReg(); + TargetRegisterClass *RC = 0; + + if (RegVT == MVT::i32) + RC = MBlaze::CPURegsRegisterClass; + else if (RegVT == MVT::f32) + RC = MBlaze::FGR32RegisterClass; + else + llvm_unreachable("RegVT not supported by LowerFormalArguments"); + + // Transform the arguments stored on + // physical registers into virtual ones + unsigned Reg = MF.addLiveIn(ArgRegEnd, RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); + + // If this is an 8 or 16-bit value, it has been passed promoted + // to 32 bits. Insert an assert[sz]ext to capture this, then + // truncate to the right size. If if is a floating point value + // then convert to the correct type. + if (VA.getLocInfo() != CCValAssign::Full) { + unsigned Opcode = 0; + if (VA.getLocInfo() == CCValAssign::SExt) + Opcode = ISD::AssertSext; + else if (VA.getLocInfo() == CCValAssign::ZExt) + Opcode = ISD::AssertZext; + if (Opcode) + ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + } + + InVals.push_back(ArgValue); + + } else { // VA.isRegLoc() + + // sanity check + assert(VA.isMemLoc()); + + // The last argument is not a register + ArgRegEnd = 0; + + // The stack pointer offset is relative to the caller stack frame. + // Since the real stack size is unknown here, a negative SPOffset + // is used so there's a way to adjust these offsets when the stack + // size get known (on EliminateFrameIndex). A dummy SPOffset is + // used instead of a direct negative address (which is recorded to + // be used on emitPrologue) to avoid mis-calc of the first stack + // offset on PEI::calculateFrameObjectOffsets. + // Arguments are always 32-bit. + unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; + int FI = MFI->CreateFixedObject(ArgSize, 0, true, false); + MBlazeFI->recordLoadArgsFI(FI, -(ArgSize+ + (FirstStackArgLoc + VA.getLocMemOffset()))); + + // Create load nodes to retrieve arguments from the stack + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, + false, false, 0)); + } + } + + // To meet ABI, when VARARGS are passed on registers, the registers + // must have their values written to the caller stack frame. If the last + // argument was placed in the stack, there's no need to save any register. + if ((isVarArg) && ArgRegEnd) { + if (StackPtr.getNode() == 0) + StackPtr = DAG.getRegister(StackReg, getPointerTy()); + + // The last register argument that must be saved is MBlaze::R10 + TargetRegisterClass *RC = MBlaze::CPURegsRegisterClass; + + unsigned Begin = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R5); + unsigned Start = MBlazeRegisterInfo::getRegisterNumbering(ArgRegEnd+1); + unsigned End = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R10); + unsigned StackLoc = ArgLocs.size()-1 + (Start - Begin); + + for (; Start <= End; ++Start, ++StackLoc) { + unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start); + unsigned LiveReg = MF.addLiveIn(Reg, RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32); + + int FI = MFI->CreateFixedObject(4, 0, true, false); + MBlazeFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); + OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0, + false, false, 0)); + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + if (!VarArgsFrameIndex) + VarArgsFrameIndex = FI; + } + } + + // All stores are grouped in one node to allow the matching between + // the size of Ins and InVals. This only happens when on varg functions + if (!OutChains.empty()) { + OutChains.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); + } + + return Chain; +} + +//===----------------------------------------------------------------------===// +// Return Value Calling Convention Implementation +//===----------------------------------------------------------------------===// + +SDValue MBlazeTargetLowering:: +LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + DebugLoc dl, SelectionDAG &DAG) { + // CCValAssign - represent the assignment of + // the return value to a location + SmallVector<CCValAssign, 16> RVLocs; + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + + // Analize return values. + CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze); + + // If this is the first return lowered for this function, add + // the regs to the liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + SDValue Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Outs[i].Val, Flag); + + // guarantee that all emitted copies are + // stuck together, avoiding something bad + Flag = Chain.getValue(1); + } + + // Return on MBlaze is always a "rtsd R15, 8" + if (Flag.getNode()) + return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other, + Chain, DAG.getRegister(MBlaze::R15, MVT::i32), Flag); + else // Return Void + return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other, + Chain, DAG.getRegister(MBlaze::R15, MVT::i32)); +} + +//===----------------------------------------------------------------------===// +// MBlaze Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +MBlazeTargetLowering::ConstraintType MBlazeTargetLowering:: +getConstraintType(const std::string &Constraint) const +{ + // MBlaze specific constrainy + // + // 'd' : An address register. Equivalent to r. + // 'y' : Equivalent to r; retained for + // backwards compatibility. + // 'f' : Floating Point registers. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default : break; + case 'd': + case 'y': + case 'f': + return C_RegisterClass; + break; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"), +/// return a list of registers that can be used to satisfy the constraint. +/// This should only be used for C_RegisterClass constraints. +std::pair<unsigned, const TargetRegisterClass*> MBlazeTargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return std::make_pair(0U, MBlaze::CPURegsRegisterClass); + case 'f': + if (VT == MVT::f32) + return std::make_pair(0U, MBlaze::FGR32RegisterClass); + } + } + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +/// Given a register class constraint, like 'r', if this corresponds directly +/// to an LLVM register class, return a register of 0 and the register class +/// pointer. +std::vector<unsigned> MBlazeTargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { + if (Constraint.size() != 1) + return std::vector<unsigned>(); + + switch (Constraint[0]) { + default : break; + case 'r': + // GCC MBlaze Constraint Letters + case 'd': + case 'y': + return make_vector<unsigned>( + MBlaze::R3, MBlaze::R4, MBlaze::R5, MBlaze::R6, + MBlaze::R7, MBlaze::R9, MBlaze::R10, MBlaze::R11, + MBlaze::R12, MBlaze::R19, MBlaze::R20, MBlaze::R21, + MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25, + MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29, + MBlaze::R30, MBlaze::R31, 0); + + case 'f': + return make_vector<unsigned>( + MBlaze::F3, MBlaze::F4, MBlaze::F5, MBlaze::F6, + MBlaze::F7, MBlaze::F9, MBlaze::F10, MBlaze::F11, + MBlaze::F12, MBlaze::F19, MBlaze::F20, MBlaze::F21, + MBlaze::F22, MBlaze::F23, MBlaze::F24, MBlaze::F25, + MBlaze::F26, MBlaze::F27, MBlaze::F28, MBlaze::F29, + MBlaze::F30, MBlaze::F31, 0); + } + return std::vector<unsigned>(); +} + +bool MBlazeTargetLowering:: +isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The MBlaze target isn't yet aware of offsets. + return false; +} + +bool MBlazeTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { + return VT != MVT::f32; +} diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h new file mode 100644 index 0000000..f8b1470 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeISelLowering.h @@ -0,0 +1,149 @@ +//===-- MBlazeISelLowering.h - MBlaze DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that MBlaze uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef MBlazeISELLOWERING_H +#define MBlazeISELLOWERING_H + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" +#include "MBlaze.h" +#include "MBlazeSubtarget.h" + +namespace llvm { + namespace MBlazeCC { + enum CC { + FIRST = 0, + EQ, + NE, + GT, + LT, + GE, + LE + }; + } + + namespace MBlazeISD { + enum NodeType { + // Start the numbering from where ISD NodeType finishes. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // Jump and link (call) + JmpLink, + + // Handle gp_rel (small data/bss sections) relocation. + GPRel, + + // Select CC Pseudo Instruction + Select_CC, + + // Wrap up multiple types of instructions + Wrap, + + // Integer Compare + ICmp, + + // Return + Ret + }; + } + + //===--------------------------------------------------------------------===// + // TargetLowering Implementation + //===--------------------------------------------------------------------===// + + class MBlazeTargetLowering : public TargetLowering { + int VarArgsFrameIndex; // FrameIndex for start of varargs area. + + public: + + explicit MBlazeTargetLowering(MBlazeTargetMachine &TM); + + /// LowerOperation - Provide custom lowering hooks for some operations. + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + + /// getTargetNodeName - This method returns the name of a target specific + // DAG node. + virtual const char *getTargetNodeName(unsigned Opcode) const; + + /// getSetCCResultType - get the ISD::SETCC result ValueType + MVT::SimpleValueType getSetCCResultType(EVT VT) const; + + virtual unsigned getFunctionAlignment(const Function *F) const; + private: + // Subtarget Info + const MBlazeSubtarget *Subtarget; + + + // Lower Operand helpers + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals); + + // Lower Operand specifics + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals); + + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals); + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + DebugLoc dl, SelectionDAG &DAG); + + virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + + // Inline asm support + ConstraintType getConstraintType(const std::string &Constraint) const; + + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const; + + std::vector<unsigned> + getRegClassForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const; + + virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + + /// isFPImmLegal - Returns true if the target can instruction select the + /// specified FP immediate natively. If false, the legalizer will + /// materialize the FP immediate as a load from a constant pool. + virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + }; +} + +#endif // MBlazeISELLOWERING_H diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td new file mode 100644 index 0000000..a48a8c9 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeInstrFPU.td @@ -0,0 +1,223 @@ +//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MBlaze profiles and nodes +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MBlaze Operand, Complex Patterns and Transformations Definitions. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Memory Access Instructions +//===----------------------------------------------------------------------===// +class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> : + TA<op, 0x000, (outs FGR32:$dst), (ins memrr:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(set FGR32:$dst, (OpNode xaddr:$addr))], IILoad>; + +class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> : + TAI<op, (outs FGR32:$dst), (ins memri:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(set FGR32:$dst, (OpNode iaddr:$addr))], IILoad>; + +class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> : + TA<op, 0x000, (outs), (ins FGR32:$dst, memrr:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(OpNode FGR32:$dst, xaddr:$addr)], IIStore>; + +class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> : + TAI<op, (outs), (ins FGR32:$dst, memrr:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(OpNode FGR32:$dst, iaddr:$addr)], IIStore>; + +class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode, + InstrItinClass itin> : + TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>; + +class CmpFN<bits<6> op, bits<11> flags, string instr_asm, + InstrItinClass itin> : + TA<op, flags, (outs CPURegs:$dst), (ins FGR32:$b, FGR32:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], itin>; + +class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode, + InstrItinClass itin> : + TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c), + !strconcat(instr_asm, " $dst, $c, $b"), + [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>; + +class ArithF2<bits<6> op, bits<11> flags, string instr_asm, + InstrItinClass itin> : + TF<op, flags, (outs FGR32:$dst), (ins FGR32:$b), + !strconcat(instr_asm, " $dst, $b"), + [], itin>; + +class ArithIF<bits<6> op, bits<11> flags, string instr_asm, + InstrItinClass itin> : + TF<op, flags, (outs FGR32:$dst), (ins CPURegs:$b), + !strconcat(instr_asm, " $dst, $b"), + [], itin>; + +class ArithFI<bits<6> op, bits<11> flags, string instr_asm, + InstrItinClass itin> : + TF<op, flags, (outs CPURegs:$dst), (ins FGR32:$b), + !strconcat(instr_asm, " $dst, $b"), + [], itin>; + +class LogicF<bits<6> op, string instr_asm> : + TAI<op, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], + IIAlu>; + +class LogicFI<bits<6> op, string instr_asm> : + TAI<op, (outs FGR32:$dst), (ins FGR32:$b, fimm:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], + IIAlu>; + +//===----------------------------------------------------------------------===// +// Pseudo instructions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// FPU Arithmetic Instructions +//===----------------------------------------------------------------------===// +let Predicates=[HasFPU] in { + def FOR : LogicF<0x28, "or ">; + def FORI : LogicFI<0x28, "ori ">; + def FADD : ArithF<0x16, 0x000, "fadd ", fadd, IIAlu>; + def FRSUB : ArithFR<0x16, 0x080, "frsub ", fsub, IIAlu>; + def FMUL : ArithF<0x16, 0x100, "fmul ", fmul, IIAlu>; + def FDIV : ArithF<0x16, 0x180, "fdiv ", fdiv, IIAlu>; + + def LWF : LoadFM<0x32, "lw ", load>; + def LWFI : LoadFMI<0x32, "lwi ", load>; + + def SWF : StoreFM<0x32, "sw ", store>; + def SWFI : StoreFMI<0x32, "swi ", store>; +} + +let Predicates=[HasFPU,HasSqrt] in { + def FLT : ArithIF<0x16, 0x280, "flt ", IIAlu>; + def FINT : ArithFI<0x16, 0x300, "fint ", IIAlu>; + def FSQRT : ArithF2<0x16, 0x300, "fsqrt ", IIAlu>; +} + +let isAsCheapAsAMove = 1 in { + def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIAlu>; + def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIAlu>; + def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIAlu>; + def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIAlu>; + def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIAlu>; + def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIAlu>; + def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIAlu>; +} + + +let usesCustomInserter = 1 in { + def Select_FCC : MBlazePseudo<(outs FGR32:$dst), + (ins FGR32:$T, FGR32:$F, CPURegs:$CMP, i32imm:$CC), + "; SELECT_FCC PSEUDO!", + []>; +} + +// Floating point conversions +let Predicates=[HasFPU] in { + def : Pat<(sint_to_fp CPURegs:$V), (FLT CPURegs:$V)>; + def : Pat<(fp_to_sint FGR32:$V), (FINT FGR32:$V)>; + def : Pat<(fsqrt FGR32:$V), (FSQRT FGR32:$V)>; +} + +// SET_CC operations +let Predicates=[HasFPU] in { + def : Pat<(setcc FGR32:$L, FGR32:$R, SETEQ), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_EQ FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETNE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_EQ FGR32:$L, FGR32:$R), 1)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETOEQ), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_EQ FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (XOR (FCMP_UN FGR32:$L, FGR32:$R), + (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (OR (FCMP_UN FGR32:$L, FGR32:$R), + (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETGT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_GT FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETLT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_LT FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETGE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_GE FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETLE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_LE FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_GT FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_LT FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_GE FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_LE FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETUEQ), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (OR (FCMP_UN FGR32:$L, FGR32:$R), + (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETUNE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_NE FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (OR (FCMP_UN FGR32:$L, FGR32:$R), + (FCMP_GT FGR32:$L, FGR32:$R)), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETULT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (OR (FCMP_UN FGR32:$L, FGR32:$R), + (FCMP_LT FGR32:$L, FGR32:$R)), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (OR (FCMP_UN FGR32:$L, FGR32:$R), + (FCMP_GE FGR32:$L, FGR32:$R)), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETULE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (OR (FCMP_UN FGR32:$L, FGR32:$R), + (FCMP_LE FGR32:$L, FGR32:$R)), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETO), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_UN FGR32:$L, FGR32:$R), 1)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETUO), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_UN FGR32:$L, FGR32:$R), 2)>; +} + +// SELECT operations +def : Pat<(select CPURegs:$C, FGR32:$T, FGR32:$F), + (Select_FCC FGR32:$T, FGR32:$F, CPURegs:$C, 2)>; + +//===----------------------------------------------------------------------===// +// Patterns for Floating Point Instructions +//===----------------------------------------------------------------------===// +def : Pat<(f32 fpimm:$imm), (FORI F0, fpimm:$imm)>; diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td new file mode 100644 index 0000000..b59999e --- /dev/null +++ b/lib/Target/MBlaze/MBlazeInstrFSL.td @@ -0,0 +1,153 @@ +//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// FSL Instruction Formats +//===----------------------------------------------------------------------===// +class FSLGetD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> : + TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b), + !strconcat(instr_asm, " $dst, $b"), + [(set CPURegs:$dst, (OpNode CPURegs:$b))], IIAlu>; + +class FSLGet<bits<6> op, string instr_asm, Intrinsic OpNode> : + TAI<op, (outs CPURegs:$dst), (ins fslimm:$b), + !strconcat(instr_asm, " $dst, $b"), + [(set CPURegs:$dst, (OpNode immZExt4:$b))], IIAlu>; + +class FSLPutD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> : + TA<op, flags, (outs), (ins CPURegs:$v, CPURegs:$b), + !strconcat(instr_asm, " $v, $b"), + [(OpNode CPURegs:$v, CPURegs:$b)], IIAlu>; + +class FSLPut<bits<6> op, string instr_asm, Intrinsic OpNode> : + TAI<op, (outs), (ins CPURegs:$v, fslimm:$b), + !strconcat(instr_asm, " $v, $b"), + [(OpNode CPURegs:$v, immZExt4:$b)], IIAlu>; + +class FSLPutTD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> : + TA<op, flags, (outs), (ins CPURegs:$b), + !strconcat(instr_asm, " $b"), + [(OpNode CPURegs:$b)], IIAlu>; + +class FSLPutT<bits<6> op, string instr_asm, Intrinsic OpNode> : + TAI<op, (outs), (ins fslimm:$b), + !strconcat(instr_asm, " $b"), + [(OpNode immZExt4:$b)], IIAlu>; + +//===----------------------------------------------------------------------===// +// FSL Get Instructions +//===----------------------------------------------------------------------===// +def GET : FSLGet<0x1B, "get ", int_mblaze_fsl_get>; +def AGET : FSLGet<0x1B, "aget ", int_mblaze_fsl_aget>; +def CGET : FSLGet<0x1B, "cget ", int_mblaze_fsl_cget>; +def CAGET : FSLGet<0x1B, "caget ", int_mblaze_fsl_caget>; +def EGET : FSLGet<0x1B, "eget ", int_mblaze_fsl_eget>; +def EAGET : FSLGet<0x1B, "eaget ", int_mblaze_fsl_eaget>; +def ECGET : FSLGet<0x1B, "ecget ", int_mblaze_fsl_ecget>; +def ECAGET : FSLGet<0x1B, "ecaget ", int_mblaze_fsl_ecaget>; +def NGET : FSLGet<0x1B, "nget ", int_mblaze_fsl_nget>; +def NAGET : FSLGet<0x1B, "naget ", int_mblaze_fsl_naget>; +def NCGET : FSLGet<0x1B, "ncget ", int_mblaze_fsl_ncget>; +def NCAGET : FSLGet<0x1B, "ncaget ", int_mblaze_fsl_ncaget>; +def NEGET : FSLGet<0x1B, "neget ", int_mblaze_fsl_neget>; +def NEAGET : FSLGet<0x1B, "neaget ", int_mblaze_fsl_neaget>; +def NECGET : FSLGet<0x1B, "necget ", int_mblaze_fsl_necget>; +def NECAGET : FSLGet<0x1B, "necaget ", int_mblaze_fsl_necaget>; +def TGET : FSLGet<0x1B, "tget ", int_mblaze_fsl_tget>; +def TAGET : FSLGet<0x1B, "taget ", int_mblaze_fsl_taget>; +def TCGET : FSLGet<0x1B, "tcget ", int_mblaze_fsl_tcget>; +def TCAGET : FSLGet<0x1B, "tcaget ", int_mblaze_fsl_tcaget>; +def TEGET : FSLGet<0x1B, "teget ", int_mblaze_fsl_teget>; +def TEAGET : FSLGet<0x1B, "teaget ", int_mblaze_fsl_teaget>; +def TECGET : FSLGet<0x1B, "tecget ", int_mblaze_fsl_tecget>; +def TECAGET : FSLGet<0x1B, "tecaget ", int_mblaze_fsl_tecaget>; +def TNGET : FSLGet<0x1B, "tnget ", int_mblaze_fsl_tnget>; +def TNAGET : FSLGet<0x1B, "tnaget ", int_mblaze_fsl_tnaget>; +def TNCGET : FSLGet<0x1B, "tncget ", int_mblaze_fsl_tncget>; +def TNCAGET : FSLGet<0x1B, "tncaget ", int_mblaze_fsl_tncaget>; +def TNEGET : FSLGet<0x1B, "tneget ", int_mblaze_fsl_tneget>; +def TNEAGET : FSLGet<0x1B, "tneaget ", int_mblaze_fsl_tneaget>; +def TNECGET : FSLGet<0x1B, "tnecget ", int_mblaze_fsl_tnecget>; +def TNECAGET : FSLGet<0x1B, "tnecaget ", int_mblaze_fsl_tnecaget>; + +//===----------------------------------------------------------------------===// +// FSL Dynamic Get Instructions +//===----------------------------------------------------------------------===// +def GETD : FSLGetD<0x1B, 0x00, "getd ", int_mblaze_fsl_get>; +def AGETD : FSLGetD<0x1B, 0x00, "agetd ", int_mblaze_fsl_aget>; +def CGETD : FSLGetD<0x1B, 0x00, "cgetd ", int_mblaze_fsl_cget>; +def CAGETD : FSLGetD<0x1B, 0x00, "cagetd ", int_mblaze_fsl_caget>; +def EGETD : FSLGetD<0x1B, 0x00, "egetd ", int_mblaze_fsl_eget>; +def EAGETD : FSLGetD<0x1B, 0x00, "eagetd ", int_mblaze_fsl_eaget>; +def ECGETD : FSLGetD<0x1B, 0x00, "ecgetd ", int_mblaze_fsl_ecget>; +def ECAGETD : FSLGetD<0x1B, 0x00, "ecagetd ", int_mblaze_fsl_ecaget>; +def NGETD : FSLGetD<0x1B, 0x00, "ngetd ", int_mblaze_fsl_nget>; +def NAGETD : FSLGetD<0x1B, 0x00, "nagetd ", int_mblaze_fsl_naget>; +def NCGETD : FSLGetD<0x1B, 0x00, "ncgetd ", int_mblaze_fsl_ncget>; +def NCAGETD : FSLGetD<0x1B, 0x00, "ncagetd ", int_mblaze_fsl_ncaget>; +def NEGETD : FSLGetD<0x1B, 0x00, "negetd ", int_mblaze_fsl_neget>; +def NEAGETD : FSLGetD<0x1B, 0x00, "neagetd ", int_mblaze_fsl_neaget>; +def NECGETD : FSLGetD<0x1B, 0x00, "necgetd ", int_mblaze_fsl_necget>; +def NECAGETD : FSLGetD<0x1B, 0x00, "necagetd ", int_mblaze_fsl_necaget>; +def TGETD : FSLGetD<0x1B, 0x00, "tgetd ", int_mblaze_fsl_tget>; +def TAGETD : FSLGetD<0x1B, 0x00, "tagetd ", int_mblaze_fsl_taget>; +def TCGETD : FSLGetD<0x1B, 0x00, "tcgetd ", int_mblaze_fsl_tcget>; +def TCAGETD : FSLGetD<0x1B, 0x00, "tcagetd ", int_mblaze_fsl_tcaget>; +def TEGETD : FSLGetD<0x1B, 0x00, "tegetd ", int_mblaze_fsl_teget>; +def TEAGETD : FSLGetD<0x1B, 0x00, "teagetd ", int_mblaze_fsl_teaget>; +def TECGETD : FSLGetD<0x1B, 0x00, "tecgetd ", int_mblaze_fsl_tecget>; +def TECAGETD : FSLGetD<0x1B, 0x00, "tecagetd ", int_mblaze_fsl_tecaget>; +def TNGETD : FSLGetD<0x1B, 0x00, "tngetd ", int_mblaze_fsl_tnget>; +def TNAGETD : FSLGetD<0x1B, 0x00, "tnagetd ", int_mblaze_fsl_tnaget>; +def TNCGETD : FSLGetD<0x1B, 0x00, "tncgetd ", int_mblaze_fsl_tncget>; +def TNCAGETD : FSLGetD<0x1B, 0x00, "tncagetd ", int_mblaze_fsl_tncaget>; +def TNEGETD : FSLGetD<0x1B, 0x00, "tnegetd ", int_mblaze_fsl_tneget>; +def TNEAGETD : FSLGetD<0x1B, 0x00, "tneagetd ", int_mblaze_fsl_tneaget>; +def TNECGETD : FSLGetD<0x1B, 0x00, "tnecgetd ", int_mblaze_fsl_tnecget>; +def TNECAGETD : FSLGetD<0x1B, 0x00, "tnecagetd", int_mblaze_fsl_tnecaget>; + +//===----------------------------------------------------------------------===// +// FSL Put Instructions +//===----------------------------------------------------------------------===// +def PUT : FSLPut<0x1B, "put ", int_mblaze_fsl_put>; +def APUT : FSLPut<0x1B, "aput ", int_mblaze_fsl_aput>; +def CPUT : FSLPut<0x1B, "cput ", int_mblaze_fsl_cput>; +def CAPUT : FSLPut<0x1B, "caput ", int_mblaze_fsl_caput>; +def NPUT : FSLPut<0x1B, "nput ", int_mblaze_fsl_nput>; +def NAPUT : FSLPut<0x1B, "naput ", int_mblaze_fsl_naput>; +def NCPUT : FSLPut<0x1B, "ncput ", int_mblaze_fsl_ncput>; +def NCAPUT : FSLPut<0x1B, "ncaput ", int_mblaze_fsl_ncaput>; +def TPUT : FSLPutT<0x1B, "tput ", int_mblaze_fsl_tput>; +def TAPUT : FSLPutT<0x1B, "taput ", int_mblaze_fsl_taput>; +def TCPUT : FSLPutT<0x1B, "tcput ", int_mblaze_fsl_tcput>; +def TCAPUT : FSLPutT<0x1B, "tcaput ", int_mblaze_fsl_tcaput>; +def TNPUT : FSLPutT<0x1B, "tnput ", int_mblaze_fsl_tnput>; +def TNAPUT : FSLPutT<0x1B, "tnaput ", int_mblaze_fsl_tnaput>; +def TNCPUT : FSLPutT<0x1B, "tncput ", int_mblaze_fsl_tncput>; +def TNCAPUT : FSLPutT<0x1B, "tncaput ", int_mblaze_fsl_tncaput>; + +//===----------------------------------------------------------------------===// +// FSL Dynamic Put Instructions +//===----------------------------------------------------------------------===// +def PUTD : FSLPutD<0x1B, 0x00, "putd ", int_mblaze_fsl_put>; +def APUTD : FSLPutD<0x1B, 0x00, "aputd ", int_mblaze_fsl_aput>; +def CPUTD : FSLPutD<0x1B, 0x00, "cputd ", int_mblaze_fsl_cput>; +def CAPUTD : FSLPutD<0x1B, 0x00, "caputd ", int_mblaze_fsl_caput>; +def NPUTD : FSLPutD<0x1B, 0x00, "nputd ", int_mblaze_fsl_nput>; +def NAPUTD : FSLPutD<0x1B, 0x00, "naputd ", int_mblaze_fsl_naput>; +def NCPUTD : FSLPutD<0x1B, 0x00, "ncputd ", int_mblaze_fsl_ncput>; +def NCAPUTD : FSLPutD<0x1B, 0x00, "ncaputd ", int_mblaze_fsl_ncaput>; +def TPUTD : FSLPutTD<0x1B, 0x00, "tputd ", int_mblaze_fsl_tput>; +def TAPUTD : FSLPutTD<0x1B, 0x00, "taputd ", int_mblaze_fsl_taput>; +def TCPUTD : FSLPutTD<0x1B, 0x00, "tcputd ", int_mblaze_fsl_tcput>; +def TCAPUTD : FSLPutTD<0x1B, 0x00, "tcaputd ", int_mblaze_fsl_tcaput>; +def TNPUTD : FSLPutTD<0x1B, 0x00, "tnputd ", int_mblaze_fsl_tnput>; +def TNAPUTD : FSLPutTD<0x1B, 0x00, "tnaputd ", int_mblaze_fsl_tnaput>; +def TNCPUTD : FSLPutTD<0x1B, 0x00, "tncputd ", int_mblaze_fsl_tncput>; +def TNCAPUTD : FSLPutTD<0x1B, 0x00, "tncaputd ", int_mblaze_fsl_tncaput>; diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td new file mode 100644 index 0000000..7d65543 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeInstrFormats.td @@ -0,0 +1,246 @@ +//===- MBlazeInstrFormats.td - MB Instruction defs --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe MBlaze instructions format +// +// CPU INSTRUCTION FORMATS +// +// opcode - operation code. +// rd - dst reg. +// ra - first src. reg. +// rb - second src. reg. +// imm16 - 16-bit immediate value. +// +//===----------------------------------------------------------------------===// + +// Generic MBlaze Format +class MBlazeInst<dag outs, dag ins, string asmstr, list<dag> pattern, + InstrItinClass itin> : Instruction +{ + field bits<32> Inst; + + let Namespace = "MBlaze"; + + bits<6> opcode; + + // Top 6 bits are the 'opcode' field + let Inst{0-5} = opcode; + + dag OutOperandList = outs; + dag InOperandList = ins; + + let AsmString = asmstr; + let Pattern = pattern; + let Itinerary = itin; +} + +//===----------------------------------------------------------------------===// +// Pseudo instruction class +//===----------------------------------------------------------------------===// +class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>: + MBlazeInst<outs, ins, asmstr, pattern, IIPseudo>; + +//===----------------------------------------------------------------------===// +// Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|> +//===----------------------------------------------------------------------===// + +class TA<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> rd; + bits<5> ra; + bits<5> rb; + + let opcode = op; + + let Inst{6-10} = rd; + let Inst{11-15} = ra; + let Inst{16-20} = rb; + let Inst{21-31} = flags; +} + +class TAI<bits<6> op, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> rd; + bits<5> ra; + bits<16> imm16; + + let opcode = op; + + let Inst{6-10} = rd; + let Inst{11-15} = ra; + let Inst{16-31} = imm16; +} + +class TIMM<bits<6> op, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> ra; + bits<16> imm16; + + let opcode = op; + + let Inst{6-15} = 0; + let Inst{16-31} = imm16; +} + +class TADDR<bits<6> op, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<26> addr; + + let opcode = op; + + let Inst{6-31} = addr; +} + +//===----------------------------------------------------------------------===// +// Type B instruction class in MBlaze : <|opcode|rd|ra|immediate|> +//===----------------------------------------------------------------------===// + +class TB<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, + InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> rd; + bits<5> ra; + bits<16> imm16; + + let opcode = op; + + let Inst{6-10} = rd; + let Inst{11-15} = ra; + let Inst{16-31} = imm16; +} + +//===----------------------------------------------------------------------===// +// Float instruction class in MBlaze : <|opcode|rd|ra|flags|> +//===----------------------------------------------------------------------===// + +class TF<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> rd; + bits<5> ra; + + let opcode = op; + + let Inst{6-10} = rd; + let Inst{11-15} = ra; + let Inst{16-20} = 0; + let Inst{21-31} = flags; +} + +//===----------------------------------------------------------------------===// +// Branch instruction class in MBlaze : <|opcode|rd|br|ra|flags|> +//===----------------------------------------------------------------------===// + +class TBR<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins, + string asmstr, list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> ra; + + let opcode = op; + + let Inst{6-10} = 0; + let Inst{11-15} = br; + let Inst{16-20} = ra; + let Inst{21-31} = flags; +} + +class TBRC<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins, + string asmstr, list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> ra; + bits<5> rb; + + let opcode = op; + + let Inst{6-10} = br; + let Inst{11-15} = ra; + let Inst{16-20} = rb; + let Inst{21-31} = flags; +} + +class TBRL<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins, + string asmstr, list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> ra; + + let opcode = op; + + let Inst{6-10} = 0xF; + let Inst{11-15} = br; + let Inst{16-20} = ra; + let Inst{21-31} = flags; +} + +class TBRI<bits<6> op, bits<5> br, dag outs, dag ins, + string asmstr, list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<16> imm16; + + let opcode = op; + + let Inst{6-10} = 0; + let Inst{11-15} = br; + let Inst{16-31} = imm16; +} + +class TBRLI<bits<6> op, bits<5> br, dag outs, dag ins, + string asmstr, list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<16> imm16; + + let opcode = op; + + let Inst{6-10} = 0xF; + let Inst{11-15} = br; + let Inst{16-31} = imm16; +} + +class TBRCI<bits<6> op, bits<5> br, dag outs, dag ins, + string asmstr, list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> ra; + bits<16> imm16; + + let opcode = op; + + let Inst{6-10} = br; + let Inst{11-15} = ra; + let Inst{16-31} = imm16; +} + +class TRET<bits<6> op, dag outs, dag ins, + string asmstr, list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> ra; + bits<16> imm16; + + let opcode = op; + + let Inst{6-10} = 0x10; + let Inst{11-15} = ra; + let Inst{16-31} = imm16; +} diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp new file mode 100644 index 0000000..a7e8eb7 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -0,0 +1,222 @@ +//===- MBlazeInstrInfo.cpp - MBlaze Instruction Information -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MBlaze implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "MBlazeInstrInfo.h" +#include "MBlazeTargetMachine.h" +#include "MBlazeMachineFunction.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "MBlazeGenInstrInfo.inc" + +using namespace llvm; + +MBlazeInstrInfo::MBlazeInstrInfo(MBlazeTargetMachine &tm) + : TargetInstrInfoImpl(MBlazeInsts, array_lengthof(MBlazeInsts)), + TM(tm), RI(*TM.getSubtargetImpl(), *this) {} + +static bool isZeroImm(const MachineOperand &op) { + return op.isImm() && op.getImm() == 0; +} + +/// Return true if the instruction is a register to register move and +/// leave the source and dest operands in the passed parameters. +bool MBlazeInstrInfo:: +isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const { + SrcSubIdx = DstSubIdx = 0; // No sub-registers. + + // add $dst, $src, $zero || addu $dst, $zero, $src + // or $dst, $src, $zero || or $dst, $zero, $src + if ((MI.getOpcode() == MBlaze::ADD) || (MI.getOpcode() == MBlaze::OR)) { + if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == MBlaze::R0) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); + return true; + } else if (MI.getOperand(2).isReg() && + MI.getOperand(2).getReg() == MBlaze::R0) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } + } + + // addi $dst, $src, 0 + // ori $dst, $src, 0 + if ((MI.getOpcode() == MBlaze::ADDI) || (MI.getOpcode() == MBlaze::ORI)) { + if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } + } + + return false; +} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned MBlazeInstrInfo:: +isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { + if (MI->getOpcode() == MBlaze::LWI) { + if ((MI->getOperand(2).isFI()) && // is a stack slot + (MI->getOperand(1).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(1)))) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + } + + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned MBlazeInstrInfo:: +isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { + if (MI->getOpcode() == MBlaze::SWI) { + if ((MI->getOperand(2).isFI()) && // is a stack slot + (MI->getOperand(1).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(1)))) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + } + return 0; +} + +/// insertNoop - If data hazard condition is found insert the target nop +/// instruction. +void MBlazeInstrInfo:: +insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + BuildMI(MBB, MI, DL, get(MBlaze::NOP)); +} + +bool MBlazeInstrInfo:: +copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + DebugLoc dl = DebugLoc::getUnknownLoc(); + llvm::BuildMI(MBB, I, dl, get(MBlaze::ADD), DestReg) + .addReg(SrcReg).addReg(MBlaze::R0); + return true; +} + +void MBlazeInstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC) const { + DebugLoc dl = DebugLoc::getUnknownLoc(); + BuildMI(MBB, I, dl, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill)) + .addImm(0).addFrameIndex(FI); +} + +void MBlazeInstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC) const { + DebugLoc dl = DebugLoc::getUnknownLoc(); + BuildMI(MBB, I, dl, get(MBlaze::LWI), DestReg) + .addImm(0).addFrameIndex(FI); +} + +MachineInstr *MBlazeInstrInfo:: +foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, int FI) const { + if (Ops.size() != 1) return NULL; + + MachineInstr *NewMI = NULL; + + switch (MI->getOpcode()) { + case MBlaze::OR: + case MBlaze::ADD: + if ((MI->getOperand(0).isReg()) && + (MI->getOperand(2).isReg()) && + (MI->getOperand(2).getReg() == MBlaze::R0) && + (MI->getOperand(1).isReg())) { + if (Ops[0] == 0) { // COPY -> STORE + unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::SW)) + .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) + .addImm(0).addFrameIndex(FI); + } else { // COPY -> LOAD + unsigned DstReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::LW)) + .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | + getUndefRegState(isUndef)) + .addImm(0).addFrameIndex(FI); + } + } + break; + } + + return NewMI; +} + +//===----------------------------------------------------------------------===// +// Branch Analysis +//===----------------------------------------------------------------------===// +unsigned MBlazeInstrInfo:: +InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond) const { + DebugLoc dl = DebugLoc::getUnknownLoc(); + + // Can only insert uncond branches so far. + assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!"); + BuildMI(&MBB, dl, get(MBlaze::BRI)).addMBB(TBB); + return 1; +} + +/// getGlobalBaseReg - Return a virtual register initialized with the +/// the global base register value. Output instructions required to +/// initialize the register in the function entry block, if necessary. +/// +unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { + MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>(); + unsigned GlobalBaseReg = MBlazeFI->getGlobalBaseReg(); + if (GlobalBaseReg != 0) + return GlobalBaseReg; + + // Insert the set of GlobalBaseReg into the first MBB of the function + MachineBasicBlock &FirstMBB = MF->front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + + GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass); + bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, MBlaze::R20, + MBlaze::CPURegsRegisterClass, + MBlaze::CPURegsRegisterClass); + assert(Ok && "Couldn't assign to global base register!"); + Ok = Ok; // Silence warning when assertions are turned off. + RegInfo.addLiveIn(MBlaze::R20); + + MBlazeFI->setGlobalBaseReg(GlobalBaseReg); + return GlobalBaseReg; +} diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h new file mode 100644 index 0000000..4f79f1c --- /dev/null +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -0,0 +1,242 @@ +//===- MBlazeInstrInfo.h - MBlaze Instruction Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MBlaze implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZEINSTRUCTIONINFO_H +#define MBLAZEINSTRUCTIONINFO_H + +#include "MBlaze.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "MBlazeRegisterInfo.h" + +namespace llvm { + +namespace MBlaze { + + // MBlaze Branch Codes + enum FPBranchCode { + BRANCH_F, + BRANCH_T, + BRANCH_FL, + BRANCH_TL, + BRANCH_INVALID + }; + + // MBlaze Condition Codes + enum CondCode { + // To be used with float branch True + FCOND_F, + FCOND_UN, + FCOND_EQ, + FCOND_UEQ, + FCOND_OLT, + FCOND_ULT, + FCOND_OLE, + FCOND_ULE, + FCOND_SF, + FCOND_NGLE, + FCOND_SEQ, + FCOND_NGL, + FCOND_LT, + FCOND_NGE, + FCOND_LE, + FCOND_NGT, + + // To be used with float branch False + // This conditions have the same mnemonic as the + // above ones, but are used with a branch False; + FCOND_T, + FCOND_OR, + FCOND_NEQ, + FCOND_OGL, + FCOND_UGE, + FCOND_OGE, + FCOND_UGT, + FCOND_OGT, + FCOND_ST, + FCOND_GLE, + FCOND_SNE, + FCOND_GL, + FCOND_NLT, + FCOND_GE, + FCOND_NLE, + FCOND_GT, + + // Only integer conditions + COND_E, + COND_GZ, + COND_GEZ, + COND_LZ, + COND_LEZ, + COND_NE, + COND_INVALID + }; + + // Turn condition code into conditional branch opcode. + unsigned GetCondBranchFromCond(CondCode CC); + + /// GetOppositeBranchCondition - Return the inverse of the specified cond, + /// e.g. turning COND_E to COND_NE. + CondCode GetOppositeBranchCondition(MBlaze::CondCode CC); + + /// MBlazeCCToString - Map each FP condition code to its string + inline static const char *MBlazeFCCToString(MBlaze::CondCode CC) + { + switch (CC) { + default: llvm_unreachable("Unknown condition code"); + case FCOND_F: + case FCOND_T: return "f"; + case FCOND_UN: + case FCOND_OR: return "un"; + case FCOND_EQ: + case FCOND_NEQ: return "eq"; + case FCOND_UEQ: + case FCOND_OGL: return "ueq"; + case FCOND_OLT: + case FCOND_UGE: return "olt"; + case FCOND_ULT: + case FCOND_OGE: return "ult"; + case FCOND_OLE: + case FCOND_UGT: return "ole"; + case FCOND_ULE: + case FCOND_OGT: return "ule"; + case FCOND_SF: + case FCOND_ST: return "sf"; + case FCOND_NGLE: + case FCOND_GLE: return "ngle"; + case FCOND_SEQ: + case FCOND_SNE: return "seq"; + case FCOND_NGL: + case FCOND_GL: return "ngl"; + case FCOND_LT: + case FCOND_NLT: return "lt"; + case FCOND_NGE: + case FCOND_GE: return "ge"; + case FCOND_LE: + case FCOND_NLE: return "nle"; + case FCOND_NGT: + case FCOND_GT: return "gt"; + } + } +} + +/// MBlazeII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace MBlazeII { + /// Target Operand Flag enum. + enum TOF { + //===------------------------------------------------------------------===// + // MBlaze Specific MachineOperand flags. + MO_NO_FLAG, + + /// MO_GOT - Represents the offset into the global offset table at which + /// the address the relocation entry symbol resides during execution. + MO_GOT, + + /// MO_GOT_CALL - Represents the offset into the global offset table at + /// which the address of a call site relocation entry symbol resides + /// during execution. This is different from the above since this flag + /// can only be present in call instructions. + MO_GOT_CALL, + + /// MO_GPREL - Represents the offset from the current gp value to be used + /// for the relocatable object file being produced. + MO_GPREL, + + /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol + /// address. + MO_ABS_HILO + + }; +} + +class MBlazeInstrInfo : public TargetInstrInfoImpl { + MBlazeTargetMachine &TM; + const MBlazeRegisterInfo RI; +public: + explicit MBlazeInstrInfo(MBlazeTargetMachine &TM); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const MBlazeRegisterInfo &getRegisterInfo() const { return RI; } + + /// Return true if the instruction is a register to register move and return + /// the source and dest operands and their sub-register indices by reference. + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// Branch Analysis + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond) const; + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const { + return 0; + } + + /// Insert nop instruction when hazard condition is found + virtual void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + /// getGlobalBaseReg - Return a virtual register initialized with the + /// the global base register value. Output instructions required to + /// initialize the register in the function entry block, if necessary. + /// + unsigned getGlobalBaseReg(MachineFunction *MF) const; +}; + +} + +#endif diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td new file mode 100644 index 0000000..3c406dd --- /dev/null +++ b/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -0,0 +1,672 @@ +//===- MBlazeInstrInfo.td - MBlaze Instruction defs -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// +include "MBlazeInstrFormats.td" + +//===----------------------------------------------------------------------===// +// MBlaze profiles and nodes +//===----------------------------------------------------------------------===// +def SDT_MBlazeRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_MBlazeJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; + +// Call +def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink, + [SDNPHasChain,SDNPOptInFlag,SDNPOutFlag]>; + +// Return +def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet, + [SDNPHasChain, SDNPOptInFlag]>; + +// Hi and Lo nodes are used to handle global addresses. Used on +// MBlazeISelLowering to lower stuff like GlobalAddress, ExternalSymbol +// static model. +def MBWrapper : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>; +def MBlazeGPRel : SDNode<"MBlazeISD::GPRel", SDTIntUnaryOp>; + +def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; +def SDT_MBCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +// These are target-independent nodes, but have target-specific formats. +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MBCallSeqStart, + [SDNPHasChain, SDNPOutFlag]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>; + +//===----------------------------------------------------------------------===// +// MBlaze Instruction Predicate Definitions. +//===----------------------------------------------------------------------===// +def HasPipe3 : Predicate<"Subtarget.hasPipe3()">; +def HasBarrel : Predicate<"Subtarget.hasBarrel()">; +def NoBarrel : Predicate<"!Subtarget.hasBarrel()">; +def HasDiv : Predicate<"Subtarget.hasDiv()">; +def HasMul : Predicate<"Subtarget.hasMul()">; +def HasFSL : Predicate<"Subtarget.hasFSL()">; +def HasEFSL : Predicate<"Subtarget.hasEFSL()">; +def HasMSRSet : Predicate<"Subtarget.hasMSRSet()">; +def HasException : Predicate<"Subtarget.hasException()">; +def HasPatCmp : Predicate<"Subtarget.hasPatCmp()">; +def HasFPU : Predicate<"Subtarget.hasFPU()">; +def HasESR : Predicate<"Subtarget.hasESR()">; +def HasPVR : Predicate<"Subtarget.hasPVR()">; +def HasMul64 : Predicate<"Subtarget.hasMul64()">; +def HasSqrt : Predicate<"Subtarget.hasSqrt()">; +def HasMMU : Predicate<"Subtarget.hasMMU()">; + +//===----------------------------------------------------------------------===// +// MBlaze Operand, Complex Patterns and Transformations Definitions. +//===----------------------------------------------------------------------===// + +// Instruction operand types +def brtarget : Operand<OtherVT>; +def calltarget : Operand<i32>; +def simm16 : Operand<i32>; +def uimm5 : Operand<i32>; +def fimm : Operand<f32>; + +// Unsigned Operand +def uimm16 : Operand<i32> { + let PrintMethod = "printUnsignedImm"; +} + +// FSL Operand +def fslimm : Operand<i32> { + let PrintMethod = "printFSLImm"; +} + +// Address operand +def memri : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops simm16, CPURegs); +} + +def memrr : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops CPURegs, CPURegs); +} + +// Transformation Function - get the lower 16 bits. +def LO16 : SDNodeXForm<imm, [{ + return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF); +}]>; + +// Transformation Function - get the higher 16 bits. +def HI16 : SDNodeXForm<imm, [{ + return getI32Imm((unsigned)N->getZExtValue() >> 16); +}]>; + +// Node immediate fits as 16-bit sign extended on target immediate. +// e.g. addi, andi +def immSExt16 : PatLeaf<(imm), [{ + return (N->getZExtValue() >> 16) == 0; +}]>; + +// Node immediate fits as 16-bit zero extended on target immediate. +// The LO16 param means that only the lower 16 bits of the node +// immediate are caught. +// e.g. addiu, sltiu +def immZExt16 : PatLeaf<(imm), [{ + return (N->getZExtValue() >> 16) == 0; +}], LO16>; + +// FSL immediate field must fit in 4 bits. +def immZExt4 : PatLeaf<(imm), [{ + return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ; +}]>; + +// shamt field must fit in 5 bits. +def immZExt5 : PatLeaf<(imm), [{ + return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ; +}]>; + +// MBlaze Address Mode! SDNode frameindex could possibily be a match +// since load and store instructions from stack used it. +def iaddr : ComplexPattern<i32, 2, "SelectAddrRegImm", [frameindex], []>; +def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>; + +//===----------------------------------------------------------------------===// +// Pseudo instructions +//===----------------------------------------------------------------------===// + +// As stack alignment is always done with addiu, we need a 16-bit immediate +let Defs = [R1], Uses = [R1] in { +def ADJCALLSTACKDOWN : MBlazePseudo<(outs), (ins simm16:$amt), + "${:comment} ADJCALLSTACKDOWN $amt", + [(callseq_start timm:$amt)]>; +def ADJCALLSTACKUP : MBlazePseudo<(outs), + (ins uimm16:$amt1, simm16:$amt2), + "${:comment} ADJCALLSTACKUP $amt1", + [(callseq_end timm:$amt1, timm:$amt2)]>; +} + +// Some assembly macros need to avoid pseudoinstructions and assembler +// automatic reodering, we should reorder ourselves. +def MACRO : MBlazePseudo<(outs), (ins), ".set macro", []>; +def REORDER : MBlazePseudo<(outs), (ins), ".set reorder", []>; +def NOMACRO : MBlazePseudo<(outs), (ins), ".set nomacro", []>; +def NOREORDER : MBlazePseudo<(outs), (ins), ".set noreorder", []>; + +// When handling PIC code the assembler needs .cpload and .cprestore +// directives. If the real instructions corresponding these directives +// are used, we have the same behavior, but get also a bunch of warnings +// from the assembler. +def CPLOAD : MBlazePseudo<(outs), (ins CPURegs:$reg), ".cpload $reg", []>; +def CPRESTORE : MBlazePseudo<(outs), (ins uimm16:$l), ".cprestore $l\n", []>; + +//===----------------------------------------------------------------------===// +// Instructions specific format +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions +//===----------------------------------------------------------------------===// +class Arith<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode, + InstrItinClass itin> : + TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>; + +class ArithI<bits<6> op, string instr_asm, SDNode OpNode, + Operand Od, PatLeaf imm_type> : + TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>; + +class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode, + InstrItinClass itin> : + TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b), + !strconcat(instr_asm, " $dst, $c, $b"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>; + +class ArithRI<bits<6> op, string instr_asm, SDNode OpNode, + Operand Od, PatLeaf imm_type> : + TAI<op, (outs CPURegs:$dst), (ins Od:$b, CPURegs:$c), + !strconcat(instr_asm, " $dst, $c, $b"), + [(set CPURegs:$dst, (OpNode imm_type:$b, CPURegs:$c))], IIAlu>; + +class ArithN<bits<6> op, bits<11> flags, string instr_asm, + InstrItinClass itin> : + TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], itin>; + +class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> : + TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], IIAlu>; + +class ArithRN<bits<6> op, bits<11> flags, string instr_asm, + InstrItinClass itin> : + TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b), + !strconcat(instr_asm, " $dst, $b, $c"), + [], itin>; + +class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> : + TAI<op, (outs CPURegs:$dst), (ins Od:$c, CPURegs:$b), + !strconcat(instr_asm, " $dst, $b, $c"), + [], IIAlu>; + +//===----------------------------------------------------------------------===// +// Misc Arithmetic Instructions +//===----------------------------------------------------------------------===// + +class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> : + TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>; + +class LogicI<bits<6> op, string instr_asm, SDNode OpNode> : + TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], + IIAlu>; + +class EffectiveAddress<string instr_asm> : + TAI<0x08, (outs CPURegs:$dst), (ins memri:$addr), + instr_asm, [(set CPURegs:$dst, iaddr:$addr)], IIAlu>; + +//===----------------------------------------------------------------------===// +// Memory Access Instructions +//===----------------------------------------------------------------------===// +class LoadM<bits<6> op, string instr_asm, PatFrag OpNode> : + TA<op, 0x000, (outs CPURegs:$dst), (ins memrr:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(set CPURegs:$dst, (OpNode xaddr:$addr))], IILoad>; + +class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> : + TAI<op, (outs CPURegs:$dst), (ins memri:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(set CPURegs:$dst, (OpNode iaddr:$addr))], IILoad>; + +class StoreM<bits<6> op, string instr_asm, PatFrag OpNode> : + TA<op, 0x000, (outs), (ins CPURegs:$dst, memrr:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(OpNode CPURegs:$dst, xaddr:$addr)], IIStore>; + +class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> : + TAI<op, (outs), (ins CPURegs:$dst, memri:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(OpNode CPURegs:$dst, iaddr:$addr)], IIStore>; + +//===----------------------------------------------------------------------===// +// Branch Instructions +//===----------------------------------------------------------------------===// +class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> : + TBR<op, br, flags, (outs), (ins CPURegs:$target), + !strconcat(instr_asm, " $target"), + [(brind CPURegs:$target)], IIBranch>; + +class BranchI<bits<6> op, bits<5> brf, string instr_asm> : + TBRI<op, brf, (outs), (ins brtarget:$target), + !strconcat(instr_asm, " $target"), + [(br bb:$target)], IIBranch>; + +//===----------------------------------------------------------------------===// +// Branch and Link Instructions +//===----------------------------------------------------------------------===// +class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> : + TBRL<op, br, flags, (outs), (ins CPURegs:$target), + !strconcat(instr_asm, " r15, $target"), + [], IIBranch>; + +class BranchLI<bits<6> op, bits<5> br, string instr_asm> : + TBRLI<op, br, (outs), (ins calltarget:$target), + !strconcat(instr_asm, " r15, $target"), + [], IIBranch>; + +//===----------------------------------------------------------------------===// +// Conditional Branch Instructions +//===----------------------------------------------------------------------===// +class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm, + PatFrag cond_op> : + TBRC<op, br, flags, (outs), + (ins CPURegs:$a, CPURegs:$b, brtarget:$offset), + !strconcat(instr_asm, " $a, $b, $offset"), + [], IIBranch>; + //(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)], + //IIBranch>; + +class BranchCI<bits<6> op, bits<5> br, string instr_asm, PatFrag cond_op> : + TBRCI<op, br, (outs), (ins CPURegs:$a, brtarget:$offset), + !strconcat(instr_asm, " $a, $offset"), + [], IIBranch>; + +//===----------------------------------------------------------------------===// +// MBlaze arithmetic instructions +//===----------------------------------------------------------------------===// + +let isCommutable = 1, isAsCheapAsAMove = 1 in { + def ADD : Arith<0x00, 0x000, "add ", add, IIAlu>; + def ADDC : Arith<0x02, 0x000, "addc ", adde, IIAlu>; + def ADDK : Arith<0x04, 0x000, "addk ", addc, IIAlu>; + def ADDKC : ArithN<0x06, 0x000, "addkc ", IIAlu>; + def AND : Logic<0x21, 0x000, "and ", and>; + def OR : Logic<0x20, 0x000, "or ", or>; + def XOR : Logic<0x22, 0x000, "xor ", xor>; +} + +let isAsCheapAsAMove = 1 in { + def ANDN : ArithN<0x23, 0x000, "andn ", IIAlu>; + def CMP : ArithN<0x05, 0x001, "cmp ", IIAlu>; + def CMPU : ArithN<0x05, 0x003, "cmpu ", IIAlu>; + def RSUB : ArithR<0x01, 0x000, "rsub ", sub, IIAlu>; + def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIAlu>; + def RSUBK : ArithR<0x05, 0x000, "rsubk ", subc, IIAlu>; + def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIAlu>; +} + +let isCommutable = 1, Predicates=[HasMul] in { + def MUL : Arith<0x10, 0x000, "mul ", mul, IIAlu>; +} + +let isCommutable = 1, Predicates=[HasMul,HasMul64] in { + def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIAlu>; + def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIAlu>; +} + +let Predicates=[HasMul,HasMul64] in { + def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>; +} + +let Predicates=[HasBarrel] in { + def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIAlu>; + def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIAlu>; + def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIAlu>; + def BSRLI : ArithI<0x11, "bsrli ", srl, uimm5, immZExt5>; + def BSRAI : ArithI<0x11, "bsrai ", sra, uimm5, immZExt5>; + def BSLLI : ArithI<0x11, "bslli ", shl, uimm5, immZExt5>; +} + +let Predicates=[HasDiv] in { + def IDIV : Arith<0x12, 0x000, "idiv ", sdiv, IIAlu>; + def IDIVU : Arith<0x12, 0x002, "idivu ", udiv, IIAlu>; +} + +//===----------------------------------------------------------------------===// +// MBlaze immediate mode arithmetic instructions +//===----------------------------------------------------------------------===// + +let isAsCheapAsAMove = 1 in { + def ADDI : ArithI<0x08, "addi ", add, simm16, immSExt16>; + def ADDIC : ArithNI<0x0A, "addic ", simm16, immSExt16>; + def ADDIK : ArithNI<0x0C, "addik ", simm16, immSExt16>; + def ADDIKC : ArithI<0x0E, "addikc ", addc, simm16, immSExt16>; + def RSUBI : ArithRI<0x09, "rsubi ", sub, simm16, immSExt16>; + def RSUBIC : ArithRNI<0x0B, "rsubi ", simm16, immSExt16>; + def RSUBIK : ArithRNI<0x0E, "rsubic ", simm16, immSExt16>; + def RSUBIKC : ArithRI<0x0F, "rsubikc", subc, simm16, immSExt16>; + def ANDNI : ArithNI<0x2B, "andni ", uimm16, immZExt16>; + def ANDI : LogicI<0x29, "andi ", and>; + def ORI : LogicI<0x28, "ori ", or>; + def XORI : LogicI<0x2A, "xori ", xor>; +} + +let Predicates=[HasMul] in { + def MULI : ArithI<0x18, "muli ", mul, simm16, immSExt16>; +} + +//===----------------------------------------------------------------------===// +// MBlaze memory access instructions +//===----------------------------------------------------------------------===// + +let canFoldAsLoad = 1, isReMaterializable = 1 in { + def LBU : LoadM<0x30, "lbu ", zextloadi8>; + def LHU : LoadM<0x31, "lhu ", zextloadi16>; + def LW : LoadM<0x32, "lw ", load>; + + def LBUI : LoadMI<0x30, "lbui ", zextloadi8>; + def LHUI : LoadMI<0x31, "lhui ", zextloadi16>; + def LWI : LoadMI<0x32, "lwi ", load>; +} + + def SB : StoreM<0x34, "sb ", truncstorei8>; + def SH : StoreM<0x35, "sh ", truncstorei16>; + def SW : StoreM<0x36, "sw ", store>; + + def SBI : StoreMI<0x34, "sbi ", truncstorei8>; + def SHI : StoreMI<0x35, "shi ", truncstorei16>; + def SWI : StoreMI<0x36, "swi ", store>; + +//===----------------------------------------------------------------------===// +// MBlaze branch instructions +//===----------------------------------------------------------------------===// + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { + def BRI : BranchI<0x2E, 0x00, "bri ">; + def BRAI : BranchI<0x2E, 0x08, "brai ">; + def BEQI : BranchCI<0x2F, 0x00, "beqi ", seteq>; + def BNEI : BranchCI<0x2F, 0x01, "bnei ", setne>; + def BLTI : BranchCI<0x2F, 0x02, "blti ", setlt>; + def BLEI : BranchCI<0x2F, 0x03, "blei ", setle>; + def BGTI : BranchCI<0x2F, 0x04, "bgti ", setgt>; + def BGEI : BranchCI<0x2F, 0x05, "bgei ", setge>; +} + +let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { + def BR : Branch<0x26, 0x00, 0x000, "br ">; + def BRA : Branch<0x26, 0x08, 0x000, "bra ">; + def BEQ : BranchC<0x27, 0x00, 0x000, "beq ", seteq>; + def BNE : BranchC<0x27, 0x01, 0x000, "bne ", setne>; + def BLT : BranchC<0x27, 0x02, 0x000, "blt ", setlt>; + def BLE : BranchC<0x27, 0x03, 0x000, "ble ", setle>; + def BGT : BranchC<0x27, 0x04, 0x000, "bgt ", setgt>; + def BGE : BranchC<0x27, 0x05, 0x000, "bge ", setge>; +} + +let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1 in { + def BRID : BranchI<0x2E, 0x10, "brid ">; + def BRAID : BranchI<0x2E, 0x18, "braid ">; + def BEQID : BranchCI<0x2F, 0x10, "beqid ", seteq>; + def BNEID : BranchCI<0x2F, 0x11, "bneid ", setne>; + def BLTID : BranchCI<0x2F, 0x12, "bltid ", setlt>; + def BLEID : BranchCI<0x2F, 0x13, "bleid ", setle>; + def BGTID : BranchCI<0x2F, 0x14, "bgtid ", setgt>; + def BGEID : BranchCI<0x2F, 0x15, "bgeid ", setge>; +} + +let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, + hasDelaySlot = 1, hasCtrlDep = 1 in { + def BRD : Branch<0x26, 0x10, 0x000, "brd ">; + def BRAD : Branch<0x26, 0x18, 0x000, "brad ">; + def BEQD : BranchC<0x27, 0x10, 0x000, "beqd ", seteq>; + def BNED : BranchC<0x27, 0x11, 0x000, "bned ", setne>; + def BLTD : BranchC<0x27, 0x12, 0x000, "bltd ", setlt>; + def BLED : BranchC<0x27, 0x13, 0x000, "bled ", setle>; + def BGTD : BranchC<0x27, 0x14, 0x000, "bgtd ", setgt>; + def BGED : BranchC<0x27, 0x15, 0x000, "bged ", setge>; +} + +let isCall = 1, hasCtrlDep = 1, isIndirectBranch = 1, + Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12], + Uses = [R1,R5,R6,R7,R8,R9,R10] in { + def BRL : BranchL<0x26, 0x04, 0x000, "brl ">; + def BRAL : BranchL<0x26, 0x0C, 0x000, "bral ">; +} + +let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1, + Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12], + Uses = [R1,R5,R6,R7,R8,R9,R10] in { + def BRLID : BranchLI<0x2E, 0x14, "brlid ">; + def BRALID : BranchLI<0x2E, 0x1C, "bralid ">; +} + +let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1, isIndirectBranch = 1, + Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12], + Uses = [R1,R5,R6,R7,R8,R9,R10] in { + def BRLD : BranchL<0x26, 0x14, 0x000, "brld ">; + def BRALD : BranchL<0x26, 0x1C, 0x000, "brald ">; +} + +let isReturn=1, isTerminator=1, hasDelaySlot=1, + isBarrier=1, hasCtrlDep=1, imm16=0x8 in { + def RTSD : TRET<0x2D, (outs), (ins CPURegs:$target), + "rtsd $target, 8", + [(MBlazeRet CPURegs:$target)], + IIBranch>; +} + +//===----------------------------------------------------------------------===// +// MBlaze misc instructions +//===----------------------------------------------------------------------===// + +let addr = 0 in { + def NOP : TADDR<0x00, (outs), (ins), "nop ", [], IIAlu>; +} + +let usesCustomInserter = 1 in { + //class PseudoSelCC<RegisterClass RC, string asmstr>: + // MBlazePseudo<(outs RC:$D), (ins RC:$T, RC:$F, CPURegs:$CMP), asmstr, + // [(set RC:$D, (MBlazeSelectCC RC:$T, RC:$F, CPURegs:$CMP))]>; + //def Select_CC : PseudoSelCC<CPURegs, "# MBlazeSelect_CC">; + + def Select_CC : MBlazePseudo<(outs CPURegs:$dst), + (ins CPURegs:$T, CPURegs:$F, CPURegs:$CMP, i32imm:$CC), + "; SELECT_CC PSEUDO!", + []>; + + def ShiftL : MBlazePseudo<(outs CPURegs:$dst), + (ins CPURegs:$L, CPURegs:$R), + "; ShiftL PSEUDO!", + []>; + + def ShiftRA : MBlazePseudo<(outs CPURegs:$dst), + (ins CPURegs:$L, CPURegs:$R), + "; ShiftRA PSEUDO!", + []>; + + def ShiftRL : MBlazePseudo<(outs CPURegs:$dst), + (ins CPURegs:$L, CPURegs:$R), + "; ShiftRL PSEUDO!", + []>; +} + + +let rb = 0 in { + def SEXT16 : TA<0x24, 0x061, (outs CPURegs:$dst), (ins CPURegs:$src), + "sext16 $dst, $src", [], IIAlu>; + def SEXT8 : TA<0x24, 0x060, (outs CPURegs:$dst), (ins CPURegs:$src), + "sext8 $dst, $src", [], IIAlu>; + def SRL : TA<0x24, 0x041, (outs CPURegs:$dst), (ins CPURegs:$src), + "srl $dst, $src", [], IIAlu>; + def SRA : TA<0x24, 0x001, (outs CPURegs:$dst), (ins CPURegs:$src), + "sra $dst, $src", [], IIAlu>; + def SRC : TA<0x24, 0x021, (outs CPURegs:$dst), (ins CPURegs:$src), + "src $dst, $src", [], IIAlu>; +} + +def LEA_ADDI : EffectiveAddress<"addi $dst, ${addr:stackloc}">; + +//===----------------------------------------------------------------------===// +// Arbitrary patterns that map to one or more instructions +//===----------------------------------------------------------------------===// + +// Small immediates +def : Pat<(i32 0), (ADD R0, R0)>; +def : Pat<(i32 immSExt16:$imm), (ADDI R0, imm:$imm)>; +def : Pat<(i32 immZExt16:$imm), (ORI R0, imm:$imm)>; + +// Arbitrary immediates +def : Pat<(i32 imm:$imm), (ADDI R0, imm:$imm)>; + +// In register sign extension +def : Pat<(sext_inreg CPURegs:$src, i16), (SEXT16 CPURegs:$src)>; +def : Pat<(sext_inreg CPURegs:$src, i8), (SEXT8 CPURegs:$src)>; + +// Call +def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)), (BRLID tglobaladdr:$dst)>; +def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),(BRLID texternalsym:$dst)>; +def : Pat<(MBlazeJmpLink CPURegs:$dst), (BRLD CPURegs:$dst)>; + +// Shift Instructions +def : Pat<(shl CPURegs:$L, CPURegs:$R), (ShiftL CPURegs:$L, CPURegs:$R)>; +def : Pat<(sra CPURegs:$L, CPURegs:$R), (ShiftRA CPURegs:$L, CPURegs:$R)>; +def : Pat<(srl CPURegs:$L, CPURegs:$R), (ShiftRL CPURegs:$L, CPURegs:$R)>; + +// SET_CC operations +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETEQ), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMP CPURegs:$L, CPURegs:$R), 1)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETNE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMP CPURegs:$L, CPURegs:$R), 2)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMP CPURegs:$L, CPURegs:$R), 3)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMP CPURegs:$L, CPURegs:$R), 4)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMP CPURegs:$L, CPURegs:$R), 5)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMP CPURegs:$L, CPURegs:$R), 6)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMPU CPURegs:$L, CPURegs:$R), 3)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMPU CPURegs:$L, CPURegs:$R), 4)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMPU CPURegs:$L, CPURegs:$R), 5)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMPU CPURegs:$L, CPURegs:$R), 6)>; + +// SELECT operations +def : Pat<(select CPURegs:$C, CPURegs:$T, CPURegs:$F), + (Select_CC CPURegs:$T, CPURegs:$F, CPURegs:$C, 2)>; + +// SELECT_CC +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETEQ), + (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 1)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETNE), + (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 2)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGT), + (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 3)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLT), + (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 4)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGE), + (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 5)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLE), + (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 6)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGT), + (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 3)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULT), + (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 4)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGE), + (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 5)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULE), + (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 6)>; + +// BRCOND instructions +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETEQ), bb:$T), + (BEQID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETNE), bb:$T), + (BNEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGT), bb:$T), + (BGTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLT), bb:$T), + (BLTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGE), bb:$T), + (BGEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLE), bb:$T), + (BLEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGT), bb:$T), + (BGTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULT), bb:$T), + (BLTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGE), bb:$T), + (BGEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULE), bb:$T), + (BLEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond CPURegs:$C, bb:$T), + (BNEID CPURegs:$C, bb:$T)>; + +// Jump tables, global addresses, and constant pools +def : Pat<(MBWrapper tglobaladdr:$in), (ORI R0, tglobaladdr:$in)>; +def : Pat<(MBWrapper tjumptable:$in), (ORI R0, tjumptable:$in)>; +def : Pat<(MBWrapper tconstpool:$in), (ORI R0, tconstpool:$in)>; + +// Misc instructions +def : Pat<(and CPURegs:$lh, (not CPURegs:$rh)),(ANDN CPURegs:$lh, CPURegs:$rh)>; + +// Arithmetic with immediates +def : Pat<(add CPURegs:$in, imm:$imm),(ADDI CPURegs:$in, imm:$imm)>; +def : Pat<(or CPURegs:$in, imm:$imm),(ORI CPURegs:$in, imm:$imm)>; +def : Pat<(xor CPURegs:$in, imm:$imm),(XORI CPURegs:$in, imm:$imm)>; + +// extended load and stores +def : Pat<(extloadi1 iaddr:$src), (LBUI iaddr:$src)>; +def : Pat<(extloadi8 iaddr:$src), (LBUI iaddr:$src)>; +def : Pat<(extloadi16 iaddr:$src), (LHUI iaddr:$src)>; +def : Pat<(extloadi1 xaddr:$src), (LBU xaddr:$src)>; +def : Pat<(extloadi8 xaddr:$src), (LBU xaddr:$src)>; +def : Pat<(extloadi16 xaddr:$src), (LHU xaddr:$src)>; + +def : Pat<(sextloadi1 iaddr:$src), (SEXT8 (LBUI iaddr:$src))>; +def : Pat<(sextloadi8 iaddr:$src), (SEXT8 (LBUI iaddr:$src))>; +def : Pat<(sextloadi16 iaddr:$src), (SEXT16 (LHUI iaddr:$src))>; +def : Pat<(sextloadi1 xaddr:$src), (SEXT8 (LBU xaddr:$src))>; +def : Pat<(sextloadi8 xaddr:$src), (SEXT8 (LBU xaddr:$src))>; +def : Pat<(sextloadi16 xaddr:$src), (SEXT16 (LHU xaddr:$src))>; + +// peepholes +def : Pat<(store (i32 0), iaddr:$dst), (SWI R0, iaddr:$dst)>; + +//===----------------------------------------------------------------------===// +// Floating Point Support +//===----------------------------------------------------------------------===// +include "MBlazeInstrFSL.td" +include "MBlazeInstrFPU.td" diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp new file mode 100644 index 0000000..c8faffc --- /dev/null +++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp @@ -0,0 +1,109 @@ +//===- MBlazeIntrinsicInfo.cpp - Intrinsic Information -00-------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MBlaze implementation of TargetIntrinsicInfo. +// +//===----------------------------------------------------------------------===// + +#include "MBlazeIntrinsicInfo.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/Support/raw_ostream.h" +#include <cstring> + +using namespace llvm; + +namespace mblazeIntrinsic { + + enum ID { + last_non_mblaze_intrinsic = Intrinsic::num_intrinsics-1, +#define GET_INTRINSIC_ENUM_VALUES +#include "MBlazeGenIntrinsics.inc" +#undef GET_INTRINSIC_ENUM_VALUES + , num_mblaze_intrinsics + }; + +#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN +#include "MBlazeGenIntrinsics.inc" +#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN +} + +std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys, + unsigned numTys) const { + static const char *const names[] = { +#define GET_INTRINSIC_NAME_TABLE +#include "MBlazeGenIntrinsics.inc" +#undef GET_INTRINSIC_NAME_TABLE + }; + + assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded"); + if (IntrID < Intrinsic::num_intrinsics) + return 0; + assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics && + "Invalid intrinsic ID"); + + std::string Result(names[IntrID - Intrinsic::num_intrinsics]); + return Result; +} + +unsigned MBlazeIntrinsicInfo:: +lookupName(const char *Name, unsigned Len) const { +#define GET_FUNCTION_RECOGNIZER +#include "MBlazeGenIntrinsics.inc" +#undef GET_FUNCTION_RECOGNIZER + return 0; +} + +unsigned MBlazeIntrinsicInfo:: +lookupGCCName(const char *Name) const { + return mblazeIntrinsic::getIntrinsicForGCCBuiltin("mblaze",Name); +} + +bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const { + // Overload Table + const bool OTable[] = { +#define GET_INTRINSIC_OVERLOAD_TABLE +#include "MBlazeGenIntrinsics.inc" +#undef GET_INTRINSIC_OVERLOAD_TABLE + }; + if (IntrID == 0) + return false; + else + return OTable[IntrID - Intrinsic::num_intrinsics]; +} + +/// This defines the "getAttributes(ID id)" method. +#define GET_INTRINSIC_ATTRIBUTES +#include "MBlazeGenIntrinsics.inc" +#undef GET_INTRINSIC_ATTRIBUTES + +static const FunctionType *getType(LLVMContext &Context, unsigned id) { + const Type *ResultTy = NULL; + std::vector<const Type*> ArgTys; + bool IsVarArg = false; + +#define GET_INTRINSIC_GENERATOR +#include "MBlazeGenIntrinsics.inc" +#undef GET_INTRINSIC_GENERATOR + + return FunctionType::get(ResultTy, ArgTys, IsVarArg); +} + +Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, + const Type **Tys, + unsigned numTy) const { + assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded"); + AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID); + return cast<Function>(M->getOrInsertFunction(getName(IntrID), + getType(M->getContext(), IntrID), + AList)); +} diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h new file mode 100644 index 0000000..9804c77 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h @@ -0,0 +1,33 @@ +//===- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MBlaze implementation of TargetIntrinsicInfo. +// +//===----------------------------------------------------------------------===// +#ifndef MBLAZEINTRINSICS_H +#define MBLAZEINTRINSICS_H + +#include "llvm/Target/TargetIntrinsicInfo.h" + +namespace llvm { + + class MBlazeIntrinsicInfo : public TargetIntrinsicInfo { + public: + std::string getName(unsigned IntrID, const Type **Tys = 0, + unsigned numTys = 0) const; + unsigned lookupName(const char *Name, unsigned Len) const; + unsigned lookupGCCName(const char *Name) const; + bool isOverloaded(unsigned IID) const; + Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0, + unsigned numTys = 0) const; + }; + +} + +#endif diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td new file mode 100644 index 0000000..76eb563 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeIntrinsics.td @@ -0,0 +1,137 @@ +//===- IntrinsicsMBlaze.td - Defines MBlaze intrinsics -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the MicroBlaze-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Definitions for all MBlaze intrinsics. +// + +// MBlaze intrinsic classes. +let TargetPrefix = "mblaze", isTarget = 1 in { + class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty], + [IntrWriteMem]>; + + class MBFSL_Put_Intrinsic : Intrinsic<[llvm_void_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem]>; + + class MBFSL_PutT_Intrinsic : Intrinsic<[llvm_void_ty], + [llvm_i32_ty], + [IntrWriteMem]>; +} + +//===----------------------------------------------------------------------===// +// MicroBlaze FSL Get Intrinsic Definitions. +// + +def int_mblaze_fsl_get : GCCBuiltin<"__builtin_mblaze_fsl_get">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_aget : GCCBuiltin<"__builtin_mblaze_fsl_aget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_cget : GCCBuiltin<"__builtin_mblaze_fsl_cget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_caget : GCCBuiltin<"__builtin_mblaze_fsl_caget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_eget : GCCBuiltin<"__builtin_mblaze_fsl_eget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_eaget : GCCBuiltin<"__builtin_mblaze_fsl_eaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_ecget : GCCBuiltin<"__builtin_mblaze_fsl_ecget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_ecaget : GCCBuiltin<"__builtin_mblaze_fsl_ecaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_nget : GCCBuiltin<"__builtin_mblaze_fsl_nget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_naget : GCCBuiltin<"__builtin_mblaze_fsl_naget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_ncget : GCCBuiltin<"__builtin_mblaze_fsl_ncget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_ncaget : GCCBuiltin<"__builtin_mblaze_fsl_ncaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_neget : GCCBuiltin<"__builtin_mblaze_fsl_neget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_neaget : GCCBuiltin<"__builtin_mblaze_fsl_neaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_necget : GCCBuiltin<"__builtin_mblaze_fsl_necget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_necaget : GCCBuiltin<"__builtin_mblaze_fsl_necaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tget : GCCBuiltin<"__builtin_mblaze_fsl_tget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_taget : GCCBuiltin<"__builtin_mblaze_fsl_taget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tcget : GCCBuiltin<"__builtin_mblaze_fsl_tcget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tcaget : GCCBuiltin<"__builtin_mblaze_fsl_tcaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_teget : GCCBuiltin<"__builtin_mblaze_fsl_teget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_teaget : GCCBuiltin<"__builtin_mblaze_fsl_teaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tecget : GCCBuiltin<"__builtin_mblaze_fsl_tecget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tecaget : GCCBuiltin<"__builtin_mblaze_fsl_tecaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tnget : GCCBuiltin<"__builtin_mblaze_fsl_tnget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tnaget : GCCBuiltin<"__builtin_mblaze_fsl_tnaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tncget : GCCBuiltin<"__builtin_mblaze_fsl_tncget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tncaget : GCCBuiltin<"__builtin_mblaze_fsl_tncaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tneget : GCCBuiltin<"__builtin_mblaze_fsl_tneget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tneaget : GCCBuiltin<"__builtin_mblaze_fsl_tneaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tnecget : GCCBuiltin<"__builtin_mblaze_fsl_tnecget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tnecaget : GCCBuiltin<"__builtin_mblaze_fsl_tnecaget">, + MBFSL_Get_Intrinsic; + +//===----------------------------------------------------------------------===// +// MicroBlaze FSL Put Intrinsic Definitions. +// + +def int_mblaze_fsl_put : GCCBuiltin<"__builtin_mblaze_fsl_put">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_aput : GCCBuiltin<"__builtin_mblaze_fsl_aput">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_cput : GCCBuiltin<"__builtin_mblaze_fsl_cput">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_caput : GCCBuiltin<"__builtin_mblaze_fsl_caput">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_nput : GCCBuiltin<"__builtin_mblaze_fsl_nput">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_naput : GCCBuiltin<"__builtin_mblaze_fsl_naput">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_ncput : GCCBuiltin<"__builtin_mblaze_fsl_ncput">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_ncaput : GCCBuiltin<"__builtin_mblaze_fsl_ncaput">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_tput : GCCBuiltin<"__builtin_mblaze_fsl_tput">, + MBFSL_PutT_Intrinsic; +def int_mblaze_fsl_taput : GCCBuiltin<"__builtin_mblaze_fsl_taput">, + MBFSL_PutT_Intrinsic; +def int_mblaze_fsl_tcput : GCCBuiltin<"__builtin_mblaze_fsl_tcput">, + MBFSL_PutT_Intrinsic; +def int_mblaze_fsl_tcaput : GCCBuiltin<"__builtin_mblaze_fsl_tcaput">, + MBFSL_PutT_Intrinsic; +def int_mblaze_fsl_tnput : GCCBuiltin<"__builtin_mblaze_fsl_tnput">, + MBFSL_PutT_Intrinsic; +def int_mblaze_fsl_tnaput : GCCBuiltin<"__builtin_mblaze_fsl_tnaput">, + MBFSL_PutT_Intrinsic; +def int_mblaze_fsl_tncput : GCCBuiltin<"__builtin_mblaze_fsl_tncput">, + MBFSL_PutT_Intrinsic; +def int_mblaze_fsl_tncaput : GCCBuiltin<"__builtin_mblaze_fsl_tncaput">, + MBFSL_PutT_Intrinsic; diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp new file mode 100644 index 0000000..7ae465d --- /dev/null +++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp @@ -0,0 +1,27 @@ +//===-- MBlazeMCAsmInfo.cpp - MBlaze asm properties -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the MBlazeMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "MBlazeMCAsmInfo.h" +using namespace llvm; + +MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, const StringRef &TT) { + AlignmentIsInBytes = false; + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = 0; + PrivateGlobalPrefix = "$"; + CommentString = "#"; + ZeroDirective = "\t.space\t"; + GPRel32Directive = "\t.gpword\t"; + HasSetDirective = false; +} diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MBlazeMCAsmInfo.h new file mode 100644 index 0000000..bccb418 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.h @@ -0,0 +1,30 @@ +//=====-- MBlazeMCAsmInfo.h - MBlaze asm properties -----------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MBlazeMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZETARGETASMINFO_H +#define MBLAZETARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + class StringRef; + + class MBlazeMCAsmInfo : public MCAsmInfo { + public: + explicit MBlazeMCAsmInfo(const Target &T, const StringRef &TT); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h new file mode 100644 index 0000000..08d4dca --- /dev/null +++ b/lib/Target/MBlaze/MBlazeMachineFunction.h @@ -0,0 +1,136 @@ +//===-- MBlazeMachineFunctionInfo.h - Private data ----------------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MBlaze specific subclass of MachineFunctionInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZE_MACHINE_FUNCTION_INFO_H +#define MBLAZE_MACHINE_FUNCTION_INFO_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/VectorExtras.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" + +namespace llvm { + +/// MBlazeFunctionInfo - This class is derived from MachineFunction private +/// MBlaze target-specific information for each MachineFunction. +class MBlazeFunctionInfo : public MachineFunctionInfo { + +private: + /// Holds for each function where on the stack the Frame Pointer must be + /// saved. This is used on Prologue and Epilogue to emit FP save/restore + int FPStackOffset; + + /// Holds for each function where on the stack the Return Address must be + /// saved. This is used on Prologue and Epilogue to emit RA save/restore + int RAStackOffset; + + /// At each function entry a special bitmask directive must be emitted + /// to help in debugging CPU callee saved registers. It needs a negative + /// offset from the final stack size and its higher register location on + /// the stack. + int CPUTopSavedRegOff; + + /// MBlazeFIHolder - Holds a FrameIndex and it's Stack Pointer Offset + struct MBlazeFIHolder { + + int FI; + int SPOffset; + + MBlazeFIHolder(int FrameIndex, int StackPointerOffset) + : FI(FrameIndex), SPOffset(StackPointerOffset) {} + }; + + /// When PIC is used the GP must be saved on the stack on the function + /// prologue and must be reloaded from this stack location after every + /// call. A reference to its stack location and frame index must be kept + /// to be used on emitPrologue and processFunctionBeforeFrameFinalized. + MBlazeFIHolder GPHolder; + + /// On LowerFormalArguments the stack size is unknown, so the Stack + /// Pointer Offset calculation of "not in register arguments" must be + /// postponed to emitPrologue. + SmallVector<MBlazeFIHolder, 16> FnLoadArgs; + bool HasLoadArgs; + + // When VarArgs, we must write registers back to caller stack, preserving + // on register arguments. Since the stack size is unknown on + // LowerFormalArguments, the Stack Pointer Offset calculation must be + // postponed to emitPrologue. + SmallVector<MBlazeFIHolder, 4> FnStoreVarArgs; + bool HasStoreVarArgs; + + /// SRetReturnReg - Some subtargets require that sret lowering includes + /// returning the value of the returned struct in a register. This field + /// holds the virtual register into which the sret argument is passed. + unsigned SRetReturnReg; + + /// GlobalBaseReg - keeps track of the virtual register initialized for + /// use as the global base register. This is used for PIC in some PIC + /// relocation models. + unsigned GlobalBaseReg; + +public: + MBlazeFunctionInfo(MachineFunction& MF) + : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), + GPHolder(-1,-1), HasLoadArgs(false), HasStoreVarArgs(false), + SRetReturnReg(0), GlobalBaseReg(0) + {} + + int getFPStackOffset() const { return FPStackOffset; } + void setFPStackOffset(int Off) { FPStackOffset = Off; } + + int getRAStackOffset() const { return RAStackOffset; } + void setRAStackOffset(int Off) { RAStackOffset = Off; } + + int getCPUTopSavedRegOff() const { return CPUTopSavedRegOff; } + void setCPUTopSavedRegOff(int Off) { CPUTopSavedRegOff = Off; } + + int getGPStackOffset() const { return GPHolder.SPOffset; } + int getGPFI() const { return GPHolder.FI; } + void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; } + void setGPFI(int FI) { GPHolder.FI = FI; } + bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; } + + bool hasLoadArgs() const { return HasLoadArgs; } + bool hasStoreVarArgs() const { return HasStoreVarArgs; } + + void recordLoadArgsFI(int FI, int SPOffset) { + if (!HasLoadArgs) HasLoadArgs=true; + FnLoadArgs.push_back(MBlazeFIHolder(FI, SPOffset)); + } + void recordStoreVarArgsFI(int FI, int SPOffset) { + if (!HasStoreVarArgs) HasStoreVarArgs=true; + FnStoreVarArgs.push_back(MBlazeFIHolder(FI, SPOffset)); + } + + void adjustLoadArgsFI(MachineFrameInfo *MFI) const { + if (!hasLoadArgs()) return; + for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i) + MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset ); + } + void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const { + if (!hasStoreVarArgs()) return; + for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i) + MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset ); + } + + unsigned getSRetReturnReg() const { return SRetReturnReg; } + void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } + + unsigned getGlobalBaseReg() const { return GlobalBaseReg; } + void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } +}; + +} // end of namespace llvm + +#endif // MBLAZE_MACHINE_FUNCTION_INFO_H diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp new file mode 100644 index 0000000..8dfca81 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -0,0 +1,421 @@ +//===- MBlazeRegisterInfo.cpp - MBlaze Register Information -== -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MBlaze implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mblaze-reg-info" + +#include "MBlaze.h" +#include "MBlazeSubtarget.h" +#include "MBlazeRegisterInfo.h" +#include "MBlazeMachineFunction.h" +#include "llvm/Constants.h" +#include "llvm/Type.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; + +MBlazeRegisterInfo:: +MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii) + : MBlazeGenRegisterInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP), + Subtarget(ST), TII(tii) {} + +/// getRegisterNumbering - Given the enum value for some register, e.g. +/// MBlaze::R0, return the number that it corresponds to (e.g. 0). +unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) { + switch (RegEnum) { + case MBlaze::R0 : case MBlaze::F0 : return 0; + case MBlaze::R1 : case MBlaze::F1 : return 1; + case MBlaze::R2 : case MBlaze::F2 : return 2; + case MBlaze::R3 : case MBlaze::F3 : return 3; + case MBlaze::R4 : case MBlaze::F4 : return 4; + case MBlaze::R5 : case MBlaze::F5 : return 5; + case MBlaze::R6 : case MBlaze::F6 : return 6; + case MBlaze::R7 : case MBlaze::F7 : return 7; + case MBlaze::R8 : case MBlaze::F8 : return 8; + case MBlaze::R9 : case MBlaze::F9 : return 9; + case MBlaze::R10 : case MBlaze::F10 : return 10; + case MBlaze::R11 : case MBlaze::F11 : return 11; + case MBlaze::R12 : case MBlaze::F12 : return 12; + case MBlaze::R13 : case MBlaze::F13 : return 13; + case MBlaze::R14 : case MBlaze::F14 : return 14; + case MBlaze::R15 : case MBlaze::F15 : return 15; + case MBlaze::R16 : case MBlaze::F16 : return 16; + case MBlaze::R17 : case MBlaze::F17 : return 17; + case MBlaze::R18 : case MBlaze::F18 : return 18; + case MBlaze::R19 : case MBlaze::F19 : return 19; + case MBlaze::R20 : case MBlaze::F20 : return 20; + case MBlaze::R21 : case MBlaze::F21 : return 21; + case MBlaze::R22 : case MBlaze::F22 : return 22; + case MBlaze::R23 : case MBlaze::F23 : return 23; + case MBlaze::R24 : case MBlaze::F24 : return 24; + case MBlaze::R25 : case MBlaze::F25 : return 25; + case MBlaze::R26 : case MBlaze::F26 : return 26; + case MBlaze::R27 : case MBlaze::F27 : return 27; + case MBlaze::R28 : case MBlaze::F28 : return 28; + case MBlaze::R29 : case MBlaze::F29 : return 29; + case MBlaze::R30 : case MBlaze::F30 : return 30; + case MBlaze::R31 : case MBlaze::F31 : return 31; + default: llvm_unreachable("Unknown register number!"); + } + return 0; // Not reached +} + +/// getRegisterFromNumbering - Given the enum value for some register, e.g. +/// MBlaze::R0, return the number that it corresponds to (e.g. 0). +unsigned MBlazeRegisterInfo::getRegisterFromNumbering(unsigned Reg) { + switch (Reg) { + case 0 : return MBlaze::R0; + case 1 : return MBlaze::R1; + case 2 : return MBlaze::R2; + case 3 : return MBlaze::R3; + case 4 : return MBlaze::R4; + case 5 : return MBlaze::R5; + case 6 : return MBlaze::R6; + case 7 : return MBlaze::R7; + case 8 : return MBlaze::R8; + case 9 : return MBlaze::R9; + case 10 : return MBlaze::R10; + case 11 : return MBlaze::R11; + case 12 : return MBlaze::R12; + case 13 : return MBlaze::R13; + case 14 : return MBlaze::R14; + case 15 : return MBlaze::R15; + case 16 : return MBlaze::R16; + case 17 : return MBlaze::R17; + case 18 : return MBlaze::R18; + case 19 : return MBlaze::R19; + case 20 : return MBlaze::R20; + case 21 : return MBlaze::R21; + case 22 : return MBlaze::R22; + case 23 : return MBlaze::R23; + case 24 : return MBlaze::R24; + case 25 : return MBlaze::R25; + case 26 : return MBlaze::R26; + case 27 : return MBlaze::R27; + case 28 : return MBlaze::R28; + case 29 : return MBlaze::R29; + case 30 : return MBlaze::R30; + case 31 : return MBlaze::R31; + default: llvm_unreachable("Unknown register number!"); + } + return 0; // Not reached +} + +unsigned MBlazeRegisterInfo::getPICCallReg() { + return MBlaze::R20; +} + +//===----------------------------------------------------------------------===// +// Callee Saved Registers methods +//===----------------------------------------------------------------------===// + +/// MBlaze Callee Saved Registers +const unsigned* MBlazeRegisterInfo:: +getCalleeSavedRegs(const MachineFunction *MF) const { + // MBlaze callee-save register range is R20 - R31 + static const unsigned CalleeSavedRegs[] = { + MBlaze::R20, MBlaze::R21, MBlaze::R22, MBlaze::R23, + MBlaze::R24, MBlaze::R25, MBlaze::R26, MBlaze::R27, + MBlaze::R28, MBlaze::R29, MBlaze::R30, MBlaze::R31, + 0 + }; + + return CalleeSavedRegs; +} + +/// MBlaze Callee Saved Register Classes +const TargetRegisterClass* const* MBlazeRegisterInfo:: +getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRC[] = { + &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, + &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, + &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, + &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, + &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, + &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, + 0 + }; + + return CalleeSavedRC; +} + +BitVector MBlazeRegisterInfo:: +getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + Reserved.set(MBlaze::R0); + Reserved.set(MBlaze::R1); + Reserved.set(MBlaze::R2); + Reserved.set(MBlaze::R13); + Reserved.set(MBlaze::R14); + Reserved.set(MBlaze::R15); + Reserved.set(MBlaze::R16); + Reserved.set(MBlaze::R17); + Reserved.set(MBlaze::R18); + Reserved.set(MBlaze::R19); + return Reserved; +} + +//===----------------------------------------------------------------------===// +// +// Stack Frame Processing methods +// +----------------------------+ +// +// The stack is allocated decrementing the stack pointer on +// the first instruction of a function prologue. Once decremented, +// all stack references are are done through a positive offset +// from the stack/frame pointer, so the stack is considered +// to grow up. +// +//===----------------------------------------------------------------------===// + +void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + + // See the description at MicroBlazeMachineFunction.h + int TopCPUSavedRegOff = -1; + + // Adjust CPU Callee Saved Registers Area. Registers RA and FP must + // be saved in this CPU Area there is the need. This whole Area must + // be aligned to the default Stack Alignment requirements. + unsigned StackOffset = MFI->getStackSize(); + unsigned RegSize = 4; + + // Replace the dummy '0' SPOffset by the negative offsets, as explained on + // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid + // the approach done by calculateFrameObjectOffsets to the stack frame. + MBlazeFI->adjustLoadArgsFI(MFI); + MBlazeFI->adjustStoreVarArgsFI(MFI); + + if (hasFP(MF)) { + MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), + StackOffset); + MBlazeFI->setFPStackOffset(StackOffset); + TopCPUSavedRegOff = StackOffset; + StackOffset += RegSize; + } + + if (MFI->hasCalls()) { + MBlazeFI->setRAStackOffset(0); + MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), + StackOffset); + TopCPUSavedRegOff = StackOffset; + StackOffset += RegSize; + } + + // Update frame info + MFI->setStackSize(StackOffset); + + // Recalculate the final tops offset. The final values must be '0' + // if there isn't a callee saved register for CPU or FPU, otherwise + // a negative offset is needed. + if (TopCPUSavedRegOff >= 0) + MBlazeFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset); +} + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +bool MBlazeRegisterInfo::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return NoFramePointerElim || MFI->hasVarSizedObjects(); +} + +// This function eliminate ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void MBlazeRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + +// FrameIndex represent objects inside a abstract stack. +// We must replace FrameIndex with an stack/frame pointer +// direct reference. +unsigned MBlazeRegisterInfo:: +eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + int *Value, RegScavenger *RS) const { + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + + unsigned i = 0; + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + + unsigned oi = i == 2 ? 1 : 2; + + DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n"; + errs() << "<--------->\n" << MI); + + int FrameIndex = MI.getOperand(i).getIndex(); + int stackSize = MF.getFrameInfo()->getStackSize(); + int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + + DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" + << "spOffset : " << spOffset << "\n" + << "stackSize : " << stackSize << "\n"); + + // as explained on LowerFormalArguments, detect negative offsets + // and adjust SPOffsets considering the final stack size. + int Offset = (spOffset < 0) ? (stackSize - spOffset) : (spOffset + 4); + Offset += MI.getOperand(oi).getImm(); + + DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); + + MI.getOperand(oi).ChangeToImmediate(Offset); + MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); + return 0; +} + +void MBlazeRegisterInfo:: +emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc dl = (MBBI != MBB.end() ? + MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); + + // Get the right frame order for MBlaze. + adjustMBlazeStackFrame(MF); + + // Get the number of bytes to allocate from the FrameInfo. + unsigned StackSize = MFI->getStackSize(); + + // No need to allocate space on the stack. + if (StackSize == 0 && !MFI->hasCalls()) return; + if (StackSize < 28 && MFI->hasCalls()) StackSize = 28; + + int FPOffset = MBlazeFI->getFPStackOffset(); + int RAOffset = MBlazeFI->getRAStackOffset(); + + // Adjust stack : addi R1, R1, -imm + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDI), MBlaze::R1) + .addReg(MBlaze::R1).addImm(-StackSize); + + // Save the return address only if the function isnt a leaf one. + // swi R15, R1, stack_loc + if (MFI->hasCalls()) { + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::SWI)) + .addReg(MBlaze::R15).addImm(RAOffset).addReg(MBlaze::R1); + } + + // if framepointer enabled, save it and set it + // to point to the stack pointer + if (hasFP(MF)) { + // swi R19, R1, stack_loc + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::SWI)) + .addReg(MBlaze::R19).addImm(FPOffset).addReg(MBlaze::R1); + + // add R19, R1, R0 + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R19) + .addReg(MBlaze::R1).addReg(MBlaze::R0); + } +} + +void MBlazeRegisterInfo:: +emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + DebugLoc dl = MBBI->getDebugLoc(); + + // Get the FI's where RA and FP are saved. + int FPOffset = MBlazeFI->getFPStackOffset(); + int RAOffset = MBlazeFI->getRAStackOffset(); + + // if framepointer enabled, restore it and restore the + // stack pointer + if (hasFP(MF)) { + // add R1, R19, R0 + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1) + .addReg(MBlaze::R19).addReg(MBlaze::R0); + + // lwi R19, R1, stack_loc + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19) + .addImm(FPOffset).addReg(MBlaze::R1); + } + + // Restore the return address only if the function isnt a leaf one. + // lwi R15, R1, stack_loc + if (MFI->hasCalls()) { + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15) + .addImm(RAOffset).addReg(MBlaze::R1); + } + + // Get the number of bytes from FrameInfo + int StackSize = (int) MFI->getStackSize(); + if (StackSize < 28 && MFI->hasCalls()) StackSize = 28; + + // adjust stack. + // addi R1, R1, imm + if (StackSize) { + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDI), MBlaze::R1) + .addReg(MBlaze::R1).addImm(StackSize); + } +} + + +void MBlazeRegisterInfo:: +processFunctionBeforeFrameFinalized(MachineFunction &MF) const { + // Set the stack offset where GP must be saved/loaded from. + MachineFrameInfo *MFI = MF.getFrameInfo(); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + if (MBlazeFI->needGPSaveRestore()) + MFI->setObjectOffset(MBlazeFI->getGPFI(), MBlazeFI->getGPStackOffset()); +} + +unsigned MBlazeRegisterInfo::getRARegister() const { + return MBlaze::R15; +} + +unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + return hasFP(MF) ? MBlaze::R19 : MBlaze::R1; +} + +unsigned MBlazeRegisterInfo::getEHExceptionRegister() const { + llvm_unreachable("What is the exception register"); + return 0; +} + +unsigned MBlazeRegisterInfo::getEHHandlerRegister() const { + llvm_unreachable("What is the exception handler register"); + return 0; +} + +int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + llvm_unreachable("What is the dwarf register number"); + return -1; +} + +#include "MBlazeGenRegisterInfo.inc" + diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h new file mode 100644 index 0000000..cde7d39 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -0,0 +1,96 @@ +//===- MBlazeRegisterInfo.h - MBlaze Register Information Impl --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MBlaze implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZEREGISTERINFO_H +#define MBLAZEREGISTERINFO_H + +#include "MBlaze.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "MBlazeGenRegisterInfo.h.inc" + +namespace llvm { +class MBlazeSubtarget; +class TargetInstrInfo; +class Type; + +namespace MBlaze { + /// SubregIndex - The index of various sized subregister classes. Note that + /// these indices must be kept in sync with the class indices in the + /// MBlazeRegisterInfo.td file. + enum SubregIndex { + SUBREG_FPEVEN = 1, SUBREG_FPODD = 2 + }; +} + +struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { + const MBlazeSubtarget &Subtarget; + const TargetInstrInfo &TII; + + MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget, + const TargetInstrInfo &tii); + + /// getRegisterNumbering - Given the enum value for some register, e.g. + /// MBlaze::RA, return the number that it corresponds to (e.g. 31). + static unsigned getRegisterNumbering(unsigned RegEnum); + static unsigned getRegisterFromNumbering(unsigned RegEnum); + + /// Get PIC indirect call register + static unsigned getPICCallReg(); + + /// Adjust the MBlaze stack frame. + void adjustMBlazeStackFrame(MachineFunction &MF) const; + + /// Code Generation virtual methods... + const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + + const TargetRegisterClass* const* + getCalleeSavedRegClasses(const MachineFunction* MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + bool hasFP(const MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + /// Stack Frame Processing Methods + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; + + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + /// Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(const MachineFunction &MF) const; + + /// Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; + + /// targetHandlesStackFrameRounding - Returns true if the target is + /// responsible for rounding up the stack frame (probably at emitPrologue + /// time). + bool targetHandlesStackFrameRounding() const { return true; } + + int getDwarfRegNum(unsigned RegNum, bool isEH) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td new file mode 100644 index 0000000..96a5c98 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td @@ -0,0 +1,186 @@ +//===- MBlazeRegisterInfo.td - MBlaze Register defs -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the MicroBlaze register file +//===----------------------------------------------------------------------===// + +// We have banks of 32 registers each. +class MBlazeReg<string n> : Register<n> { + field bits<5> Num; + let Namespace = "MBlaze"; +} + +class MBlazeRegWithSubRegs<string n, list<Register> subregs> + : RegisterWithSubRegs<n, subregs> { + field bits<5> Num; + let Namespace = "MBlaze"; +} + +// MBlaze CPU Registers +class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> { + let Num = num; +} + +// MBlaze 32-bit (aliased) FPU Registers +class FPR<bits<5> num, string n, list<Register> subregs> + : MBlazeRegWithSubRegs<n, subregs> { + let Num = num; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +let Namespace = "MBlaze" in { + + // General Purpose Registers + def R0 : MBlazeGPRReg< 0, "r0">, DwarfRegNum<[0]>; + def R1 : MBlazeGPRReg< 1, "r1">, DwarfRegNum<[1]>; + def R2 : MBlazeGPRReg< 2, "r2">, DwarfRegNum<[2]>; + def R3 : MBlazeGPRReg< 3, "r3">, DwarfRegNum<[3]>; + def R4 : MBlazeGPRReg< 4, "r4">, DwarfRegNum<[5]>; + def R5 : MBlazeGPRReg< 5, "r5">, DwarfRegNum<[5]>; + def R6 : MBlazeGPRReg< 6, "r6">, DwarfRegNum<[6]>; + def R7 : MBlazeGPRReg< 7, "r7">, DwarfRegNum<[7]>; + def R8 : MBlazeGPRReg< 8, "r8">, DwarfRegNum<[8]>; + def R9 : MBlazeGPRReg< 9, "r9">, DwarfRegNum<[9]>; + def R10 : MBlazeGPRReg< 10, "r10">, DwarfRegNum<[10]>; + def R11 : MBlazeGPRReg< 11, "r11">, DwarfRegNum<[11]>; + def R12 : MBlazeGPRReg< 12, "r12">, DwarfRegNum<[12]>; + def R13 : MBlazeGPRReg< 13, "r13">, DwarfRegNum<[13]>; + def R14 : MBlazeGPRReg< 14, "r14">, DwarfRegNum<[14]>; + def R15 : MBlazeGPRReg< 15, "r15">, DwarfRegNum<[15]>; + def R16 : MBlazeGPRReg< 16, "r16">, DwarfRegNum<[16]>; + def R17 : MBlazeGPRReg< 17, "r17">, DwarfRegNum<[17]>; + def R18 : MBlazeGPRReg< 18, "r18">, DwarfRegNum<[18]>; + def R19 : MBlazeGPRReg< 19, "r19">, DwarfRegNum<[19]>; + def R20 : MBlazeGPRReg< 20, "r20">, DwarfRegNum<[20]>; + def R21 : MBlazeGPRReg< 21, "r21">, DwarfRegNum<[21]>; + def R22 : MBlazeGPRReg< 22, "r22">, DwarfRegNum<[22]>; + def R23 : MBlazeGPRReg< 23, "r23">, DwarfRegNum<[23]>; + def R24 : MBlazeGPRReg< 24, "r24">, DwarfRegNum<[24]>; + def R25 : MBlazeGPRReg< 25, "r25">, DwarfRegNum<[25]>; + def R26 : MBlazeGPRReg< 26, "r26">, DwarfRegNum<[26]>; + def R27 : MBlazeGPRReg< 27, "r27">, DwarfRegNum<[27]>; + def R28 : MBlazeGPRReg< 28, "r28">, DwarfRegNum<[28]>; + def R29 : MBlazeGPRReg< 29, "r29">, DwarfRegNum<[29]>; + def R30 : MBlazeGPRReg< 30, "r30">, DwarfRegNum<[30]>; + def R31 : MBlazeGPRReg< 31, "r31">, DwarfRegNum<[31]>; + + /// MBlaze Single point precision FPU Registers + def F0 : FPR< 0, "r0", [R0]>, DwarfRegNum<[32]>; + def F1 : FPR< 1, "r1", [R1]>, DwarfRegNum<[33]>; + def F2 : FPR< 2, "r2", [R2]>, DwarfRegNum<[34]>; + def F3 : FPR< 3, "r3", [R3]>, DwarfRegNum<[35]>; + def F4 : FPR< 4, "r4", [R4]>, DwarfRegNum<[36]>; + def F5 : FPR< 5, "r5", [R5]>, DwarfRegNum<[37]>; + def F6 : FPR< 6, "r6", [R6]>, DwarfRegNum<[38]>; + def F7 : FPR< 7, "r7", [R7]>, DwarfRegNum<[39]>; + def F8 : FPR< 8, "r8", [R8]>, DwarfRegNum<[40]>; + def F9 : FPR< 9, "r9", [R9]>, DwarfRegNum<[41]>; + def F10 : FPR<10, "r10", [R10]>, DwarfRegNum<[42]>; + def F11 : FPR<11, "r11", [R11]>, DwarfRegNum<[43]>; + def F12 : FPR<12, "r12", [R12]>, DwarfRegNum<[44]>; + def F13 : FPR<13, "r13", [R13]>, DwarfRegNum<[45]>; + def F14 : FPR<14, "r14", [R14]>, DwarfRegNum<[46]>; + def F15 : FPR<15, "r15", [R15]>, DwarfRegNum<[47]>; + def F16 : FPR<16, "r16", [R16]>, DwarfRegNum<[48]>; + def F17 : FPR<17, "r17", [R17]>, DwarfRegNum<[49]>; + def F18 : FPR<18, "r18", [R18]>, DwarfRegNum<[50]>; + def F19 : FPR<19, "r19", [R19]>, DwarfRegNum<[51]>; + def F20 : FPR<20, "r20", [R20]>, DwarfRegNum<[52]>; + def F21 : FPR<21, "r21", [R21]>, DwarfRegNum<[53]>; + def F22 : FPR<22, "r22", [R22]>, DwarfRegNum<[54]>; + def F23 : FPR<23, "r23", [R23]>, DwarfRegNum<[55]>; + def F24 : FPR<24, "r24", [R24]>, DwarfRegNum<[56]>; + def F25 : FPR<25, "r25", [R25]>, DwarfRegNum<[57]>; + def F26 : FPR<26, "r26", [R26]>, DwarfRegNum<[58]>; + def F27 : FPR<27, "r27", [R27]>, DwarfRegNum<[59]>; + def F28 : FPR<28, "r28", [R28]>, DwarfRegNum<[60]>; + def F29 : FPR<29, "r29", [R29]>, DwarfRegNum<[61]>; + def F30 : FPR<30, "r30", [R30]>, DwarfRegNum<[62]>; + def F31 : FPR<31, "r31", [R31]>, DwarfRegNum<[63]>; +} + +//===----------------------------------------------------------------------===// +// Register Classes +//===----------------------------------------------------------------------===// + +def CPURegs : RegisterClass<"MBlaze", [i32], 32, + [ + // Return Values and Arguments + R3, R4, R5, R6, R7, R8, R9, R10, + + // Not preserved across procedure calls + R11, R12, + + // Callee save + R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + + // Reserved + R0, // Always zero + R1, // The stack pointer + R2, // Read-only small data area anchor + R13, // Read-write small data area anchor + R14, // Return address for interrupts + R15, // Return address for sub-routines + R16, // Return address for trap + R17, // Return address for exceptions + R18, // Reserved for assembler + R19 // The frame-pointer + ]> +{ + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + CPURegsClass::iterator + CPURegsClass::allocation_order_end(const MachineFunction &MF) const { + // The last 10 registers on the list above are reserved + return end()-10; + } + }]; +} + +def FGR32 : RegisterClass<"MBlaze", [f32], 32, + [ + // Return Values and Arguments + F3, F4, F5, F6, F7, F8, F9, F10, + + // Not preserved across procedure calls + F11, F12, + + // Callee save + F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31, + + // Reserved + F0, // Always zero + F1, // The stack pointer + F2, // Read-only small data area anchor + F13, // Read-write small data area anchor + F14, // Return address for interrupts + F15, // Return address for sub-routines + F16, // Return address for trap + F17, // Return address for exceptions + F18, // Reserved for assembler + F19 // The frame pointer + ]> +{ + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + FGR32Class::iterator + FGR32Class::allocation_order_end(const MachineFunction &MF) const { + // The last 10 registers on the list above are reserved + return end()-10; + } + }]; +} diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td new file mode 100644 index 0000000..6a94491 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeSchedule.td @@ -0,0 +1,63 @@ +//===- MBlazeSchedule.td - MBlaze Scheduling Definitions --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Functional units across MBlaze chips sets. Based on GCC/MBlaze backend files. +//===----------------------------------------------------------------------===// +def ALU : FuncUnit; +def IMULDIV : FuncUnit; + +//===----------------------------------------------------------------------===// +// Instruction Itinerary classes used for MBlaze +//===----------------------------------------------------------------------===// +def IIAlu : InstrItinClass; +def IILoad : InstrItinClass; +def IIStore : InstrItinClass; +def IIXfer : InstrItinClass; +def IIBranch : InstrItinClass; +def IIHiLo : InstrItinClass; +def IIImul : InstrItinClass; +def IIIdiv : InstrItinClass; +def IIFcvt : InstrItinClass; +def IIFmove : InstrItinClass; +def IIFcmp : InstrItinClass; +def IIFadd : InstrItinClass; +def IIFmulSingle : InstrItinClass; +def IIFmulDouble : InstrItinClass; +def IIFdivSingle : InstrItinClass; +def IIFdivDouble : InstrItinClass; +def IIFsqrtSingle : InstrItinClass; +def IIFsqrtDouble : InstrItinClass; +def IIFrecipFsqrtStep : InstrItinClass; +def IIPseudo : InstrItinClass; + +//===----------------------------------------------------------------------===// +// MBlaze Generic instruction itineraries. +//===----------------------------------------------------------------------===// +def MBlazeGenericItineraries : ProcessorItineraries<[ + InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>, + InstrItinData<IILoad , [InstrStage<3, [ALU]>]>, + InstrItinData<IIStore , [InstrStage<1, [ALU]>]>, + InstrItinData<IIXfer , [InstrStage<2, [ALU]>]>, + InstrItinData<IIBranch , [InstrStage<1, [ALU]>]>, + InstrItinData<IIHiLo , [InstrStage<1, [IMULDIV]>]>, + InstrItinData<IIImul , [InstrStage<17, [IMULDIV]>]>, + InstrItinData<IIIdiv , [InstrStage<38, [IMULDIV]>]>, + InstrItinData<IIFcvt , [InstrStage<1, [ALU]>]>, + InstrItinData<IIFmove , [InstrStage<2, [ALU]>]>, + InstrItinData<IIFcmp , [InstrStage<3, [ALU]>]>, + InstrItinData<IIFadd , [InstrStage<4, [ALU]>]>, + InstrItinData<IIFmulSingle , [InstrStage<7, [ALU]>]>, + InstrItinData<IIFmulDouble , [InstrStage<8, [ALU]>]>, + InstrItinData<IIFdivSingle , [InstrStage<23, [ALU]>]>, + InstrItinData<IIFdivDouble , [InstrStage<36, [ALU]>]>, + InstrItinData<IIFsqrtSingle , [InstrStage<54, [ALU]>]>, + InstrItinData<IIFsqrtDouble , [InstrStage<12, [ALU]>]>, + InstrItinData<IIFrecipFsqrtStep , [InstrStage<5, [ALU]>]> +]>; diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp new file mode 100644 index 0000000..3440521 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp @@ -0,0 +1,31 @@ +//===- MBlazeSubtarget.cpp - MBlaze Subtarget Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MBlaze specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "MBlazeSubtarget.h" +#include "MBlaze.h" +#include "MBlazeGenSubtarget.inc" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, const std::string &FS): + HasPipe3(false), HasBarrel(false), HasDiv(false), HasMul(false), + HasFSL(false), HasEFSL(false), HasMSRSet(false), HasException(false), + HasPatCmp(false), HasFPU(false), HasESR(false), HasPVR(false), + HasMul64(false), HasSqrt(false), HasMMU(false) +{ + std::string CPU = "v400"; + MBlazeArchVersion = V400; + + // Parse features string. + ParseSubtargetFeatures(FS, CPU); +} diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h new file mode 100644 index 0000000..bebb3f7 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeSubtarget.h @@ -0,0 +1,79 @@ +//=====-- MBlazeSubtarget.h - Define Subtarget for the MBlaze -*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MBlaze specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZESUBTARGET_H +#define MBLAZESUBTARGET_H + +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetMachine.h" + +#include <string> + +namespace llvm { + +class MBlazeSubtarget : public TargetSubtarget { + +protected: + + enum MBlazeArchEnum { + V400, V500, V600, V700, V710 + }; + + // MBlaze architecture version + MBlazeArchEnum MBlazeArchVersion; + + bool HasPipe3; + bool HasBarrel; + bool HasDiv; + bool HasMul; + bool HasFSL; + bool HasEFSL; + bool HasMSRSet; + bool HasException; + bool HasPatCmp; + bool HasFPU; + bool HasESR; + bool HasPVR; + bool HasMul64; + bool HasSqrt; + bool HasMMU; + + InstrItineraryData InstrItins; + +public: + + /// This constructor initializes the data members to match that + /// of the specified triple. + MBlazeSubtarget(const std::string &TT, const std::string &FS); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); + + bool hasFPU() const { return HasFPU; } + bool hasSqrt() const { return HasSqrt; } + bool hasMul() const { return HasMul; } + bool hasMul64() const { return HasMul64; } + bool hasDiv() const { return HasDiv; } + bool hasBarrel() const { return HasBarrel; } + + bool isV400() const { return MBlazeArchVersion == V400; } + bool isV500() const { return MBlazeArchVersion == V500; } + bool isV600() const { return MBlazeArchVersion == V600; } + bool isV700() const { return MBlazeArchVersion == V700; } + bool isV710() const { return MBlazeArchVersion == V710; } +}; +} // End llvm namespace + +#endif diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp new file mode 100644 index 0000000..9eba2b3 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -0,0 +1,66 @@ +//===-- MBlazeTargetMachine.cpp - Define TargetMachine for MBlaze ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the info about MBlaze target spec. +// +//===----------------------------------------------------------------------===// + +#include "MBlaze.h" +#include "MBlazeMCAsmInfo.h" +#include "MBlazeTargetMachine.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +extern "C" void LLVMInitializeMBlazeTarget() { + // Register the target. + RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget); + RegisterAsmInfo<MBlazeMCAsmInfo> A(TheMBlazeTarget); +} + +// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment +// The stack is always 8 byte aligned +// On function prologue, the stack is created by decrementing +// its pointer. Once decremented, all references are done with positive +// offset from the stack/frame pointer, using StackGrowsUp enables +// an easier handling. +MBlazeTargetMachine:: +MBlazeTargetMachine(const Target &T, const std::string &TT, + const std::string &FS): + LLVMTargetMachine(T, TT), + Subtarget(TT, FS), + DataLayout("E-p:32:32-i8:8:8-i16:16:16-i64:32:32-" + "f64:32:32-v64:32:32-v128:32:32-n32"), + InstrInfo(*this), + FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0), + TLInfo(*this) { + if (getRelocationModel() == Reloc::Default) { + setRelocationModel(Reloc::Static); + } + + if (getCodeModel() == CodeModel::Default) + setCodeModel(CodeModel::Small); +} + +// Install an instruction selector pass using +// the ISelDag to gen MBlaze code. +bool MBlazeTargetMachine:: +addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { + PM.add(createMBlazeISelDag(*this)); + return false; +} + +// Implemented by targets that want to run passes immediately before +// machine code is emitted. return true if -print-machineinstrs should +// print out the code after the passes. +bool MBlazeTargetMachine:: +addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { + PM.add(createMBlazeDelaySlotFillerPass(*this)); + return true; +} diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h new file mode 100644 index 0000000..85c975c --- /dev/null +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -0,0 +1,69 @@ +//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze --- C++ ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MBlaze specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZE_TARGETMACHINE_H +#define MBLAZE_TARGETMACHINE_H + +#include "MBlazeSubtarget.h" +#include "MBlazeInstrInfo.h" +#include "MBlazeISelLowering.h" +#include "MBlazeIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" + +namespace llvm { + class formatted_raw_ostream; + + class MBlazeTargetMachine : public LLVMTargetMachine { + MBlazeSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + MBlazeInstrInfo InstrInfo; + TargetFrameInfo FrameInfo; + MBlazeTargetLowering TLInfo; + MBlazeIntrinsicInfo IntrinsicInfo; + public: + MBlazeTargetMachine(const Target &T, const std::string &TT, + const std::string &FS); + + virtual const MBlazeInstrInfo *getInstrInfo() const + { return &InstrInfo; } + + virtual const TargetFrameInfo *getFrameInfo() const + { return &FrameInfo; } + + virtual const MBlazeSubtarget *getSubtargetImpl() const + { return &Subtarget; } + + virtual const TargetData *getTargetData() const + { return &DataLayout;} + + virtual const MBlazeRegisterInfo *getRegisterInfo() const + { return &InstrInfo.getRegisterInfo(); } + + virtual MBlazeTargetLowering *getTargetLowering() const + { return const_cast<MBlazeTargetLowering*>(&TLInfo); } + + const TargetIntrinsicInfo *getIntrinsicInfo() const + { return &IntrinsicInfo; } + + // Pass Pipeline Configuration + virtual bool addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel); + + virtual bool addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel); + }; +} // End llvm namespace + +#endif diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp new file mode 100644 index 0000000..79c9494 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp @@ -0,0 +1,88 @@ +//===-- MBlazeTargetObjectFile.cpp - MBlaze object files ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MBlazeTargetObjectFile.h" +#include "MBlazeSubtarget.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +void MBlazeTargetObjectFile:: +Initialize(MCContext &Ctx, const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + + SmallDataSection = + getELFSection(".sdata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); + + SmallBSSSection = + getELFSection(".sbss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, + SectionKind::getBSS()); + +} + +// A address must be loaded from a small section if its size is less than the +// small section size threshold. Data in this section must be addressed using +// gp_rel operator. +static bool IsInSmallSection(uint64_t Size) { + return Size > 0 && Size <= 8; +} + +bool MBlazeTargetObjectFile:: +IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) const { + if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) + return false; + + return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM)); +} + +/// IsGlobalInSmallSection - Return true if this global address should be +/// placed into small data/bss section. +bool MBlazeTargetObjectFile:: +IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, + SectionKind Kind) const { + // Only global variables, not functions. + const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV); + if (!GVA) + return false; + + // We can only do this for datarel or BSS objects for now. + if (!Kind.isBSS() && !Kind.isDataRel()) + return false; + + // If this is a internal constant string, there is a special + // section for it, but not in small data/bss. + if (Kind.isMergeable1ByteCString()) + return false; + + const Type *Ty = GV->getType()->getElementType(); + return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty)); +} + +const MCSection *MBlazeTargetObjectFile:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*" + // sections? + + // Handle Small Section classification here. + if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind)) + return SmallBSSSection; + if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind)) + return SmallDataSection; + + // Otherwise, we work the same as ELF. + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM); +} diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.h b/lib/Target/MBlaze/MBlazeTargetObjectFile.h new file mode 100644 index 0000000..20e7702 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.h @@ -0,0 +1,41 @@ +//===-- llvm/Target/MBlazeTargetObjectFile.h - MBlaze Obj. Info -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H +#define LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + +namespace llvm { + + class MBlazeTargetObjectFile : public TargetLoweringObjectFileELF { + const MCSection *SmallDataSection; + const MCSection *SmallBSSSection; + public: + + void Initialize(MCContext &Ctx, const TargetMachine &TM); + + + /// IsGlobalInSmallSection - Return true if this global address should be + /// placed into small data/bss section. + bool IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM, + SectionKind Kind) const; + + bool IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM) const; + + const MCSection *SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, + Mangler *Mang, + const TargetMachine &TM) const; + }; +} // end namespace llvm + +#endif diff --git a/lib/Target/MBlaze/Makefile b/lib/Target/MBlaze/Makefile new file mode 100644 index 0000000..19e508c --- /dev/null +++ b/lib/Target/MBlaze/Makefile @@ -0,0 +1,23 @@ +##===- lib/Target/MBlaze/Makefile --------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMMBlazeCodeGen +TARGET = MBlaze + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = MBlazeGenRegisterInfo.h.inc MBlazeGenRegisterNames.inc \ + MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \ + MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \ + MBlazeGenDAGISel.inc MBlazeGenCallingConv.inc \ + MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc + +DIRS = AsmPrinter TargetInfo + +include $(LEVEL)/Makefile.common + diff --git a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000..5afb14d --- /dev/null +++ b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMMBlazeInfo + MBlazeTargetInfo.cpp + ) + +add_dependencies(LLVMMBlazeInfo MBlazeCodeGenTable_gen) diff --git a/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp new file mode 100644 index 0000000..16e01db --- /dev/null +++ b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp @@ -0,0 +1,19 @@ +//===-- MBlazeTargetInfo.cpp - MBlaze Target Implementation ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MBlaze.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheMBlazeTarget; + +extern "C" void LLVMInitializeMBlazeTargetInfo() { + RegisterTarget<Triple::mblaze> X(TheMBlazeTarget, "mblaze", "MBlaze"); +} diff --git a/lib/Target/MBlaze/TargetInfo/Makefile b/lib/Target/MBlaze/TargetInfo/Makefile new file mode 100644 index 0000000..fb7ea11 --- /dev/null +++ b/lib/Target/MBlaze/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/MBlaze/TargetInfo/Makefile ---------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMMBlazeInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index a96ee49..ac41cc8 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -38,7 +38,8 @@ namespace llvm { virtual bool addPassesToEmitWholeFile(PassManager &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel); + CodeGenOpt::Level OptLevel, + bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } }; @@ -57,7 +58,7 @@ bool MSILModule::runOnModule(Module &M) { TypeSymbolTable& Table = M.getTypeSymbolTable(); std::set<const Type *> Types = getAnalysis<FindUsedTypes>().getTypes(); for (TypeSymbolTable::iterator I = Table.begin(), E = Table.end(); I!=E; ) { - if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second)) + if (!I->second->isStructTy() && !I->second->isOpaqueTy()) Table.remove(I++); else { std::set<const Type *>::iterator T = Types.find(I->second); @@ -187,7 +188,7 @@ void MSILWriter::printModuleStartup() { break; case 1: Arg1 = F->arg_begin(); - if (Arg1->getType()->isInteger()) { + if (Arg1->getType()->isIntegerTy()) { Out << "\tldloc\targc\n"; Args = getTypeName(Arg1->getType()); BadSig = false; @@ -195,7 +196,7 @@ void MSILWriter::printModuleStartup() { break; case 2: Arg1 = Arg2 = F->arg_begin(); ++Arg2; - if (Arg1->getType()->isInteger() && + if (Arg1->getType()->isIntegerTy() && Arg2->getType()->getTypeID() == Type::PointerTyID) { Out << "\tldloc\targc\n\tldloc\targv\n"; Args = getTypeName(Arg1->getType())+","+getTypeName(Arg2->getType()); @@ -207,7 +208,7 @@ void MSILWriter::printModuleStartup() { } bool RetVoid = (F->getReturnType()->getTypeID() == Type::VoidTyID); - if (BadSig || (!F->getReturnType()->isInteger() && !RetVoid)) { + if (BadSig || (!F->getReturnType()->isIntegerTy() && !RetVoid)) { Out << "\tldc.i4.0\n"; } else { Out << "\tcall\t" << getTypeName(F->getReturnType()) << @@ -334,7 +335,7 @@ std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) { std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned, bool isNested) { - if (Ty->isPrimitiveType() || Ty->isInteger()) + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) return getPrimitiveTypeName(Ty,isSigned); // FIXME: "OpaqueType" support switch (Ty->getTypeID()) { @@ -1459,7 +1460,7 @@ void MSILWriter::printDeclarations(const TypeSymbolTable& ST) { for (std::set<const Type*>::const_iterator UI = UsedTypes->begin(), UE = UsedTypes->end(); UI!=UE; ++UI) { const Type* Ty = *UI; - if (isa<ArrayType>(Ty) || isa<VectorType>(Ty) || isa<StructType>(Ty)) + if (Ty->isArrayTy() || Ty->isVectorTy() || Ty->isStructTy()) Name = getTypeName(Ty, false, true); // Type with no need to declare. else continue; @@ -1688,7 +1689,8 @@ void MSILWriter::printExternals() { bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM, formatted_raw_ostream &o, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel) + CodeGenOpt::Level OptLevel, + bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; MSILWriter* Writer = new MSILWriter(o); diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp index def5fc6..7a35eb0 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp @@ -98,12 +98,19 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, bool isMemOp = Modifier && !strcmp(Modifier, "mem"); uint64_t Offset = MO.getOffset(); - O << (isMemOp ? '&' : '#'); + // If the global address expression is a part of displacement field with a + // register base, we should not emit any prefix symbol here, e.g. + // mov.w &foo, r1 + // vs + // mov.w glb(r1), r2 + // Otherwise (!) msp430-as will silently miscompile the output :( + if (!Modifier || strcmp(Modifier, "nohash")) + O << (isMemOp ? '&' : '#'); if (Offset) O << '(' << Offset << '+'; O << *GetGlobalValueSymbol(MO.getGlobal()); - + if (Offset) O << ')'; @@ -124,15 +131,11 @@ void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum, const MachineOperand &Disp = MI->getOperand(OpNum+1); // Print displacement first - if (!Disp.isImm()) { - printOperand(MI, OpNum+1, "mem"); - } else { - if (!Base.getReg()) - O << '&'; - - printOperand(MI, OpNum+1, "nohash"); - } + // Imm here is in fact global address - print extra modifier. + if (Disp.isImm() && !Base.getReg()) + O << '&'; + printOperand(MI, OpNum+1, "nohash"); // Print register base field if (Base.getReg()) { diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp index f6565bd..d7636e6 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp +++ b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp @@ -62,21 +62,26 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo, const MCOperand &Disp = MI->getOperand(OpNo+1); // Print displacement first - if (Disp.isExpr()) { - O << '&' << *Disp.getExpr(); - } else { - assert(Disp.isImm() && "Expected immediate in displacement field"); - if (!Base.getReg()) - O << '&'; + // If the global address expression is a part of displacement field with a + // register base, we should not emit any prefix symbol here, e.g. + // mov.w &foo, r1 + // vs + // mov.w glb(r1), r2 + // Otherwise (!) msp430-as will silently miscompile the output :( + if (!Base.getReg()) + O << '&'; + + if (Disp.isExpr()) + O << *Disp.getExpr(); + else { + assert(Disp.isImm() && "Expected immediate in displacement field"); O << Disp.getImm(); } - // Print register base field - if (Base.getReg()) { + if (Base.getReg()) O << '(' << getRegisterName(Base.getReg()) << ')'; - } } void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo) { diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 4eec757..911cfcb 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -26,26 +26,12 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" - using namespace llvm; -#ifndef NDEBUG -static cl::opt<bool> -ViewRMWDAGs("view-msp430-rmw-dags", cl::Hidden, - cl::desc("Pop up a window to show isel dags after RMW preprocess")); -#else -static const bool ViewRMWDAGs = false; -#endif - -STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); - - namespace { struct MSP430ISelAddressMode { enum { @@ -123,8 +109,6 @@ namespace { Lowering(*TM.getTargetLowering()), Subtarget(*TM.getSubtargetImpl()) { } - virtual void InstructionSelect(); - virtual const char *getPassName() const { return "MSP430 DAG->DAG Pattern Instruction Selection"; } @@ -133,9 +117,6 @@ namespace { bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM); bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM); - bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, - SDNode *Root) const; - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps); @@ -144,18 +125,12 @@ namespace { #include "MSP430GenDAGISel.inc" private: - DenseMap<SDNode*, SDNode*> RMWStores; - void PreprocessForRMW(); SDNode *Select(SDNode *N); SDNode *SelectIndexedLoad(SDNode *Op); SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2, unsigned Opc8, unsigned Opc16); bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp); - - #ifndef NDEBUG - unsigned Indent; - #endif }; } // end anonymous namespace @@ -217,10 +192,7 @@ bool MSP430DAGToDAGISel::MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM) } bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) { - DEBUG({ - errs() << "MatchAddress: "; - AM.dump(); - }); + DEBUG(errs() << "MatchAddress: "; AM.dump()); switch (N.getOpcode()) { default: break; @@ -336,270 +308,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, return false; } -bool MSP430DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, - SDNode *Root) const { - if (OptLevel == CodeGenOpt::None) return false; - - /// RMW preprocessing creates the following code: - /// [Load1] - /// ^ ^ - /// / | - /// / | - /// [Load2] | - /// ^ ^ | - /// | | | - /// | \-| - /// | | - /// | [Op] - /// | ^ - /// | | - /// \ / - /// \ / - /// [Store] - /// - /// The path Store => Load2 => Load1 is via chain. Note that in general it is - /// not allowed to fold Load1 into Op (and Store) since it will creates a - /// cycle. However, this is perfectly legal for the loads moved below the - /// TokenFactor by PreprocessForRMW. Query the map Store => Load1 (created - /// during preprocessing) to determine whether it's legal to introduce such - /// "cycle" for a moment. - DenseMap<SDNode*, SDNode*>::const_iterator I = RMWStores.find(Root); - if (I != RMWStores.end() && I->second == N) - return true; - - // Proceed to 'generic' cycle finder code - return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); -} - - -/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand -/// and move load below the TokenFactor. Replace store's chain operand with -/// load's chain result. -static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, - SDValue Store, SDValue TF) { - SmallVector<SDValue, 4> Ops; - for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) - if (Load.getNode() == TF.getOperand(i).getNode()) - Ops.push_back(Load.getOperand(0)); - else - Ops.push_back(TF.getOperand(i)); - SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); - SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF, - Load.getOperand(1), - Load.getOperand(2)); - CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1), - Store.getOperand(2), Store.getOperand(3)); -} - -/// MoveBelowTokenFactor2 - Replace TokenFactor operand with load's chain operand -/// and move load below the TokenFactor. Replace store's chain operand with -/// load's chain result. This a version which sinks two loads below token factor. -/// Look into PreprocessForRMW comments for explanation of transform. -static void MoveBelowTokenFactor2(SelectionDAG *CurDAG, - SDValue Load1, SDValue Load2, - SDValue Store, SDValue TF) { - SmallVector<SDValue, 4> Ops; - for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) { - SDNode* N = TF.getOperand(i).getNode(); - if (Load2.getNode() == N) - Ops.push_back(Load2.getOperand(0)); - else if (Load1.getNode() != N) - Ops.push_back(TF.getOperand(i)); - } - - SDValue NewTF = SDValue(CurDAG->MorphNodeTo(TF.getNode(), - TF.getOpcode(), - TF.getNode()->getVTList(), - &Ops[0], Ops.size()), TF.getResNo()); - SDValue NewLoad2 = CurDAG->UpdateNodeOperands(Load2, NewTF, - Load2.getOperand(1), - Load2.getOperand(2)); - - SDValue NewLoad1 = CurDAG->UpdateNodeOperands(Load1, NewLoad2.getValue(1), - Load1.getOperand(1), - Load1.getOperand(2)); - - CurDAG->UpdateNodeOperands(Store, - NewLoad1.getValue(1), - Store.getOperand(1), - Store.getOperand(2), Store.getOperand(3)); -} - -/// isAllowedToSink - return true if N a load which can be moved below token -/// factor. Basically, the load should be non-volatile and has single use. -static bool isLoadAllowedToSink(SDValue N, SDValue Chain) { - if (N.getOpcode() == ISD::BIT_CONVERT) - N = N.getOperand(0); - - LoadSDNode *LD = dyn_cast<LoadSDNode>(N); - if (!LD || LD->isVolatile()) - return false; - if (LD->getAddressingMode() != ISD::UNINDEXED) - return false; - - ISD::LoadExtType ExtType = LD->getExtensionType(); - if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) - return false; - - return (N.hasOneUse() && - LD->hasNUsesOfValue(1, 1) && - LD->isOperandOf(Chain.getNode())); -} - - -/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. -/// The chain produced by the load must only be used by the store's chain -/// operand, otherwise this may produce a cycle in the DAG. -static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, - SDValue &Load) { - if (isLoadAllowedToSink(N, Chain) && - N.getOperand(1) == Address) { - Load = N; - return true; - } - return false; -} - -/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. -/// This is only run if not in -O0 mode. -/// This allows the instruction selector to pick more read-modify-write -/// instructions. This is a common case: -/// -/// [Load chain] -/// ^ -/// | -/// [Load] -/// ^ ^ -/// | | -/// / \- -/// / | -/// [TokenFactor] [Op] -/// ^ ^ -/// | | -/// \ / -/// \ / -/// [Store] -/// -/// The fact the store's chain operand != load's chain will prevent the -/// (store (op (load))) instruction from being selected. We can transform it to: -/// -/// [Load chain] -/// ^ -/// | -/// [TokenFactor] -/// ^ -/// | -/// [Load] -/// ^ ^ -/// | | -/// | \- -/// | | -/// | [Op] -/// | ^ -/// | | -/// \ / -/// \ / -/// [Store] -/// -/// We also recognize the case where second operand of Op is load as well and -/// move it below token factor as well creating DAG as follows: -/// -/// [Load chain] -/// ^ -/// | -/// [TokenFactor] -/// ^ -/// | -/// [Load1] -/// ^ ^ -/// / | -/// / | -/// [Load2] | -/// ^ ^ | -/// | | | -/// | \-| -/// | | -/// | [Op] -/// | ^ -/// | | -/// \ / -/// \ / -/// [Store] -/// -/// This allows selection of mem-mem instructions. Yay! - -void MSP430DAGToDAGISel::PreprocessForRMW() { - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { - if (!ISD::isNON_TRUNCStore(I)) - continue; - SDValue Chain = I->getOperand(0); - - if (Chain.getNode()->getOpcode() != ISD::TokenFactor) - continue; - - SDValue N1 = I->getOperand(1); - SDValue N2 = I->getOperand(2); - if ((N1.getValueType().isFloatingPoint() && - !N1.getValueType().isVector()) || - !N1.hasOneUse()) - continue; - - unsigned RModW = 0; - SDValue Load1, Load2; - unsigned Opcode = N1.getNode()->getOpcode(); - switch (Opcode) { - case ISD::ADD: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - case ISD::ADDC: - case ISD::ADDE: { - SDValue N10 = N1.getOperand(0); - SDValue N11 = N1.getOperand(1); - if (isRMWLoad(N10, Chain, N2, Load1)) { - if (isLoadAllowedToSink(N11, Chain)) { - Load2 = N11; - RModW = 2; - } else - RModW = 1; - } else if (isRMWLoad(N11, Chain, N2, Load1)) { - if (isLoadAllowedToSink(N10, Chain)) { - Load2 = N10; - RModW = 2; - } else - RModW = 1; - } - break; - } - case ISD::SUB: - case ISD::SUBC: - case ISD::SUBE: { - SDValue N10 = N1.getOperand(0); - SDValue N11 = N1.getOperand(1); - if (isRMWLoad(N10, Chain, N2, Load1)) { - if (isLoadAllowedToSink(N11, Chain)) { - Load2 = N11; - RModW = 2; - } else - RModW = 1; - } - break; - } - } - - NumLoadMoved += RModW; - if (RModW == 1) - MoveBelowTokenFactor(CurDAG, Load1, SDValue(I, 0), Chain); - else if (RModW == 2) { - MoveBelowTokenFactor2(CurDAG, Load1, Load2, SDValue(I, 0), Chain); - SDNode* Store = I; - RMWStores[Store] = Load2.getNode(); - } - } -} - - static bool isValidIndexedLoad(const LoadSDNode *LD) { ISD::MemIndexedMode AM = LD->getAddressingMode(); if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD) @@ -656,7 +364,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op, unsigned Opc8, unsigned Opc16) { if (N1.getOpcode() == ISD::LOAD && N1.hasOneUse() && - IsLegalAndProfitableToFold(N1.getNode(), Op, Op)) { + IsLegalToFold(N1, Op, Op)) { LoadSDNode *LD = cast<LoadSDNode>(N1); if (!isValidIndexedLoad(LD)) return NULL; @@ -682,46 +390,19 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op, } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void MSP430DAGToDAGISel::InstructionSelect() { - std::string BlockName; - if (ViewRMWDAGs) - BlockName = MF->getFunction()->getNameStr() + ":" + - BB->getBasicBlock()->getNameStr(); - - PreprocessForRMW(); - - if (ViewRMWDAGs) CurDAG->viewGraph("RMW preprocessed:" + BlockName); - - DEBUG(errs() << "Selection DAG after RMW preprocessing:\n"); - DEBUG(CurDAG->dump()); - - // Codegen the basic block. - DEBUG(errs() << "===== Instruction selection begins:\n"); - DEBUG(Indent = 0); - SelectRoot(*CurDAG); - DEBUG(errs() << "===== Instruction selection ends:\n"); - - CurDAG->RemoveDeadNodes(); - RMWStores.clear(); -} - SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) { DebugLoc dl = Node->getDebugLoc(); // Dump information about the Node being selected - DEBUG(errs().indent(Indent) << "Selecting: "); + DEBUG(errs() << "Selecting: "); DEBUG(Node->dump(CurDAG)); DEBUG(errs() << "\n"); - DEBUG(Indent += 2); // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { - DEBUG(errs().indent(Indent-2) << "== "; + DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); - DEBUG(Indent -= 2); return NULL; } @@ -809,13 +490,12 @@ SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) { // Select the default instruction SDNode *ResNode = SelectCode(Node); - DEBUG(errs() << std::string(Indent-2, ' ') << "=> "); + DEBUG(errs() << "=> "); if (ResNode == NULL || ResNode == Node) DEBUG(Node->dump(CurDAG)); else DEBUG(ResNode->dump(CurDAG)); DEBUG(errs() << "\n"); - DEBUG(Indent -= 2); return ResNode; } diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index ef81f51..e6c7e1e 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -31,8 +31,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -371,7 +371,8 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i16); InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0)); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0)); } } @@ -500,7 +501,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, PseudoSourceValue::getStack(), - VA.getLocMemOffset())); + VA.getLocMemOffset(), false, false, 0)); } } @@ -794,18 +795,15 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { if (andCC) { // C = ~Z, thus Res = SRW & 1, no processing is required } else { - // Res = (SRW >> 1) & 1 + // Res = ~((SRW >> 1) & 1) Shift = true; + Invert = true; } break; case MSP430CC::COND_E: - if (andCC) { - // C = ~Z, thus Res = ~(SRW & 1) - } else { - // Res = ~((SRW >> 1) & 1) - Shift = true; - } - Invert = true; + Shift = true; + // C = ~Z for AND instruction, thus we can put Res = ~(SRW & 1), however, + // Res = (SRW >> 1) & 1 is 1 word shorter. break; } EVT VT = Op.getValueType(); @@ -893,13 +891,13 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - NULL, 0); + NULL, 0, false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, NULL, 0); + RetAddrFI, NULL, 0, false, false, 0); } SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { @@ -911,7 +909,8 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::FPW, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + false, false, 0); return FrameAddr; } @@ -971,7 +970,7 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { bool MSP430TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { - if (!Ty1->isInteger() || !Ty2->isInteger()) + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; return (Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits()); @@ -986,7 +985,7 @@ bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { bool MSP430TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { // MSP430 implicitly zero-extends 8-bit results in 16-bit registers. - return 0 && Ty1->isInteger(8) && Ty2->isInteger(16); + return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16); } bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index bb06f7b..144ba26 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -250,7 +250,7 @@ def MOV16ri : I16ri<0x0, [(set GR16:$dst, imm:$src)]>; } -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOV8rm : I8rm<0x0, (outs GR8:$dst), (ins memsrc:$src), "mov.b\t{$src, $dst}", diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index f1d4a67..c4746db 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -59,8 +59,6 @@ public: SelectionDAGISel(tm), TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {} - virtual void InstructionSelect(); - // Pass Name virtual const char *getPassName() const { return "MIPS DAG->DAG Pattern Instruction Selection"; @@ -98,29 +96,10 @@ private: inline SDValue getI32Imm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); } - - - #ifndef NDEBUG - unsigned Indent; - #endif }; } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void MipsDAGToDAGISel::InstructionSelect() { - // Codegen the basic block. - DEBUG(errs() << "===== Instruction selection begins:\n"); - DEBUG(Indent = 0); - - // Select target instructions for the DAG. - SelectRoot(*CurDAG); - - DEBUG(errs() << "===== Instruction selection ends:\n"); - - CurDAG->RemoveDeadNodes(); -} /// getGlobalBaseReg - Output the instructions required to put the /// GOT address into a register. @@ -329,17 +308,11 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { DebugLoc dl = Node->getDebugLoc(); // Dump information about the Node being selected - DEBUG(errs().indent(Indent) << "Selecting: "; - Node->dump(CurDAG); - errs() << "\n"); - DEBUG(Indent += 2); + DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { - DEBUG(errs().indent(Indent-2) << "== "; - Node->dump(CurDAG); - errs() << "\n"); - DEBUG(Indent -= 2); + DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); return NULL; } @@ -547,14 +520,12 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { // Select the default instruction SDNode *ResNode = SelectCode(Node); - DEBUG(errs().indent(Indent-2) << "=> "); + DEBUG(errs() << "=> "); if (ResNode == NULL || ResNode == Node) DEBUG(Node->dump(CurDAG)); else DEBUG(ResNode->dump(CurDAG)); DEBUG(errs() << "\n"); - DEBUG(Indent -= 2); - return ResNode; } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index d94944f..584b887 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -510,7 +510,8 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, MipsII::MO_GOT); SDValue ResNode = DAG.getLoad(MVT::i32, dl, - DAG.getEntryNode(), GA, NULL, 0); + DAG.getEntryNode(), GA, NULL, 0, + false, false, 0); // On functions and global targets not internal linked only // a load from got/GP is necessary for PIC to work. if (!GV->hasLocalLinkage() || isa<Function>(GV)) @@ -549,7 +550,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) SDValue Ops[] = { JTI }; HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1); } else // Emit Load from Global Pointer - HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0); + HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0, + false, false, 0); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); @@ -586,7 +588,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), N->getOffset(), MipsII::MO_GOT); SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), - CP, NULL, 0); + CP, NULL, 0, false, false, 0); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); } @@ -601,7 +603,8 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0, + false, false, 0); } //===----------------------------------------------------------------------===// @@ -859,7 +862,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // emit ISD::STORE whichs stores the // parameter value to a stack Location - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); } // Transform all store nodes into one single node because all store @@ -933,7 +937,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Reload GP value. FI = MipsFI->getGPFI(); SDValue FIN = DAG.getFrameIndex(FI,getPointerTy()); - SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0); + SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0, + false, false, 0); Chain = GPLoad.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32), GPLoad, SDValue(0,0)); @@ -1097,7 +1102,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // Create load nodes to retrieve arguments from the stack SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, + false, false, 0)); } } @@ -1132,7 +1138,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, int FI = MFI->CreateFixedObject(4, 0, true, false); MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); - OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0)); + OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0, + false, false, 0)); // Record the frame index of the first variable argument // which is a value necessary to VASTART. diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index e67bcbf..cef3697 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -120,7 +120,7 @@ def immZExt5 : PatLeaf<(imm), [{ // Mips Address Mode! SDNode frameindex could possibily be a match // since load and store instructions from stack used it. -def addr : ComplexPattern<i32, 2, "SelectAddr", [frameindex], []>; +def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>; //===----------------------------------------------------------------------===// // Instructions specific format @@ -300,9 +300,8 @@ class JumpFR<bits<6> op, bits<6> func, string instr_asm>: // Jump and Link (Call) let isCall=1, hasDelaySlot=1, // All calls clobber the non-callee saved registers... - Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, - K0, K1, F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, - F14, F15, F16, F17, F18, F19], Uses = [GP] in { + Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, + K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in { class JumpLink<bits<6> op, string instr_asm>: FJ< op, (outs), @@ -593,8 +592,8 @@ def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)), (JAL tglobaladdr:$dst)>; def : Pat<(MipsJmpLink (i32 texternalsym:$dst)), (JAL texternalsym:$dst)>; -def : Pat<(MipsJmpLink CPURegs:$dst), - (JALR CPURegs:$dst)>; +//def : Pat<(MipsJmpLink CPURegs:$dst), +// (JALR CPURegs:$dst)>; // hi/lo relocs def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h index 32e0436..237b160 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.h +++ b/lib/Target/Mips/MipsTargetObjectFile.h @@ -10,7 +10,7 @@ #ifndef LLVM_TARGET_MIPS_TARGETOBJECTFILE_H #define LLVM_TARGET_MIPS_TARGETOBJECTFILE_H -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" namespace llvm { diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp index 72f7c16..44a6cc0 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp @@ -106,8 +106,9 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { DbgInfo.BeginFunction(MF); // Now emit the instructions of function in its code section. - const MCSection *fCodeSection - = getObjFileLowering().SectionForCode(CurrentFnSym->getName()); + const MCSection *fCodeSection = + getObjFileLowering().SectionForCode(CurrentFnSym->getName(), + PAN::isISR(F->getSection())); // Start the Code Section. O << "\n"; @@ -157,6 +158,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { // printOperand - print operand of insn. void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) { const MachineOperand &MO = MI->getOperand(opNum); + const Function *F = MI->getParent()->getParent()->getFunction(); switch (MO.getType()) { case MachineOperand::MO_Register: @@ -189,19 +191,18 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) { } case MachineOperand::MO_ExternalSymbol: { const char *Sname = MO.getSymbolName(); + std::string Printname = Sname; - // If its a libcall name, record it to decls section. - if (PAN::getSymbolTag(Sname) == PAN::LIBCALL) - LibcallDecls.push_back(Sname); - - // Record a call to intrinsic to print the extern declaration for it. - std::string Sym = Sname; - if (PAN::isMemIntrinsic(Sym)) { - Sym = PAN::addPrefix(Sym); - LibcallDecls.push_back(createESName(Sym)); + // Intrinsic stuff needs to be renamed if we are printing IL fn. + if (PAN::isIntrinsicStuff(Printname)) { + if (PAN::isISR(F->getSection())) { + Printname = PAN::Rename(Sname); + } + // Record these decls, we need to print them in asm as extern. + LibcallDecls.push_back(createESName(Printname)); } - O << Sym; + O << Printname; break; } case MachineOperand::MO_MachineBasicBlock: @@ -247,8 +248,6 @@ void PIC16AsmPrinter::printLibcallDecls() { for (std::list<const char*>::const_iterator I = LibcallDecls.begin(); I != LibcallDecls.end(); I++) { O << MAI->getExternDirective() << *I << "\n"; - O << MAI->getExternDirective() << PAN::getArgsLabel(*I) << "\n"; - O << MAI->getExternDirective() << PAN::getRetvalLabel(*I) << "\n"; } O << MAI->getCommentString() << "External decls for libcalls - END." <<"\n"; } diff --git a/lib/Target/PIC16/PIC16ABINames.h b/lib/Target/PIC16/PIC16ABINames.h index e18ddf1..4c1a8da 100644 --- a/lib/Target/PIC16/PIC16ABINames.h +++ b/lib/Target/PIC16/PIC16ABINames.h @@ -178,18 +178,21 @@ namespace llvm { return Func1 + tag; } + // Get the retval label for the given function. static std::string getRetvalLabel(const std::string &Func) { std::string Func1 = addPrefix(Func); std::string tag = getTagName(RET_LABEL); return Func1 + tag; } + // Get the argument label for the given function. static std::string getArgsLabel(const std::string &Func) { std::string Func1 = addPrefix(Func); std::string tag = getTagName(ARGS_LABEL); return Func1 + tag; } + // Get the tempdata label for the given function. static std::string getTempdataLabel(const std::string &Func) { std::string Func1 = addPrefix(Func); std::string tag = getTagName(TEMPS_LABEL); @@ -263,6 +266,7 @@ namespace llvm { return false; } + inline static bool isMemIntrinsic (const std::string &Name) { if (Name.compare("@memcpy") == 0 || Name.compare("@memset") == 0 || Name.compare("@memmove") == 0) { @@ -272,6 +276,41 @@ namespace llvm { return false; } + // Currently names of libcalls are assigned during TargetLowering + // object construction. There is no provision to change the when the + // code for a function IL function being generated. + // So we have to change these names while printing assembly. + // We need to do that mainly for names related to intrinsics. This + // function returns true if a name needs to be cloned. + inline static bool isIntrinsicStuff(const std::string &Name) { + // Return true if the name contains LIBCALL marker, or a MemIntrinisc. + // these are mainly ARGS_LABEL, RET_LABEL, and the LIBCALL name itself. + if ((Name.find(getTagName(LIBCALL)) != std::string::npos) + || isMemIntrinsic(Name)) + return true; + + return false; + } + + // Rename the name for IL. + inline static std::string Rename(const std::string &Name) { + std::string Newname; + // If its a label (LIBCALL+Func+LABEL), change it to + // (LIBCALL+Func+IL+LABEL). + TAGS id = getSymbolTag(Name); + if (id == ARGS_LABEL || id == RET_LABEL) { + std::size_t pos = Name.find(getTagName(id)); + Newname = Name.substr(0, pos) + ".IL" + getTagName(id); + return Newname; + } + + // Else, just append IL to name. + return Name + ".IL"; + } + + + + inline static bool isLocalToFunc (std::string &Func, std::string &Var) { if (! isLocalName(Var)) return false; @@ -325,6 +364,35 @@ namespace llvm { return o.str(); } + + // Return true if the current function is an ISR + inline static bool isISR(const std::string SectName) { + if (SectName.find("interrupt") != std::string::npos) + return true; + + return false; + } + + // Return the address for ISR starts in rom. + inline static std::string getISRAddr(void) { + return "0x4"; + } + + // Returns the name of clone of a function. + static std::string getCloneFnName(const std::string &Func) { + return (Func + ".IL"); + } + + // Returns the name of clone of a variable. + static std::string getCloneVarName(const std::string &Fn, + const std::string &Var) { + std::string cloneVarName = Var; + // These vars are named like fun.auto.var. + // Just replace the function name, with clone function name. + std::string cloneFnName = getCloneFnName(Fn); + cloneVarName.replace(cloneVarName.find(Fn), Fn.length(), cloneFnName); + return cloneVarName; + } }; // class PAN. } // end namespace llvm; diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp index c517b1b..877e4ff 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.cpp +++ b/lib/Target/PIC16/PIC16DebugInfo.cpp @@ -419,7 +419,7 @@ void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num, if (TagName != "") O << ", " << TagName; for (int i = 0; i<Num; i++) - O << "," << Aux[i]; + O << "," << (Aux[i] && 0xff); } /// EmitSymbol - Emit .def for a symbol. Value is offset for the member. diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp index 82197ae..6cbd002 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp @@ -14,10 +14,7 @@ #define DEBUG_TYPE "pic16-isel" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "PIC16ISelDAGToDAG.h" -#include "llvm/Support/Debug.h" - using namespace llvm; /// createPIC16ISelDag - This pass converts a legalized DAG into a @@ -27,13 +24,6 @@ FunctionPass *llvm::createPIC16ISelDag(PIC16TargetMachine &TM) { } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void PIC16DAGToDAGISel::InstructionSelect() { - SelectRoot(*CurDAG); - CurDAG->RemoveDeadNodes(); -} - /// Select - Select instructions not customized! Used for /// expanded, promoted and normal instructions. SDNode* PIC16DAGToDAGISel::Select(SDNode *N) { diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h index 813a540..8ed5bf7 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h @@ -19,6 +19,8 @@ #include "PIC16TargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" #include "llvm/Intrinsics.h" using namespace llvm; @@ -46,8 +48,6 @@ public: return "PIC16 DAG->DAG Pattern Instruction Selection"; } - virtual void InstructionSelect(); - private: // Include the pieces autogenerated from the target description. #include "PIC16GenDAGISel.inc" diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index 7754a4f..d17abb9 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -419,8 +419,7 @@ PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl, - DAG.GetOrdering(DAG.getEntryNode().getNode())); + Callee, Args, DAG, dl); return CallInfo.first; } @@ -622,12 +621,12 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) { ChainHi = Chain.getOperand(1); } SDValue Store1 = DAG.getStore(ChainLo, dl, SrcLo, Ptr, NULL, - 0 + StoreOffset); + 0 + StoreOffset, false, false, 0); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); SDValue Store2 = DAG.getStore(ChainHi, dl, SrcHi, Ptr, NULL, - 1 + StoreOffset); + 1 + StoreOffset, false, false, 0); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); @@ -1513,8 +1512,7 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, // Direct load operands are folded in binary operations. But before folding // verify if this folding is legal. Fold only if it is legal otherwise // convert this direct load to a separate memory operation. - if(ISel->IsLegalAndProfitableToFold(Op.getOperand(0).getNode(), - Op.getNode(), Op.getNode())) + if(ISel->IsLegalToFold(Op.getOperand(0), Op.getNode(), Op.getNode())) return false; else MemOp = 0; @@ -1528,10 +1526,24 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, return true; if (isDirectLoad(Op.getOperand(1))) { - if (Op.getOperand(1).hasOneUse()) - return false; - else - MemOp = 1; + if (Op.getOperand(1).hasOneUse()) { + // Legal and profitable folding check uses the NodeId of DAG nodes. + // This NodeId is assigned by topological order. Therefore first + // assign topological order then perform legal and profitable check. + // Note:- Though this ordering is done before begining with legalization, + // newly added node during legalization process have NodeId=-1 (NewNode) + // therefore before performing any check proper ordering of the node is + // required. + DAG.AssignTopologicalOrder(); + + // Direct load operands are folded in binary operations. But before folding + // verify if this folding is legal. Fold only if it is legal otherwise + // convert this direct load to a separate memory operation. + if(ISel->IsLegalToFold(Op.getOperand(1), Op.getNode(), Op.getNode())) + return false; + else + MemOp = 1; + } } return true; } diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp index cc71b04..ab81ed1 100644 --- a/lib/Target/PIC16/PIC16MemSelOpt.cpp +++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp @@ -59,6 +59,7 @@ namespace { const TargetInstrInfo *TII; // Machine instruction info. MachineBasicBlock *MBB; // Current basic block std::string CurBank; + int PageChanged; }; char MemSelOpt::ID = 0; @@ -93,10 +94,56 @@ bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { // Let us assume that when entering a basic block now bank is selected. // Ideally we should look at the predecessors for this information. CurBank=""; + PageChanged=0; - for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { + MachineBasicBlock::iterator I; + for (I = BB.begin(); I != BB.end(); ++I) { Changed |= processInstruction(I); + + // if the page has changed insert a page sel before + // any instruction that needs one + if (PageChanged == 1) + { + // Restore the page if it was changed, before leaving the basic block, + // because it may be required by the goto terminator or the fall thru + // basic blcok. + // If the terminator is return, we don't need to restore since there + // is no goto or fall thru basic block. + if ((I->getOpcode() == PIC16::sublw_3) || //macro has goto + (I->getOpcode() == PIC16::sublw_6) || //macro has goto + (I->getOpcode() == PIC16::addlwc) || //macro has goto + (TII->get(I->getOpcode()).isBranch())) + { + DebugLoc dl = I->getDebugLoc(); + BuildMI(*MBB, I, dl, TII->get(PIC16::pagesel)).addExternalSymbol("$"); + Changed = true; + PageChanged = 0; + } + } } + + // The basic block is over, but if we did not find any goto yet, + // we haven't restored the page. + // Restore the page if it was changed, before leaving the basic block, + // because it may be required by fall thru basic blcok. + // If the terminator is return, we don't need to restore since there + // is fall thru basic block. + if (PageChanged == 1) { + // save the end pointer before we move back to last insn. + MachineBasicBlock::iterator J = I; + I--; + const TargetInstrDesc &TID = TII->get(I->getOpcode()); + if (! TID.isReturn()) + { + DebugLoc dl = I->getDebugLoc(); + BuildMI(*MBB, J, dl, + TII->get(PIC16::pagesel)).addExternalSymbol("$"); + Changed = true; + PageChanged = 0; + } + } + + return Changed; } @@ -112,42 +159,74 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) { if (!(TID.isBranch() || TID.isCall() || TID.mayLoad() || TID.mayStore())) return false; + // The first thing we should do is that record if banksel/pagesel are + // changed in an unknown way. This can happend via any type of call. + // We do it here first before scanning of MemOp / BBOp as the indirect + // call insns do not have any operands, but they still may change bank/page. + if (TID.isCall()) { + // Record that we have changed the page, so that we can restore it + // before basic block ends. + // We require to signal that a page anc bank change happened even for + // indirect calls. + PageChanged = 1; + + // When a call is made, there may be banksel for variables in callee. + // Hence the banksel in caller needs to be reset. + CurBank = ""; + } + // Scan for the memory address operand. // FIXME: Should we use standard interfaces like memoperands_iterator, // hasMemOperand() etc ? int MemOpPos = -1; + int BBOpPos = -1; for (unsigned i = 0; i < NumOperands; i++) { MachineOperand Op = MI->getOperand(i); if (Op.getType() == MachineOperand::MO_GlobalAddress || - Op.getType() == MachineOperand::MO_ExternalSymbol || - Op.getType() == MachineOperand::MO_MachineBasicBlock) { + Op.getType() == MachineOperand::MO_ExternalSymbol) { // We found one mem operand. Next one may be BS. MemOpPos = i; - break; + } + if (Op.getType() == MachineOperand::MO_MachineBasicBlock) { + // We found one BB operand. Next one may be pagesel. + BBOpPos = i; } } // If we did not find an insn accessing memory. Continue. - if (MemOpPos == -1) return Changed; + if ((MemOpPos == -1) && + (BBOpPos == -1)) + return false; + assert ((BBOpPos != MemOpPos) && "operand can only be of one type"); - // Get the MemOp. - MachineOperand &Op = MI->getOperand(MemOpPos); // If this is a pagesel material, handle it first. - if (MI->getOpcode() == PIC16::CALL || - MI->getOpcode() == PIC16::br_uncond) { + // CALL and br_ucond insns use MemOp (GA or ES) and not BBOp. + // Pagesel is required only for a direct call. + if ((MI->getOpcode() == PIC16::CALL)) { + // Get the BBOp. + MachineOperand &MemOp = MI->getOperand(MemOpPos); DebugLoc dl = MI->getDebugLoc(); - BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)). - addOperand(Op); - return true; + BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).addOperand(MemOp); + + // CALL and br_ucond needs only pagesel. so we are done. + return true; } + // Pagesel is handled. Now, add a Banksel if needed. + if (MemOpPos == -1) return Changed; + // Get the MemOp. + MachineOperand &Op = MI->getOperand(MemOpPos); + // Get the section name(NewBank) for MemOp. // This assumes that the section names for globals are already set by // AsmPrinter->doInitialization. std::string NewBank = CurBank; + bool hasExternalLinkage = false; if (Op.getType() == MachineOperand::MO_GlobalAddress && Op.getGlobal()->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) { + if (Op.getGlobal()->hasExternalLinkage()) + hasExternalLinkage= true; NewBank = Op.getGlobal()->getSection(); } else if (Op.getType() == MachineOperand::MO_ExternalSymbol) { // External Symbol is generated for temp data and arguments. They are @@ -162,7 +241,7 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) { // If the previous and new section names are same, we don't need to // emit banksel. - if (NewBank.compare(CurBank) != 0 ) { + if (NewBank.compare(CurBank) != 0 || hasExternalLinkage) { DebugLoc dl = MI->getDebugLoc(); BuildMI(*MBB, MI, dl, TII->get(PIC16::banksel)). addOperand(Op); diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp new file mode 100644 index 0000000..865da35 --- /dev/null +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp @@ -0,0 +1,299 @@ +//===-- PIC16Cloner.cpp - PIC16 LLVM Cloner for shared functions -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to clone all functions that are shared between +// the main line code (ML) and interrupt line code (IL). It clones all such +// shared functions and their automatic global vars by adding the .IL suffix. +// +// This pass is supposed to be run on the linked .bc module. +// It traveses the module call graph twice. Once starting from the main function +// and marking each reached function as "ML". Again, starting from the ISR +// and cloning any reachable function that was marked as "ML". After cloning +// the function, it remaps all the call sites in IL functions to call the +// cloned functions. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Pass.h" +#include "llvm/Module.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "PIC16Cloner.h" +#include "../PIC16ABINames.h" +#include <vector> + +using namespace llvm; +using std::vector; +using std::string; +using std::map; + +namespace llvm { + char PIC16Cloner::ID = 0; + + ModulePass *createPIC16ClonerPass() { return new PIC16Cloner(); } +} + +// We currently intend to run these passes in opt, which does not have any +// diagnostic support. So use these functions for now. In future +// we will probably write our own driver tool. +// +void PIC16Cloner::reportError(string ErrorString) { + errs() << "ERROR : " << ErrorString << "\n"; + exit(1); +} + +void PIC16Cloner:: +reportError (string ErrorString, vector<string> &Values) { + unsigned ValCount = Values.size(); + string TargetString; + for (unsigned i=0; i<ValCount; ++i) { + TargetString = "%"; + TargetString += ((char)i + '0'); + ErrorString.replace(ErrorString.find(TargetString), TargetString.length(), + Values[i]); + } + errs() << "ERROR : " << ErrorString << "\n"; + exit(1); +} + + +// Entry point +// +bool PIC16Cloner::runOnModule(Module &M) { + CallGraph &CG = getAnalysis<CallGraph>(); + + // Search for the "main" and "ISR" functions. + CallGraphNode *mainCGN = NULL, *isrCGN = NULL; + for (CallGraph::iterator it = CG.begin() ; it != CG.end(); it++) + { + // External calling node doesn't have any function associated with it. + if (! it->first) + continue; + + if (it->first->getName().str() == "main") { + mainCGN = it->second; + } + + if (PAN::isISR(it->first->getSection())) { + isrCGN = it->second; + } + + // Don't search further if we've found both. + if (mainCGN && isrCGN) + break; + } + + // We have nothing to do if any of the main or ISR is missing. + if (! mainCGN || ! isrCGN) return false; + + // Time for some diagnostics. + // See if the main itself is interrupt function then report an error. + if (PAN::isISR(mainCGN->getFunction()->getSection())) { + reportError("Function 'main' can't be interrupt function"); + } + + + // Mark all reachable functions from main as ML. + markCallGraph(mainCGN, "ML"); + + // And then all the functions reachable from ISR will be cloned. + cloneSharedFunctions(isrCGN); + + return true; +} + +// Mark all reachable functions from the given node, with the given mark. +// +void PIC16Cloner::markCallGraph(CallGraphNode *CGN, string StringMark) { + // Mark the top node first. + Function *thisF = CGN->getFunction(); + + thisF->setSection(StringMark); + + // Mark all the called functions + for(CallGraphNode::iterator cgn_it = CGN->begin(); + cgn_it != CGN->end(); ++cgn_it) { + Function *CalledF = cgn_it->second->getFunction(); + + // If calling an external function then CallGraphNode + // will not be associated with any function. + if (! CalledF) + continue; + + // Issue diagnostic if interrupt function is being called. + if (PAN::isISR(CalledF->getSection())) { + vector<string> Values; + Values.push_back(CalledF->getName().str()); + reportError("Interrupt function (%0) can't be called", Values); + } + + // Has already been mark + if (CalledF->getSection().find(StringMark) != string::npos) { + // Should we do anything here? + } else { + // Mark now + CalledF->setSection(StringMark); + } + + // Before going any further mark all the called function by current + // function. + markCallGraph(cgn_it->second ,StringMark); + } // end of loop of all called functions. +} + + +// For PIC16, automatic variables of a function are emitted as globals. +// Clone the auto variables of a function and put them in ValueMap, +// this ValueMap will be used while +// Cloning the code of function itself. +// +void PIC16Cloner::CloneAutos(Function *F) { + // We'll need to update module's globals list as well. So keep a reference + // handy. + Module *M = F->getParent(); + Module::GlobalListType &Globals = M->getGlobalList(); + + // Clear the leftovers in ValueMap by any previous cloning. + ValueMap.clear(); + + // Find the auto globls for this function and clone them, and put them + // in ValueMap. + std::string FnName = F->getName().str(); + std::string VarName, ClonedVarName; + for (Module::global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) { + VarName = I->getName().str(); + if (PAN::isLocalToFunc(FnName, VarName)) { + // Auto variable for current function found. Clone it. + GlobalVariable *GV = I; + + const Type *InitTy = GV->getInitializer()->getType(); + GlobalVariable *ClonedGV = + new GlobalVariable(InitTy, false, GV->getLinkage(), + GV->getInitializer()); + ClonedGV->setName(PAN::getCloneVarName(FnName, VarName)); + // Add these new globals to module's globals list. + Globals.push_back(ClonedGV); + + // Update ValueMap. + ValueMap[GV] = ClonedGV; + } + } +} + + +// Clone all functions that are reachable from ISR and are already +// marked as ML. +// +void PIC16Cloner::cloneSharedFunctions(CallGraphNode *CGN) { + + // Check all the called functions from ISR. + for(CallGraphNode::iterator cgn_it = CGN->begin(); + cgn_it != CGN->end(); ++cgn_it) { + Function *CalledF = cgn_it->second->getFunction(); + + // If calling an external function then CallGraphNode + // will not be associated with any function. + if (!CalledF) + continue; + + // Issue diagnostic if interrupt function is being called. + if (PAN::isISR(CalledF->getSection())) { + vector<string> Values; + Values.push_back(CalledF->getName().str()); + reportError("Interrupt function (%0) can't be called", Values); + } + + if (CalledF->getSection().find("ML") != string::npos) { + // Function is alternatively marked. It should be a shared one. + // Create IL copy. Passing called function as first argument + // and the caller as the second argument. + + // Before making IL copy, first ensure that this function has a + // body. If the function does have a body. It can't be cloned. + // Such a case may occur when the function has been declarated + // in the C source code but its body exists in assembly file. + if (!CalledF->isDeclaration()) { + Function *cf = cloneFunction(CalledF); + remapAllSites(CGN->getFunction(), CalledF, cf); + } else { + // It is called only from ISR. Still mark it as we need this info + // in code gen while calling intrinsics.Function is not marked. + CalledF->setSection("IL"); + } + } + // Before going any further clone all the shared function reachaable + // by current function. + cloneSharedFunctions(cgn_it->second); + } // end of loop of all called functions. +} + +// Clone the given function and return it. +// Note: it uses the ValueMap member of the class, which is already populated +// by cloneAutos by the time we reach here. +// FIXME: Should we just pass ValueMap's ref as a parameter here? rather +// than keeping the ValueMap as a member. +Function * +PIC16Cloner::cloneFunction(Function *OrgF) { + Function *ClonedF; + + // See if we already cloned it. Return that. + cloned_map_iterator cm_it = ClonedFunctionMap.find(OrgF); + if(cm_it != ClonedFunctionMap.end()) { + ClonedF = cm_it->second; + return ClonedF; + } + + // Clone does not exist. + // First clone the autos, and populate ValueMap. + CloneAutos(OrgF); + + // Now create the clone. + ClonedF = CloneFunction(OrgF, ValueMap); + + // The new function should be for interrupt line. Therefore should have + // the name suffixed with IL and section attribute marked with IL. + ClonedF->setName(PAN::getCloneFnName(OrgF->getName())); + ClonedF->setSection("IL"); + + // Add the newly created function to the module. + OrgF->getParent()->getFunctionList().push_back(ClonedF); + + // Update the ClonedFunctionMap to record this cloning activity. + ClonedFunctionMap[OrgF] = ClonedF; + + return ClonedF; +} + + +// Remap the call sites of shared functions, that are in IL. +// Change the IL call site of a shared function to its clone. +// +void PIC16Cloner:: +remapAllSites(Function *Caller, Function *OrgF, Function *Clone) { + // First find the caller to update. If the caller itself is cloned + // then use the cloned caller. Otherwise use it. + cloned_map_iterator cm_it = ClonedFunctionMap.find(Caller); + if (cm_it != ClonedFunctionMap.end()) + Caller = cm_it->second; + + // For the lack of a better call site finding mechanism, iterate over + // all insns to find the uses of original fn. + for (Function::iterator BI = Caller->begin(); BI != Caller->end(); ++BI) { + BasicBlock &BB = *BI; + for (BasicBlock::iterator II = BB.begin(); II != BB.end(); ++II) { + if (II->getNumOperands() > 0 && II->getOperand(0) == OrgF) + II->setOperand(0, Clone); + } + } +} + + + diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h new file mode 100644 index 0000000..24c1152 --- /dev/null +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h @@ -0,0 +1,83 @@ +//===-- PIC16Cloner.h - PIC16 LLVM Cloner for shared functions --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains declaration of a cloner class clone all functions that +// are shared between the main line code (ML) and interrupt line code (IL). +// +//===----------------------------------------------------------------------===// + +#ifndef PIC16CLONER_H +#define PIC16CLONER_H + +#include "llvm/ADT/DenseMap.h" + +using namespace llvm; +using std::vector; +using std::string; +using std::map; + +namespace llvm { + // forward classes. + class Value; + class Function; + class Module; + class ModulePass; + class CallGraph; + class CallGraphNode; + class AnalysisUsage; + + class PIC16Cloner : public ModulePass { + public: + static char ID; // Class identification + PIC16Cloner() : ModulePass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<CallGraph>(); + } + virtual bool runOnModule(Module &M); + + private: // Functions + // Mark reachable functions for the MainLine or InterruptLine. + void markCallGraph(CallGraphNode *CGN, string StringMark); + + // Clone auto variables of function specified. + void CloneAutos(Function *F); + + // Clone the body of a function. + Function *cloneFunction(Function *F); + + // Clone all shared functions. + void cloneSharedFunctions(CallGraphNode *isrCGN); + + // Remap all call sites to the shared function. + void remapAllSites(Function *Caller, Function *OrgF, Function *Clone); + + // Error reporting for PIC16Pass + void reportError(string ErrorString, vector<string> &Values); + void reportError(string ErrorString); + + private: //data + // Records if the interrupt function has already been found. + // If more than one interrupt function is found then an error + // should be thrown. + bool foundISR; + + // This ValueMap maps the auto variables of the original functions with + // the corresponding cloned auto variable of the cloned function. + // This value map is passed during the function cloning so that all the + // uses of auto variables be updated properly. + DenseMap<const Value*, Value*> ValueMap; + + // Map of a already cloned functions. + map<Function *, Function *> ClonedFunctionMap; + typedef map<Function *, Function *>::iterator cloned_map_iterator; + }; +} // End of anonymous namespace + +#endif diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp index 197c398..5ecb6aa 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp +++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp @@ -24,27 +24,27 @@ using namespace llvm; namespace llvm { - char PIC16FrameOverlay::ID = 0; - ModulePass *createPIC16OverlayPass() { return new PIC16FrameOverlay(); } + char PIC16Overlay::ID = 0; + ModulePass *createPIC16OverlayPass() { return new PIC16Overlay(); } } -void PIC16FrameOverlay::getAnalysisUsage(AnalysisUsage &AU) const { +void PIC16Overlay::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<CallGraph>(); } -void PIC16FrameOverlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) { +void PIC16Overlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) { // Do not set any color for external calling node. if (Depth != 0 && CGN->getFunction()) { unsigned Color = getColor(CGN->getFunction()); // Handle indirectly called functions - if (Color >= PIC16Overlay::StartIndirectCallColor || - Depth >= PIC16Overlay::StartIndirectCallColor) { + if (Color >= PIC16OVERLAY::StartIndirectCallColor || + Depth >= PIC16OVERLAY::StartIndirectCallColor) { // All functions called from an indirectly called function are given // an unique color. - if (Color < PIC16Overlay::StartIndirectCallColor && - Depth >= PIC16Overlay::StartIndirectCallColor) + if (Color < PIC16OVERLAY::StartIndirectCallColor && + Depth >= PIC16OVERLAY::StartIndirectCallColor) setColor(CGN->getFunction(), Depth); for (unsigned int i = 0; i < CGN->size(); i++) @@ -65,7 +65,7 @@ void PIC16FrameOverlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) { DFSTraverse((*CGN)[i], Depth+1); } -unsigned PIC16FrameOverlay::ModifyDepthForInterrupt(CallGraphNode *CGN, +unsigned PIC16Overlay::ModifyDepthForInterrupt(CallGraphNode *CGN, unsigned Depth) { Function *Fn = CGN->getFunction(); @@ -81,7 +81,7 @@ unsigned PIC16FrameOverlay::ModifyDepthForInterrupt(CallGraphNode *CGN, return Depth; } -void PIC16FrameOverlay::setColor(Function *Fn, unsigned Color) { +void PIC16Overlay::setColor(Function *Fn, unsigned Color) { std::string Section = ""; if (Fn->hasSection()) Section = Fn->getSection(); @@ -119,7 +119,7 @@ void PIC16FrameOverlay::setColor(Function *Fn, unsigned Color) { Fn->setSection(Section); } -unsigned PIC16FrameOverlay::getColor(Function *Fn) { +unsigned PIC16Overlay::getColor(Function *Fn) { int Color = 0; if (!Fn->hasSection()) return 0; @@ -150,7 +150,7 @@ unsigned PIC16FrameOverlay::getColor(Function *Fn) { return Color; } -bool PIC16FrameOverlay::runOnModule(Module &M) { +bool PIC16Overlay::runOnModule(Module &M) { CallGraph &CG = getAnalysis<CallGraph>(); CallGraphNode *ECN = CG.getExternalCallingNode(); @@ -164,7 +164,7 @@ bool PIC16FrameOverlay::runOnModule(Module &M) { return false; } -void PIC16FrameOverlay::MarkIndirectlyCalledFunctions(Module &M) { +void PIC16Overlay::MarkIndirectlyCalledFunctions(Module &M) { // If the use of a function is not a call instruction then this // function might be called indirectly. In that case give it // an unique color. diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h index d70c4e7..5a2551f 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h +++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h @@ -1,4 +1,4 @@ -//===-- PIC16FrameOverlay.h - Interface for PIC16 Frame Overlay -*- C++ -*-===// +//===-- PIC16Overlay.h - Interface for PIC16 Frame Overlay -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,30 +14,35 @@ #ifndef PIC16FRAMEOVERLAY_H #define PIC16FRAMEOVERLAY_H -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Pass.h" -#include "llvm/CallGraphSCCPass.h" using std::string; using namespace llvm; namespace llvm { - namespace PIC16Overlay { + // Forward declarations. + class Function; + class Module; + class ModulePass; + class AnalysisUsage; + class CallGraphNode; + class CallGraph; + + namespace PIC16OVERLAY { enum OverlayConsts { StartInterruptColor = 200, StartIndirectCallColor = 300 }; } - class PIC16FrameOverlay : public ModulePass { + class PIC16Overlay : public ModulePass { std::string OverlayStr; unsigned InterruptDepth; unsigned IndirectCallColor; public: static char ID; // Class identification - PIC16FrameOverlay() : ModulePass(&ID) { + PIC16Overlay() : ModulePass(&ID) { OverlayStr = "Overlay="; - InterruptDepth = PIC16Overlay::StartInterruptColor; - IndirectCallColor = PIC16Overlay::StartIndirectCallColor; + InterruptDepth = PIC16OVERLAY::StartInterruptColor; + IndirectCallColor = PIC16OVERLAY::StartIndirectCallColor; } virtual void getAnalysisUsage(AnalysisUsage &AU) const; diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp index d7cfe02..b891c18 100644 --- a/lib/Target/PIC16/PIC16TargetObjectFile.cpp +++ b/lib/Target/PIC16/PIC16TargetObjectFile.cpp @@ -315,8 +315,12 @@ PIC16TargetObjectFile::allocateSHARED(const GlobalVariable *GV, // Interface used by AsmPrinter to get a code section for a function. const PIC16Section * -PIC16TargetObjectFile::SectionForCode(const std::string &FnName) const { +PIC16TargetObjectFile::SectionForCode(const std::string &FnName, + bool isISR) const { const std::string &sec_name = PAN::getCodeSectionName(FnName); + // If it is ISR, its code section starts at a specific address. + if (isISR) + return getPIC16Section(sec_name, CODE, PAN::getISRAddr()); return getPIC16Section(sec_name, CODE); } diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h index 0b0ad43..cf8bf84 100644 --- a/lib/Target/PIC16/PIC16TargetObjectFile.h +++ b/lib/Target/PIC16/PIC16TargetObjectFile.h @@ -137,7 +137,8 @@ namespace llvm { /// Return a code section for a function. - const PIC16Section *SectionForCode (const std::string &FnName) const; + const PIC16Section *SectionForCode (const std::string &FnName, + bool isISR) const; /// Return a frame section for a function. const PIC16Section *SectionForFrame (const std::string &FnName) const; diff --git a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp index 46cc819..f1bdb12 100644 --- a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp +++ b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp @@ -15,7 +15,8 @@ using namespace llvm; Target llvm::ThePIC16Target, llvm::TheCooperTarget; extern "C" void LLVMInitializePIC16TargetInfo() { - RegisterTarget<> X(ThePIC16Target, "pic16", "PIC16 14-bit [experimental]"); + RegisterTarget<Triple::pic16> X(ThePIC16Target, "pic16", + "PIC16 14-bit [experimental]"); RegisterTarget<> Y(TheCooperTarget, "cooper", "PIC16 Cooper [experimental]"); } diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index afc90b1..ac901d0 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -31,13 +31,13 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index c7ce171..155fba2 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -66,28 +66,13 @@ def CC_PPC : CallingConv<[ // PowerPC System V Release 4 ABI //===----------------------------------------------------------------------===// -// _Complex arguments are never split, thus their two scalars are either -// passed both in argument registers or both on the stack. Also _Complex -// arguments are always passed in general purpose registers, never in -// Floating-point registers or vector registers. Arguments which should go -// on the stack are marked with the inreg parameter attribute. -// Giving inreg this target-dependent (and counter-intuitive) meaning -// simplifies things, because functions calls are not always coming from the -// frontend but are also created implicitly e.g. for libcalls. If inreg would -// actually mean that the argument is passed in a register, then all places -// which create function calls/function definitions implicitly would need to -// be aware of this fact and would need to mark arguments accordingly. With -// inreg meaning that the argument is passed on the stack, this is not an -// issue, except for calls which involve _Complex types. - def CC_PPC_SVR4_Common : CallingConv<[ // The ABI requires i64 to be passed in two adjacent registers with the first // register having an odd register number. CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>, // The first 8 integer arguments are passed in integer registers. - CCIfType<[i32], CCIf<"!ArgFlags.isInReg()", - CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>, + CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, // Make sure the i64 words from a long double are either both passed in // registers or both passed on the stack. diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 3a15f7e..66dfd4b 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -257,7 +257,7 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { case PPC::STWX: case PPC::STWX8: case PPC::STWUX: case PPC::STW: case PPC::STW8: - case PPC::STWU: case PPC::STWU8: + case PPC::STWU: case PPC::STVEWX: case PPC::STFIWX: case PPC::STWBRX: diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 004997f..9d79c0d 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -156,10 +156,6 @@ namespace { SDValue BuildSDIVSequence(SDNode *N); SDValue BuildUDIVSequence(SDNode *N); - /// InstructionSelect - This callback is invoked by - /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelect(); - void InsertVRSaveCode(MachineFunction &MF); virtual const char *getPassName() const { @@ -184,14 +180,6 @@ private: }; } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void PPCDAGToDAGISel::InstructionSelect() { - // Select target instructions for the DAG. - SelectRoot(*CurDAG); - CurDAG->RemoveDeadNodes(); -} - /// InsertVRSaveCode - Once the entire function has been instruction selected, /// all virtual registers are created and all machine instructions are built, /// check to see if we need to save/restore VRSAVE. If so, do it. diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index a11d624..3d81afa 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -25,13 +25,13 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -1243,7 +1243,8 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, // If the global is weak or external, we have to go through the lazy // resolution stub. - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0, + false, false, 0); } SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { @@ -1333,7 +1334,7 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) { false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__trampoline_setup", PtrVT), - Args, DAG, dl, DAG.GetOrdering(Chain.getNode())); + Args, DAG, dl); SDValue Ops[] = { CallResult.first, CallResult.second }; @@ -1355,7 +1356,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, + false, false, 0); } // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. @@ -1405,25 +1407,29 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, // Store first byte : number of int regs SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, - Op.getOperand(1), SV, 0, MVT::i8); + Op.getOperand(1), SV, 0, MVT::i8, + false, false, 0); uint64_t nextOffset = FPROffset; SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), ConstFPROffset); // Store second byte : number of float regs SDValue secondStore = - DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8); + DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8, + false, false, 0); nextOffset += StackOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); // Store second word : arguments given on stack SDValue thirdStore = - DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset); + DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset, + false, false, 0); nextOffset += FrameOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); // Store third word : arguments given in registers - return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset); + return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset, + false, false, 0); } @@ -1628,7 +1634,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, + false, false, 0)); } } @@ -1700,7 +1707,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned GPRIndex = 0; for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) { SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT); - SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); @@ -1714,7 +1722,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); @@ -1729,7 +1738,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned FPRIndex = 0; for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) { SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64); - SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); // Increment the address by eight for the next argument to store SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, @@ -1741,7 +1751,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); // Increment the address by eight for the next argument to store SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, @@ -1903,7 +1914,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 ); + NULL, 0, + ObjSize==1 ? MVT::i8 : MVT::i16, + false, false, 0); MemOps.push_back(Store); ++GPR_idx; } @@ -1921,7 +1934,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); ++GPR_idx; ArgOffset += PtrByteSize; @@ -2045,7 +2059,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( CurArgOffset + (ArgSize - ObjSize), isImmutable, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, + false, false, 0); } InVals.push_back(ArgVal); @@ -2091,7 +2106,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); @@ -2271,7 +2287,7 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, // Store relative to framepointer. MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, PseudoSourceValue::getFixedStack(FI), - 0)); + 0, false, false, 0)); } } @@ -2297,7 +2313,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, - PseudoSourceValue::getFixedStack(NewRetAddr), 0); + PseudoSourceValue::getFixedStack(NewRetAddr), 0, + false, false, 0); // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack // slot as the FP is never overwritten. @@ -2308,7 +2325,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, true, false); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, - PseudoSourceValue::getFixedStack(NewFPIdx), 0); + PseudoSourceValue::getFixedStack(NewFPIdx), 0, + false, false, 0); } } return Chain; @@ -2346,14 +2364,16 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, // Load the LR and FP stack slot for later adjusting. EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; LROpOut = getReturnAddrFrameIndex(DAG); - LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0); + LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0, + false, false, 0); Chain = SDValue(LROpOut.getNode(), 1); // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack // slot as the FP is never overwritten. if (isDarwinABI) { FPOpOut = getFramePointerFrameIndex(DAG); - FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0); + FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0, + false, false, 0); Chain = SDValue(FPOpOut.getNode(), 1); } } @@ -2395,7 +2415,8 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, DAG.getConstant(ArgOffset, PtrVT)); } - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); // Calculate and remember argument location. } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, TailCallArguments); @@ -2862,7 +2883,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset)); + PseudoSourceValue::getStack(), LocMemOffset, + false, false, 0)); } else { // Calculate and remember argument location. CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, @@ -3024,7 +3046,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, EVT VT = (Size==1) ? MVT::i8 : MVT::i16; if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, - NULL, 0, VT); + NULL, 0, VT, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); @@ -3061,7 +3083,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0); + SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; @@ -3092,19 +3115,22 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); if (isVarArg) { - SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0); MemOpChains.push_back(Store); // Float varargs are always shadowed in available integer registers if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); - SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -3147,10 +3173,12 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // entirely in R registers. Maybe later. PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, DAG.getConstant(ArgOffset, PtrVT)); - SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0); MemOpChains.push_back(Store); if (VR_idx != NumVRs) { - SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0); + SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); } @@ -3160,7 +3188,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, break; SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, PtrVT)); - SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -3225,7 +3254,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // TOC save area offset. SDValue PtrOff = DAG.getIntPtrConstant(40); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); - Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0); + Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0, + false, false, 0); } // Build a sequence of copy-to-reg nodes chained together with token chain @@ -3300,13 +3330,15 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, SDValue SaveSP = Op.getOperand(1); // Load the old link SP. - SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0); + SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0, + false, false, 0); // Restore the stack pointer. Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); // Store the old link SP. - return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0); + return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0, + false, false, 0); } @@ -3483,14 +3515,16 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64); // Emit a store to the stack slot. - SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0); + SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0, + false, false, 0); // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias. if (Op.getValueType() == MVT::i32) FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, FIPtr.getValueType())); - return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0); + return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0, + false, false, 0); } SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { @@ -3533,7 +3567,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other), Ops, 4, MVT::i64, MMO); // Load the value as a double. - SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0); + SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0, false, false, 0); // FCFID it and return it. SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); @@ -3578,12 +3612,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, - StackSlot, NULL, 0); + StackSlot, NULL, 0, false, false, 0); // Load FP Control Word from low 32 bits of stack slot. SDValue Four = DAG.getConstant(4, PtrVT); SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); - SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0); + SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0, + false, false, 0); // Transform as necessary SDValue CWD1 = @@ -4249,9 +4284,11 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, // Store the input value into Value#0 of the stack slot. SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, - Op.getOperand(0), FIdx, NULL, 0); + Op.getOperand(0), FIdx, NULL, 0, + false, false, 0); // Load it out. - return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0); + return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0, + false, false, 0); } SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { @@ -5460,7 +5497,8 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { // to the stack. FuncInfo->setLRStoreRequired(); return DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), RetAddrFI, NULL, 0); + DAG.getEntryNode(), RetAddrFI, NULL, 0, + false, false, 0); } SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 219efb9..a0781b9 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -366,7 +366,7 @@ def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>; def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA), "addme $rT, $rA", IntGeneral, - [(set G8RC:$rT, (adde G8RC:$rA, immAllOnes))]>; + [(set G8RC:$rT, (adde G8RC:$rA, -1))]>; def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA), "addze $rT, $rA", IntGeneral, [(set G8RC:$rT, (adde G8RC:$rA, 0))]>; @@ -375,7 +375,7 @@ def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>; def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA), "subfme $rT, $rA", IntGeneral, - [(set G8RC:$rT, (sube immAllOnes, G8RC:$rA))]>; + [(set G8RC:$rT, (sube -1, G8RC:$rA))]>; def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA), "subfze $rT, $rA", IntGeneral, [(set G8RC:$rT, (sube 0, G8RC:$rA))]>; @@ -635,13 +635,6 @@ def STHU8 : DForm_1<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STWU8 : DForm_1<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStGeneral, - [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; - def STDU : DSForm_1<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, s16immX4:$ptroff, ptr_rc:$ptrreg), diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index af7d812..9895bea 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -19,6 +19,7 @@ #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -73,8 +74,7 @@ bool PPCInstrInfo::isMoveInstr(const MachineInstr& MI, destReg = MI.getOperand(0).getReg(); return true; } - } else if (oc == PPC::FMRS || oc == PPC::FMRD || - oc == PPC::FMRSD) { // fmr r1, r2 + } else if (oc == PPC::FMR || oc == PPC::FMRSD) { // fmr r1, r2 assert(MI.getNumOperands() >= 2 && MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && @@ -344,10 +344,9 @@ bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg); } else if (DestRC == PPC::G8RCRegisterClass) { BuildMI(MBB, MI, DL, get(PPC::OR8), DestReg).addReg(SrcReg).addReg(SrcReg); - } else if (DestRC == PPC::F4RCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::FMRS), DestReg).addReg(SrcReg); - } else if (DestRC == PPC::F8RCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::FMRD), DestReg).addReg(SrcReg); + } else if (DestRC == PPC::F4RCRegisterClass || + DestRC == PPC::F8RCRegisterClass) { + BuildMI(MBB, MI, DL, get(PPC::FMR), DestReg).addReg(SrcReg); } else if (DestRC == PPC::CRRCRegisterClass) { BuildMI(MBB, MI, DL, get(PPC::MCRF), DestReg).addReg(SrcReg); } else if (DestRC == PPC::VRRCRegisterClass) { @@ -421,22 +420,30 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, FrameIdx)); return true; } else { - // FIXME: We use R0 here, because it isn't available for RA. We need to - // store the CR in the low 4-bits of the saved value. First, issue a MFCR - // to save all of the CRBits. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), PPC::R0)); + // FIXME: We need a scatch reg here. The trouble with using R0 is that + // it's possible for the stack frame to be so big the save location is + // out of range of immediate offsets, necessitating another register. + // We hack this on Darwin by reserving R2. It's probably broken on Linux + // at the moment. + + // We need to store the CR in the low 4-bits of the saved value. First, + // issue a MFCR to save all of the CRBits. + unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? + PPC::R2 : PPC::R0; + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), ScratchReg)); // If the saved register wasn't CR0, shift the bits left so that they are // in CR0's slot. if (SrcReg != PPC::CR0) { unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4; - // rlwinm r0, r0, ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0) - .addReg(PPC::R0).addImm(ShiftBits).addImm(0).addImm(31)); + // rlwinm scratch, scratch, ShiftBits, 0, 31. + NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) + .addReg(ScratchReg).addImm(ShiftBits) + .addImm(0).addImm(31)); } NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) - .addReg(PPC::R0, + .addReg(ScratchReg, getKillRegState(isKill)), FrameIdx)); } @@ -540,20 +547,28 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); } else if (RC == PPC::CRRCRegisterClass) { - // FIXME: We use R0 here, because it isn't available for RA. - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), PPC::R0), - FrameIdx)); + // FIXME: We need a scatch reg here. The trouble with using R0 is that + // it's possible for the stack frame to be so big the save location is + // out of range of immediate offsets, necessitating another register. + // We hack this on Darwin by reserving R2. It's probably broken on Linux + // at the moment. + unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? + PPC::R2 : PPC::R0; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), + ScratchReg), FrameIdx)); // If the reloaded register isn't CR0, shift the bits right so that they are // in the right CR's slot. if (DestReg != PPC::CR0) { unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4; // rlwinm r11, r11, 32-ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0) - .addReg(PPC::R0).addImm(32-ShiftBits).addImm(0).addImm(31)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) + .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) + .addImm(31)); } - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg).addReg(PPC::R0)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg) + .addReg(ScratchReg)); } else if (RC == PPC::CRBITRCRegisterClass) { unsigned Reg = 0; @@ -672,33 +687,21 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, getUndefRegState(isUndef)), FrameIndex); } - } else if (Opc == PPC::FMRD) { - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFD)) - .addReg(InReg, - getKillRegState(isKill) | - getUndefRegState(isUndef)), - FrameIndex); - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFD)) - .addReg(OutReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef)), - FrameIndex); - } - } else if (Opc == PPC::FMRS) { + } else if (Opc == PPC::FMR || Opc == PPC::FMRSD) { + // The register may be F4RC or F8RC, and that determines the memory op. + unsigned OrigReg = MI->getOperand(OpNum).getReg(); + // We cannot tell the register class from a physreg alone. + if (TargetRegisterInfo::isPhysicalRegister(OrigReg)) + return NULL; + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(OrigReg); + const bool is64 = RC == PPC::F8RCRegisterClass; + if (OpNum == 0) { // move -> store unsigned InReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); bool isUndef = MI->getOperand(1).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFS)) + NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), + get(is64 ? PPC::STFD : PPC::STFS)) .addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef)), @@ -707,7 +710,8 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, unsigned OutReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); bool isUndef = MI->getOperand(0).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFS)) + NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), + get(is64 ? PPC::LFD : PPC::LFS)) .addReg(OutReg, RegState::Define | getDeadRegState(isDead) | @@ -733,7 +737,7 @@ bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, else if ((Opc == PPC::OR8 && MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) return true; - else if (Opc == PPC::FMRD || Opc == PPC::FMRS) + else if (Opc == PPC::FMR || Opc == PPC::FMRSD) return true; return false; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 842f8ee..845cd8f 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1019,20 +1019,16 @@ let Uses = [RM] in { } } -/// FMR is split into 3 versions, one for 4/8 byte FP, and one for extending. +/// FMR is split into 2 versions, one for 4/8 byte FP, and one for extending. /// /// Note that these are defined as pseudo-ops on the PPC970 because they are /// often coalesced away and we don't want the dispatch group builder to think /// that they will fill slots (which could cause the load of a LSU reject to /// sneak into a d-group with a store). -def FMRS : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB), - "fmr $frD, $frB", FPGeneral, - []>, // (set F4RC:$frD, F4RC:$frB) - PPC970_Unit_Pseudo; -def FMRD : XForm_26<63, 72, (outs F8RC:$frD), (ins F8RC:$frB), - "fmr $frD, $frB", FPGeneral, - []>, // (set F8RC:$frD, F8RC:$frB) - PPC970_Unit_Pseudo; +def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB), + "fmr $frD, $frB", FPGeneral, + []>, // (set F4RC:$frD, F4RC:$frB) + PPC970_Unit_Pseudo; def FMRSD : XForm_26<63, 72, (outs F8RC:$frD), (ins F4RC:$frB), "fmr $frD, $frB", FPGeneral, [(set F8RC:$frD, (fextend F4RC:$frB))]>, @@ -1215,7 +1211,7 @@ def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>; def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA), "addme $rT, $rA", IntGeneral, - [(set GPRC:$rT, (adde GPRC:$rA, immAllOnes))]>; + [(set GPRC:$rT, (adde GPRC:$rA, -1))]>; def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA), "addze $rT, $rA", IntGeneral, [(set GPRC:$rT, (adde GPRC:$rA, 0))]>; @@ -1224,7 +1220,7 @@ def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>; def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA), "subfme $rT, $rA", IntGeneral, - [(set GPRC:$rT, (sube immAllOnes, GPRC:$rA))]>; + [(set GPRC:$rT, (sube -1, GPRC:$rA))]>; def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA), "subfze $rT, $rA", IntGeneral, [(set GPRC:$rT, (sube 0, GPRC:$rA))]>; @@ -1480,11 +1476,11 @@ def : Pat<(extloadf32 xaddr:$src), (FMRSD (LFSX xaddr:$src))>; // Memory barriers -def : Pat<(membarrier (i32 imm:$ll), - (i32 imm:$ls), - (i32 imm:$sl), - (i32 imm:$ss), - (i32 imm:$device)), +def : Pat<(membarrier (i32 imm /*ll*/), + (i32 imm /*ls*/), + (i32 imm /*sl*/), + (i32 imm /*ss*/), + (i32 imm /*device*/)), (SYNC)>; include "PPCInstrAltivec.td" diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 20e77e7..0b509ac 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -427,6 +427,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R2); // System-reserved register Reserved.set(PPC::R13); // Small Data Area pointer register } + // Reserve R2 on Darwin to hack around the problem of save/restore of CR + // when the stack frame is too big to address directly; we need two regs. + // This is a hack. + if (Subtarget.isDarwinABI()) { + Reserved.set(PPC::R2); + } // On PPC64, r13 is the thread pointer. Never allocate this register. // Note that this is over conservative, as it also prevents allocation of R31 @@ -447,6 +453,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::X2); } + // Reserve R2 on Darwin to hack around the problem of save/restore of CR + // when the stack frame is too big to address directly; we need two regs. + // This is a hack. + if (Subtarget.isDarwinABI()) { + Reserved.set(PPC::X2); + } } if (needsFP(MF)) diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 049e893..1cb7340 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -287,10 +287,8 @@ def GPRC : RegisterClass<"PPC", [i32], 32, GPRCClass::allocation_order_begin(const MachineFunction &MF) const { // 32-bit SVR4 ABI: r2 is reserved for the OS. // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer. - if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin()) - return begin()+1; - - return begin(); + // Darwin: R2 is reserved for CR save/restore sequence. + return begin()+1; } GPRCClass::iterator GPRCClass::allocation_order_end(const MachineFunction &MF) const { @@ -325,10 +323,8 @@ def G8RC : RegisterClass<"PPC", [i64], 64, G8RCClass::iterator G8RCClass::allocation_order_begin(const MachineFunction &MF) const { // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer. - if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin()) - return begin()+1; - - return begin(); + // Darwin: r2 is reserved for CR save/restore sequence. + return begin()+1; } G8RCClass::iterator G8RCClass::allocation_order_end(const MachineFunction &MF) const { diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 22eecd4..cac6962 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -115,32 +115,3 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, return false; } - -/// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are 4-byte, -/// 8-byte, and target default. The CIE is hard-coded to indicate that the LSDA -/// pointer in the FDE section is an "sdata4", and should be encoded as a 4-byte -/// pointer by default. However, some systems may require a different size due -/// to bugs or other conditions. We will default to a 4-byte encoding unless the -/// system tells us otherwise. -/// -/// The issue is when the CIE says their is an LSDA. That mandates that every -/// FDE have an LSDA slot. But if the function does not need an LSDA. There -/// needs to be some way to signify there is none. The LSDA is encoded as -/// pc-rel. But you don't look for some magic value after adding the pc. You -/// have to look for a zero before adding the pc. The problem is that the size -/// of the zero to look for depends on the encoding. The unwinder bug in SL is -/// that it always checks for a pointer-size zero. So on x86_64 it looks for 8 -/// bytes of zero. If you have an LSDA, it works fine since the 8-bytes are -/// non-zero so it goes ahead and then reads the value based on the encoding. -/// But if you use sdata4 and there is no LSDA, then the test for zero gives a -/// false negative and the unwinder thinks there is an LSDA. -/// -/// FIXME: This call-back isn't good! We should be using the correct encoding -/// regardless of the system. However, there are some systems which have bugs -/// that prevent this from occuring. -DwarfLSDAEncoding::Encoding PPCTargetMachine::getLSDAEncoding() const { - if (Subtarget.isDarwin() && Subtarget.getDarwinVers() != 10) - return DwarfLSDAEncoding::Default; - - return DwarfLSDAEncoding::EightByte; -} diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index a654435..ac9ae2b 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -57,18 +57,6 @@ public: return InstrItins; } - /// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are - /// 4-byte, 8-byte, and target default. The CIE is hard-coded to indicate that - /// the LSDA pointer in the FDE section is an "sdata4", and should be encoded - /// as a 4-byte pointer by default. However, some systems may require a - /// different size due to bugs or other conditions. We will default to a - /// 4-byte encoding unless the system tells us otherwise. - /// - /// FIXME: This call-back isn't good! We should be using the correct encoding - /// regardless of the system. However, there are some systems which have bugs - /// that prevent this from occuring. - virtual DwarfLSDAEncoding::Encoding getLSDAEncoding() const; - // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index 8f265cf..3465779 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -889,9 +889,26 @@ entry: ; recognize a more elaborate tree than a simple SETxx. define double @test_FNEG_sel(double %A, double %B, double %C) { - %D = sub double -0.000000e+00, %A ; <double> [#uses=1] + %D = fsub double -0.000000e+00, %A ; <double> [#uses=1] %Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1] %E = select i1 %Cond, double %B, double %C ; <double> [#uses=1] ret double %E } +//===----------------------------------------------------------------------===// +The save/restore sequence for CR in prolog/epilog is terrible: +- Each CR subreg is saved individually, rather than doing one save as a unit. +- On Darwin, the save is done after the decrement of SP, which means the offset +from SP of the save slot can be too big for a store instruction, which means we +need an additional register (currently hacked in 96015+96020; the solution there +is correct, but poor). +- On SVR4 the same thing can happen, and I don't think saving before the SP +decrement is safe on that target, as there is no red zone. This is currently +broken AFAIK, although it's not a target I can exercise. +The following demonstrates the problem: +extern void bar(char *p); +void foo() { + char x[100000]; + bar(x); + __asm__("" ::: "cr2"); +} diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp index 9a2ce6b..f6753a6 100644 --- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp @@ -56,6 +56,9 @@ namespace { unsigned AsmVariant, const char *ExtraCode); bool printGetPCX(const MachineInstr *MI, unsigned OpNo); + + virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) + const; }; } // end of anonymous namespace @@ -140,18 +143,19 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum) { break; } + unsigned mfNum = MI->getParent()->getParent()->getFunctionNumber(); unsigned bbNum = MI->getParent()->getNumber(); - O << '\n' << ".LLGETPCH" << bbNum << ":\n"; - O << "\tcall\t.LLGETPC" << bbNum << '\n' ; + O << '\n' << ".LLGETPCH" << mfNum << '_' << bbNum << ":\n"; + O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ; O << "\t sethi\t" - << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << bbNum << ")), " + << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), " << operand << '\n' ; - O << ".LLGETPC" << bbNum << ":\n" ; + O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ; O << "\tor\t" << operand - << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << bbNum << ")), " + << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), " << operand << '\n'; O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; @@ -197,6 +201,39 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } +/// isBlockOnlyReachableByFallthough - Return true if the basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +/// +/// This overrides AsmPrinter's implementation to handle delay slots. +bool SparcAsmPrinter:: +isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { + // If this is a landing pad, it isn't a fall through. If it has no preds, + // then nothing falls through to it. + if (MBB->isLandingPad() || MBB->pred_empty()) + return false; + + // If there isn't exactly one predecessor, it can't be a fall through. + MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; + ++PI2; + if (PI2 != MBB->pred_end()) + return false; + + // The predecessor has to be immediately before this block. + const MachineBasicBlock *Pred = *PI; + + if (!Pred->isLayoutSuccessor(MBB)) + return false; + + // Check if the last terminator is an unconditional branch. + MachineBasicBlock::const_iterator I = Pred->end(); + while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) + ; // Noop + return I == Pred->end() || !I->getDesc().isBarrier(); +} + + + // Force static initialization. extern "C" void LLVMInitializeSparcAsmPrinter() { RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget); diff --git a/lib/Target/Sparc/README.txt b/lib/Target/Sparc/README.txt index cc24abf..b4991fe 100644 --- a/lib/Target/Sparc/README.txt +++ b/lib/Target/Sparc/README.txt @@ -56,3 +56,4 @@ int %t1(int %a, int %b) { leaf fns. * Fill delay slots +* Implement JIT support diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index e1b3299..a7d1805 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -35,7 +35,6 @@ class SparcDAGToDAGISel : public SelectionDAGISel { /// make the right decision when generating code for different targets. const SparcSubtarget &Subtarget; SparcTargetMachine& TM; - MachineBasicBlock *CurBB; public: explicit SparcDAGToDAGISel(SparcTargetMachine &tm) : SelectionDAGISel(tm), @@ -56,10 +55,6 @@ public: char ConstraintCode, std::vector<SDValue> &OutOps); - /// InstructionSelect - This callback is invoked by - /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelect(); - virtual const char *getPassName() const { return "SPARC DAG->DAG Pattern Instruction Selection"; } @@ -72,17 +67,8 @@ private: }; } // end anonymous namespace -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void SparcDAGToDAGISel::InstructionSelect() { - CurBB = BB; - // Select target instructions for the DAG. - SelectRoot(*CurDAG); - CurDAG->RemoveDeadNodes(); -} - SDNode* SparcDAGToDAGISel::getGlobalBaseReg() { - MachineFunction *MF = CurBB->getParent(); + MachineFunction *MF = BB->getParent(); unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF); return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index e67002a..4e93ef0 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -21,7 +21,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -134,7 +134,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load; if (ObjectVT == MVT::i32) { - Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); + Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, + false, false, 0); } else { ISD::LoadExtType LoadOp = ISD::SEXTLOAD; @@ -143,7 +144,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr, DAG.getConstant(Offset, MVT::i32)); Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr, - NULL, 0, ObjectVT); + NULL, 0, ObjectVT, false, false, 0); Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load); } InVals.push_back(Load); @@ -167,7 +168,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0); + SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0, + false, false, 0); InVals.push_back(Load); } ArgOffset += 4; @@ -189,7 +191,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); + HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, + false, false, 0); } SDValue LoVal; @@ -201,7 +204,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4, true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); + LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, + false, false, 0); } // Compose the two halves together into an i64 unit. @@ -235,7 +239,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0)); + OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0, + false, false, 0)); ArgOffset += 4; } @@ -339,7 +344,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // FIXME: VERIFY THAT 68 IS RIGHT. SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+68); PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0, + false, false, 0)); } #else @@ -385,14 +391,17 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // out the parts as integers. Top part goes in a reg. SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, - Val, StackPtr, NULL, 0); + Val, StackPtr, NULL, 0, + false, false, 0); // Sparc is big-endian, so the high part comes first. - SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0); + SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, + false, false, 0); // Increment the pointer to the other half. StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getIntPtrConstant(4)); // Load the low part. - SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0); + SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, + false, false, 0); RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi)); @@ -435,7 +444,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, ValToStore, - PtrOff, NULL, 0)); + PtrOff, NULL, 0, + false, false, 0)); } ArgOffset += ObjSize; } @@ -759,7 +769,7 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, GlobalBase, RelAddr); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - AbsAddr, NULL, 0); + AbsAddr, NULL, 0, false, false, 0); } SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, @@ -780,7 +790,7 @@ SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, GlobalBase, RelAddr); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - AbsAddr, NULL, 0); + AbsAddr, NULL, 0, false, false, 0); } static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { @@ -872,7 +882,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, DAG.getConstant(TLI.getVarArgsFrameOffset(), MVT::i32)); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0, + false, false, 0); } static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { @@ -882,21 +893,23 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); DebugLoc dl = Node->getDebugLoc(); - SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0); + SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0, + false, false, 0); // Increment the pointer, VAList, to the next vaarg SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList, DAG.getConstant(VT.getSizeInBits()/8, MVT::i32)); // Store the incremented VAList to the legalized pointer InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr, - VAListPtr, SV, 0); + VAListPtr, SV, 0, false, false, 0); // Load the actual argument out of the pointer VAList, unless this is an // f64 load. if (VT != MVT::f64) - return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0); + return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0, false, false, 0); // Otherwise, load it as i64, then do a bitconvert. - SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0); + SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0, + false, false, 0); // Bit-Convert the value to f64. SDValue Ops[2] = { diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h index e457235..56d8708 100644 --- a/lib/Target/Sparc/SparcMachineFunctionInfo.h +++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h @@ -22,7 +22,7 @@ namespace llvm { unsigned GlobalBaseReg; public: SparcMachineFunctionInfo() : GlobalBaseReg(0) {} - SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {} + explicit SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {} unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index f6f632d..8152e1d 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -100,8 +100,6 @@ namespace { Lowering(*TM.getTargetLowering()), Subtarget(*TM.getSubtargetImpl()) { } - virtual void InstructionSelect(); - virtual const char *getPassName() const { return "SystemZ DAG->DAG Pattern Instruction Selection"; } @@ -152,10 +150,6 @@ namespace { bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM); bool MatchAddressRI(SDValue N, SystemZRRIAddressMode &AM, bool is12Bit); - - #ifndef NDEBUG - unsigned Indent; - #endif }; } // end anonymous namespace @@ -594,41 +588,22 @@ bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr, bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Disp, SDValue &Index) { if (ISD::isNON_EXTLoad(N.getNode()) && - N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), P, P)) + IsLegalToFold(N, P, P)) return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index); return false; } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void SystemZDAGToDAGISel::InstructionSelect() { - // Codegen the basic block. - DEBUG(errs() << "===== Instruction selection begins:\n"); - DEBUG(Indent = 0); - SelectRoot(*CurDAG); - DEBUG(errs() << "===== Instruction selection ends:\n"); - - CurDAG->RemoveDeadNodes(); -} - SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); DebugLoc dl = Node->getDebugLoc(); unsigned Opcode = Node->getOpcode(); // Dump information about the Node being selected - DEBUG(errs().indent(Indent) << "Selecting: "; - Node->dump(CurDAG); - errs() << "\n"); - DEBUG(Indent += 2); + DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { - DEBUG(errs().indent(Indent-2) << "== "; - Node->dump(CurDAG); - errs() << "\n"); - DEBUG(Indent -= 2); + DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); return NULL; // Already selected. } @@ -694,9 +669,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { MVT::i32)); ReplaceUses(SDValue(Node, 0), SDValue(Div, 0)); - DEBUG(errs().indent(Indent-2) << "=> "; - Result->dump(CurDAG); - errs() << "\n"); + DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n"); } // Copy the remainder (even subreg) result, if it is needed. @@ -709,15 +682,9 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { MVT::i32)); ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0)); - DEBUG(errs().indent(Indent-2) << "=> "; - Result->dump(CurDAG); - errs() << "\n"); + DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n"); } -#ifndef NDEBUG - Indent -= 2; -#endif - return NULL; } case ISD::UDIVREM: { @@ -783,9 +750,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { CurDAG->getTargetConstant(SubRegIdx, MVT::i32)); ReplaceUses(SDValue(Node, 0), SDValue(Div, 0)); - DEBUG(errs().indent(Indent-2) << "=> "; - Result->dump(CurDAG); - errs() << "\n"); + DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n"); } // Copy the remainder (even subreg) result, if it is needed. @@ -797,15 +762,9 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { CurDAG->getTargetConstant(SubRegIdx, MVT::i32)); ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0)); - DEBUG(errs().indent(Indent-2) << "=> "; - Result->dump(CurDAG); - errs() << "\n"); + DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n"); } -#ifndef NDEBUG - Indent -= 2; -#endif - return NULL; } } @@ -813,14 +772,12 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Select the default instruction SDNode *ResNode = SelectCode(Node); - DEBUG(errs().indent(Indent-2) << "=> "; + DEBUG(errs() << "=> "; if (ResNode == NULL || ResNode == Node) Node->dump(CurDAG); else ResNode->dump(CurDAG); errs() << "\n"; ); - DEBUG(Indent -= 2); - return ResNode; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index f7405a5..6f4b30f 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -30,9 +30,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -337,7 +337,8 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain, // from this parameter SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); } // If this is an 8/16/32-bit value, it is really passed promoted to 64 @@ -435,7 +436,8 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, DAG.getIntPtrConstant(Offset)); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), Offset)); + PseudoSourceValue::getStack(), Offset, + false, false, 0)); } } @@ -738,7 +740,7 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op, if (ExtraLoadRequired) Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 336e20e..f46840c 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -58,7 +58,7 @@ def FMOV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src), []>; } -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1 in { def FMOV32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src), "le\t{$dst, $src}", [(set FP32:$dst, (load rriaddr12:$src))]>; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 1891bba..a44f6d9 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -257,7 +257,7 @@ def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins i64imm:$src), [(set GR64:$dst, i64hi32:$src)]>; } -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOV32rm : RXI<0x58, (outs GR32:$dst), (ins rriaddr12:$src), "l\t{$dst, $src}", diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index e47d419..fd6e330 100644 --- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -33,7 +33,8 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo { public: SystemZMachineFunctionInfo() : CalleeSavedFrameSize(0) {} - SystemZMachineFunctionInfo(MachineFunction &MF) : CalleeSavedFrameSize(0) {} + explicit SystemZMachineFunctionInfo(MachineFunction &MF) + : CalleeSavedFrameSize(0) {} unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; } diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 295b30f..9a16808 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -580,7 +580,7 @@ const IntegerType *TargetData::getIntPtrType(LLVMContext &C) const { uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, unsigned NumIndices) const { const Type *Ty = ptrTy; - assert(isa<PointerType>(Ty) && "Illegal argument for getIndexedOffset()"); + assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()"); uint64_t Result = 0; generic_gep_type_iterator<Value* const*> diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index 094a57e..18b0fa4 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -15,6 +15,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" +#include <ctype.h> using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index a231ebc..82619c7 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -19,17 +19,15 @@ #include "llvm/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -289,832 +287,54 @@ TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const { } /// getSymbolForDwarfGlobalReference - Return an MCExpr to use for a -/// pc-relative reference to the specified global variable from exception -/// handling information. In addition to the symbol, this returns -/// by-reference: -/// -/// IsIndirect - True if the returned symbol is actually a stub that contains -/// the address of the symbol, false if the symbol is the global itself. -/// -/// IsPCRel - True if the symbol reference is already pc-relative, false if -/// the caller needs to subtract off the address of the reference from the -/// symbol. -/// +/// reference to the specified global variable from exception +/// handling information. const MCExpr *TargetLoweringObjectFile:: getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { - // The generic implementation of this just returns a direct reference to the - // symbol. - IsIndirect = false; - IsPCRel = false; - + MachineModuleInfo *MMI, unsigned Encoding) const { // FIXME: Use GetGlobalValueSymbol. SmallString<128> Name; Mang->getNameWithPrefix(Name, GV, false); - return MCSymbolRefExpr::Create(Name.str(), getContext()); -} - - -//===----------------------------------------------------------------------===// -// ELF -//===----------------------------------------------------------------------===// -typedef StringMap<const MCSectionELF*> ELFUniqueMapTy; - -TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() { - // If we have the section uniquing map, free it. - delete (ELFUniqueMapTy*)UniquingMap; -} - -const MCSection *TargetLoweringObjectFileELF:: -getELFSection(StringRef Section, unsigned Type, unsigned Flags, - SectionKind Kind, bool IsExplicit) const { - if (UniquingMap == 0) - UniquingMap = new ELFUniqueMapTy(); - ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap; - - // Do the lookup, if we have a hit, return it. - const MCSectionELF *&Entry = Map[Section]; - if (Entry) return Entry; - - return Entry = MCSectionELF::Create(Section, Type, Flags, Kind, IsExplicit, - getContext()); -} - -void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - if (UniquingMap != 0) - ((ELFUniqueMapTy*)UniquingMap)->clear(); - TargetLoweringObjectFile::Initialize(Ctx, TM); - - BSSSection = - getELFSection(".bss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getBSS()); - - TextSection = - getELFSection(".text", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC, - SectionKind::getText()); - - DataSection = - getELFSection(".data", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getDataRel()); - - ReadOnlySection = - getELFSection(".rodata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC, - SectionKind::getReadOnly()); - - TLSDataSection = - getELFSection(".tdata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | - MCSectionELF::SHF_WRITE, SectionKind::getThreadData()); - - TLSBSSSection = - getELFSection(".tbss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | - MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS()); - - DataRelSection = - getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); - - DataRelLocalSection = - getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRelLocal()); - - DataRelROSection = - getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getReadOnlyWithRel()); - - DataRelROLocalSection = - getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getReadOnlyWithRelLocal()); - - MergeableConst4Section = - getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, - SectionKind::getMergeableConst4()); - - MergeableConst8Section = - getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, - SectionKind::getMergeableConst8()); - - MergeableConst16Section = - getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, - SectionKind::getMergeableConst16()); - - StaticCtorSection = - getELFSection(".ctors", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); - - StaticDtorSection = - getELFSection(".dtors", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); - - // Exception Handling Sections. - - // FIXME: We're emitting LSDA info into a readonly section on ELF, even though - // it contains relocatable pointers. In PIC mode, this is probably a big - // runtime hit for C++ apps. Either the contents of the LSDA need to be - // adjusted or this should be a data section. - LSDASection = - getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly()); - EHFrameSection = - getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); - - // Debug Info Sections. - DwarfAbbrevSection = - getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfInfoSection = - getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfLineSection = - getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfFrameSection = - getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfPubNamesSection = - getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfPubTypesSection = - getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfStrSection = - getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfLocSection = - getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfARangesSection = - getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfRangesSection = - getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfMacroInfoSection = - getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); -} - - -static SectionKind -getELFKindForNamedSection(StringRef Name, SectionKind K) { - if (Name.empty() || Name[0] != '.') return K; - - // Some lame default implementation based on some magic section names. - if (Name == ".bss" || - Name.startswith(".bss.") || - Name.startswith(".gnu.linkonce.b.") || - Name.startswith(".llvm.linkonce.b.") || - Name == ".sbss" || - Name.startswith(".sbss.") || - Name.startswith(".gnu.linkonce.sb.") || - Name.startswith(".llvm.linkonce.sb.")) - return SectionKind::getBSS(); - - if (Name == ".tdata" || - Name.startswith(".tdata.") || - Name.startswith(".gnu.linkonce.td.") || - Name.startswith(".llvm.linkonce.td.")) - return SectionKind::getThreadData(); - - if (Name == ".tbss" || - Name.startswith(".tbss.") || - Name.startswith(".gnu.linkonce.tb.") || - Name.startswith(".llvm.linkonce.tb.")) - return SectionKind::getThreadBSS(); - - return K; -} - - -static unsigned getELFSectionType(StringRef Name, SectionKind K) { - - if (Name == ".init_array") - return MCSectionELF::SHT_INIT_ARRAY; - - if (Name == ".fini_array") - return MCSectionELF::SHT_FINI_ARRAY; + const MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); - if (Name == ".preinit_array") - return MCSectionELF::SHT_PREINIT_ARRAY; - - if (K.isBSS() || K.isThreadBSS()) - return MCSectionELF::SHT_NOBITS; - - return MCSectionELF::SHT_PROGBITS; -} - - -static unsigned -getELFSectionFlags(SectionKind K) { - unsigned Flags = 0; - - if (!K.isMetadata()) - Flags |= MCSectionELF::SHF_ALLOC; - - if (K.isText()) - Flags |= MCSectionELF::SHF_EXECINSTR; - - if (K.isWriteable()) - Flags |= MCSectionELF::SHF_WRITE; - - if (K.isThreadLocal()) - Flags |= MCSectionELF::SHF_TLS; - - // K.isMergeableConst() is left out to honour PR4650 - if (K.isMergeableCString() || K.isMergeableConst4() || - K.isMergeableConst8() || K.isMergeableConst16()) - Flags |= MCSectionELF::SHF_MERGE; - - if (K.isMergeableCString()) - Flags |= MCSectionELF::SHF_STRINGS; - - return Flags; -} - - -const MCSection *TargetLoweringObjectFileELF:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - StringRef SectionName = GV->getSection(); - - // Infer section flags from the section name if we can. - Kind = getELFKindForNamedSection(SectionName, Kind); - - return getELFSection(SectionName, - getELFSectionType(SectionName, Kind), - getELFSectionFlags(Kind), Kind, true); -} - -static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) { - if (Kind.isText()) return ".gnu.linkonce.t."; - if (Kind.isReadOnly()) return ".gnu.linkonce.r."; - - if (Kind.isThreadData()) return ".gnu.linkonce.td."; - if (Kind.isThreadBSS()) return ".gnu.linkonce.tb."; - - if (Kind.isDataNoRel()) return ".gnu.linkonce.d."; - if (Kind.isDataRelLocal()) return ".gnu.linkonce.d.rel.local."; - if (Kind.isDataRel()) return ".gnu.linkonce.d.rel."; - if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local."; - - assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return ".gnu.linkonce.d.rel.ro."; + return getSymbolForDwarfReference(Sym, MMI, Encoding); } -const MCSection *TargetLoweringObjectFileELF:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - - // If this global is linkonce/weak and the target handles this by emitting it - // into a 'uniqued' section name, create and return the section now. - if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) { - const char *Prefix = getSectionPrefixForUniqueGlobal(Kind); - SmallString<128> Name; - Name.append(Prefix, Prefix+strlen(Prefix)); - Mang->getNameWithPrefix(Name, GV, false); - return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind), - getELFSectionFlags(Kind), Kind); - } - - if (Kind.isText()) return TextSection; - - if (Kind.isMergeable1ByteCString() || - Kind.isMergeable2ByteCString() || - Kind.isMergeable4ByteCString()) { - - // We also need alignment here. - // FIXME: this is getting the alignment of the character, not the - // alignment of the global! - unsigned Align = - TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)); - - const char *SizeSpec = ".rodata.str1."; - if (Kind.isMergeable2ByteCString()) - SizeSpec = ".rodata.str2."; - else if (Kind.isMergeable4ByteCString()) - SizeSpec = ".rodata.str4."; - else - assert(Kind.isMergeable1ByteCString() && "unknown string width"); - - - std::string Name = SizeSpec + utostr(Align); - return getELFSection(Name, MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | - MCSectionELF::SHF_MERGE | - MCSectionELF::SHF_STRINGS, - Kind); - } - - if (Kind.isMergeableConst()) { - if (Kind.isMergeableConst4() && MergeableConst4Section) - return MergeableConst4Section; - if (Kind.isMergeableConst8() && MergeableConst8Section) - return MergeableConst8Section; - if (Kind.isMergeableConst16() && MergeableConst16Section) - return MergeableConst16Section; - return ReadOnlySection; // .const - } - - if (Kind.isReadOnly()) return ReadOnlySection; - - if (Kind.isThreadData()) return TLSDataSection; - if (Kind.isThreadBSS()) return TLSBSSSection; - - // Note: we claim that common symbols are put in BSSSection, but they are - // really emitted with the magic .comm directive, which creates a symbol table - // entry but not a section. - if (Kind.isBSS() || Kind.isCommon()) return BSSSection; - - if (Kind.isDataNoRel()) return DataSection; - if (Kind.isDataRelLocal()) return DataRelLocalSection; - if (Kind.isDataRel()) return DataRelSection; - if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; - - assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return DataRelROSection; -} - -/// getSectionForConstant - Given a mergeable constant with the -/// specified size and relocation information, return a section that it -/// should be placed in. -const MCSection *TargetLoweringObjectFileELF:: -getSectionForConstant(SectionKind Kind) const { - if (Kind.isMergeableConst4() && MergeableConst4Section) - return MergeableConst4Section; - if (Kind.isMergeableConst8() && MergeableConst8Section) - return MergeableConst8Section; - if (Kind.isMergeableConst16() && MergeableConst16Section) - return MergeableConst16Section; - if (Kind.isReadOnly()) - return ReadOnlySection; - - if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; - assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return DataRelROSection; -} - -//===----------------------------------------------------------------------===// -// MachO -//===----------------------------------------------------------------------===// - -typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy; - -TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() { - // If we have the MachO uniquing map, free it. - delete (MachOUniqueMapTy*)UniquingMap; -} - - -const MCSectionMachO *TargetLoweringObjectFileMachO:: -getMachOSection(StringRef Segment, StringRef Section, - unsigned TypeAndAttributes, - unsigned Reserved2, SectionKind Kind) const { - // We unique sections by their segment/section pair. The returned section - // may not have the same flags as the requested section, if so this should be - // diagnosed by the client as an error. - - // Create the map if it doesn't already exist. - if (UniquingMap == 0) - UniquingMap = new MachOUniqueMapTy(); - MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap; - - // Form the name to look up. - SmallString<64> Name; - Name += Segment; - Name.push_back(','); - Name += Section; - - // Do the lookup, if we have a hit, return it. - const MCSectionMachO *&Entry = Map[Name.str()]; - if (Entry) return Entry; - - // Otherwise, return a new section. - return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes, - Reserved2, Kind, getContext()); -} - - -void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - if (UniquingMap != 0) - ((MachOUniqueMapTy*)UniquingMap)->clear(); - TargetLoweringObjectFile::Initialize(Ctx, TM); - - TextSection // .text - = getMachOSection("__TEXT", "__text", - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - SectionKind::getText()); - DataSection // .data - = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel()); - - CStringSection // .cstring - = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS, - SectionKind::getMergeable1ByteCString()); - UStringSection - = getMachOSection("__TEXT","__ustring", 0, - SectionKind::getMergeable2ByteCString()); - FourByteConstantSection // .literal4 - = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS, - SectionKind::getMergeableConst4()); - EightByteConstantSection // .literal8 - = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS, - SectionKind::getMergeableConst8()); - - // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back - // to using it in -static mode. - SixteenByteConstantSection = 0; - if (TM.getRelocationModel() != Reloc::Static && - TM.getTargetData()->getPointerSize() == 32) - SixteenByteConstantSection = // .literal16 - getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS, - SectionKind::getMergeableConst16()); - - ReadOnlySection // .const - = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly()); - - TextCoalSection - = getMachOSection("__TEXT", "__textcoal_nt", - MCSectionMachO::S_COALESCED | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - SectionKind::getText()); - ConstTextCoalSection - = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED, - SectionKind::getText()); - ConstDataCoalSection - = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED, - SectionKind::getText()); - ConstDataSection // .const_data - = getMachOSection("__DATA", "__const", 0, - SectionKind::getReadOnlyWithRel()); - DataCoalSection - = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED, - SectionKind::getDataRel()); - DataCommonSection - = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL, - SectionKind::getBSS()); - DataBSSSection - = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL, - SectionKind::getBSS()); - - - LazySymbolPointerSection - = getMachOSection("__DATA", "__la_symbol_ptr", - MCSectionMachO::S_LAZY_SYMBOL_POINTERS, - SectionKind::getMetadata()); - NonLazySymbolPointerSection - = getMachOSection("__DATA", "__nl_symbol_ptr", - MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, - SectionKind::getMetadata()); - - if (TM.getRelocationModel() == Reloc::Static) { - StaticCtorSection - = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel()); - StaticDtorSection - = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel()); - } else { - StaticCtorSection - = getMachOSection("__DATA", "__mod_init_func", - MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, - SectionKind::getDataRel()); - StaticDtorSection - = getMachOSection("__DATA", "__mod_term_func", - MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, - SectionKind::getDataRel()); - } - - // Exception Handling. - LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0, - SectionKind::getDataRel()); - EHFrameSection = - getMachOSection("__TEXT", "__eh_frame", - MCSectionMachO::S_COALESCED | - MCSectionMachO::S_ATTR_NO_TOC | - MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS | - MCSectionMachO::S_ATTR_LIVE_SUPPORT, - SectionKind::getReadOnly()); - - // Debug Information. - DwarfAbbrevSection = - getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfInfoSection = - getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfLineSection = - getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfFrameSection = - getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfPubNamesSection = - getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfPubTypesSection = - getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfStrSection = - getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfLocSection = - getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfARangesSection = - getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfRangesSection = - getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfMacroInfoSection = - getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfDebugInlineSection = - getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); -} - -const MCSection *TargetLoweringObjectFileMachO:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - // Parse the section specifier and create it if valid. - StringRef Segment, Section; - unsigned TAA, StubSize; - std::string ErrorCode = - MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, - TAA, StubSize); - if (!ErrorCode.empty()) { - // If invalid, report the error with llvm_report_error. - llvm_report_error("Global variable '" + GV->getNameStr() + - "' has an invalid section specifier '" + GV->getSection()+ - "': " + ErrorCode + "."); - // Fall back to dropping it into the data section. - return DataSection; - } - - // Get the section. - const MCSectionMachO *S = - getMachOSection(Segment, Section, TAA, StubSize, Kind); - - // Okay, now that we got the section, verify that the TAA & StubSize agree. - // If the user declared multiple globals with different section flags, we need - // to reject it here. - if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { - // If invalid, report the error with llvm_report_error. - llvm_report_error("Global variable '" + GV->getNameStr() + - "' section type or attributes does not match previous" - " section specifier"); - } - - return S; -} - -const MCSection *TargetLoweringObjectFileMachO:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS"); - - if (Kind.isText()) - return GV->isWeakForLinker() ? TextCoalSection : TextSection; - - // If this is weak/linkonce, put this in a coalescable section, either in text - // or data depending on if it is writable. - if (GV->isWeakForLinker()) { - if (Kind.isReadOnly()) - return ConstTextCoalSection; - return DataCoalSection; - } - - // FIXME: Alignment check should be handled by section classifier. - if (Kind.isMergeable1ByteCString() || - Kind.isMergeable2ByteCString()) { - if (TM.getTargetData()->getPreferredAlignment( - cast<GlobalVariable>(GV)) < 32) { - if (Kind.isMergeable1ByteCString()) - return CStringSection; - assert(Kind.isMergeable2ByteCString()); - return UStringSection; - } - } - - if (Kind.isMergeableConst()) { - if (Kind.isMergeableConst4()) - return FourByteConstantSection; - if (Kind.isMergeableConst8()) - return EightByteConstantSection; - if (Kind.isMergeableConst16() && SixteenByteConstantSection) - return SixteenByteConstantSection; - } - - // Otherwise, if it is readonly, but not something we can specially optimize, - // just drop it in .const. - if (Kind.isReadOnly()) - return ReadOnlySection; - - // If this is marked const, put it into a const section. But if the dynamic - // linker needs to write to it, put it in the data segment. - if (Kind.isReadOnlyWithRel()) - return ConstDataSection; - - // Put zero initialized globals with strong external linkage in the - // DATA, __common section with the .zerofill directive. - if (Kind.isBSSExtern()) - return DataCommonSection; - - // Put zero initialized globals with local linkage in __DATA,__bss directive - // with the .zerofill directive (aka .lcomm). - if (Kind.isBSSLocal()) - return DataBSSSection; - - // Otherwise, just drop the variable in the normal data section. - return DataSection; -} - -const MCSection * -TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const { - // If this constant requires a relocation, we have to put it in the data - // segment, not in the text segment. - if (Kind.isDataRel() || Kind.isReadOnlyWithRel()) - return ConstDataSection; - - if (Kind.isMergeableConst4()) - return FourByteConstantSection; - if (Kind.isMergeableConst8()) - return EightByteConstantSection; - if (Kind.isMergeableConst16() && SixteenByteConstantSection) - return SixteenByteConstantSection; - return ReadOnlySection; // .const -} - -/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide -/// not to emit the UsedDirective for some symbols in llvm.used. -// FIXME: REMOVE this (rdar://7071300) -bool TargetLoweringObjectFileMachO:: -shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const { - /// On Darwin, internally linked data beginning with "L" or "l" does not have - /// the directive emitted (this occurs in ObjC metadata). - if (!GV) return false; - - // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix. - if (GV->hasLocalLinkage() && !isa<Function>(GV)) { - // FIXME: ObjC metadata is currently emitted as internal symbols that have - // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and - // this horrible hack can go away. - SmallString<64> Name; - Mang->getNameWithPrefix(Name, GV, false); - if (Name[0] == 'L' || Name[0] == 'l') - return false; +const MCExpr *TargetLoweringObjectFile:: +getSymbolForDwarfReference(const MCSymbol *Sym, MachineModuleInfo *MMI, + unsigned Encoding) const { + const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext()); + + switch (Encoding & 0xF0) { + default: + llvm_report_error("We do not support this DWARF encoding yet!"); + break; + case dwarf::DW_EH_PE_absptr: + // Do nothing special + break; + case dwarf::DW_EH_PE_pcrel: + // FIXME: PCSymbol + const MCExpr *PC = MCSymbolRefExpr::Create(".", getContext()); + Res = MCBinaryExpr::CreateSub(Res, PC, getContext()); + break; } - return true; + return Res; } -const MCExpr *TargetLoweringObjectFileMachO:: -getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { - // The mach-o version of this method defaults to returning a stub reference. - IsIndirect = true; - IsPCRel = false; - - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += "$non_lazy_ptr"; - return MCSymbolRefExpr::Create(Name.str(), getContext()); +unsigned TargetLoweringObjectFile::getPersonalityEncoding() const { + return dwarf::DW_EH_PE_absptr; } - -//===----------------------------------------------------------------------===// -// COFF -//===----------------------------------------------------------------------===// - -typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy; - -TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() { - delete (COFFUniqueMapTy*)UniquingMap; +unsigned TargetLoweringObjectFile::getLSDAEncoding() const { + return dwarf::DW_EH_PE_absptr; } - -const MCSection *TargetLoweringObjectFileCOFF:: -getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const { - // Create the map if it doesn't already exist. - if (UniquingMap == 0) - UniquingMap = new MachOUniqueMapTy(); - COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap; - - // Do the lookup, if we have a hit, return it. - const MCSectionCOFF *&Entry = Map[Name]; - if (Entry) return Entry; - - return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext()); +unsigned TargetLoweringObjectFile::getFDEEncoding() const { + return dwarf::DW_EH_PE_absptr; } -void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - if (UniquingMap != 0) - ((COFFUniqueMapTy*)UniquingMap)->clear(); - TargetLoweringObjectFile::Initialize(Ctx, TM); - TextSection = getCOFFSection("\t.text", true, SectionKind::getText()); - DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel()); - StaticCtorSection = - getCOFFSection(".ctors", false, SectionKind::getDataRel()); - StaticDtorSection = - getCOFFSection(".dtors", false, SectionKind::getDataRel()); - - // FIXME: We're emitting LSDA info into a readonly section on COFF, even - // though it contains relocatable pointers. In PIC mode, this is probably a - // big runtime hit for C++ apps. Either the contents of the LSDA need to be - // adjusted or this should be a data section. - LSDASection = - getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly()); - EHFrameSection = - getCOFFSection(".eh_frame", false, SectionKind::getDataRel()); - - // Debug info. - // FIXME: Don't use 'directive' mode here. - DwarfAbbrevSection = - getCOFFSection("\t.section\t.debug_abbrev,\"dr\"", - true, SectionKind::getMetadata()); - DwarfInfoSection = - getCOFFSection("\t.section\t.debug_info,\"dr\"", - true, SectionKind::getMetadata()); - DwarfLineSection = - getCOFFSection("\t.section\t.debug_line,\"dr\"", - true, SectionKind::getMetadata()); - DwarfFrameSection = - getCOFFSection("\t.section\t.debug_frame,\"dr\"", - true, SectionKind::getMetadata()); - DwarfPubNamesSection = - getCOFFSection("\t.section\t.debug_pubnames,\"dr\"", - true, SectionKind::getMetadata()); - DwarfPubTypesSection = - getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"", - true, SectionKind::getMetadata()); - DwarfStrSection = - getCOFFSection("\t.section\t.debug_str,\"dr\"", - true, SectionKind::getMetadata()); - DwarfLocSection = - getCOFFSection("\t.section\t.debug_loc,\"dr\"", - true, SectionKind::getMetadata()); - DwarfARangesSection = - getCOFFSection("\t.section\t.debug_aranges,\"dr\"", - true, SectionKind::getMetadata()); - DwarfRangesSection = - getCOFFSection("\t.section\t.debug_ranges,\"dr\"", - true, SectionKind::getMetadata()); - DwarfMacroInfoSection = - getCOFFSection("\t.section\t.debug_macinfo,\"dr\"", - true, SectionKind::getMetadata()); -} - -const MCSection *TargetLoweringObjectFileCOFF:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - return getCOFFSection(GV->getSection(), false, Kind); -} - -static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { - if (Kind.isText()) - return ".text$linkonce"; - if (Kind.isWriteable()) - return ".data$linkonce"; - return ".rdata$linkonce"; -} - - -const MCSection *TargetLoweringObjectFileCOFF:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - assert(!Kind.isThreadLocal() && "Doesn't support TLS"); - - // If this global is linkonce/weak and the target handles this by emitting it - // into a 'uniqued' section name, create and return the section now. - if (GV->isWeakForLinker()) { - const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); - SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - Mang->getNameWithPrefix(Name, GV, false); - return getCOFFSection(Name.str(), false, Kind); - } - - if (Kind.isText()) - return getTextSection(); - - return getDataSection(); +unsigned TargetLoweringObjectFile::getTTypeEncoding() const { + return dwarf::DW_EH_PE_absptr; } diff --git a/lib/Target/X86/Android.mk b/lib/Target/X86/Android.mk new file mode 100644 index 0000000..f5b8180 --- /dev/null +++ b/lib/Target/X86/Android.mk @@ -0,0 +1,47 @@ +LOCAL_PATH := $(call my-dir) + +# For the host only +# ===================================================== +include $(CLEAR_VARS) +include $(CLEAR_TBLGEN_VARS) + +TBLGEN_TABLES := \ + X86GenRegisterInfo.h.inc \ + X86GenRegisterNames.inc \ + X86GenRegisterInfo.inc \ + X86GenInstrNames.inc \ + X86GenInstrInfo.inc \ + X86GenAsmMatcher.inc \ + X86GenDAGISel.inc \ + X86GenDisassemblerTables.inc \ + X86GenFastISel.inc \ + X86GenCallingConv.inc \ + X86GenSubtarget.inc \ + X86GenEDInfo.inc + +LOCAL_SRC_FILES := \ + X86AsmBackend.cpp \ + X86COFFMachineModuleInfo.cpp \ + X86CodeEmitter.cpp \ + X86ELFWriterInfo.cpp \ + X86FastISel.cpp \ + X86FloatingPoint.cpp \ + X86FloatingPointRegKill.cpp \ + X86ISelDAGToDAG.cpp \ + X86ISelLowering.cpp \ + X86InstrInfo.cpp \ + X86JITInfo.cpp \ + X86MCAsmInfo.cpp \ + X86MCCodeEmitter.cpp \ + X86MCTargetExpr.cpp \ + X86RegisterInfo.cpp \ + X86Subtarget.cpp \ + X86TargetMachine.cpp \ + X86TargetObjectFile.cpp + +LOCAL_MODULE:= libLLVMX86CodeGen + +include $(LLVM_HOST_BUILD_MK) +include $(LLVM_TBLGEN_RULES_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index acf497a..84d7bb7 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -10,6 +10,7 @@ #include "llvm/Target/TargetAsmParser.h" #include "X86.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" @@ -183,6 +184,14 @@ struct X86Operand : public MCParsedAsmOperand { bool isReg() const { return Kind == Register; } + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getReg())); @@ -190,13 +199,13 @@ struct X86Operand : public MCParsedAsmOperand { void addImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateExpr(getImm())); + addExpr(Inst, getImm()); } void addImmSExt8Operands(MCInst &Inst, unsigned N) const { // FIXME: Support user customization of the render method. assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateExpr(getImm())); + addExpr(Inst, getImm()); } void addMemOperands(MCInst &Inst, unsigned N) const { @@ -204,7 +213,7 @@ struct X86Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); Inst.addOperand(MCOperand::CreateImm(getMemScale())); Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); - Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); + addExpr(Inst, getMemDisp()); Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); } @@ -218,7 +227,7 @@ struct X86Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); Inst.addOperand(MCOperand::CreateImm(getMemScale())); Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); - Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); + addExpr(Inst, getMemDisp()); } static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { @@ -492,24 +501,20 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { bool X86ATTAsmParser:: ParseInstruction(const StringRef &Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // FIXME: Hack to recognize "sal..." for now. We need a way to represent - // alternative syntaxes in the .td file, without requiring instruction - // duplication. - if (Name.startswith("sal")) { - std::string Tmp = "shl" + Name.substr(3).str(); - Operands.push_back(X86Operand::CreateToken(Tmp, NameLoc)); - } else { - // FIXME: This is a hack. We eventually want to add a general pattern - // mechanism to be used in the table gen file for these assembly names that - // use the same opcodes. Also we should only allow the "alternate names" - // for rep and repne with the instructions they can only appear with. - StringRef PatchedName = Name; - if (Name == "repe" || Name == "repz") - PatchedName = "rep"; - else if (Name == "repnz") - PatchedName = "repne"; - Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); - } + // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to + // represent alternative syntaxes in the .td file, without requiring + // instruction duplication. + StringRef PatchedName = StringSwitch<StringRef>(Name) + .Case("sal", "shl") + .Case("salb", "shlb") + .Case("sall", "shll") + .Case("salq", "shlq") + .Case("salw", "shlw") + .Case("repe", "rep") + .Case("repz", "rep") + .Case("repnz", "repne") + .Default(Name); + Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); if (getLexer().isNot(AsmToken::EndOfStatement)) { diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index 38ccbf9..734a545 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -25,10 +25,15 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #define MachineInstr MCInst +#define GET_INSTRUCTION_NAME #include "X86GenAsmWriter.inc" #undef MachineInstr void X86ATTInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } +StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} + void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op) { switch (MI->getOperand(Op).getImm()) { @@ -68,7 +73,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo) { O << '$' << Op.getImm(); if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256)) - *CommentStream << format("imm = 0x%X\n", Op.getImm()); + *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm()); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h index 3180618..d109a07 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h @@ -26,11 +26,12 @@ public: virtual void printInst(const MCInst *MI); - + virtual StringRef getOpcodeName(unsigned Opcode) const; + // Autogenerated by tblgen. void printInstruction(const MCInst *MI); static const char *getRegisterName(unsigned RegNo); - + static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo); void printMemReference(const MCInst *MI, unsigned Op); diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 304306d..8cab24c 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -33,10 +33,11 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" @@ -52,40 +53,42 @@ void X86AsmPrinter::PrintPICBaseSymbol() const { OutContext); } +MCSymbol *X86AsmPrinter::GetGlobalValueSymbol(const GlobalValue *GV) const { + SmallString<60> NameStr; + Mang->getNameWithPrefix(NameStr, GV, false); + MCSymbol *Symb = OutContext.GetOrCreateSymbol(NameStr.str()); + + if (Subtarget->isTargetCygMing()) { + X86COFFMachineModuleInfo &COFFMMI = + MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); + COFFMMI.DecorateCygMingName(Symb, OutContext, GV, *TM.getTargetData()); + + // Save function name for later type emission. + if (const Function *F = dyn_cast<Function>(GV)) + if (F->isDeclaration()) + COFFMMI.addExternalFunction(Symb->getName()); + + } + + return Symb; +} + /// runOnMachineFunction - Emit the function body. /// bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); - - // COFF and Cygwin specific mangling stuff. This should be moved out to the - // mangler or handled some other way? - if (Subtarget->isTargetCOFF()) { - X86COFFMachineModuleInfo &COFFMMI = - MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - // Populate function information map. Don't want to populate - // non-stdcall or non-fastcall functions' information right now. - const Function *F = MF.getFunction(); - CallingConv::ID CC = F->getCallingConv(); - if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall) - COFFMMI.AddFunctionInfo(F, *MF.getInfo<X86MachineFunctionInfo>()); - } - if (Subtarget->isTargetCygMing()) { + if (Subtarget->isTargetCOFF()) { const Function *F = MF.getFunction(); - X86COFFMachineModuleInfo &COFFMMI = - MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - COFFMMI.DecorateCygMingName(CurrentFnSym, OutContext,F,*TM.getTargetData()); - - O << "\t.def\t " << *CurrentFnSym; - O << ";\t.scl\t" << + O << "\t.def\t " << *CurrentFnSym << ";\t.scl\t" << (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) << ";\t.endef\n"; } - + // Have common code print out the function header with linkage info etc. EmitFunctionHeader(); - + // Emit the rest of the function body. EmitFunctionBody(); @@ -119,12 +122,6 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { else GVSym = GetGlobalValueSymbol(GV); - if (Subtarget->isTargetCygMing()) { - X86COFFMachineModuleInfo &COFFMMI = - MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - COFFMMI.DecorateCygMingName(GVSym, OutContext, GV, *TM.getTargetData()); - } - // Handle dllimport linkage. if (MO.getTargetFlags() == X86II::MO_DLLIMPORT) GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName()); @@ -585,7 +582,6 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) if (I->hasDLLExportLinkage()) { MCSymbol *Sym = GetGlobalValueSymbol(I); - COFFMMI.DecorateCygMingName(Sym, OutContext, I, *TM.getTargetData()); DLLExportedFns.push_back(Sym); } @@ -607,6 +603,28 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } } } + + if (Subtarget->isTargetELF()) { + TargetLoweringObjectFileELF &TLOFELF = + static_cast<TargetLoweringObjectFileELF &>(getObjFileLowering()); + + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); + const TargetData *TD = TM.getTargetData(); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) + O << *Stubs[i].first << ":\n" + << (TD->getPointerSize() == 8 ? + MAI->getData64bitsDirective() : MAI->getData32bitsDirective()) + << *Stubs[i].second << '\n'; + + Stubs.clear(); + } + } } diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h index 1d32a5f..039214a 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h @@ -61,8 +61,7 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { virtual void EmitInstruction(const MachineInstr *MI); void printSymbolOperand(const MachineOperand &MO); - - + virtual MCSymbol *GetGlobalValueSymbol(const GlobalValue *GV) const; // These methods are used by the tablegen'erated instruction printer. void printOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp index 4274d0a..610beb5 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp @@ -24,10 +24,14 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #define MachineInstr MCInst +#define GET_INSTRUCTION_NAME #include "X86GenAsmWriter1.inc" #undef MachineInstr void X86IntelInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } +StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op) { switch (MI->getOperand(Op).getImm()) { diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h index 1976177..545bf84 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h @@ -26,10 +26,12 @@ public: : MCInstPrinter(O, MAI) {} virtual void printInst(const MCInst *MI); + virtual StringRef getOpcodeName(unsigned Opcode) const; // Autogenerated by tblgen. void printInstruction(const MCInst *MI); static const char *getRegisterName(unsigned RegNo); + static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo, diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 61f26a7..eed3b45 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(X86GenCallingConv.inc -gen-callingconv) tablegen(X86GenSubtarget.inc -gen-subtarget) set(sources + X86AsmBackend.cpp X86CodeEmitter.cpp X86COFFMachineModuleInfo.cpp X86ELFWriterInfo.cpp diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 19eb05e..e5f84e8 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -67,8 +67,8 @@ no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit [ %tmp.34.i18, %no_exit.i7 ] %tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.28.i16, %no_exit.i7 ] - %tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00 - %tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00 + %tmp.28.i16 = fadd double %tmp.0.0.0.i10, 0.000000e+00 + %tmp.34.i18 = fadd double %tmp.0.1.0.i9, 0.000000e+00 br i1 false, label %Compute_Tree.exit23, label %no_exit.i7 Compute_Tree.exit23: ; preds = %no_exit.i7 @@ -97,7 +97,7 @@ pcmp/pand/pandn/por to do a selection instead of a conditional branch: double %X(double %Y, double %Z, double %A, double %B) { %C = setlt double %A, %B - %z = add double %Z, 0.0 ;; select operand is not a load + %z = fadd double %Z, 0.0 ;; select operand is not a load %D = select bool %C, double %Y, double %z ret double %D } @@ -545,7 +545,7 @@ eliminates a constant pool load. For example, consider: define i64 @ccosf(float %z.0, float %z.1) nounwind readonly { entry: - %tmp6 = sub float -0.000000e+00, %z.1 ; <float> [#uses=1] + %tmp6 = fsub float -0.000000e+00, %z.1 ; <float> [#uses=1] %tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly ret i64 %tmp20 } diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index aa7bb3d..d4545a6 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -227,11 +227,6 @@ lambda, siod, optimizer-eval, ackermann, hash2, nestedloop, strcat, and Treesor. //===---------------------------------------------------------------------===// -Teach the coalescer to coalesce vregs of different register classes. e.g. FR32 / -FR64 to VR128. - -//===---------------------------------------------------------------------===// - Adding to the list of cmp / test poor codegen issues: int test(__m128 *A, __m128 *B) { @@ -1868,3 +1863,69 @@ carried over to machine instructions. Asm printer (or JIT) can use this information to add the "lock" prefix. //===---------------------------------------------------------------------===// + +_Bool bar(int *x) { return *x & 1; } + +define zeroext i1 @bar(i32* nocapture %x) nounwind readonly { +entry: + %tmp1 = load i32* %x ; <i32> [#uses=1] + %and = and i32 %tmp1, 1 ; <i32> [#uses=1] + %tobool = icmp ne i32 %and, 0 ; <i1> [#uses=1] + ret i1 %tobool +} + +bar: # @bar +# BB#0: # %entry + movl 4(%esp), %eax + movb (%eax), %al + andb $1, %al + movzbl %al, %eax + ret + +Missed optimization: should be movl+andl. + +//===---------------------------------------------------------------------===// + +Consider the following two functions compiled with clang: +_Bool foo(int *x) { return !(*x & 4); } +unsigned bar(int *x) { return !(*x & 4); } + +foo: + movl 4(%esp), %eax + testb $4, (%eax) + sete %al + movzbl %al, %eax + ret + +bar: + movl 4(%esp), %eax + movl (%eax), %eax + shrl $2, %eax + andl $1, %eax + xorl $1, %eax + ret + +The second function generates more code even though the two functions are +are functionally identical. + +//===---------------------------------------------------------------------===// + +Take the following C code: +int x(int y) { return (y & 63) << 14; } + +Code produced by gcc: + andl $63, %edi + sall $14, %edi + movl %edi, %eax + ret + +Code produced by clang: + shll $14, %edi + movl %edi, %eax + andl $1032192, %eax + ret + +The code produced by gcc is 3 bytes shorter. This sort of construct often +shows up with bitfields. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/TargetInfo/Android.mk b/lib/Target/X86/TargetInfo/Android.mk new file mode 100644 index 0000000..ee53f0d --- /dev/null +++ b/lib/Target/X86/TargetInfo/Android.mk @@ -0,0 +1,24 @@ +LOCAL_PATH := $(call my-dir) + +# For the device only +# ===================================================== +include $(CLEAR_VARS) +include $(CLEAR_TBLGEN_VARS) + +TBLGEN_TABLES := \ + X86GenRegisterNames.inc \ + X86GenInstrNames.inc + +TBLGEN_TD_DIR := $(LOCAL_PATH)/.. + +LOCAL_SRC_FILES := \ + X86TargetInfo.cpp + +LOCAL_C_INCLUDES += \ + $(LOCAL_PATH)/.. + +LOCAL_MODULE:= libLLVMX86Info + +include $(LLVM_HOST_BUILD_MK) +include $(LLVM_TBLGEN_RULES_MK) +include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 1d17a05..ba0ee6c 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -19,12 +19,15 @@ namespace llvm { -class X86TargetMachine; class FunctionPass; -class MachineCodeEmitter; -class MCCodeEmitter; class JITCodeEmitter; +class MCAssembler; +class MCCodeEmitter; +class MCContext; +class MachineCodeEmitter; class Target; +class TargetAsmBackend; +class X86TargetMachine; class formatted_raw_ostream; /// createX86ISelDag - This pass converts a legalized DAG into a @@ -49,9 +52,13 @@ FunctionPass *createX87FPRegKillInserterPass(); FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE); -MCCodeEmitter *createHeinousX86MCCodeEmitter(const Target &, TargetMachine &TM); -MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM); -MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM); +MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM, + MCContext &Ctx); +MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM, + MCContext &Ctx); + +TargetAsmBackend *createX86_32AsmBackend(const Target &, MCAssembler &); +TargetAsmBackend *createX86_64AsmBackend(const Target &, MCAssembler &); /// createX86EmitCodeToMemory - Returns a pass that converts a register /// allocated function into raw machine code in a dynamically diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp new file mode 100644 index 0000000..e6654ef --- /dev/null +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -0,0 +1,34 @@ +//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmBackend.h" +#include "X86.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmBackend.h" +using namespace llvm; + +namespace { + +class X86AsmBackend : public TargetAsmBackend { +public: + X86AsmBackend(const Target &T, MCAssembler &A) + : TargetAsmBackend(T) {} +}; + +} + +TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, + MCAssembler &A) { + return new X86AsmBackend(T, A); +} + +TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, + MCAssembler &A) { + return new X86AsmBackend(T, A); +} diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp index ea52795..ab67acb 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp +++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp @@ -27,90 +27,55 @@ X86COFFMachineModuleInfo::X86COFFMachineModuleInfo(const MachineModuleInfo &) { X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() { } -void X86COFFMachineModuleInfo::AddFunctionInfo(const Function *F, - const X86MachineFunctionInfo &Val) { - FunctionInfoMap[F] = Val; +void X86COFFMachineModuleInfo::addExternalFunction(const StringRef& Name) { + CygMingStubs.insert(Name); } - - -static X86MachineFunctionInfo calculateFunctionInfo(const Function *F, - const TargetData &TD) { - X86MachineFunctionInfo Info; - uint64_t Size = 0; - - switch (F->getCallingConv()) { - case CallingConv::X86_StdCall: - Info.setDecorationStyle(StdCall); - break; - case CallingConv::X86_FastCall: - Info.setDecorationStyle(FastCall); - break; - default: - return Info; - } - - unsigned argNum = 1; - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI, ++argNum) { - const Type* Ty = AI->getType(); - - // 'Dereference' type in case of byval parameter attribute - if (F->paramHasAttr(argNum, Attribute::ByVal)) - Ty = cast<PointerType>(Ty)->getElementType(); - - // Size should be aligned to DWORD boundary - Size += ((TD.getTypeAllocSize(Ty) + 3)/4)*4; - } - - // We're not supporting tooooo huge arguments :) - Info.setBytesToPopOnReturn((unsigned int)Size); - return Info; -} - - -/// DecorateCygMingName - Query FunctionInfoMap and use this information for -/// various name decorations for Cygwin and MingW. +/// DecorateCygMingName - Apply various name decorations if the function uses +/// stdcall or fastcall calling convention. void X86COFFMachineModuleInfo::DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV, const TargetData &TD) { const Function *F = dyn_cast<Function>(GV); if (!F) return; - - // Save function name for later type emission. - if (F->isDeclaration()) - CygMingStubs.insert(StringRef(Name.data(), Name.size())); - + // We don't want to decorate non-stdcall or non-fastcall functions right now CallingConv::ID CC = F->getCallingConv(); if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall) return; - - const X86MachineFunctionInfo *Info; - - FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F); - if (info_item == FunctionInfoMap.end()) { - // Calculate apropriate function info and populate map - FunctionInfoMap[F] = calculateFunctionInfo(F, TD); - Info = &FunctionInfoMap[F]; - } else { - Info = &info_item->second; - } - - if (Info->getDecorationStyle() == None) return; + + unsigned ArgWords = 0; + DenseMap<const Function*, unsigned>::const_iterator item = FnArgWords.find(F); + if (item == FnArgWords.end()) { + // Calculate arguments sizes + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + const Type* Ty = AI->getType(); + + // 'Dereference' type in case of byval parameter attribute + if (AI->hasByValAttr()) + Ty = cast<PointerType>(Ty)->getElementType(); + + // Size should be aligned to DWORD boundary + ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4; + } + + FnArgWords[F] = ArgWords; + } else + ArgWords = item->second; + const FunctionType *FT = F->getFunctionType(); - // "Pure" variadic functions do not receive @0 suffix. if (!FT->isVarArg() || FT->getNumParams() == 0 || (FT->getNumParams() == 1 && F->hasStructRetAttr())) - raw_svector_ostream(Name) << '@' << Info->getBytesToPopOnReturn(); - - if (Info->getDecorationStyle() == FastCall) { + raw_svector_ostream(Name) << '@' << ArgWords; + + if (CC == CallingConv::X86_FastCall) { if (Name[0] == '_') Name[0] = '@'; else Name.insert(Name.begin(), '@'); - } + } } /// DecorateCygMingName - Query FunctionInfoMap and use this information for @@ -121,6 +86,6 @@ void X86COFFMachineModuleInfo::DecorateCygMingName(MCSymbol *&Name, const TargetData &TD) { SmallString<128> NameStr(Name->getName().begin(), Name->getName().end()); DecorateCygMingName(NameStr, GV, TD); - + Name = Ctx.GetOrCreateSymbol(NameStr.str()); } diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index 0e2009e..9de3dcd 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -21,44 +21,25 @@ namespace llvm { class X86MachineFunctionInfo; class TargetData; - + /// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation /// for X86 COFF targets. class X86COFFMachineModuleInfo : public MachineModuleInfoImpl { StringSet<> CygMingStubs; - - // We have to propagate some information about MachineFunction to - // AsmPrinter. It's ok, when we're printing the function, since we have - // access to MachineFunction and can get the appropriate MachineFunctionInfo. - // Unfortunately, this is not possible when we're printing reference to - // Function (e.g. calling it and so on). Even more, there is no way to get the - // corresponding MachineFunctions: it can even be not created at all. That's - // why we should use additional structure, when we're collecting all necessary - // information. - // - // This structure is using e.g. for name decoration for stdcall & fastcall'ed - // function, since we have to use arguments' size for decoration. - typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap; - FMFInfoMap FunctionInfoMap; - + DenseMap<const Function*, unsigned> FnArgWords; public: X86COFFMachineModuleInfo(const MachineModuleInfo &); ~X86COFFMachineModuleInfo(); - - + void DecorateCygMingName(MCSymbol* &Name, MCContext &Ctx, const GlobalValue *GV, const TargetData &TD); void DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV, const TargetData &TD); - - void AddFunctionInfo(const Function *F, const X86MachineFunctionInfo &Val); - + void addExternalFunction(const StringRef& Name); typedef StringSet<>::const_iterator stub_iterator; stub_iterator stub_begin() const { return CygMingStubs.begin(); } stub_iterator stub_end() const { return CygMingStubs.end(); } - - }; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index f0bceb1..8deadf6 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -387,10 +387,16 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, // If no BaseReg, issue a RIP relative instruction only if the MCE can // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table // 2-7) and absolute references. + unsigned BaseRegNo = -1U; + if (BaseReg != 0 && BaseReg != X86::RIP) + BaseRegNo = getX86RegNum(BaseReg); + if (// The SIB byte must be used if there is an index register. IndexReg.getReg() == 0 && - // The SIB byte must be used if the base is ESP/RSP. - BaseReg != X86::ESP && BaseReg != X86::RSP && + // The SIB byte must be used if the base is ESP/RSP/R12, all of which + // encode to an R/M value of 4, which indicates that a SIB byte is + // present. + BaseRegNo != N86::ESP && // If there is no base register and we're in 64-bit mode, we need a SIB // byte to emit an addr that is just 'disp32' (the non-RIP relative form). (!Is64BitMode || BaseReg != 0)) { @@ -401,7 +407,6 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, return; } - unsigned BaseRegNo = getX86RegNum(BaseReg); // If the base is not EBP/ESP and there is no displacement, use simple // indirect register encoding, this handles addresses like [EAX]. The // encoding for [EBP] with no displacement means [disp32] so we handle it @@ -757,27 +762,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: { MCE.emitByte(BaseOpcode); - - // Special handling of lfence, mfence, monitor, and mwait. - if (Desc->getOpcode() == X86::LFENCE || - Desc->getOpcode() == X86::MFENCE || - Desc->getOpcode() == X86::MONITOR || - Desc->getOpcode() == X86::MWAIT) { - emitRegModRMByte((Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); - - switch (Desc->getOpcode()) { - default: break; - case X86::MONITOR: - MCE.emitByte(0xC8); - break; - case X86::MWAIT: - MCE.emitByte(0xC9); - break; - } - } else { - emitRegModRMByte(MI.getOperand(CurOp++).getReg(), - (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); - } + emitRegModRMByte(MI.getOperand(CurOp++).getReg(), + (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); if (CurOp == NumOps) break; @@ -853,6 +839,27 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, getX86RegNum(MI.getOperand(CurOp).getReg())); ++CurOp; break; + + case X86II::MRM_C1: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xC1); + break; + case X86II::MRM_C8: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xC8); + break; + case X86II::MRM_C9: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xC9); + break; + case X86II::MRM_E8: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xE8); + break; + case X86II::MRM_F0: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xF0); + break; } if (!Desc->isVariadic() && CurOp != NumOps) { @@ -864,335 +871,3 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, MCE.processDebugLoc(MI.getDebugLoc(), false); } - -// Adapt the Emitter / CodeEmitter interfaces to MCCodeEmitter. -// -// FIXME: This is a total hack designed to allow work on llvm-mc to proceed -// without being blocked on various cleanups needed to support a clean interface -// to instruction encoding. -// -// Look away! - -#include "llvm/DerivedTypes.h" - -namespace { -class MCSingleInstructionCodeEmitter : public MachineCodeEmitter { - uint8_t Data[256]; - const MCInst *CurrentInst; - SmallVectorImpl<MCFixup> *FixupList; - -public: - MCSingleInstructionCodeEmitter() { reset(0, 0); } - - void reset(const MCInst *Inst, SmallVectorImpl<MCFixup> *Fixups) { - CurrentInst = Inst; - FixupList = Fixups; - BufferBegin = Data; - BufferEnd = array_endof(Data); - CurBufferPtr = Data; - } - - StringRef str() { - return StringRef(reinterpret_cast<char*>(BufferBegin), - CurBufferPtr - BufferBegin); - } - - virtual void startFunction(MachineFunction &F) {} - virtual bool finishFunction(MachineFunction &F) { return false; } - virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {} - virtual bool earlyResolveAddresses() const { return false; } - virtual void addRelocation(const MachineRelocation &MR) { - unsigned Offset = 0, OpIndex = 0, Kind = MR.getRelocationType(); - - // This form is only used in one case, for branches. - if (MR.isBasicBlock()) { - Offset = unsigned(MR.getMachineCodeOffset()); - OpIndex = 0; - } else { - assert(MR.isJumpTableIndex() && "Unexpected relocation!"); - - Offset = unsigned(MR.getMachineCodeOffset()); - - // The operand index is encoded as the first byte of the fake operand. - OpIndex = MR.getJumpTableIndex(); - } - - MCOperand Op = CurrentInst->getOperand(OpIndex); - assert(Op.isExpr() && "FIXME: Not yet implemented!"); - FixupList->push_back(MCFixup::Create(Offset, Op.getExpr(), - MCFixupKind(FirstTargetFixupKind + Kind))); - } - virtual void setModuleInfo(MachineModuleInfo* Info) {} - - // Interface functions which should never get called in our usage. - - virtual void emitLabel(uint64_t LabelID) { - assert(0 && "Unexpected code emitter call!"); - } - virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { - assert(0 && "Unexpected code emitter call!"); - return 0; - } - virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { - assert(0 && "Unexpected code emitter call!"); - return 0; - } - virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { - assert(0 && "Unexpected code emitter call!"); - return 0; - } - virtual uintptr_t getLabelAddress(uint64_t LabelID) const { - assert(0 && "Unexpected code emitter call!"); - return 0; - } -}; - -class X86MCCodeEmitter : public MCCodeEmitter { - X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - -private: - X86TargetMachine &TM; - llvm::Function *DummyF; - TargetData *DummyTD; - mutable llvm::MachineFunction *DummyMF; - llvm::MachineBasicBlock *DummyMBB; - - MCSingleInstructionCodeEmitter *InstrEmitter; - Emitter<MachineCodeEmitter> *Emit; - -public: - X86MCCodeEmitter(X86TargetMachine &_TM) : TM(_TM) { - // Verily, thou shouldst avert thine eyes. - const llvm::FunctionType *FTy = - FunctionType::get(llvm::Type::getVoidTy(getGlobalContext()), false); - DummyF = Function::Create(FTy, GlobalValue::InternalLinkage); - DummyTD = new TargetData(""); - DummyMF = new MachineFunction(DummyF, TM, 0); - DummyMBB = DummyMF->CreateMachineBasicBlock(); - - InstrEmitter = new MCSingleInstructionCodeEmitter(); - Emit = new Emitter<MachineCodeEmitter>(TM, *InstrEmitter, - *TM.getInstrInfo(), - *DummyTD, false); - } - ~X86MCCodeEmitter() { - delete Emit; - delete InstrEmitter; - delete DummyMF; - delete DummyF; - } - - unsigned getNumFixupKinds() const { - return 5; - } - - MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { - static MCFixupKindInfo Infos[] = { - { "reloc_pcrel_word", 0, 4 * 8 }, - { "reloc_picrel_word", 0, 4 * 8 }, - { "reloc_absolute_word", 0, 4 * 8 }, - { "reloc_absolute_word_sext", 0, 4 * 8 }, - { "reloc_absolute_dword", 0, 8 * 8 } - }; - - assert(Kind >= FirstTargetFixupKind && Kind < MaxTargetFixupKind && - "Invalid kind!"); - return Infos[Kind - FirstTargetFixupKind]; - } - - bool AddRegToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - if (Start + 1 > MI.getNumOperands()) - return false; - - const MCOperand &Op = MI.getOperand(Start); - if (!Op.isReg()) return false; - - Instr->addOperand(MachineOperand::CreateReg(Op.getReg(), false)); - return true; - } - - bool AddImmToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - if (Start + 1 > MI.getNumOperands()) - return false; - - const MCOperand &Op = MI.getOperand(Start); - if (Op.isImm()) { - Instr->addOperand(MachineOperand::CreateImm(Op.getImm())); - return true; - } - if (!Op.isExpr()) - return false; - - const MCExpr *Expr = Op.getExpr(); - if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) { - Instr->addOperand(MachineOperand::CreateImm(CE->getValue())); - return true; - } - - // Fake this as an external symbol to the code emitter to add a relcoation - // entry we will recognize. - Instr->addOperand(MachineOperand::CreateJTI(Start, 0)); - return true; - } - - bool AddLMemToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - return (AddRegToInstr(MI, Instr, Start + 0) && - AddImmToInstr(MI, Instr, Start + 1) && - AddRegToInstr(MI, Instr, Start + 2) && - AddImmToInstr(MI, Instr, Start + 3)); - } - - bool AddMemToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - return (AddRegToInstr(MI, Instr, Start + 0) && - AddImmToInstr(MI, Instr, Start + 1) && - AddRegToInstr(MI, Instr, Start + 2) && - AddImmToInstr(MI, Instr, Start + 3) && - AddRegToInstr(MI, Instr, Start + 4)); - } - - void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const { - // Don't look yet! - - // Convert the MCInst to a MachineInstr so we can (ab)use the regular - // emitter. - const X86InstrInfo &II = *TM.getInstrInfo(); - const TargetInstrDesc &Desc = II.get(MI.getOpcode()); - MachineInstr *Instr = DummyMF->CreateMachineInstr(Desc, DebugLoc()); - DummyMBB->push_back(Instr); - - unsigned Opcode = MI.getOpcode(); - unsigned NumOps = MI.getNumOperands(); - unsigned CurOp = 0; - bool AddTied = false; - if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) - AddTied = true; - else if (NumOps > 2 && - Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) - // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 - --NumOps; - - bool OK = true; - switch (Desc.TSFlags & X86II::FormMask) { - case X86II::MRMDestReg: - case X86II::MRMSrcReg: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (AddTied) - OK &= AddRegToInstr(MI, Instr, CurOp++ - 1); - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::RawFrm: - if (CurOp < NumOps) { - // Hack to make branches work. - if (!(Desc.TSFlags & X86II::ImmMask) && - MI.getOperand(0).isExpr() && - isa<MCSymbolRefExpr>(MI.getOperand(0).getExpr())) - Instr->addOperand(MachineOperand::CreateMBB(DummyMBB)); - else - OK &= AddImmToInstr(MI, Instr, CurOp); - } - break; - - case X86II::AddRegFrm: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (AddTied) - OK &= AddRegToInstr(MI, Instr, CurOp++ - 1); - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::MRM0r: case X86II::MRM1r: - case X86II::MRM2r: case X86II::MRM3r: - case X86II::MRM4r: case X86II::MRM5r: - case X86II::MRM6r: case X86II::MRM7r: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (AddTied) - OK &= AddRegToInstr(MI, Instr, CurOp++ - 1); - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::MRM0m: case X86II::MRM1m: - case X86II::MRM2m: case X86II::MRM3m: - case X86II::MRM4m: case X86II::MRM5m: - case X86II::MRM6m: case X86II::MRM7m: - OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5; - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::MRMSrcMem: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (AddTied) - OK &= AddRegToInstr(MI, Instr, CurOp++ - 1); - if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - Opcode == X86::LEA16r || Opcode == X86::LEA32r) - OK &= AddLMemToInstr(MI, Instr, CurOp); - else - OK &= AddMemToInstr(MI, Instr, CurOp); - break; - - case X86II::MRMDestMem: - OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5; - OK &= AddRegToInstr(MI, Instr, CurOp); - break; - - default: - case X86II::MRMInitReg: - case X86II::Pseudo: - OK = false; - break; - } - - if (!OK) { - dbgs() << "couldn't convert inst '"; - MI.dump(); - dbgs() << "' to machine instr:\n"; - Instr->dump(); - } - - InstrEmitter->reset(&MI, &Fixups); - if (OK) - Emit->emitInstruction(*Instr, &Desc); - OS << InstrEmitter->str(); - - Instr->eraseFromParent(); - } -}; -} - -#include "llvm/Support/CommandLine.h" - -static cl::opt<bool> EnableNewEncoder("enable-new-x86-encoder", - cl::ReallyHidden); - - -// Ok, now you can look. -MCCodeEmitter *llvm::createHeinousX86MCCodeEmitter(const Target &T, - TargetMachine &TM) { - - // FIXME: Remove the heinous one when the new one works. - if (EnableNewEncoder) { - if (TM.getTargetData()->getPointerSize() == 4) - return createX86_32MCCodeEmitter(T, TM); - return createX86_64MCCodeEmitter(T, TM); - } - - return new X86MCCodeEmitter(static_cast<X86TargetMachine&>(TM)); -} diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index ea398e9..98e3f4e 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -388,6 +388,8 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { } case Instruction::GetElementPtr: { + X86AddressMode SavedAM = AM; + // Pattern-match simple GEPs. uint64_t Disp = (int32_t)AM.Disp; unsigned IndexReg = AM.IndexReg; @@ -428,7 +430,13 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { AM.IndexReg = IndexReg; AM.Scale = Scale; AM.Disp = (uint32_t)Disp; - return X86SelectAddress(U->getOperand(0), AM); + if (X86SelectAddress(U->getOperand(0), AM)) + return true; + + // If we couldn't merge the sub value into this addr mode, revert back to + // our address and just match the value instead of completely failing. + AM = SavedAM; + break; unsupported_gep: // Ok, the GEP indices weren't all covered. break; @@ -786,8 +794,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) { bool X86FastISel::X86SelectZExt(Instruction *I) { // Handle zero-extension from i1 to i8, which is common. - if (I->getType()->isInteger(8) && - I->getOperand(0)->getType()->isInteger(1)) { + if (I->getType()->isIntegerTy(8) && + I->getOperand(0)->getType()->isIntegerTy(1)) { unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Set the high bits to zero. @@ -828,30 +836,30 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { std::swap(TrueMBB, FalseMBB); Predicate = CmpInst::FCMP_UNE; // FALL THROUGH - case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE; break; - case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA; break; - case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break; - case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA; break; - case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE; break; - case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break; - case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break; - case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP; break; - case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE; break; - case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB; break; - case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE; break; - case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; - case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; + case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break; + case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break; + case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; + case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break; + case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break; + case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break; + case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break; + case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break; + case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break; + case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break; + case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break; + case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; + case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; - case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE; break; - case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE; break; - case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA; break; - case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break; - case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; - case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; - case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG; break; - case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break; - case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL; break; - case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break; + case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break; + case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break; + case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break; + case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; + case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; + case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; + case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break; + case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break; + case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break; + case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break; default: return false; } @@ -869,7 +877,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { if (Predicate == CmpInst::FCMP_UNE) { // X86 requires a second branch to handle UNE (and OEQ, // which is mapped to UNE above). - BuildMI(MBB, DL, TII.get(X86::JP)).addMBB(TrueMBB); + BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB); } FastEmitBranch(FalseMBB); @@ -923,7 +931,8 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { unsigned OpCode = SetMI->getOpcode(); if (OpCode == X86::SETOr || OpCode == X86::SETBr) { - BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? X86::JO : X86::JB)) + BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? + X86::JO_4 : X86::JB_4)) .addMBB(TrueMBB); FastEmitBranch(FalseMBB); MBB->addSuccessor(TrueMBB); @@ -939,7 +948,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { if (OpReg == 0) return false; BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); - BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(TrueMBB); + BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB); FastEmitBranch(FalseMBB); MBB->addSuccessor(TrueMBB); return true; @@ -948,7 +957,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { bool X86FastISel::X86SelectShift(Instruction *I) { unsigned CReg = 0, OpReg = 0, OpImm = 0; const TargetRegisterClass *RC = NULL; - if (I->getType()->isInteger(8)) { + if (I->getType()->isIntegerTy(8)) { CReg = X86::CL; RC = &X86::GR8RegClass; switch (I->getOpcode()) { @@ -957,7 +966,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; default: return false; } - } else if (I->getType()->isInteger(16)) { + } else if (I->getType()->isIntegerTy(16)) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { @@ -966,7 +975,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; default: return false; } - } else if (I->getType()->isInteger(32)) { + } else if (I->getType()->isIntegerTy(32)) { CReg = X86::ECX; RC = &X86::GR32RegClass; switch (I->getOpcode()) { @@ -975,7 +984,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; default: return false; } - } else if (I->getType()->isInteger(64)) { + } else if (I->getType()->isIntegerTy(64)) { CReg = X86::RCX; RC = &X86::GR64RegClass; switch (I->getOpcode()) { @@ -1160,6 +1169,8 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { if (!X86SelectAddress(DI->getAddress(), AM)) return false; const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + // FIXME may need to add RegState::Debug to any registers produced, + // although ESP/EBP should be the only ones at the moment. addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0). addMetadata(DI->getVariable()); return true; diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h new file mode 100644 index 0000000..c8dac3c --- /dev/null +++ b/lib/Target/X86/X86FixupKinds.h @@ -0,0 +1,25 @@ +//===-- X86/X86FixupKinds.h - X86 Specific Fixup Entries --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_X86_X86FIXUPKINDS_H +#define LLVM_X86_X86FIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace X86 { +enum Fixups { + reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch. + reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1 + reloc_riprel_4byte // 32-bit rip-relative +}; +} +} + +#endif diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp index 34a0045..6a117dd 100644 --- a/lib/Target/X86/X86FloatingPointRegKill.cpp +++ b/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -118,7 +118,7 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { for (BasicBlock::const_iterator II = SI->begin(); (PN = dyn_cast<PHINode>(II)); ++II) { if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) || - (!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) || + (!Subtarget.hasSSE1() && PN->getType()->isFloatingPointTy()) || (!Subtarget.hasSSE2() && PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) { ContainsFPCode = true; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index e44ce421..3fad8ad 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -12,15 +12,6 @@ // //===----------------------------------------------------------------------===// -// Force NDEBUG on in any optimized build on Darwin. -// -// FIXME: This is a huge hack, to work around ridiculously awful compile times -// on this file with gcc-4.2 on Darwin, in Release mode. -#if (!defined(__llvm__) && defined(__APPLE__) && \ - defined(__OPTIMIZE__) && !defined(NDEBUG)) -#define NDEBUG -#endif - #define DEBUG_TYPE "x86-isel" #include "X86.h" #include "X86InstrBuilder.h" @@ -177,14 +168,11 @@ namespace { return "X86 DAG->DAG Instruction Selection"; } - /// InstructionSelect - This callback is invoked by - /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelect(); - virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); - virtual - bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const; + virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const; + + virtual void PreprocessISelDAG(); // Include the pieces autogenerated from the target description. #include "X86GenDAGISel.inc" @@ -208,18 +196,17 @@ namespace { SDValue &Scale, SDValue &Index, SDValue &Disp); bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp); - bool SelectScalarSSELoad(SDNode *Op, SDValue Pred, - SDValue N, SDValue &Base, SDValue &Scale, + bool SelectScalarSSELoad(SDNode *Root, SDValue N, + SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, - SDValue &InChain, SDValue &OutChain); + SDValue &NodeWithChain); + bool TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - void PreprocessForRMW(); - void PreprocessForFPConvert(); - + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, @@ -295,19 +282,22 @@ namespace { const X86InstrInfo *getInstrInfo() { return getTargetMachine().getInstrInfo(); } - -#ifndef NDEBUG - unsigned Indent; -#endif }; } -bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, - SDNode *Root) const { +bool +X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { if (OptLevel == CodeGenOpt::None) return false; - if (U == Root) + if (!N.hasOneUse()) + return false; + + if (N.getOpcode() != ISD::LOAD) + return true; + + // If N is a load, do additional profitability checks. + if (U == Root) { switch (U->getOpcode()) { default: break; case X86ISD::ADD: @@ -354,60 +344,9 @@ bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, } } } - - // Proceed to 'generic' cycle finder code - return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); -} - -/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand -/// and move load below the TokenFactor. Replace store's chain operand with -/// load's chain result. -static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, - SDValue Store, SDValue TF) { - SmallVector<SDValue, 4> Ops; - for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) - if (Load.getNode() == TF.getOperand(i).getNode()) - Ops.push_back(Load.getOperand(0)); - else - Ops.push_back(TF.getOperand(i)); - SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); - SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF, - Load.getOperand(1), - Load.getOperand(2)); - CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1), - Store.getOperand(2), Store.getOperand(3)); -} - -/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The -/// chain produced by the load must only be used by the store's chain operand, -/// otherwise this may produce a cycle in the DAG. -/// -static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, - SDValue &Load) { - if (N.getOpcode() == ISD::BIT_CONVERT) { - if (!N.hasOneUse()) - return false; - N = N.getOperand(0); } - LoadSDNode *LD = dyn_cast<LoadSDNode>(N); - if (!LD || LD->isVolatile()) - return false; - if (LD->getAddressingMode() != ISD::UNINDEXED) - return false; - - ISD::LoadExtType ExtType = LD->getExtensionType(); - if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) - return false; - - if (N.hasOneUse() && - LD->hasNUsesOfValue(1, 1) && - N.getOperand(1) == Address && - LD->isOperandOf(Chain.getNode())) { - Load = N; - return true; - } - return false; + return true; } /// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain @@ -473,51 +412,15 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { return false; } - -/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. -/// This is only run if not in -O0 mode. -/// This allows the instruction selector to pick more read-modify-write -/// instructions. This is a common case: -/// -/// [Load chain] -/// ^ -/// | -/// [Load] -/// ^ ^ -/// | | -/// / \- -/// / | -/// [TokenFactor] [Op] -/// ^ ^ -/// | | -/// \ / -/// \ / -/// [Store] -/// -/// The fact the store's chain operand != load's chain will prevent the -/// (store (op (load))) instruction from being selected. We can transform it to: -/// -/// [Load chain] -/// ^ -/// | -/// [TokenFactor] -/// ^ -/// | -/// [Load] -/// ^ ^ -/// | | -/// | \- -/// | | -/// | [Op] -/// | ^ -/// | | -/// \ / -/// \ / -/// [Store] -void X86DAGToDAGISel::PreprocessForRMW() { +void X86DAGToDAGISel::PreprocessISelDAG() { + // OptForSize is used in pattern predicates that isel is matching. + OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { - if (I->getOpcode() == X86ISD::CALL) { + E = CurDAG->allnodes_end(); I != E; ) { + SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + + if (OptLevel != CodeGenOpt::None && N->getOpcode() == X86ISD::CALL) { /// Also try moving call address load from outside callseq_start to just /// before the call to allow it to be folded. /// @@ -537,85 +440,23 @@ void X86DAGToDAGISel::PreprocessForRMW() { /// \ / /// \ / /// [CALL] - SDValue Chain = I->getOperand(0); - SDValue Load = I->getOperand(1); + SDValue Chain = N->getOperand(0); + SDValue Load = N->getOperand(1); if (!isCalleeLoad(Load, Chain)) continue; - MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain); + MoveBelowCallSeqStart(CurDAG, Load, SDValue(N, 0), Chain); ++NumLoadMoved; continue; } - - if (!ISD::isNON_TRUNCStore(I)) - continue; - SDValue Chain = I->getOperand(0); - - if (Chain.getNode()->getOpcode() != ISD::TokenFactor) - continue; - - SDValue N1 = I->getOperand(1); - SDValue N2 = I->getOperand(2); - if ((N1.getValueType().isFloatingPoint() && - !N1.getValueType().isVector()) || - !N1.hasOneUse()) - continue; - - bool RModW = false; - SDValue Load; - unsigned Opcode = N1.getNode()->getOpcode(); - switch (Opcode) { - case ISD::ADD: - case ISD::MUL: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - case ISD::ADDC: - case ISD::ADDE: - case ISD::VECTOR_SHUFFLE: { - SDValue N10 = N1.getOperand(0); - SDValue N11 = N1.getOperand(1); - RModW = isRMWLoad(N10, Chain, N2, Load); - if (!RModW) - RModW = isRMWLoad(N11, Chain, N2, Load); - break; - } - case ISD::SUB: - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - case ISD::ROTL: - case ISD::ROTR: - case ISD::SUBC: - case ISD::SUBE: - case X86ISD::SHLD: - case X86ISD::SHRD: { - SDValue N10 = N1.getOperand(0); - RModW = isRMWLoad(N10, Chain, N2, Load); - break; - } - } - - if (RModW) { - MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); - ++NumLoadMoved; - checkForCycles(I); - } - } -} - - -/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend -/// nodes that target the FP stack to be store and load to the stack. This is a -/// gross hack. We would like to simply mark these as being illegal, but when -/// we do that, legalize produces these when it expands calls, then expands -/// these in the same legalize pass. We would like dag combine to be able to -/// hack on these between the call expansion and the node legalization. As such -/// this pass basically does "really late" legalization of these inline with the -/// X86 isel pass. -void X86DAGToDAGISel::PreprocessForFPConvert() { - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ) { - SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + + // Lower fpround and fpextend nodes that target the FP stack to be store and + // load to the stack. This is a gross hack. We would like to simply mark + // these as being illegal, but when we do that, legalize produces these when + // it expands calls, then expands these in the same legalize pass. We would + // like dag combine to be able to hack on these between the call expansion + // and the node legalization. As such this pass basically does "really + // late" legalization of these inline with the X86 isel pass. + // FIXME: This should only happen when not compiled with -O0. if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) continue; @@ -652,9 +493,10 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { // FIXME: optimize the case where the src/dest is a load or store? SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0), - MemTmp, NULL, 0, MemVT); + MemTmp, NULL, 0, MemVT, + false, false, 0); SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, - NULL, 0, MemVT); + NULL, 0, MemVT, false, false, 0); // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the // extload we created. This will cause general havok on the dag because @@ -670,30 +512,6 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { } } -/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel -/// when it has created a SelectionDAG for us to codegen. -void X86DAGToDAGISel::InstructionSelect() { - const Function *F = MF->getFunction(); - OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); - - if (OptLevel != CodeGenOpt::None) - PreprocessForRMW(); - - // FIXME: This should only happen when not compiled with -O0. - PreprocessForFPConvert(); - - // Codegen the basic block. -#ifndef NDEBUG - DEBUG(dbgs() << "===== Instruction selection begins:\n"); - Indent = 0; -#endif - SelectRoot(*CurDAG); -#ifndef NDEBUG - DEBUG(dbgs() << "===== Instruction selection ends:\n"); -#endif - - CurDAG->RemoveDeadNodes(); -} /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in /// the main function. @@ -1300,22 +1118,24 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to /// match a load whose top elements are either undef or zeros. The load flavor /// is derived from the type of N, which is either v4f32 or v2f64. -bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, +/// +/// We also return: +/// PatternChainNode: this is the matched node that has a chain input and +/// output. +bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, - SDValue &InChain, - SDValue &OutChain) { + SDValue &PatternNodeWithChain) { if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { - InChain = N.getOperand(0).getValue(1); - if (ISD::isNON_EXTLoad(InChain.getNode()) && - InChain.getValue(0).hasOneUse() && - N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) { - LoadSDNode *LD = cast<LoadSDNode>(InChain); - if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + PatternNodeWithChain = N.getOperand(0); + if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) && + PatternNodeWithChain.hasOneUse() && + IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && + IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { + LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain); + if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment)) return false; - OutChain = LD->getChain(); return true; } } @@ -1327,13 +1147,14 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && N.getOperand(0).getNode()->hasOneUse() && ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && - N.getOperand(0).getOperand(0).hasOneUse()) { + N.getOperand(0).getOperand(0).hasOneUse() && + IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && + IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { // Okay, this is a zero extending load. Fold it. LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); - if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; - OutChain = LD->getChain(); - InChain = SDValue(LD, 1); + PatternNodeWithChain = SDValue(LD, 0); return true; } return false; @@ -1407,7 +1228,6 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp) { - assert(Op->getOpcode() == X86ISD::TLSADDR); assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); @@ -1434,11 +1254,12 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { - if (ISD::isNON_EXTLoad(N.getNode()) && - N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), P, P)) - return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); - return false; + if (!ISD::isNON_EXTLoad(N.getNode()) || + !IsProfitableToFold(N, P, P) || + !IsLegalToFold(N, P, P)) + return false; + + return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); } /// getGlobalBaseReg - Return an SDNode that returns the value of @@ -1541,7 +1362,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_DEC16m; else if (isSub) { if (isCN) { - if (Predicate_i16immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_SUB16mi8; else Opc = X86::LOCK_SUB16mi; @@ -1549,7 +1370,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_SUB16mr; } else { if (isCN) { - if (Predicate_i16immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_ADD16mi8; else Opc = X86::LOCK_ADD16mi; @@ -1564,7 +1385,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_DEC32m; else if (isSub) { if (isCN) { - if (Predicate_i32immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_SUB32mi8; else Opc = X86::LOCK_SUB32mi; @@ -1572,7 +1393,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_SUB32mr; } else { if (isCN) { - if (Predicate_i32immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_ADD32mi8; else Opc = X86::LOCK_ADD32mi; @@ -1588,7 +1409,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { else if (isSub) { Opc = X86::LOCK_SUB64mr; if (isCN) { - if (Predicate_i64immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_SUB64mi8; else if (Predicate_i64immSExt32(Val.getNode())) Opc = X86::LOCK_SUB64mi32; @@ -1596,7 +1417,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { } else { Opc = X86::LOCK_ADD64mr; if (isCN) { - if (Predicate_i64immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_ADD64mi8; else if (Predicate_i64immSExt32(Val.getNode())) Opc = X86::LOCK_ADD64mi32; @@ -1652,8 +1473,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) { case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: - case X86::JA: case X86::JAE: case X86::JB: case X86::JBE: - case X86::JE: case X86::JNE: case X86::JP: case X86::JNP: + case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4: + case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4: case X86::CMOVA16rr: case X86::CMOVA16rm: case X86::CMOVA32rr: case X86::CMOVA32rm: case X86::CMOVA64rr: case X86::CMOVA64rm: @@ -1693,24 +1514,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); DebugLoc dl = Node->getDebugLoc(); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent, ' ') << "Selecting: "; - Node->dump(CurDAG); - dbgs() << '\n'; - }); - Indent += 2; -#endif + DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); if (Node->isMachineOpcode()) { -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "== "; - Node->dump(CurDAG); - dbgs() << '\n'; - }); - Indent -= 2; -#endif + DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); return NULL; // Already selected. } @@ -1806,13 +1613,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { LoReg, NVT, InFlag); InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 0), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { @@ -1835,19 +1636,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { InFlag = Result.getValue(2); } ReplaceUses(SDValue(Node, 1), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } -#ifndef NDEBUG - Indent -= 2; -#endif - return NULL; } @@ -1962,13 +1753,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { LoReg, NVT, InFlag); InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 0), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the remainder (high) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { @@ -1992,19 +1777,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { InFlag = Result.getValue(2); } ReplaceUses(SDValue(Node, 1), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } - -#ifndef NDEBUG - Indent -= 2; -#endif - return NULL; } @@ -2117,17 +1891,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDNode *ResNode = SelectCode(Node); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - if (ResNode == NULL || ResNode == Node) - Node->dump(CurDAG); - else - ResNode->dump(CurDAG); - dbgs() << '\n'; - }); - Indent -= 2; -#endif + DEBUG(dbgs() << "=> "; + if (ResNode == NULL || ResNode == Node) + Node->dump(CurDAG); + else + ResNode->dump(CurDAG); + dbgs() << '\n'); return ResNode; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 515bc84..802bedc 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -73,15 +73,16 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { case X86Subtarget::isDarwin: if (TM.getSubtarget<X86Subtarget>().is64Bit()) return new X8664_MachoTargetObjectFile(); - return new X8632_MachoTargetObjectFile(); + return new TargetLoweringObjectFileMachO(); case X86Subtarget::isELF: - return new TargetLoweringObjectFileELF(); + if (TM.getSubtarget<X86Subtarget>().is64Bit()) + return new X8664_ELFTargetObjectFile(TM); + return new X8632_ELFTargetObjectFile(TM); case X86Subtarget::isMingw: case X86Subtarget::isCygwin: case X86Subtarget::isWindows: return new TargetLoweringObjectFileCOFF(); } - } X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) @@ -1001,19 +1002,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) computeRegisterProperties(); - // Divide and reminder operations have no vector equivalent and can - // trap. Do a custom widening for these operations in which we never - // generate more divides/remainder than the original vector width. - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { - if (!isTypeLegal((MVT::SimpleValueType)VT)) { - setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom); - setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom); - setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom); - setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom); - } - } - // FIXME: These should be based on subtarget info. Plus, the values should // be smaller when we are in optimizing for size mode. maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores @@ -1411,18 +1399,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { return CC_X86_32_C; } -/// NameDecorationForCallConv - Selects the appropriate decoration to -/// apply to a MachineFunction containing a given calling convention. -NameDecorationStyle -X86TargetLowering::NameDecorationForCallConv(CallingConv::ID CallConv) { - if (CallConv == CallingConv::X86_FastCall) - return FastCall; - else if (CallConv == CallingConv::X86_StdCall) - return StdCall; - return None; -} - - /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" with size and alignment information specified by /// the specific parameter attribute. The copy will be passed as a byval @@ -1476,7 +1452,8 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, VA.getLocMemOffset(), isImmutable, false); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); return DAG.getLoad(ValVT, dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); } } @@ -1498,9 +1475,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, Fn->getName() == "main") FuncInfo->setForceFramePointer(true); - // Decorate the function name. - FuncInfo->setDecorationStyle(NameDecorationForCallConv(CallConv)); - MachineFrameInfo *MFI = MF.getFrameInfo(); bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isTargetWin64(); @@ -1573,7 +1547,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // If value is passed via pointer - do a load. if (VA.getLocInfo() == CCValAssign::Indirect) - ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0); + ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0, + false, false, 0); InVals.push_back(ArgValue); } @@ -1668,7 +1643,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, PseudoSourceValue::getFixedStack(RegSaveFrameIndex), - Offset); + Offset, false, false, 0); MemOps.push_back(Store); Offset += 8; } @@ -1737,7 +1712,8 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain, return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); } return DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset); + PseudoSourceValue::getStack(), LocMemOffset, + false, false, 0); } /// EmitTailCallLoadRetAddr - Emit a load of return address if tail call @@ -1752,7 +1728,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, OutRetAddr = getReturnAddressFrameIndex(DAG); // Load the "old" Return address. - OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0); + OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0); return SDValue(OutRetAddr.getNode(), 1); } @@ -1767,11 +1743,12 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, // Calculate the new stack slot for the return address. int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = - MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, true,false); + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false, false); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, - PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0); + PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0, + false, false, 0); return Chain; } @@ -1882,7 +1859,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); Chain = DAG.getStore(Chain, dl, Arg, SpillSlot, - PseudoSourceValue::getFixedStack(FI), 0); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); Arg = SpillSlot; break; } @@ -2013,7 +1991,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Store relative to framepointer. MemOpChains2.push_back( DAG.getStore(ArgChain, dl, Arg, FIN, - PseudoSourceValue::getFixedStack(FI), 0)); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0)); } } } @@ -2256,7 +2235,8 @@ static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, const X86InstrInfo *TII) { - int FI; + unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; + int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) @@ -2272,25 +2252,30 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) && Def->getOperand(1).isFI()) { FI = Def->getOperand(1).getIndex(); - if (MFI->getObjectSize(FI) != Flags.getByValSize()) - return false; + Bytes = Flags.getByValSize(); } else return false; } - } else { - LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg); - if (!Ld) + } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { + if (Flags.isByVal()) + // ByVal argument is passed in as a pointer but it's now being + // dereferenced. e.g. + // define @foo(%struct.X* %A) { + // tail call @bar(%struct.X* byval %A) + // } return false; SDValue Ptr = Ld->getBasePtr(); FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); if (!FINode) return false; FI = FINode->getIndex(); - } + } else + return false; + assert(FI != INT_MAX); if (!MFI->isFixedObjectIndex(FI)) return false; - return Offset == MFI->getObjectOffset(FI); + return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); } /// IsEligibleForTailCallOptimization - Check whether the call is eligible @@ -2397,7 +2382,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { // Set up a frame object for the return address. uint64_t SlotSize = TD->getPointerSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, - true, false); + false, false); FuncInfo->setRAIndex(ReturnAddrIndex); } @@ -3592,7 +3577,8 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, int EltNo = (Offset - StartOffset) >> 2; int Mask[4] = { EltNo, EltNo, EltNo, EltNo }; EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32; - SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0); + SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0, + false, false, 0); // Canonicalize it to a v4i32 shuffle. V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, @@ -4836,8 +4822,16 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) && isa<ConstantSDNode>(N2)) { - unsigned Opc = (EltVT.getSizeInBits() == 8) ? X86ISD::PINSRB - : X86ISD::PINSRW; + unsigned Opc; + if (VT == MVT::v8i16) + Opc = X86ISD::PINSRW; + else if (VT == MVT::v4i16) + Opc = X86ISD::MMX_PINSRW; + else if (VT == MVT::v16i8) + Opc = X86ISD::PINSRB; + else + Opc = X86ISD::PINSRB; + // Transform it so it match pinsr{b,w} which expects a GR32 as its second // argument. if (N1.getValueType() != MVT::i32) @@ -4888,7 +4882,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); if (N2.getValueType() != MVT::i32) N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); - return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2); + return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW, + dl, VT, N0, N1, N2); } return SDValue(); } @@ -5091,7 +5086,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, // load. if (isGlobalStubReference(OpFlags)) Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. @@ -5171,7 +5166,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, MVT::i32)); SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base, - NULL, 0); + NULL, 0, false, false, 0); unsigned char OperandFlags = 0; // Most TLS accesses are not RIP relative, even on x86-64. One exception is @@ -5196,7 +5191,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, if (model == TLSModel::InitialExec) Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, false, false, 0); // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. @@ -5264,7 +5259,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, DAG.getConstant(VTBits, MVT::i8)); - SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT, + SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32, AndNode, DAG.getConstant(0, MVT::i8)); SDValue Hi, Lo; @@ -5313,7 +5308,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0); + PseudoSourceValue::getFixedStack(SSFI), 0, + false, false, 0); return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); } @@ -5348,7 +5344,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, }; Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops)); Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0); + PseudoSourceValue::getFixedStack(SSFI), 0, + false, false, 0); } return Result; @@ -5421,12 +5418,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, PseudoSourceValue::getConstantPool(), 0, - false, 16); + false, false, 16); SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, PseudoSourceValue::getConstantPool(), 0, - false, 16); + false, false, 16); SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); // Add the halves; easiest way is to swap them into another reg first. @@ -5513,9 +5510,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackSlot, WordOff); SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), - StackSlot, NULL, 0); + StackSlot, NULL, 0, false, false, 0); SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), - OffsetSlot, NULL, 0); + OffsetSlot, NULL, 0, false, false, 0); return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); } @@ -5563,7 +5560,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, dl, Value, StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0); + PseudoSourceValue::getFixedStack(SSFI), 0, + false, false, 0); SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) @@ -5597,7 +5595,7 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, NULL, 0); + FIST, StackSlot, NULL, 0, false, false, 0); } SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { @@ -5607,7 +5605,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, NULL, 0); + FIST, StackSlot, NULL, 0, false, false, 0); } SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { @@ -5632,8 +5630,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); } @@ -5659,8 +5657,8 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); if (VT.isVector()) { return DAG.getNode(ISD::BIT_CONVERT, dl, VT, DAG.getNode(ISD::XOR, dl, MVT::v2i64, @@ -5708,8 +5706,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1); // Shift sign bit right or left if the two operands have different types. @@ -5737,8 +5735,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { C = ConstantVector::get(CV); CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2); // Or the value with the sign bit. @@ -5890,26 +5888,31 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node /// if it's possible. -static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC, +static SDValue LowerToBT(SDValue And, ISD::CondCode CC, DebugLoc dl, SelectionDAG &DAG) { + SDValue Op0 = And.getOperand(0); + SDValue Op1 = And.getOperand(1); + if (Op0.getOpcode() == ISD::TRUNCATE) + Op0 = Op0.getOperand(0); + if (Op1.getOpcode() == ISD::TRUNCATE) + Op1 = Op1.getOperand(0); + SDValue LHS, RHS; - if (Op0.getOperand(1).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op010C = - dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0))) - if (Op010C->getZExtValue() == 1) { - LHS = Op0.getOperand(0); - RHS = Op0.getOperand(1).getOperand(1); + if (Op1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *And10C = dyn_cast<ConstantSDNode>(Op1.getOperand(0))) + if (And10C->getZExtValue() == 1) { + LHS = Op0; + RHS = Op1.getOperand(1); } - } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op000C = - dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0))) - if (Op000C->getZExtValue() == 1) { - LHS = Op0.getOperand(1); - RHS = Op0.getOperand(0).getOperand(1); + } else if (Op0.getOpcode() == ISD::SHL) { + if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0))) + if (And00C->getZExtValue() == 1) { + LHS = Op1; + RHS = Op0.getOperand(1); } - } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) { - ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1)); - SDValue AndLHS = Op0.getOperand(0); + } else if (Op1.getOpcode() == ISD::Constant) { + ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1); + SDValue AndLHS = Op0; if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { LHS = AndLHS.getOperand(0); RHS = AndLHS.getOperand(1); @@ -5959,6 +5962,21 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { return NewSetCC; } + // Look for "(setcc) == / != 1" to avoid unncessary setcc. + if (Op0.getOpcode() == X86ISD::SETCC && + Op1.getOpcode() == ISD::Constant && + (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || + cast<ConstantSDNode>(Op1)->isNullValue()) && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); + bool Invert = (CC == ISD::SETNE) ^ + cast<ConstantSDNode>(Op1)->isNullValue(); + if (Invert) + CCode = X86::GetOppositeBranchCondition(CCode); + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); + } + bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); if (X86CC == X86::COND_INVALID) @@ -6400,24 +6418,13 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, EVT IntPtr = getPointerTy(); EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); - Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag); Flag = Chain.getValue(1); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, - DAG.getTargetExternalSymbol("_alloca", IntPtr), - DAG.getRegister(X86::EAX, IntPtr), - DAG.getRegister(X86StackPtr, SPTy), - Flag }; - Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops, 5); - Flag = Chain.getValue(1); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(0, true), - DAG.getIntPtrConstant(0, true), - Flag); + Chain = DAG.getNode(X86ISD::MINGW_ALLOCA, dl, NodeTys, Chain, Flag); + Flag = Chain.getValue(1); Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1); @@ -6461,8 +6468,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/false, - DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl, - DAG.GetOrdering(Chain.getNode())); + DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); return CallResult.second; } @@ -6646,7 +6652,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, + false, false, 0); } // __va_list_tag: @@ -6658,8 +6665,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { SDValue FIN = Op.getOperand(1); // Store gp_offset SDValue Store = DAG.getStore(Op.getOperand(0), dl, - DAG.getConstant(VarArgsGPOffset, MVT::i32), - FIN, SV, 0); + DAG.getConstant(VarArgsGPOffset, MVT::i32), + FIN, SV, 0, false, false, 0); MemOps.push_back(Store); // Store fp_offset @@ -6667,21 +6674,23 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { FIN, DAG.getIntPtrConstant(4)); Store = DAG.getStore(Op.getOperand(0), dl, DAG.getConstant(VarArgsFPOffset, MVT::i32), - FIN, SV, 0); + FIN, SV, 0, false, false, 0); MemOps.push_back(Store); // Store ptr to overflow_arg_area FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0); + Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0, + false, false, 0); MemOps.push_back(Store); // Store ptr to reg_save_area. FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0); + Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0, + false, false, 0); MemOps.push_back(Store); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0], MemOps.size()); @@ -6967,13 +6976,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - NULL, 0); + NULL, 0, false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, NULL, 0); + RetAddrFI, NULL, 0, false, false, 0); } SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { @@ -6985,7 +6994,8 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + false, false, 0); return FrameAddr; } @@ -7009,7 +7019,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame, DAG.getIntPtrConstant(-TD->getPointerSize())); StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); - Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0); + Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); MF.getRegInfo().addLiveOut(StoreAddrReg); @@ -7044,11 +7054,12 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 SDValue Addr = Trmp; OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 0); + Addr, TrmpAddr, 0, false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64)); - OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, false, 2); + OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, + false, false, 2); // Load the 'nest' parameter value into R10. // R10 is specified in X86CallingConv.td @@ -7056,24 +7067,25 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 10); + Addr, TrmpAddr, 10, false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64)); - OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, false, 2); + OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, + false, false, 2); // Jump to the nested function. OpCode = (JMP64r << 8) | REX_WB; // jmpq *... Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64)); OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 20); + Addr, TrmpAddr, 20, false, false, 0); unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64)); OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr, - TrmpAddr, 22); + TrmpAddr, 22, false, false, 0); SDValue Ops[] = { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) }; @@ -7133,21 +7145,23 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg); OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), - Trmp, TrmpAddr, 0); + Trmp, TrmpAddr, 0, false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); - OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, false, 1); + OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, + false, false, 1); const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode. Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr, - TrmpAddr, 5, false, 1); + TrmpAddr, 5, false, false, 1); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); - OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, false, 1); + OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, + false, false, 1); SDValue Ops[] = { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) }; @@ -7190,7 +7204,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { DAG.getEntryNode(), StackSlot); // Load FP Control Word from stack slot - SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0); + SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0, + false, false, 0); // Transform as necessary SDValue CWD1 = @@ -7554,7 +7569,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, if (FIST.getNode() != 0) { EVT VT = N->getValueType(0); // Return a load from the stack slot. - Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0)); + Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0, + false, false, 0)); } return; } @@ -7572,14 +7588,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(edx.getValue(1)); return; } - case ISD::SDIV: - case ISD::UDIV: - case ISD::SREM: - case ISD::UREM: { - EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - Results.push_back(DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements())); - return; - } case ISD::ATOMIC_CMP_SWAP: { EVT T = N->getValueType(0); assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); @@ -7677,6 +7685,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; + case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; @@ -7721,6 +7730,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; + case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA"; } } @@ -7778,13 +7788,13 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { - if (!Ty1->isInteger() || !Ty2->isInteger()) + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); if (NumBits1 <= NumBits2) return false; - return Subtarget->is64Bit() || NumBits1 < 64; + return true; } bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { @@ -7794,12 +7804,12 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { unsigned NumBits2 = VT2.getSizeInBits(); if (NumBits1 <= NumBits2) return false; - return Subtarget->is64Bit() || NumBits1 < 64; + return true; } bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. - return Ty1->isInteger(32) && Ty2->isInteger(64) && Subtarget->is64Bit(); + return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit(); } bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { @@ -7955,7 +7965,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, MIB.addReg(EAXreg); // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. return nextMBB; @@ -8112,7 +8122,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MIB.addReg(X86::EDX); // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. return nextMBB; @@ -8215,7 +8225,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, MIB.addReg(X86::EAX); // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now. return nextMBB; @@ -8297,7 +8307,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( if (!Subtarget->isTargetWin64()) { // If %al is 0, branch around the XMM save block. BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); - BuildMI(MBB, DL, TII->get(X86::JE)).addMBB(EndMBB); + BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB); MBB->addSuccessor(EndMBB); } @@ -8390,6 +8400,29 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, return BB; } +MachineBasicBlock * +X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + MachineFunction *F = BB->getParent(); + + // The lowering is pretty easy: we're just emitting the call to _alloca. The + // non-trivial part is impdef of ESP. + // FIXME: The code should be tweaked as soon as we'll try to do codegen for + // mingw-w64. + + BuildMI(BB, DL, TII->get(X86::CALLpcrel32)) + .addExternalSymbol("_alloca") + .addReg(X86::EAX, RegState::Implicit) + .addReg(X86::ESP, RegState::Implicit) + .addReg(X86::EAX, RegState::Define | RegState::Implicit) + .addReg(X86::ESP, RegState::Define | RegState::Implicit); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; +} MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, @@ -8397,6 +8430,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); + case X86::MINGW_ALLOCA: + return EmitLoweredMingwAlloca(MI, BB, EM); case X86::CMOV_GR8: case X86::CMOV_V1I64: case X86::CMOV_FR32: @@ -8783,10 +8818,11 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16) return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile()); + LD->isVolatile(), LD->isNonTemporal(), 0); return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); } else if (NumElems == 4 && LastLoadedElt == 1) { SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; @@ -8806,10 +8842,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(2); // If we have SSE[12] support, try to form min/max nodes. SSE min/max - // instructions have the peculiarity that if either operand is a NaN, - // they chose what we call the RHS operand (and as such are not symmetric). - // It happens that this matches the semantics of the common C idiom - // x<y?x:y and related forms, so we can recognize these cases. + // instructions match the semantics of the common C idiom x<y?x:y but not + // x<=y?x:y, because of how they handle negative zero (which can be + // ignored in unsafe-math mode). if (Subtarget->hasSSE2() && (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) && Cond.getOpcode() == ISD::SETCC) { @@ -8817,36 +8852,34 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode = 0; // Check for x CC y ? x : y. - if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { + if (DAG.isEqualTo(LHS, Cond.getOperand(0)) && + DAG.isEqualTo(RHS, Cond.getOperand(1))) { switch (CC) { default: break; case ISD::SETULT: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a min would handle NaNs incorrectly, and swapping + // the operands would cause it to handle comparisons between positive + // and negative zero incorrectly. + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { + if (!UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMIN; break; case ISD::SETOLE: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a min would handle comparisons between positive + // and negative zero incorrectly. + if (!UnsafeFPMath && + !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) + break; Opcode = X86ISD::FMIN; break; case ISD::SETULE: - // This can be a min, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a min would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOLT: case ISD::SETLT: @@ -8855,32 +8888,29 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; case ISD::SETOGE: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a max would handle comparisons between positive + // and negative zero incorrectly. + if (!UnsafeFPMath && + !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS)) + break; Opcode = X86ISD::FMAX; break; case ISD::SETUGT: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a max would handle NaNs incorrectly, and swapping + // the operands would cause it to handle comparisons between positive + // and negative zero incorrectly. + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { + if (!UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMAX; break; case ISD::SETUGE: - // This can be a max, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a max would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOGT: case ISD::SETGT: @@ -8889,36 +8919,33 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; } // Check for x CC y ? y : x -- a min/max with reversed arms. - } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { + } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && + DAG.isEqualTo(RHS, Cond.getOperand(0))) { switch (CC) { default: break; case ISD::SETOGE: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a min would handle comparisons between positive + // and negative zero incorrectly, and swapping the operands would + // cause it to handle NaNs incorrectly. + if (!UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) { + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMIN; break; case ISD::SETUGT: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a min would handle NaNs incorrectly. + if (!UnsafeFPMath && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + break; Opcode = X86ISD::FMIN; break; case ISD::SETUGE: - // This can be a min, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a min would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOGT: case ISD::SETGT: @@ -8927,32 +8954,28 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; case ISD::SETULT: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a max would handle NaNs incorrectly. + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + break; Opcode = X86ISD::FMAX; break; case ISD::SETOLE: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a max would handle comparisons between positive + // and negative zero incorrectly, and swapping the operands would + // cause it to handle NaNs incorrectly. + if (!UnsafeFPMath && + !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) { + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMAX; break; case ISD::SETULE: - // This can be a max, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a max would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOLT: case ISD::SETLT: @@ -9177,10 +9200,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, /// LEA + SHL, LEA + LEA. static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { - if (DAG.getMachineFunction(). - getFunction()->hasFnAttr(Attribute::OptimizeForSize)) - return SDValue(); - if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); @@ -9319,7 +9338,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { - unsigned SplatIdx = cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); + unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); if (C->getZExtValue() == SplatIdx) BaseShAmt = InVec.getOperand(1); } @@ -9505,7 +9524,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getSrcValue(), Ld->getSrcValueOffset(), Ld->isVolatile(), - Ld->getAlignment()); + Ld->isNonTemporal(), Ld->getAlignment()); SDValue NewChain = NewLd.getValue(1); if (TokenFactorIndex != -1) { Ops.push_back(NewChain); @@ -9514,7 +9533,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, } return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(), St->getSrcValue(), St->getSrcValueOffset(), - St->isVolatile(), St->getAlignment()); + St->isVolatile(), St->isNonTemporal(), + St->getAlignment()); } // Otherwise, lower to two pairs of 32-bit loads / stores. @@ -9524,10 +9544,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr, Ld->getSrcValue(), Ld->getSrcValueOffset(), - Ld->isVolatile(), Ld->getAlignment()); + Ld->isVolatile(), Ld->isNonTemporal(), + Ld->getAlignment()); SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr, Ld->getSrcValue(), Ld->getSrcValueOffset()+4, - Ld->isVolatile(), + Ld->isVolatile(), Ld->isNonTemporal(), MinAlign(Ld->getAlignment(), 4)); SDValue NewChain = LoLd.getValue(1); @@ -9544,11 +9565,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr, St->getSrcValue(), St->getSrcValueOffset(), - St->isVolatile(), St->getAlignment()); + St->isVolatile(), St->isNonTemporal(), + St->getAlignment()); SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr, St->getSrcValue(), St->getSrcValueOffset() + 4, St->isVolatile(), + St->isNonTemporal(), MinAlign(St->getAlignment(), 4)); return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt); } @@ -9731,7 +9754,7 @@ static bool LowerToBSwap(CallInst *CI) { // Verify this is a simple bswap. if (CI->getNumOperands() != 2 || CI->getType() != CI->getOperand(1)->getType() || - !CI->getType()->isInteger()) + !CI->getType()->isIntegerTy()) return false; const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); @@ -9780,17 +9803,26 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { return LowerToBSwap(CI); } // rorw $$8, ${0:w} --> llvm.bswap.i16 - if (CI->getType()->isInteger(16) && + if (CI->getType()->isIntegerTy(16) && AsmPieces.size() == 3 && - AsmPieces[0] == "rorw" && + (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") && AsmPieces[1] == "$$8," && AsmPieces[2] == "${0:w}" && - IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") { - return LowerToBSwap(CI); + IA->getConstraintString().compare(0, 5, "=r,0,") == 0) { + AsmPieces.clear(); + SplitString(IA->getConstraintString().substr(5), AsmPieces, ","); + std::sort(AsmPieces.begin(), AsmPieces.end()); + if (AsmPieces.size() == 4 && + AsmPieces[0] == "~{cc}" && + AsmPieces[1] == "~{dirflag}" && + AsmPieces[2] == "~{flags}" && + AsmPieces[3] == "~{fpsr}") { + return LowerToBSwap(CI); + } } break; case 3: - if (CI->getType()->isInteger(64) && + if (CI->getType()->isIntegerTy(64) && Constraints.size() >= 2 && Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 193ef05..4c12fcc 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -180,7 +180,7 @@ namespace llvm { /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector, /// corresponds to X86::PINSRW. - PINSRW, + PINSRW, MMX_PINSRW, /// PSHUFB - Shuffle 16 8-bit values within a vector. PSHUFB, @@ -249,6 +249,9 @@ namespace llvm { // with control flow. VASTART_SAVE_XMM_REGS, + // MINGW_ALLOCA - MingW's __alloca call to do stack probing. + MINGW_ALLOCA, + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - // Atomic 64-bit binary operations. @@ -259,6 +262,10 @@ namespace llvm { ATOMAND64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG + + // WARNING: Do not add anything in the end unless you want the node to + // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be + // thought as target memory ops! }; } @@ -639,7 +646,6 @@ namespace llvm { int FPDiff, DebugLoc dl); CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const; - NameDecorationStyle NameDecorationForCallConv(CallingConv::ID CallConv); unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG); std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, @@ -790,7 +796,11 @@ namespace llvm { MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, MachineBasicBlock *BB, DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; - + + MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 468dd67..8462255 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -59,10 +59,11 @@ def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr", // Pattern fragments. // -def i64immSExt8 : PatLeaf<(i64 imm), [{ - // i64immSExt8 predicate - True if the 64-bit immediate fits in a 8-bit - // sign extended field. - return (int64_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +def i64immSExt8 : PatLeaf<(i64 immSext8)>; + +def GetLo32XForm : SDNodeXForm<imm, [{ + // Transformation function: get the low 32 bits. + return getI32Imm((unsigned)N->getZExtValue()); }]>; def i64immSExt32 : PatLeaf<(i64 imm), [{ @@ -71,6 +72,7 @@ def i64immSExt32 : PatLeaf<(i64 imm), [{ return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue(); }]>; + def i64immZExt32 : PatLeaf<(i64 imm), [{ // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit // unsignedsign extended field. @@ -325,7 +327,7 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (load addr:$src))]>; @@ -556,7 +558,7 @@ def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2), addr:$dst)]>; def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), i64immSExt8:$src2), + [(store (adde (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>; } // Uses = [EFLAGS] @@ -893,35 +895,38 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), let isTwoAddress = 1 in { def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src), "rcl{q}\t{1, $dst|$dst, 1}", []>; -def RCL64m1 : RI<0xD1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), - "rcl{q}\t{1, $dst|$dst, 1}", []>; -let Uses = [CL] in { -def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; -def RCL64mCL : RI<0xD3, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; -} def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL64mi : RIi8<0xC1, MRM2m, (outs i64mem:$dst), - (ins i64mem:$src, i8imm:$cnt), - "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src), "rcr{q}\t{1, $dst|$dst, 1}", []>; -def RCR64m1 : RI<0xD1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), - "rcr{q}\t{1, $dst|$dst, 1}", []>; +def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + let Uses = [CL] in { +def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src), "rcr{q}\t{%cl, $dst|$dst, CL}", []>; -def RCR64mCL : RI<0xD3, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), - "rcr{q}\t{%cl, $dst|$dst, CL}", []>; } -def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), - "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR64mi : RIi8<0xC1, MRM3m, (outs i64mem:$dst), - (ins i64mem:$src, i8imm:$cnt), +} + +let isTwoAddress = 0 in { +def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), + "rcl{q}\t{1, $dst|$dst, 1}", []>; +def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt), + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst), + "rcr{q}\t{1, $dst|$dst, 1}", []>; +def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt), "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + +let Uses = [CL] in { +def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; +def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), + "rcr{q}\t{%cl, $dst|$dst, CL}", []>; +} } let isTwoAddress = 1 in { @@ -1771,7 +1776,7 @@ def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; -def SWPGS : I<0x01, RawFrm, (outs), (ins), "swpgs", []>, TB; +def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB; def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), "push{q}\t%fs", []>, TB; @@ -1978,7 +1983,7 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm), (i64 0), (AND32ri (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit), - imm:$imm), + (i32 (GetLo32XForm imm:$imm))), x86_subreg_32bit)>; // r & (2^32-1) ==> movz @@ -2102,34 +2107,34 @@ def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; // (shl x (and y, 63)) ==> (shl x, y) -def : Pat<(shl GR64:$src1, (and CL:$amt, 63)), +def : Pat<(shl GR64:$src1, (and CL, 63)), (SHL64rCL GR64:$src1)>; -def : Pat<(store (shl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), +def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SHL64mCL addr:$dst)>; -def : Pat<(srl GR64:$src1, (and CL:$amt, 63)), +def : Pat<(srl GR64:$src1, (and CL, 63)), (SHR64rCL GR64:$src1)>; -def : Pat<(store (srl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), +def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SHR64mCL addr:$dst)>; -def : Pat<(sra GR64:$src1, (and CL:$amt, 63)), +def : Pat<(sra GR64:$src1, (and CL, 63)), (SAR64rCL GR64:$src1)>; -def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), +def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SAR64mCL addr:$dst)>; // Double shift patterns -def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), +def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm:$amt2)), addr:$dst), + GR64:$src2, (i8 imm)), addr:$dst), (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; -def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), +def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm:$amt2)), addr:$dst), + GR64:$src2, (i8 imm)), addr:$dst), (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index e22a903..ae24bfb 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -397,7 +397,7 @@ def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins), let canFoldAsLoad = 1 in { def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP, [(set RFP32:$dst, (loadf32 addr:$src))]>; -let isReMaterializable = 1, mayHaveSideEffects = 1 in +let isReMaterializable = 1 in def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP, [(set RFP64:$dst, (loadf64 addr:$src))]>; def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP, diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index a799f16..bb81cbf 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -29,7 +29,16 @@ def MRM0m : Format<24>; def MRM1m : Format<25>; def MRM2m : Format<26>; def MRM3m : Format<27>; def MRM4m : Format<28>; def MRM5m : Format<29>; def MRM6m : Format<30>; def MRM7m : Format<31>; def MRMInitReg : Format<32>; - +def MRM_C1 : Format<33>; +def MRM_C2 : Format<34>; +def MRM_C3 : Format<35>; +def MRM_C4 : Format<36>; +def MRM_C8 : Format<37>; +def MRM_C9 : Format<38>; +def MRM_E8 : Format<39>; +def MRM_F0 : Format<40>; +def MRM_F8 : Format<41>; +def MRM_F9 : Format<42>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our @@ -37,11 +46,13 @@ def MRMInitReg : Format<32>; class ImmType<bits<3> val> { bits<3> Value = val; } -def NoImm : ImmType<0>; -def Imm8 : ImmType<1>; -def Imm16 : ImmType<2>; -def Imm32 : ImmType<3>; -def Imm64 : ImmType<4>; +def NoImm : ImmType<0>; +def Imm8 : ImmType<1>; +def Imm8PCRel : ImmType<2>; +def Imm16 : ImmType<3>; +def Imm32 : ImmType<4>; +def Imm32PCRel : ImmType<5>; +def Imm64 : ImmType<6>; // FPFormat - This specifies what form this FP instruction has. This is used by // the Floating-Point stackifier pass. @@ -121,6 +132,12 @@ class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, let Pattern = pattern; let CodeSize = 3; } +class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern> + : X86Inst<o, f, Imm8PCRel, outs, ins, asm> { + let Pattern = pattern; + let CodeSize = 3; +} class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> { @@ -134,6 +151,13 @@ class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } +class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern> + : X86Inst<o, f, Imm32PCRel, outs, ins, asm> { + let Pattern = pattern; + let CodeSize = 3; +} + // FPStack Instruction Templates: // FPI - Floating Point Instruction template. class FPI<bits<8> o, Format F, dag outs, dag ins, string asm> diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 8d13c0f..39bda04 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -276,11 +276,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, - { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 }, - { X86::MOVSDrr, X86::MOVSDmr, 0, 0 }, { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, - { X86::MOVSSrr, X86::MOVSSmr, 0, 0 }, { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, { X86::MUL16r, X86::MUL16m, 1, 0 }, @@ -389,12 +386,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, { X86::MOVDQArr, X86::MOVDQArm, 16 }, - { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 }, - { X86::MOVSDrr, X86::MOVSDrm, 0 }, { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, - { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 }, - { X86::MOVSSrr, X86::MOVSSrm, 0 }, { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, @@ -682,23 +675,20 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, case X86::MOV16rr: case X86::MOV32rr: case X86::MOV64rr: - case X86::MOVSSrr: - case X86::MOVSDrr: // FP Stack register class copies case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080: case X86::MOV_Fp3264: case X86::MOV_Fp3280: case X86::MOV_Fp6432: case X86::MOV_Fp8032: - + + // Note that MOVSSrr and MOVSDrr are not considered copies. FR32 and FR64 + // copies are done with FsMOVAPSrr and FsMOVAPDrr. + case X86::FsMOVAPSrr: case X86::FsMOVAPDrr: case X86::MOVAPSrr: case X86::MOVAPDrr: case X86::MOVDQArr: - case X86::MOVSS2PSrr: - case X86::MOVSD2PDrr: - case X86::MOVPS2SSrr: - case X86::MOVPD2SDrr: case X86::MMX_MOVQ64rr: assert(MI.getNumOperands() >= 2 && MI.getOperand(0).isReg() && @@ -1083,7 +1073,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, case X86::MOV8r0: Opc = X86::MOV8ri; break; case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; - case X86::MOV64r0: Opc = X86::MOV64ri; break; + case X86::MOV64r0: Opc = X86::MOV64ri64i32; break; } Clone = false; } @@ -1587,44 +1577,44 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { switch (BrOpc) { default: return X86::COND_INVALID; - case X86::JE: return X86::COND_E; - case X86::JNE: return X86::COND_NE; - case X86::JL: return X86::COND_L; - case X86::JLE: return X86::COND_LE; - case X86::JG: return X86::COND_G; - case X86::JGE: return X86::COND_GE; - case X86::JB: return X86::COND_B; - case X86::JBE: return X86::COND_BE; - case X86::JA: return X86::COND_A; - case X86::JAE: return X86::COND_AE; - case X86::JS: return X86::COND_S; - case X86::JNS: return X86::COND_NS; - case X86::JP: return X86::COND_P; - case X86::JNP: return X86::COND_NP; - case X86::JO: return X86::COND_O; - case X86::JNO: return X86::COND_NO; + case X86::JE_4: return X86::COND_E; + case X86::JNE_4: return X86::COND_NE; + case X86::JL_4: return X86::COND_L; + case X86::JLE_4: return X86::COND_LE; + case X86::JG_4: return X86::COND_G; + case X86::JGE_4: return X86::COND_GE; + case X86::JB_4: return X86::COND_B; + case X86::JBE_4: return X86::COND_BE; + case X86::JA_4: return X86::COND_A; + case X86::JAE_4: return X86::COND_AE; + case X86::JS_4: return X86::COND_S; + case X86::JNS_4: return X86::COND_NS; + case X86::JP_4: return X86::COND_P; + case X86::JNP_4: return X86::COND_NP; + case X86::JO_4: return X86::COND_O; + case X86::JNO_4: return X86::COND_NO; } } unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { switch (CC) { default: llvm_unreachable("Illegal condition code!"); - case X86::COND_E: return X86::JE; - case X86::COND_NE: return X86::JNE; - case X86::COND_L: return X86::JL; - case X86::COND_LE: return X86::JLE; - case X86::COND_G: return X86::JG; - case X86::COND_GE: return X86::JGE; - case X86::COND_B: return X86::JB; - case X86::COND_BE: return X86::JBE; - case X86::COND_A: return X86::JA; - case X86::COND_AE: return X86::JAE; - case X86::COND_S: return X86::JS; - case X86::COND_NS: return X86::JNS; - case X86::COND_P: return X86::JP; - case X86::COND_NP: return X86::JNP; - case X86::COND_O: return X86::JO; - case X86::COND_NO: return X86::JNO; + case X86::COND_E: return X86::JE_4; + case X86::COND_NE: return X86::JNE_4; + case X86::COND_L: return X86::JL_4; + case X86::COND_LE: return X86::JLE_4; + case X86::COND_G: return X86::JG_4; + case X86::COND_GE: return X86::JGE_4; + case X86::COND_B: return X86::JB_4; + case X86::COND_BE: return X86::JBE_4; + case X86::COND_A: return X86::JA_4; + case X86::COND_AE: return X86::JAE_4; + case X86::COND_S: return X86::JS_4; + case X86::COND_NS: return X86::JNS_4; + case X86::COND_P: return X86::JP_4; + case X86::COND_NP: return X86::JNP_4; + case X86::COND_O: return X86::JO_4; + case X86::COND_NO: return X86::JNO_4; } } @@ -1694,7 +1684,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return true; // Handle unconditional branches. - if (I->getOpcode() == X86::JMP) { + if (I->getOpcode() == X86::JMP_4) { if (!AllowModify) { TBB = I->getOperand(0).getMBB(); continue; @@ -1778,7 +1768,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { while (I != MBB.begin()) { --I; - if (I->getOpcode() != X86::JMP && + if (I->getOpcode() != X86::JMP_4 && GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) break; // Remove the branch. @@ -1804,7 +1794,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (Cond.empty()) { // Unconditional branch? assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(TBB); return 1; } @@ -1814,16 +1804,16 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, switch (CC) { case X86::COND_NP_OR_E: // Synthesize NP_OR_E with two branches. - BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB); ++Count; break; case X86::COND_NE_OR_P: // Synthesize NE_OR_P with two branches. - BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); ++Count; break; default: { @@ -1834,7 +1824,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, } if (FBB) { // Two-way Conditional branch. Insert the second branch. - BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB); + BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB); ++Count; } return Count; @@ -1860,7 +1850,7 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, CommonRC = SrcRC; else if (!DestRC->hasSubClass(SrcRC)) { // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, - // but we want to copy then as GR64. Similarly, for GR32_NOREX and + // but we want to copy them as GR64. Similarly, for GR32_NOREX and // GR32_NOSP, copy as GR32. if (SrcRC->hasSuperClass(&X86::GR64RegClass) && DestRC->hasSuperClass(&X86::GR64RegClass)) @@ -3556,6 +3546,14 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } } break; + + case X86II::MRM_C1: + case X86II::MRM_C8: + case X86II::MRM_C9: + case X86II::MRM_E8: + case X86II::MRM_F0: + FinalSize += 2; + break; } case X86II::MRMInitReg: diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index a6b3863..5111719 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -268,6 +268,18 @@ namespace X86II { // MRMInitReg - This form is used for instructions whose source and // destinations are the same register. MRMInitReg = 32, + + //// MRM_C1 - A mod/rm byte of exactly 0xC1. + MRM_C1 = 33, + MRM_C2 = 34, + MRM_C3 = 35, + MRM_C4 = 36, + MRM_C8 = 37, + MRM_C9 = 38, + MRM_E8 = 39, + MRM_F0 = 40, + MRM_F8 = 41, + MRM_F9 = 42, FormMask = 63, @@ -331,11 +343,13 @@ namespace X86II { // This three-bit field describes the size of an immediate operand. Zero is // unused so that we can tell if we forgot to set a value. ImmShift = 13, - ImmMask = 7 << ImmShift, - Imm8 = 1 << ImmShift, - Imm16 = 2 << ImmShift, - Imm32 = 3 << ImmShift, - Imm64 = 4 << ImmShift, + ImmMask = 7 << ImmShift, + Imm8 = 1 << ImmShift, + Imm8PCRel = 2 << ImmShift, + Imm16 = 3 << ImmShift, + Imm32 = 4 << ImmShift, + Imm32PCRel = 5 << ImmShift, + Imm64 = 6 << ImmShift, //===------------------------------------------------------------------===// // FP Instruction Classification... Zero is non-fp instruction. @@ -396,15 +410,37 @@ namespace X86II { return TSFlags >> X86II::OpcodeShift; } + static inline bool hasImm(unsigned TSFlags) { + return (TSFlags & X86II::ImmMask) != 0; + } + /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field /// of the specified instruction. static inline unsigned getSizeOfImm(unsigned TSFlags) { switch (TSFlags & X86II::ImmMask) { default: assert(0 && "Unknown immediate size"); - case X86II::Imm8: return 1; - case X86II::Imm16: return 2; - case X86II::Imm32: return 4; - case X86II::Imm64: return 8; + case X86II::Imm8: + case X86II::Imm8PCRel: return 1; + case X86II::Imm16: return 2; + case X86II::Imm32: + case X86II::Imm32PCRel: return 4; + case X86II::Imm64: return 8; + } + } + + /// isImmPCRel - Return true if the immediate of the specified instruction's + /// TSFlags indicates that it is pc relative. + static inline unsigned isImmPCRel(unsigned TSFlags) { + switch (TSFlags & X86II::ImmMask) { + default: assert(0 && "Unknown immediate size"); + case X86II::Imm8PCRel: + case X86II::Imm32PCRel: + return true; + case X86II::Imm8: + case X86II::Imm16: + case X86II::Imm32: + case X86II::Imm64: + return false; } } } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index f0b4239..8a6ff54 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -65,7 +65,7 @@ def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>, def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>; -def SDTX86RdTsc : SDTypeProfile<0, 0, []>; +def SDTX86Void : SDTypeProfile<0, 0, []>; def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; @@ -143,7 +143,7 @@ def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr, [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, SDNPMayLoad]>; -def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG",SDTX86RdTsc, +def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void, [SDNPHasChain, SDNPOutFlag, SDNPSideEffect]>; def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>; @@ -178,6 +178,9 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; +def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void, + [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; + //===----------------------------------------------------------------------===// // X86 Operand Definitions. // @@ -343,18 +346,37 @@ def X86_COND_O : PatLeaf<(i8 13)>; def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE def X86_COND_S : PatLeaf<(i8 15)>; -def i16immSExt8 : PatLeaf<(i16 imm), [{ - // i16immSExt8 predicate - True if the 16-bit immediate fits in a 8-bit - // sign extended field. - return (int16_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +def immSext8 : PatLeaf<(imm), [{ + return N->getSExtValue() == (int8_t)N->getSExtValue(); }]>; -def i32immSExt8 : PatLeaf<(i32 imm), [{ - // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit - // sign extended field. - return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +def i16immSExt8 : PatLeaf<(i16 immSext8)>; +def i32immSExt8 : PatLeaf<(i32 immSext8)>; + +/// Load patterns: these constraint the match to the right address space. +def dsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) + if (PT->getAddressSpace() > 255) + return false; + return true; }]>; +def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) + return PT->getAddressSpace() == 256; + return false; +}]>; + +def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) + return PT->getAddressSpace() == 257; + return false; +}]>; + + // Helper fragments for loads. // It's always safe to treat a anyext i16 load as a i32 load if the i16 is // known to be 32-bit aligned or better. Ditto for i8 to i16. @@ -372,8 +394,7 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{ return false; }]>; -def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), -[{ +def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{ LoadSDNode *LD = cast<LoadSDNode>(N); if (const Value *Src = LD->getSrcValue()) if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) @@ -399,72 +420,11 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ return false; }]>; -def nvloadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ - LoadSDNode *LD = cast<LoadSDNode>(N); - if (const Value *Src = LD->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - if (LD->isVolatile()) - return false; - ISD::LoadExtType ExtType = LD->getExtensionType(); - if (ExtType == ISD::NON_EXTLOAD) - return true; - if (ExtType == ISD::EXTLOAD) - return LD->getAlignment() >= 4; - return false; -}]>; - -def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - return PT->getAddressSpace() == 256; - return false; -}]>; - -def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - return PT->getAddressSpace() == 257; - return false; -}]>; - -def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; -def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; - -def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; -def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; -def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; +def loadi8 : PatFrag<(ops node:$ptr), (i8 (dsload node:$ptr))>; +def loadi64 : PatFrag<(ops node:$ptr), (i64 (dsload node:$ptr))>; +def loadf32 : PatFrag<(ops node:$ptr), (f32 (dsload node:$ptr))>; +def loadf64 : PatFrag<(ops node:$ptr), (f64 (dsload node:$ptr))>; +def loadf80 : PatFrag<(ops node:$ptr), (f80 (dsload node:$ptr))>; def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>; def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>; @@ -562,7 +522,7 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), } // x86-64 va_start lowering magic. -let usesCustomInserter = 1 in +let usesCustomInserter = 1 in { def VASTART_SAVE_XMM_REGS : I<0, Pseudo, (outs), (ins GR8:$al, @@ -573,6 +533,19 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo, imm:$regsavefi, imm:$offset)]>; +// Dynamic stack allocation yields _alloca call for Cygwin/Mingw targets. Calls +// to _alloca is needed to probe the stack when allocating more than 4k bytes in +// one go. Touching the stack at 4K increments is necessary to ensure that the +// guard pages used by the OS virtual memory manager are allocated in correct +// sequence. +// The main point of having separate instruction are extra unmodelled effects +// (compared to ordinary calls) like stack pointer change. + +def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins), + "# dynamic stack allocation", + [(X86MingwAlloca)]>; +} + // Nop let neverHasSideEffects = 1 in { def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>; @@ -596,7 +569,7 @@ let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in "", []>; //===----------------------------------------------------------------------===// -// Control Flow Instructions... +// Control Flow Instructions. // // Return instructions. @@ -614,16 +587,46 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, "lret\t$amt", []>; } -// All branches are RawFrm, Void, Branch, and Terminators -let isBranch = 1, isTerminator = 1 in - class IBr<bits<8> opcode, dag ins, string asm, list<dag> pattern> : - I<opcode, RawFrm, (outs), ins, asm, pattern>; +// Unconditional branches. +let isBarrier = 1, isBranch = 1, isTerminator = 1 in { + def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst), + "jmp\t$dst", [(br bb:$dst)]>; + def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), + "jmp\t$dst", []>; +} -let isBranch = 1, isBarrier = 1 in { - def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>; - def JMP8 : IBr<0xEB, (ins brtarget8:$dst), "jmp\t$dst", []>; +// Conditional Branches. +let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in { + multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> { + def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>; + def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm, + [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB; + } } +defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>; +defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>; +defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>; +defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>; +defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>; +defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>; +defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>; +defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>; +defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>; +defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>; +defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>; +defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>; +defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>; +defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>; +defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>; +defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>; + +// FIXME: What about the CX/RCX versions of this instruction? +let Uses = [ECX], isBranch = 1, isTerminator = 1 in + def JCXZ8 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), + "jcxz\t$dst", []>; + + // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", @@ -644,63 +647,6 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { "ljmp{l}\t{*}$dst", []>; } -// Conditional branches -let Uses = [EFLAGS] in { -// Short conditional jumps -def JO8 : IBr<0x70, (ins brtarget8:$dst), "jo\t$dst", []>; -def JNO8 : IBr<0x71, (ins brtarget8:$dst), "jno\t$dst", []>; -def JB8 : IBr<0x72, (ins brtarget8:$dst), "jb\t$dst", []>; -def JAE8 : IBr<0x73, (ins brtarget8:$dst), "jae\t$dst", []>; -def JE8 : IBr<0x74, (ins brtarget8:$dst), "je\t$dst", []>; -def JNE8 : IBr<0x75, (ins brtarget8:$dst), "jne\t$dst", []>; -def JBE8 : IBr<0x76, (ins brtarget8:$dst), "jbe\t$dst", []>; -def JA8 : IBr<0x77, (ins brtarget8:$dst), "ja\t$dst", []>; -def JS8 : IBr<0x78, (ins brtarget8:$dst), "js\t$dst", []>; -def JNS8 : IBr<0x79, (ins brtarget8:$dst), "jns\t$dst", []>; -def JP8 : IBr<0x7A, (ins brtarget8:$dst), "jp\t$dst", []>; -def JNP8 : IBr<0x7B, (ins brtarget8:$dst), "jnp\t$dst", []>; -def JL8 : IBr<0x7C, (ins brtarget8:$dst), "jl\t$dst", []>; -def JGE8 : IBr<0x7D, (ins brtarget8:$dst), "jge\t$dst", []>; -def JLE8 : IBr<0x7E, (ins brtarget8:$dst), "jle\t$dst", []>; -def JG8 : IBr<0x7F, (ins brtarget8:$dst), "jg\t$dst", []>; - -def JCXZ8 : IBr<0xE3, (ins brtarget8:$dst), "jcxz\t$dst", []>; - -def JE : IBr<0x84, (ins brtarget:$dst), "je\t$dst", - [(X86brcond bb:$dst, X86_COND_E, EFLAGS)]>, TB; -def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst", - [(X86brcond bb:$dst, X86_COND_NE, EFLAGS)]>, TB; -def JL : IBr<0x8C, (ins brtarget:$dst), "jl\t$dst", - [(X86brcond bb:$dst, X86_COND_L, EFLAGS)]>, TB; -def JLE : IBr<0x8E, (ins brtarget:$dst), "jle\t$dst", - [(X86brcond bb:$dst, X86_COND_LE, EFLAGS)]>, TB; -def JG : IBr<0x8F, (ins brtarget:$dst), "jg\t$dst", - [(X86brcond bb:$dst, X86_COND_G, EFLAGS)]>, TB; -def JGE : IBr<0x8D, (ins brtarget:$dst), "jge\t$dst", - [(X86brcond bb:$dst, X86_COND_GE, EFLAGS)]>, TB; - -def JB : IBr<0x82, (ins brtarget:$dst), "jb\t$dst", - [(X86brcond bb:$dst, X86_COND_B, EFLAGS)]>, TB; -def JBE : IBr<0x86, (ins brtarget:$dst), "jbe\t$dst", - [(X86brcond bb:$dst, X86_COND_BE, EFLAGS)]>, TB; -def JA : IBr<0x87, (ins brtarget:$dst), "ja\t$dst", - [(X86brcond bb:$dst, X86_COND_A, EFLAGS)]>, TB; -def JAE : IBr<0x83, (ins brtarget:$dst), "jae\t$dst", - [(X86brcond bb:$dst, X86_COND_AE, EFLAGS)]>, TB; - -def JS : IBr<0x88, (ins brtarget:$dst), "js\t$dst", - [(X86brcond bb:$dst, X86_COND_S, EFLAGS)]>, TB; -def JNS : IBr<0x89, (ins brtarget:$dst), "jns\t$dst", - [(X86brcond bb:$dst, X86_COND_NS, EFLAGS)]>, TB; -def JP : IBr<0x8A, (ins brtarget:$dst), "jp\t$dst", - [(X86brcond bb:$dst, X86_COND_P, EFLAGS)]>, TB; -def JNP : IBr<0x8B, (ins brtarget:$dst), "jnp\t$dst", - [(X86brcond bb:$dst, X86_COND_NP, EFLAGS)]>, TB; -def JO : IBr<0x80, (ins brtarget:$dst), "jo\t$dst", - [(X86brcond bb:$dst, X86_COND_O, EFLAGS)]>, TB; -def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst", - [(X86brcond bb:$dst, X86_COND_NO, EFLAGS)]>, TB; -} // Uses = [EFLAGS] // Loop instructions @@ -721,7 +667,7 @@ let isCall = 1 in XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], Uses = [ESP] in { - def CALLpcrel32 : Ii32<0xE8, RawFrm, + def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i32imm_pcrel:$dst,variable_ops), "call\t$dst", []>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), @@ -761,8 +707,10 @@ def TCRETURNri : I<0, Pseudo, (outs), "#TC_RETURN $dst $offset", []>; -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst, variable_ops), +// FIXME: The should be pseudo instructions that are lowered when going to +// mcinst. +let isCall = 1, isBranch = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in + def TAILJMPd : Ii32<0xE9, RawFrm, (outs),(ins i32imm_pcrel:$dst,variable_ops), "jmp\t$dst # TAILCALL", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in @@ -929,6 +877,9 @@ let Defs = [RAX, RDX] in def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB; +let Defs = [RAX, RCX, RDX] in +def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; + let isBarrier = 1, hasCtrlDep = 1 in { def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; } @@ -1059,7 +1010,7 @@ def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(set GR8:$dst, (loadi8 addr:$src))]>; @@ -1093,7 +1044,7 @@ def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; let mayLoad = 1, - canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in + canFoldAsLoad = 1, isReMaterializable = 1 in def MOV8rm_NOREX : I<0x8A, MRMSrcMem, (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; @@ -1115,7 +1066,10 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src), // // Extra precision multiplication -let Defs = [AL,AH,EFLAGS], Uses = [AL] in + +// AL is really implied by AX, by the registers in Defs must match the +// SDNode results (i8, i32). +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the @@ -1133,7 +1087,7 @@ def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), "mul{l}\t$src", []>; // EAX,EDX = EAX*GR32 -let Defs = [AL,AH,EFLAGS], Uses = [AL] in +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. @@ -1155,7 +1109,7 @@ def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), } let neverHasSideEffects = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AL] in +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>; // AL,AH = AL*GR8 let Defs = [AX,DX,EFLAGS], Uses = [AX] in @@ -1165,7 +1119,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>; // EAX,EDX = EAX*GR32 let mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AL] in +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), "imul{b}\t$src", []>; // AL,AH = AL*[mem8] let Defs = [AX,DX,EFLAGS], Uses = [AX] in @@ -1178,7 +1132,7 @@ def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), } // neverHasSideEffects // unsigned division/remainder -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -1188,7 +1142,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX "div{l}\t$src", []>; let mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH "div{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -1201,7 +1155,7 @@ def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), } // Signed division/remainder. -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "idiv{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -1211,7 +1165,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX "idiv{l}\t$src", []>; let mayLoad = 1, mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH "idiv{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -2328,98 +2282,100 @@ let isTwoAddress = 0 in { def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src), "rcl{b}\t{1, $dst|$dst, 1}", []>; -def RCL8m1 : I<0xD0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src), - "rcl{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src), "rcl{b}\t{%cl, $dst|$dst, CL}", []>; -def RCL8mCL : I<0xD2, MRM2m, (outs i8mem:$dst), (ins i8mem:$src), - "rcl{b}\t{%cl, $dst|$dst, CL}", []>; } def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL8mi : Ii8<0xC0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt), - "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src), "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; -def RCL16m1 : I<0xD1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src), - "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src), "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -def RCL16mCL : I<0xD3, MRM2m, (outs i16mem:$dst), (ins i16mem:$src), - "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCL16mi : Ii8<0xC1, MRM2m, (outs i16mem:$dst), - (ins i16mem:$src, i8imm:$cnt), - "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src), "rcl{l}\t{1, $dst|$dst, 1}", []>; -def RCL32m1 : I<0xD1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src), - "rcl{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src), "rcl{l}\t{%cl, $dst|$dst, CL}", []>; -def RCL32mCL : I<0xD3, MRM2m, (outs i32mem:$dst), (ins i32mem:$src), - "rcl{l}\t{%cl, $dst|$dst, CL}", []>; } def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL32mi : Ii8<0xC1, MRM2m, (outs i32mem:$dst), - (ins i32mem:$src, i8imm:$cnt), - "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src), "rcr{b}\t{1, $dst|$dst, 1}", []>; -def RCR8m1 : I<0xD0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src), - "rcr{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src), "rcr{b}\t{%cl, $dst|$dst, CL}", []>; -def RCR8mCL : I<0xD2, MRM3m, (outs i8mem:$dst), (ins i8mem:$src), - "rcr{b}\t{%cl, $dst|$dst, CL}", []>; } def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR8mi : Ii8<0xC0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt), - "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src), "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; -def RCR16m1 : I<0xD1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src), - "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src), "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -def RCR16mCL : I<0xD3, MRM3m, (outs i16mem:$dst), (ins i16mem:$src), - "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCR16mi : Ii8<0xC1, MRM3m, (outs i16mem:$dst), - (ins i16mem:$src, i8imm:$cnt), - "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src), "rcr{l}\t{1, $dst|$dst, 1}", []>; -def RCR32m1 : I<0xD1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src), - "rcr{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src), "rcr{l}\t{%cl, $dst|$dst, CL}", []>; -def RCR32mCL : I<0xD3, MRM3m, (outs i32mem:$dst), (ins i32mem:$src), - "rcr{l}\t{%cl, $dst|$dst, CL}", []>; } def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR32mi : Ii8<0xC1, MRM3m, (outs i32mem:$dst), - (ins i32mem:$src, i8imm:$cnt), + +let isTwoAddress = 0 in { +def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), + "rcl{b}\t{1, $dst|$dst, 1}", []>; +def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst), + "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; +def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt), + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst), + "rcl{l}\t{1, $dst|$dst, 1}", []>; +def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt), + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst), + "rcr{b}\t{1, $dst|$dst, 1}", []>; +def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt), + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst), + "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; +def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt), + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst), + "rcr{l}\t{1, $dst|$dst, 1}", []>; +def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in { +def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst), + "rcl{b}\t{%cl, $dst|$dst, CL}", []>; +def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst), + "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst), + "rcl{l}\t{%cl, $dst|$dst, CL}", []>; +def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst), + "rcr{b}\t{%cl, $dst|$dst, CL}", []>; +def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst), + "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), + "rcr{l}\t{%cl, $dst|$dst, CL}", []>; +} +} + // FIXME: provide shorter instructions when imm8 == 1 let Uses = [CL] in { def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), @@ -4100,7 +4056,7 @@ def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; -def INVLPG : I<0x01, RawFrm, (outs), (ins), "invlpg", []>, TB; +def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins), "str{w}\t{$dst}", []>, TB; @@ -4262,17 +4218,17 @@ def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; // VMX instructions // 66 0F 38 80 -def INVEPT : I<0x38, RawFrm, (outs), (ins), "invept", []>, OpSize, TB; +def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8; // 66 0F 38 81 -def INVVPID : I<0x38, RawFrm, (outs), (ins), "invvpid", []>, OpSize, TB; +def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8; // 0F 01 C1 -def VMCALL : I<0x01, RawFrm, (outs), (ins), "vmcall", []>, TB; +def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), "vmclear\t$vmcs", []>, OpSize, TB; // 0F 01 C2 -def VMLAUNCH : I<0x01, RawFrm, (outs), (ins), "vmlaunch", []>, TB; +def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB; // 0F 01 C3 -def VMRESUME : I<0x01, RawFrm, (outs), (ins), "vmresume", []>, TB; +def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB; def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), "vmptrld\t$vmcs", []>, TB; def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins), @@ -4294,7 +4250,7 @@ def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB; // 0F 01 C4 -def VMXOFF : I<0x01, RawFrm, (outs), (ins), "vmxoff", []>, OpSize; +def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB; def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), "vmxon\t{$vmxon}", []>, XD; @@ -4462,12 +4418,6 @@ def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>; def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>; -// (and (i32 load), 255) -> (zextload i8) -def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))), - (MOVZX32rm8 addr:$src)>; -def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 65535))), - (MOVZX32rm16 addr:$src)>; - //===----------------------------------------------------------------------===// // Some peepholes //===----------------------------------------------------------------------===// @@ -4563,43 +4513,43 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; // (shl x (and y, 31)) ==> (shl x, y) -def : Pat<(shl GR8:$src1, (and CL:$amt, 31)), +def : Pat<(shl GR8:$src1, (and CL, 31)), (SHL8rCL GR8:$src1)>; -def : Pat<(shl GR16:$src1, (and CL:$amt, 31)), +def : Pat<(shl GR16:$src1, (and CL, 31)), (SHL16rCL GR16:$src1)>; -def : Pat<(shl GR32:$src1, (and CL:$amt, 31)), +def : Pat<(shl GR32:$src1, (and CL, 31)), (SHL32rCL GR32:$src1)>; -def : Pat<(store (shl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), (SHL8mCL addr:$dst)>; -def : Pat<(store (shl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), (SHL16mCL addr:$dst)>; -def : Pat<(store (shl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SHL32mCL addr:$dst)>; -def : Pat<(srl GR8:$src1, (and CL:$amt, 31)), +def : Pat<(srl GR8:$src1, (and CL, 31)), (SHR8rCL GR8:$src1)>; -def : Pat<(srl GR16:$src1, (and CL:$amt, 31)), +def : Pat<(srl GR16:$src1, (and CL, 31)), (SHR16rCL GR16:$src1)>; -def : Pat<(srl GR32:$src1, (and CL:$amt, 31)), +def : Pat<(srl GR32:$src1, (and CL, 31)), (SHR32rCL GR32:$src1)>; -def : Pat<(store (srl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), (SHR8mCL addr:$dst)>; -def : Pat<(store (srl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), (SHR16mCL addr:$dst)>; -def : Pat<(store (srl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SHR32mCL addr:$dst)>; -def : Pat<(sra GR8:$src1, (and CL:$amt, 31)), +def : Pat<(sra GR8:$src1, (and CL, 31)), (SAR8rCL GR8:$src1)>; -def : Pat<(sra GR16:$src1, (and CL:$amt, 31)), +def : Pat<(sra GR16:$src1, (and CL, 31)), (SAR16rCL GR16:$src1)>; -def : Pat<(sra GR32:$src1, (and CL:$amt, 31)), +def : Pat<(sra GR32:$src1, (and CL, 31)), (SAR32rCL GR32:$src1)>; -def : Pat<(store (sra (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst), (SAR8mCL addr:$dst)>; -def : Pat<(store (sra (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst), (SAR16mCL addr:$dst)>; -def : Pat<(store (sra (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SAR32mCL addr:$dst)>; // (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c) @@ -4620,11 +4570,11 @@ def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), addr:$dst), (SHRD32mrCL addr:$dst, GR32:$src2)>; -def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)), +def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm:$amt2)), addr:$dst), + GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; // (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c) @@ -4645,11 +4595,11 @@ def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), addr:$dst), (SHLD32mrCL addr:$dst, GR32:$src2)>; -def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)), +def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm:$amt2)), addr:$dst), + GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; // (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c) @@ -4670,11 +4620,11 @@ def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), addr:$dst), (SHRD16mrCL addr:$dst, GR16:$src2)>; -def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)), +def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm:$amt2)), addr:$dst), + GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; // (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c) @@ -4695,11 +4645,11 @@ def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), addr:$dst), (SHLD16mrCL addr:$dst, GR16:$src2)>; -def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)), +def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm:$amt2)), addr:$dst), + GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; // (anyext (setcc_carry)) -> (setcc_carry) diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 89f020c..c8e0723 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -141,7 +141,7 @@ def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (load_mmx addr:$src))]>; @@ -426,13 +426,15 @@ def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), // Extract / Insert -def MMX_X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>; -def MMX_X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>; +def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW", + SDTypeProfile<1, 3, [SDTCisVT<0, v4i16>, SDTCisSameAs<0,1>, + SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; + def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1), + [(set GR32:$dst, (X86pextrw (v4i16 VR64:$src1), (iPTR imm:$src2)))]>; let Constraints = "$src1 = $dst" in { def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg, @@ -597,13 +599,6 @@ let AddedComplexity = 10 in { (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>; } -// Patterns to perform vector shuffling with a zeroed out vector. -let AddedComplexity = 20 in { - def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV, - (v2i32 (scalar_to_vector (load_mmx addr:$src))))), - (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>; -} - // Some special case PANDN patterns. // FIXME: Get rid of these. def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e26c979..2743dba 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -160,6 +160,32 @@ def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>; def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>; def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>; +// MOVNT Support +// Like 'store', but requires the non-temporal bit to be set +def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal(); + return false; +}]>; + +def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal() && !ST->isTruncatingStore() && + ST->getAddressingMode() == ISD::UNINDEXED && + ST->getAlignment() >= 16; + return false; +}]>; + +def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal() && + ST->getAlignment() < 16; + return false; +}]>; + def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; @@ -344,18 +370,56 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in { // SSE1 Instructions //===----------------------------------------------------------------------===// -// Move Instructions -let neverHasSideEffects = 1 in -def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - "movss\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +// Move Instructions. Register-to-register movss is not used for FR32 +// register copies because it's a partial register update; FsMOVAPSrr is +// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG +// because INSERT_SUBREG requires that the insert be implementable in terms of +// a copy, and just mentioned, we don't use movss for copies. +let Constraints = "$src1 = $dst" in +def MOVSSrr : SSI<0x10, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), + "movss\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>; + +// Extract the low 32-bit value from one vector and insert it into another. +let AddedComplexity = 15 in +def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), + (MOVSSrr VR128:$src1, + (EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>; + +// Implicitly promote a 32-bit scalar to a vector. +def : Pat<(v4f32 (scalar_to_vector FR32:$src)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>; + +// Loading from memory automatically zeroing upper bits. +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), "movss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (loadf32 addr:$src))]>; + +// MOVSSrm zeros the high parts of the register; represent this +// with SUBREG_TO_REG. +let AddedComplexity = 20 in { +def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; +def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; +def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; +} + +// Store scalar value to memory. def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", [(store FR32:$src, addr:$dst)]>; +// Extract and store. +def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSSmr addr:$dst, + (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; + // Conversion instructions def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), "cvttss2si\t{$src, $dst|$dst, $src}", @@ -518,7 +582,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), // Alias instruction to load FR32 from f128mem using movaps. Upper bits are // disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; @@ -715,7 +779,7 @@ defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin, let neverHasSideEffects = 1 in def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>; @@ -727,7 +791,7 @@ def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), let neverHasSideEffects = 1 in def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (loadv4f32 addr:$src))]>; @@ -736,7 +800,7 @@ def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(store (v4f32 VR128:$src), addr:$dst)]>; // Intrinsic forms of MOVUPS load and store -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>; @@ -762,6 +826,9 @@ let Constraints = "$src1 = $dst" in { } // Constraints = "$src1 = $dst" +def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (MOVHPSrm VR128:$src1, addr:$src2)>; + def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -793,9 +860,9 @@ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), let AddedComplexity = 20 in { def : Pat<(v4f32 (movddup VR128:$src, (undef))), - (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (MOVLHPSrr VR128:$src, VR128:$src)>; def : Pat<(v2i64 (movddup VR128:$src, (undef))), - (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (MOVLHPSrr VR128:$src, VR128:$src)>; } @@ -1010,10 +1077,33 @@ def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>; // Non-temporal stores -def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), +def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>; +let AddedComplexity = 400 in { // Prefer non-temporal versions +def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; + +def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>; + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (MOVNTDQ_64mr VR128:$src, addr:$dst)>; + +def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, + TB, Requires<[HasSSE2]>; + +def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, + TB, Requires<[HasSSE2]>; +} + // Load, store, and memory fence def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>; @@ -1032,84 +1122,73 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllZerosV))]>; -let Predicates = [HasSSE1] in { - def : Pat<(v2i64 immAllZerosV), (V_SET0)>; - def : Pat<(v8i16 immAllZerosV), (V_SET0)>; - def : Pat<(v16i8 immAllZerosV), (V_SET0)>; - def : Pat<(v2f64 immAllZerosV), (V_SET0)>; - def : Pat<(v4f32 immAllZerosV), (V_SET0)>; -} - -// FR32 to 128-bit vector conversion. -let isAsCheapAsAMove = 1 in -def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4f32 (scalar_to_vector FR32:$src)))]>; -def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>; - -// FIXME: may not be able to eliminate this movss with coalescing the src and -// dest register classes are different. We really want to write this pattern -// like this: -// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), -// (f32 FR32:$src)>; -let isAsCheapAsAMove = 1 in -def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (vector_extract (v4f32 VR128:$src), - (iPTR 0)))]>; -def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), - "movss\t{$src, $dst|$dst, $src}", - [(store (f32 (vector_extract (v4f32 VR128:$src), - (iPTR 0))), addr:$dst)]>; - - -// Move to lower bits of a VR128, leaving upper bits alone. -// Three operand (but two address) aliases. -let Constraints = "$src1 = $dst" in { -let neverHasSideEffects = 1 in - def MOVLSS2PSrr : SSI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $dst|$dst, $src2}", []>; - - let AddedComplexity = 15 in - def MOVLPSrr : SSI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "movss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (movl VR128:$src1, VR128:$src2)))]>; -} +def : Pat<(v2i64 immAllZerosV), (V_SET0)>; +def : Pat<(v8i16 immAllZerosV), (V_SET0)>; +def : Pat<(v16i8 immAllZerosV), (V_SET0)>; +def : Pat<(v2f64 immAllZerosV), (V_SET0)>; +def : Pat<(v4f32 immAllZerosV), (V_SET0)>; -// Move to lower bits of a VR128 and zeroing upper bits. -// Loading from memory automatically zeroing upper bits. -let AddedComplexity = 20 in -def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector - (loadf32 addr:$src))))))]>; - -def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (MOVZSS2PSrm addr:$src)>; +def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; //===---------------------------------------------------------------------===// // SSE2 Instructions //===---------------------------------------------------------------------===// -// Move Instructions -let neverHasSideEffects = 1 in -def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +// Move Instructions. Register-to-register movsd is not used for FR64 +// register copies because it's a partial register update; FsMOVAPDrr is +// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG +// because INSERT_SUBREG requires that the insert be implementable in terms of +// a copy, and just mentioned, we don't use movsd for copies. +let Constraints = "$src1 = $dst" in +def MOVSDrr : SDI<0x10, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), + "movsd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>; + +// Extract the low 64-bit value from one vector and insert it into another. +let AddedComplexity = 15 in +def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, + (EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>; + +// Implicitly promote a 64-bit scalar to a vector. +def : Pat<(v2f64 (scalar_to_vector FR64:$src)), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>; + +// Loading from memory automatically zeroing upper bits. +let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), "movsd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (loadf64 addr:$src))]>; + +// MOVSDrm zeros the high parts of the register; represent this +// with SUBREG_TO_REG. +let AddedComplexity = 20 in { +def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +} + +// Store scalar value to memory. def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", [(store FR64:$src, addr:$dst)]>; +// Extract and store. +def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSDmr addr:$dst, + (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>; + // Conversion instructions def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src), "cvttsd2si\t{$src, $dst|$dst, $src}", @@ -1163,7 +1242,8 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), Requires<[HasSSE2, OptForSize]>; def : Pat<(extloadf32 addr:$src), - (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>; + (CVTSS2SDrr (MOVSSrm addr:$src))>, + Requires<[HasSSE2, OptForSpeed]>; // Match intrinsics which expect XMM operand(s). def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), @@ -1282,7 +1362,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), // Alias instruction to load FR64 from f128mem using movapd. Upper bits are // disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), "movapd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; @@ -1480,7 +1560,7 @@ defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin, let neverHasSideEffects = 1 in def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movapd\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movapd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>; @@ -2295,34 +2375,47 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; // Non-temporal stores -def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; -def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; -def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), +def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; +def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; +def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti\t{$src, $dst|$dst, $src}", [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, TB, Requires<[HasSSE2]>; +let AddedComplexity = 400 in { // Prefer non-temporal versions +def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; + +def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; + +def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), + (MOVNTDQmr VR128:$src, addr:$dst)>; +} + // Flush cache def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, TB, Requires<[HasSSE2]>; // Load, store, and memory fence -def LFENCE : I<0xAE, MRM5r, (outs), (ins), +def LFENCE : I<0xAE, MRM_E8, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; -def MFENCE : I<0xAE, MRM6r, (outs), (ins), +def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; //TODO: custom lower this so as to never even generate the noop -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), +def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 0)), (NOOP)>; def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), +def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), (MFENCE)>; // Alias instructions that map zero vector to pxor / xorp* for sse. @@ -2334,17 +2427,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; -// FR64 to 128-bit vector conversion. -let isAsCheapAsAMove = 1 in -def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (scalar_to_vector FR64:$src)))]>; -def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>; - def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2373,20 +2455,9 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), [(store (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))), addr:$dst)]>; -// FIXME: may not be able to eliminate this movss with coalescing the src and -// dest register classes are different. We really want to write this pattern -// like this: -// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), -// (f32 FR32:$src)>; -let isAsCheapAsAMove = 1 in -def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (vector_extract (v2f64 VR128:$src), - (iPTR 0)))]>; -def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract (v2f64 VR128:$src), - (iPTR 0))), addr:$dst)]>; +def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>; + def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), @@ -2403,44 +2474,11 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>; - -// Move to lower bits of a VR128, leaving upper bits alone. -// Three operand (but two address) aliases. -let Constraints = "$src1 = $dst" in { - let neverHasSideEffects = 1 in - def MOVLSD2PDrr : SDI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", []>; - - let AddedComplexity = 15 in - def MOVLPDrr : SDI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (movl VR128:$src1, VR128:$src2)))]>; -} - // Store / copy lower 64-bits of a XMM register. def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; -// Move to lower bits of a VR128 and zeroing upper bits. -// Loading from memory automatically zeroing upper bits. -let AddedComplexity = 20 in { -def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (X86vzmovl (v2f64 (scalar_to_vector - (loadf64 addr:$src))))))]>; - -def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (MOVZSD2PDrm addr:$src)>; -def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (MOVZSD2PDrm addr:$src)>; -def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>; -} - // movd / movq to XMM register zero-extends let AddedComplexity = 15 in { def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), @@ -2613,9 +2651,9 @@ let Constraints = "$src1 = $dst" in { } // Thread synchronization -def MONITOR : I<0x01, MRM1r, (outs), (ins), "monitor", +def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor", [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; -def MWAIT : I<0x01, MRM1r, (outs), (ins), "mwait", +def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait", [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; // vector_shuffle v1, <undef> <1, 1, 3, 3> @@ -2986,13 +3024,15 @@ let Predicates = [HasSSE2] in { let AddedComplexity = 15 in { // Zeroing a VR128 then do a MOVS{S|D} to the lower bits. def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), - (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>; + (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), - (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE1]>; + (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>; + (MOVSSrr (v4f32 (V_SET0)), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>; + (MOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>; } // Splat v2f64 / v2i64 @@ -3010,8 +3050,7 @@ def : Pat<(unpckh (v2i64 VR128:$src), (undef)), // Special unary SHUFPSrri case. def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), (SHUFPSrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE1]>; + (SHUFFLE_get_shuf_imm VR128:$src3))>; let AddedComplexity = 5 in def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, @@ -3057,13 +3096,13 @@ def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))), } let AddedComplexity = 10 in { def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))), - (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (UNPCKLPSrr VR128:$src, VR128:$src)>; def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKLBWrr VR128:$src, VR128:$src)>; def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKLWDrr VR128:$src, VR128:$src)>; def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKLDQrr VR128:$src, VR128:$src)>; } // vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> @@ -3077,13 +3116,13 @@ def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))), } let AddedComplexity = 10 in { def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))), - (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (UNPCKHPSrr VR128:$src, VR128:$src)>; def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKHBWrr VR128:$src, VR128:$src)>; def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKHWDrr VR128:$src, VR128:$src)>; def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKHDQrr VR128:$src, VR128:$src)>; } let AddedComplexity = 20 in { @@ -3105,45 +3144,49 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), let AddedComplexity = 20 in { // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; + (MOVLPSrm VR128:$src1, addr:$src2)>; def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; + (MOVLPDrm VR128:$src1, addr:$src2)>; def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; + (MOVLPSrm VR128:$src1, addr:$src2)>; def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; + (MOVLPDrm VR128:$src1, addr:$src2)>; } // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; + (MOVLPSmr addr:$src1, VR128:$src2)>; def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVLPDmr addr:$src1, VR128:$src2)>; def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; + (MOVLPSmr addr:$src1, VR128:$src2)>; def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVLPDmr addr:$src1, VR128:$src2)>; let AddedComplexity = 15 in { // Setting the lowest element in the vector. def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), - (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>; def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), - (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>; -// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd) +// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), - (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>, + Requires<[HasSSE2]>; def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), - (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>, + Requires<[HasSSE2]>; } // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but // fall back to this for SSE1) def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), (SHUFPSrri VR128:$src2, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>; + (SHUFFLE_get_shuf_imm VR128:$src3))>; // Set lowest element and zero upper elements. let AddedComplexity = 15 in @@ -3185,30 +3228,30 @@ def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))), // Use movaps / movups for SSE integer load / store (one byte shorter). def : Pat<(alignedloadv4i32 addr:$src), - (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>; + (MOVAPSrm addr:$src)>; def : Pat<(loadv4i32 addr:$src), - (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>; + (MOVUPSrm addr:$src)>; def : Pat<(alignedloadv2i64 addr:$src), - (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>; + (MOVAPSrm addr:$src)>; def : Pat<(loadv2i64 addr:$src), - (MOVUPSrm addr:$src)>, Requires<[HasSSE2]>; + (MOVUPSrm addr:$src)>; def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v2i64 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v4i32 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v8i16 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v16i8 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; //===----------------------------------------------------------------------===// // SSE4.1 Instructions @@ -3397,7 +3440,7 @@ let Constraints = "$src1 = $dst" in { (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, - (OpNode VR128:$src1, (memop addr:$src2)))]>, OpSize; + (OpVT (OpNode VR128:$src1, (memop addr:$src2))))]>, OpSize; def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp index d3b0052..250634f 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -55,6 +55,11 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { if (!is64Bit) Data64bitsDirective = 0; // we can't emit a 64-bit unit + // Use ## as a comment string so that .s files generated by llvm can go + // through the GCC preprocessor without causing an error. This is needed + // because "clang foo.s" runs the C preprocessor, which is usually reserved + // for .S files on other systems. Perhaps this is because the file system + // wasn't always case preserving or something. CommentString = "##"; PCSymbol = "."; @@ -70,6 +75,8 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) { AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; + TextAlignFillValue = 0x90; + PrivateGlobalPrefix = ".L"; WeakRefDirective = "\t.weak\t"; PCSymbol = "."; @@ -94,27 +101,6 @@ MCSection *X86ELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const { X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; -} - - -X86WinMCAsmInfo::X86WinMCAsmInfo(const Triple &Triple) { - AsmTransCBE = x86_asm_table; - AssemblerDialect = AsmWriterFlavor; - GlobalPrefix = "_"; - CommentString = ";"; - - PrivateGlobalPrefix = "$"; - AlignDirective = "\tALIGN\t"; - ZeroDirective = "\tdb\t"; - AsciiDirective = "\tdb\t"; - AscizDirective = 0; - Data8bitsDirective = "\tdb\t"; - Data16bitsDirective = "\tdw\t"; - Data32bitsDirective = "\tdd\t"; - Data64bitsDirective = "\tdq\t"; - HasDotTypeDotSizeDirective = false; - HasSingleParameterDotFile = false; - - AlignmentIsInBytes = true; + TextAlignFillValue = 0x90; } diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h index ca227b7..69716bf 100644 --- a/lib/Target/X86/X86MCAsmInfo.h +++ b/lib/Target/X86/X86MCAsmInfo.h @@ -33,11 +33,6 @@ namespace llvm { struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF { explicit X86MCAsmInfoCOFF(const Triple &Triple); }; - - struct X86WinMCAsmInfo : public MCAsmInfo { - explicit X86WinMCAsmInfo(const Triple &Triple); - }; - } // namespace llvm #endif diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 764c87a..3f18696 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -14,53 +14,44 @@ #define DEBUG_TYPE "x86-emitter" #include "X86.h" #include "X86InstrInfo.h" +#include "X86FixupKinds.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -// FIXME: This should move to a header. -namespace llvm { -namespace X86 { -enum Fixups { - reloc_pcrel_word = FirstTargetFixupKind, - reloc_picrel_word, - reloc_absolute_word, - reloc_absolute_word_sext, - reloc_absolute_dword -}; -} -} - namespace { class X86MCCodeEmitter : public MCCodeEmitter { X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT const TargetMachine &TM; const TargetInstrInfo &TII; + MCContext &Ctx; bool Is64BitMode; public: - X86MCCodeEmitter(TargetMachine &tm, bool is64Bit) - : TM(tm), TII(*TM.getInstrInfo()) { + X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) + : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) { Is64BitMode = is64Bit; } ~X86MCCodeEmitter() {} unsigned getNumFixupKinds() const { - return 5; + return 3; } - MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { - static MCFixupKindInfo Infos[] = { - { "reloc_pcrel_word", 0, 4 * 8 }, - { "reloc_picrel_word", 0, 4 * 8 }, - { "reloc_absolute_word", 0, 4 * 8 }, - { "reloc_absolute_word_sext", 0, 4 * 8 }, - { "reloc_absolute_dword", 0, 8 * 8 } + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[] = { + { "reloc_pcrel_4byte", 0, 4 * 8 }, + { "reloc_pcrel_1byte", 0, 1 * 8 }, + { "reloc_riprel_4byte", 0, 4 * 8 } }; + + if (Kind < FirstTargetFixupKind) + return MCCodeEmitter::getFixupKindInfo(Kind); - assert(Kind >= FirstTargetFixupKind && Kind < MaxTargetFixupKind && + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); return Infos[Kind - FirstTargetFixupKind]; } @@ -83,9 +74,11 @@ public: } } - void EmitDisplacementField(const MCOperand &Disp, int64_t Adj, bool IsPCRel, - unsigned &CurByte, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const; + void EmitImmediate(const MCOperand &Disp, + unsigned ImmSize, MCFixupKind FixupKind, + unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + int ImmOffset = 0) const; inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) { @@ -106,8 +99,8 @@ public: void EmitMemModRMByte(const MCInst &MI, unsigned Op, - unsigned RegOpcodeField, intptr_t PCAdj, - unsigned &CurByte, raw_ostream &OS, + unsigned RegOpcodeField, + unsigned TSFlags, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const; void EncodeInstruction(const MCInst &MI, raw_ostream &OS, @@ -119,13 +112,15 @@ public: MCCodeEmitter *llvm::createX86_32MCCodeEmitter(const Target &, - TargetMachine &TM) { - return new X86MCCodeEmitter(TM, false); + TargetMachine &TM, + MCContext &Ctx) { + return new X86MCCodeEmitter(TM, Ctx, false); } MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &, - TargetMachine &TM) { - return new X86MCCodeEmitter(TM, true); + TargetMachine &TM, + MCContext &Ctx) { + return new X86MCCodeEmitter(TM, Ctx, true); } @@ -135,36 +130,59 @@ static bool isDisp8(int Value) { return Value == (signed char)Value; } +/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate +/// in an instruction with the specified TSFlags. +static MCFixupKind getImmFixupKind(unsigned TSFlags) { + unsigned Size = X86II::getSizeOfImm(TSFlags); + bool isPCRel = X86II::isImmPCRel(TSFlags); + + switch (Size) { + default: assert(0 && "Unknown immediate size"); + case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1; + case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4; + case 2: assert(!isPCRel); return FK_Data_2; + case 8: assert(!isPCRel); return FK_Data_8; + } +} + + void X86MCCodeEmitter:: -EmitDisplacementField(const MCOperand &DispOp, int64_t Adj, bool IsPCRel, - unsigned &CurByte, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const { +EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, + unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const { // If this is a simple integer displacement that doesn't require a relocation, // emit it now. if (DispOp.isImm()) { - EmitConstant(DispOp.getImm(), 4, CurByte, OS); + // FIXME: is this right for pc-rel encoding?? Probably need to emit this as + // a fixup if so. + EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS); return; } -#if 0 - // Otherwise, this is something that requires a relocation. Emit it as such - // now. - unsigned RelocType = Is64BitMode ? - (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext) - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); -#endif + // If we have an immoffset, add it to the expression. + const MCExpr *Expr = DispOp.getExpr(); + + // If the fixup is pc-relative, we need to bias the value to be relative to + // the start of the field, not the end of the field. + if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) || + FixupKind == MCFixupKind(X86::reloc_riprel_4byte)) + ImmOffset -= 4; + if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte)) + ImmOffset -= 1; + + if (ImmOffset) + Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx), + Ctx); // Emit a symbolic constant as a fixup and 4 zeros. - Fixups.push_back(MCFixup::Create(CurByte, DispOp.getExpr(), - MCFixupKind(X86::reloc_absolute_word))); - EmitConstant(0, 4, CurByte, OS); + Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind)); + EmitConstant(0, Size, CurByte, OS); } void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField, - intptr_t PCAdj, - unsigned &CurByte, + unsigned TSFlags, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const{ const MCOperand &Disp = MI.getOperand(Op+3); @@ -172,31 +190,48 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, const MCOperand &Scale = MI.getOperand(Op+1); const MCOperand &IndexReg = MI.getOperand(Op+2); unsigned BaseReg = Base.getReg(); - - // FIXME: Eliminate! - bool IsPCRel = false; + + // Handle %rip relative addressing. + if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode + assert(IndexReg.getReg() == 0 && Is64BitMode && + "Invalid rip-relative address"); + EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); + + // rip-relative addressing is actually relative to the *next* instruction. + // Since an immediate can follow the mod/rm byte for an instruction, this + // means that we need to bias the immediate field of the instruction with + // the size of the immediate field. If we have this case, add it into the + // expression to emit. + int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0; + EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_riprel_4byte), + CurByte, OS, Fixups, -ImmSize); + return; + } + + unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U; + // Determine whether a SIB byte is needed. // If no BaseReg, issue a RIP relative instruction only if the MCE can // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table // 2-7) and absolute references. + if (// The SIB byte must be used if there is an index register. IndexReg.getReg() == 0 && - // The SIB byte must be used if the base is ESP/RSP. - BaseReg != X86::ESP && BaseReg != X86::RSP && + // The SIB byte must be used if the base is ESP/RSP/R12, all of which + // encode to an R/M value of 4, which indicates that a SIB byte is + // present. + BaseRegNo != N86::ESP && // If there is no base register and we're in 64-bit mode, we need a SIB // byte to emit an addr that is just 'disp32' (the non-RIP relative form). (!Is64BitMode || BaseReg != 0)) { - if (BaseReg == 0 || // [disp32] in X86-32 mode - BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode + if (BaseReg == 0) { // [disp32] in X86-32 mode EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); - EmitDisplacementField(Disp, PCAdj, true, CurByte, OS, Fixups); + EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); return; } - unsigned BaseRegNo = GetX86RegNum(Base); - // If the base is not EBP/ESP and there is no displacement, use simple // indirect register encoding, this handles addresses like [EAX]. The // encoding for [EBP] with no displacement means [disp32] so we handle it @@ -209,13 +244,13 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. if (Disp.isImm() && isDisp8(Disp.getImm())) { EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS); - EmitConstant(Disp.getImm(), 1, CurByte, OS); + EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); return; } // Otherwise, emit the most general non-SIB encoding: [REG+disp32] EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS); - EmitDisplacementField(Disp, PCAdj, IsPCRel, CurByte, OS, Fixups); + EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); return; } @@ -270,9 +305,9 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Do we need to output a displacement? if (ForceDisp8) - EmitConstant(Disp.getImm(), 1, CurByte, OS); + EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); else if (ForceDisp32 || Disp.getImm() != 0) - EmitDisplacementField(Disp, PCAdj, IsPCRel, CurByte, OS, Fixups); + EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); } /// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64 @@ -280,11 +315,11 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, /// size, and 3) use of X86-64 extended registers. static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, const TargetInstrDesc &Desc) { - unsigned REX = 0; - - // Pseudo instructions do not need REX prefix byte. + // Pseudo instructions never have a rex byte. if ((TSFlags & X86II::FormMask) == X86II::Pseudo) return 0; + + unsigned REX = 0; if (TSFlags & X86II::REX_W) REX |= 1 << 3; @@ -482,52 +517,29 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!"); default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n"; - assert(0 && "Unknown FormMask value in X86MCCodeEmitter!"); - case X86II::RawFrm: { + assert(0 && "Unknown FormMask value in X86MCCodeEmitter!"); + case X86II::Pseudo: return; // Pseudo instructions encode to nothing. + case X86II::RawFrm: EmitByte(BaseOpcode, CurByte, OS); - - if (CurOp == NumOps) - break; - - assert(0 && "Unimpl RawFrm expr"); break; - } - case X86II::AddRegFrm: { + case X86II::AddRegFrm: EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS); - if (CurOp == NumOps) - break; - - const MCOperand &MO1 = MI.getOperand(CurOp++); - if (MO1.isImm()) { - unsigned Size = X86II::getSizeOfImm(TSFlags); - EmitConstant(MO1.getImm(), Size, CurByte, OS); - break; - } - - assert(0 && "Unimpl AddRegFrm expr"); break; - } case X86II::MRMDestReg: EmitByte(BaseOpcode, CurByte, OS); EmitRegModRMByte(MI.getOperand(CurOp), GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS); CurOp += 2; - if (CurOp != NumOps) - EmitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(TSFlags), CurByte, OS); break; case X86II::MRMDestMem: EmitByte(BaseOpcode, CurByte, OS); EmitMemModRMByte(MI, CurOp, GetX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)), - 0, CurByte, OS, Fixups); + TSFlags, CurByte, OS, Fixups); CurOp += X86AddrNumOperands + 1; - if (CurOp != NumOps) - EmitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(TSFlags), CurByte, OS); break; case X86II::MRMSrcReg: @@ -535,9 +547,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitRegModRMByte(MI.getOperand(CurOp+1), GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS); CurOp += 2; - if (CurOp != NumOps) - EmitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(TSFlags), CurByte, OS); break; case X86II::MRMSrcMem: { @@ -551,117 +560,78 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, else AddrOperands = X86AddrNumOperands; - // FIXME: What is this actually doing? - intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? - X86II::getSizeOfImm(TSFlags) : 0; - EmitMemModRMByte(MI, CurOp+1, GetX86RegNum(MI.getOperand(CurOp)), - PCAdj, CurByte, OS, Fixups); + TSFlags, CurByte, OS, Fixups); CurOp += AddrOperands + 1; - if (CurOp != NumOps) - EmitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(TSFlags), CurByte, OS); break; } case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: case X86II::MRM3r: case X86II::MRM4r: case X86II::MRM5r: - case X86II::MRM6r: case X86II::MRM7r: { + case X86II::MRM6r: case X86II::MRM7r: EmitByte(BaseOpcode, CurByte, OS); - - // Special handling of lfence, mfence, monitor, and mwait. - // FIXME: This is terrible, they should get proper encoding bits in TSFlags. - if (Opcode == X86::LFENCE || Opcode == X86::MFENCE || - Opcode == X86::MONITOR || Opcode == X86::MWAIT) { - EmitByte(ModRMByte(3, (TSFlags & X86II::FormMask)-X86II::MRM0r, 0), - CurByte, OS); - - switch (Opcode) { - default: break; - case X86::MONITOR: EmitByte(0xC8, CurByte, OS); break; - case X86::MWAIT: EmitByte(0xC9, CurByte, OS); break; - } - } else { - EmitRegModRMByte(MI.getOperand(CurOp++), - (TSFlags & X86II::FormMask)-X86II::MRM0r, - CurByte, OS); - } - - if (CurOp == NumOps) - break; - - const MCOperand &MO1 = MI.getOperand(CurOp++); - if (MO1.isImm()) { - EmitConstant(MO1.getImm(), X86II::getSizeOfImm(TSFlags), CurByte, OS); - break; - } - - assert(0 && "relo unimpl"); -#if 0 - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); - if (Opcode == X86::MOV64ri32) - rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? - if (MO1.isGlobal()) { - bool Indirect = gvNeedsNonLazyPtr(MO1, TM); - emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - Indirect); - } else if (MO1.isSymbol()) - emitExternalSymbolAddress(MO1.getSymbolName(), rt); - else if (MO1.isCPI()) - emitConstPoolAddress(MO1.getIndex(), rt); - else if (MO1.isJTI()) - emitJumpTableAddress(MO1.getIndex(), rt); + EmitRegModRMByte(MI.getOperand(CurOp++), + (TSFlags & X86II::FormMask)-X86II::MRM0r, + CurByte, OS); break; -#endif - } case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: case X86II::MRM4m: case X86II::MRM5m: - case X86II::MRM6m: case X86II::MRM7m: { - intptr_t PCAdj = 0; - if (CurOp + X86AddrNumOperands != NumOps) { - if (MI.getOperand(CurOp+X86AddrNumOperands).isImm()) - PCAdj = X86II::getSizeOfImm(TSFlags); - else - PCAdj = 4; - } - + case X86II::MRM6m: case X86II::MRM7m: EmitByte(BaseOpcode, CurByte, OS); EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m, - PCAdj, CurByte, OS, Fixups); + TSFlags, CurByte, OS, Fixups); CurOp += X86AddrNumOperands; - - if (CurOp == NumOps) - break; - - const MCOperand &MO = MI.getOperand(CurOp++); - if (MO.isImm()) { - EmitConstant(MO.getImm(), X86II::getSizeOfImm(TSFlags), CurByte, OS); - break; - } - - assert(0 && "relo not handled"); -#if 0 - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); - if (Opcode == X86::MOV64mi32) - rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? - if (MO.isGlobal()) { - bool Indirect = gvNeedsNonLazyPtr(MO, TM); - emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, - Indirect); - } else if (MO.isSymbol()) - emitExternalSymbolAddress(MO.getSymbolName(), rt); - else if (MO.isCPI()) - emitConstPoolAddress(MO.getIndex(), rt); - else if (MO.isJTI()) - emitJumpTableAddress(MO.getIndex(), rt); -#endif + break; + case X86II::MRM_C1: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC1, CurByte, OS); + break; + case X86II::MRM_C2: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC2, CurByte, OS); + break; + case X86II::MRM_C3: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC3, CurByte, OS); + break; + case X86II::MRM_C4: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC4, CurByte, OS); + break; + case X86II::MRM_C8: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC8, CurByte, OS); + break; + case X86II::MRM_C9: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC9, CurByte, OS); + break; + case X86II::MRM_E8: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xE8, CurByte, OS); + break; + case X86II::MRM_F0: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xF0, CurByte, OS); + break; + case X86II::MRM_F8: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xF8, CurByte, OS); + break; + case X86II::MRM_F9: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xF9, CurByte, OS); break; } - } + + // If there is a remaining operand, it must be a trailing immediate. Emit it + // according to the right size for the instruction. + if (CurOp != NumOps) + EmitImmediate(MI.getOperand(CurOp++), + X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), + CurByte, OS, Fixups); #ifndef NDEBUG // FIXME: Verify. diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index bb53bf1..4b2529b 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -18,12 +18,6 @@ namespace llvm { -enum NameDecorationStyle { - None, - StdCall, - FastCall -}; - /// X86MachineFunctionInfo - This class is derived from MachineFunction and /// contains private X86 target-specific information for each MachineFunction. class X86MachineFunctionInfo : public MachineFunctionInfo { @@ -41,10 +35,6 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// Used on windows platform for stdcall & fastcall name decoration unsigned BytesToPopOnReturn; - /// DecorationStyle - If the function requires additional name decoration, - /// DecorationStyle holds the right way to do so. - NameDecorationStyle DecorationStyle; - /// ReturnAddrIndex - FrameIndex for return slot. int ReturnAddrIndex; @@ -66,7 +56,6 @@ public: X86MachineFunctionInfo() : ForceFramePointer(false), CalleeSavedFrameSize(0), BytesToPopOnReturn(0), - DecorationStyle(None), ReturnAddrIndex(0), TailCallReturnAddrDelta(0), SRetReturnReg(0), @@ -76,7 +65,6 @@ public: : ForceFramePointer(false), CalleeSavedFrameSize(0), BytesToPopOnReturn(0), - DecorationStyle(None), ReturnAddrIndex(0), TailCallReturnAddrDelta(0), SRetReturnReg(0), @@ -91,9 +79,6 @@ public: unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; } void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;} - NameDecorationStyle getDecorationStyle() const { return DecorationStyle; } - void setDecorationStyle(NameDecorationStyle style) { DecorationStyle = style;} - int getRAIndex() const { return ReturnAddrIndex; } void setRAIndex(int Index) { ReturnAddrIndex = Index; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 081c6d9..0f4ce37 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -191,6 +191,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR16_NOREXRegClass; else if (A == &X86::GR16_ABCDRegClass) return &X86::GR16_ABCDRegClass; + } else if (B == &X86::FR32RegClass) { + return A; } break; case 2: @@ -207,6 +209,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || A == &X86::GR16_NOREXRegClass) return &X86::GR16_ABCDRegClass; + } else if (B == &X86::FR64RegClass) { + return A; } break; case 3: @@ -234,6 +238,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR32_NOREXRegClass; else if (A == &X86::GR32_ABCDRegClass) return &X86::GR64_ABCDRegClass; + } else if (B == &X86::VR128RegClass) { + return A; } break; case 4: @@ -446,8 +452,10 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function *F = MF.getFunction(); bool requiresRealignment = - RealignStack && (MFI->getMaxAlignment() > StackAlign); + RealignStack && ((MFI->getMaxAlignment() > StackAlign) || + F->hasFnAttr(Attribute::StackAlignment)); // FIXME: Currently we don't support stack realignment for functions with // variable-sized allocas. @@ -485,7 +493,7 @@ X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const { Offset += SlotSize; } else { unsigned Align = MFI->getObjectAlignment(FI); - assert( (-(Offset + StackSize)) % Align == 0); + assert((-(Offset + StackSize)) % Align == 0); Align = 0; return Offset + StackSize; } @@ -627,10 +635,6 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo *MFI = MF.getFrameInfo(); - // Calculate and set max stack object alignment early, so we can decide - // whether we will need stack realignment (and thus FP). - MFI->calculateMaxStackAlignment(); - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); @@ -1053,7 +1057,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) .addImm(NumBytes); BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca"); + .addExternalSymbol("_alloca") + .addReg(StackPtr, RegState::Define | RegState::Implicit); } else { // Save EAX BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) @@ -1064,7 +1069,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) .addImm(NumBytes - 4); BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca"); + .addExternalSymbol("_alloca") + .addReg(StackPtr, RegState::Define | RegState::Implicit); // Restore EAX MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 8fb5e92..e4bdb4e 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -35,7 +35,8 @@ namespace X86 { /// these indices must be kept in sync with the class indices in the /// X86RegisterInfo.td file. enum SubregIndex { - SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4 + SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4, + SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3 }; } diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 1559bf7..ed2ce6c 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -158,22 +158,22 @@ let Namespace = "X86" in { def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; // YMM Registers, used by AVX instructions - def YMM0: Register<"ymm0">, DwarfRegNum<[17, 21, 21]>; - def YMM1: Register<"ymm1">, DwarfRegNum<[18, 22, 22]>; - def YMM2: Register<"ymm2">, DwarfRegNum<[19, 23, 23]>; - def YMM3: Register<"ymm3">, DwarfRegNum<[20, 24, 24]>; - def YMM4: Register<"ymm4">, DwarfRegNum<[21, 25, 25]>; - def YMM5: Register<"ymm5">, DwarfRegNum<[22, 26, 26]>; - def YMM6: Register<"ymm6">, DwarfRegNum<[23, 27, 27]>; - def YMM7: Register<"ymm7">, DwarfRegNum<[24, 28, 28]>; - def YMM8: Register<"ymm8">, DwarfRegNum<[25, -2, -2]>; - def YMM9: Register<"ymm9">, DwarfRegNum<[26, -2, -2]>; - def YMM10: Register<"ymm10">, DwarfRegNum<[27, -2, -2]>; - def YMM11: Register<"ymm11">, DwarfRegNum<[28, -2, -2]>; - def YMM12: Register<"ymm12">, DwarfRegNum<[29, -2, -2]>; - def YMM13: Register<"ymm13">, DwarfRegNum<[30, -2, -2]>; - def YMM14: Register<"ymm14">, DwarfRegNum<[31, -2, -2]>; - def YMM15: Register<"ymm15">, DwarfRegNum<[32, -2, -2]>; + def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>; + def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>; + def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>; + def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegNum<[20, 24, 24]>; + def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegNum<[21, 25, 25]>; + def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegNum<[22, 26, 26]>; + def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegNum<[23, 27, 27]>; + def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegNum<[24, 28, 28]>; + def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegNum<[25, -2, -2]>; + def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegNum<[26, -2, -2]>; + def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegNum<[27, -2, -2]>; + def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegNum<[28, -2, -2]>; + def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegNum<[29, -2, -2]>; + def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>; + def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>; + def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>; // Floating point stack registers def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>; @@ -238,6 +238,10 @@ def x86_subreg_8bit_hi : PatLeaf<(i32 2)>; def x86_subreg_16bit : PatLeaf<(i32 3)>; def x86_subreg_32bit : PatLeaf<(i32 4)>; +def x86_subreg_ss : PatLeaf<(i32 1)>; +def x86_subreg_sd : PatLeaf<(i32 2)>; +def x86_subreg_xmm : PatLeaf<(i32 3)>; + def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W], [AL, CL, DL, BL, SPL, BPL, SIL, DIL, @@ -277,11 +281,31 @@ def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>; -def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, +def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + +def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + +def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; +def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + +def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + //===----------------------------------------------------------------------===// // Register Class Definitions... now that we have all of the pieces, define the // top-level register classes. The order specified in the register list is @@ -793,6 +817,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]> { + let SubRegClassList = [FR32, FR64]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -811,7 +836,9 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, - YMM12, YMM13, YMM14, YMM15]>; + YMM12, YMM13, YMM14, YMM15]> { + let SubRegClassList = [FR32, FR64, VR128]; +} // Status flags registers. def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 618dd10..594a470 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -20,9 +20,9 @@ namespace llvm { class GlobalValue; class TargetMachine; - + /// PICStyles - The X86 backend supports a number of different styles of PIC. -/// +/// namespace PICStyles { enum Style { StubPIC, // Used on i386-darwin in -fPIC mode. @@ -46,7 +46,7 @@ protected: /// PICStyle - Which PIC style to use /// PICStyles::Style PICStyle; - + /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or /// none supported. X86SSEEnum X86SSELevel; @@ -58,7 +58,7 @@ protected: /// HasCMov - True if this processor has conditional move instructions /// (generally pentium pro+). bool HasCMov; - + /// HasX86_64 - True if the processor supports X86-64 instructions. /// bool HasX86_64; @@ -78,8 +78,9 @@ protected: /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; - /// HasVectorUAMem - True if SIMD operations can have unaligned memory operands. - /// This may require setting a feature bit in the processor. + /// HasVectorUAMem - True if SIMD operations can have unaligned memory + /// operands. This may require setting a feature bit in the + /// processor. bool HasVectorUAMem; /// DarwinVers - Nonzero if this is a darwin platform: the numeric @@ -150,20 +151,20 @@ public: bool isTargetDarwin() const { return TargetType == isDarwin; } bool isTargetELF() const { return TargetType == isELF; } - + bool isTargetWindows() const { return TargetType == isWindows; } bool isTargetMingw() const { return TargetType == isMingw; } bool isTargetCygwin() const { return TargetType == isCygwin; } bool isTargetCygMing() const { return TargetType == isMingw || TargetType == isCygwin; } - + /// isTargetCOFF - Return true if this is any COFF/Windows target variant. bool isTargetCOFF() const { return TargetType == isMingw || TargetType == isCygwin || TargetType == isWindows; } - + bool isTargetWin64() const { return Is64Bit && (TargetType == isMingw || TargetType == isWindows); } @@ -175,7 +176,7 @@ public: else if (isTargetDarwin()) p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32"; else if (isTargetMingw() || isTargetWindows()) - p = "e-p:32:32-f64:64:64-i64:64:64-f80:128:128-n8:16:32"; + p = "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32"; else p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"; @@ -196,11 +197,11 @@ public: bool isPICStyleStubAny() const { return PICStyle == PICStyles::StubDynamicNoPIC || PICStyle == PICStyles::StubPIC; } - + /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard, /// 10 = Snow Leopard, etc. unsigned getDarwinVers() const { return DarwinVers; } - + /// ClassifyGlobalReference - Classify a global variable reference for the /// current subtarget according to how we should reference it in a non-pcrel /// context. diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index f835e29..56ddaf8 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -30,9 +30,8 @@ static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { case Triple::MinGW32: case Triple::MinGW64: case Triple::Cygwin: - return new X86MCAsmInfoCOFF(TheTriple); case Triple::Win32: - return new X86WinMCAsmInfo(TheTriple); + return new X86MCAsmInfoCOFF(TheTriple); default: return new X86ELFMCAsmInfo(TheTriple); } @@ -48,11 +47,16 @@ extern "C" void LLVMInitializeX86Target() { RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo); // Register the code emitter. - // FIXME: Remove the heinous one when the new one works. TargetRegistry::RegisterCodeEmitter(TheX86_32Target, - createHeinousX86MCCodeEmitter); + createX86_32MCCodeEmitter); TargetRegistry::RegisterCodeEmitter(TheX86_64Target, - createHeinousX86MCCodeEmitter); + createX86_64MCCodeEmitter); + + // Register the asm backend. + TargetRegistry::RegisterAsmBackend(TheX86_32Target, + createX86_32AsmBackend); + TargetRegistry::RegisterAsmBackend(TheX86_64Target, + createX86_64AsmBackend); } @@ -201,32 +205,3 @@ void X86TargetMachine::setCodeModelForJIT() { else setCodeModel(CodeModel::Small); } - -/// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are 4-byte, -/// 8-byte, and target default. The CIE is hard-coded to indicate that the LSDA -/// pointer in the FDE section is an "sdata4", and should be encoded as a 4-byte -/// pointer by default. However, some systems may require a different size due -/// to bugs or other conditions. We will default to a 4-byte encoding unless the -/// system tells us otherwise. -/// -/// The issue is when the CIE says their is an LSDA. That mandates that every -/// FDE have an LSDA slot. But if the function does not need an LSDA. There -/// needs to be some way to signify there is none. The LSDA is encoded as -/// pc-rel. But you don't look for some magic value after adding the pc. You -/// have to look for a zero before adding the pc. The problem is that the size -/// of the zero to look for depends on the encoding. The unwinder bug in SL is -/// that it always checks for a pointer-size zero. So on x86_64 it looks for 8 -/// bytes of zero. If you have an LSDA, it works fine since the 8-bytes are -/// non-zero so it goes ahead and then reads the value based on the encoding. -/// But if you use sdata4 and there is no LSDA, then the test for zero gives a -/// false negative and the unwinder thinks there is an LSDA. -/// -/// FIXME: This call-back isn't good! We should be using the correct encoding -/// regardless of the system. However, there are some systems which have bugs -/// that prevent this from occuring. -DwarfLSDAEncoding::Encoding X86TargetMachine::getLSDAEncoding() const { - if (Subtarget.isTargetDarwin() && Subtarget.getDarwinVers() != 10) - return DwarfLSDAEncoding::Default; - - return DwarfLSDAEncoding::EightByte; -} diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index eee29be..2bb5454 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -62,18 +62,6 @@ public: return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; } - /// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are - /// 4-byte, 8-byte, and target default. The CIE is hard-coded to indicate that - /// the LSDA pointer in the FDE section is an "sdata4", and should be encoded - /// as a 4-byte pointer by default. However, some systems may require a - /// different size due to bugs or other conditions. We will default to a - /// 4-byte encoding unless the system tells us otherwise. - /// - /// FIXME: This call-back isn't good! We should be using the correct encoding - /// regardless of the system. However, there are some systems which have bugs - /// that prevent this from occuring. - virtual DwarfLSDAEncoding::Encoding getLSDAEncoding() const; - // Set up the pass pipeline. virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index b8cef7d..29a0be5 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -7,61 +7,112 @@ // //===----------------------------------------------------------------------===// -#include "X86TargetObjectFile.h" #include "X86MCTargetExpr.h" +#include "X86TargetObjectFile.h" +#include "X86TargetMachine.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" #include "llvm/Target/Mangler.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/Dwarf.h" using namespace llvm; +using namespace dwarf; -const MCExpr *X8632_MachoTargetObjectFile:: +const MCExpr *X8664_MachoTargetObjectFile:: getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { - // The mach-o version of this method defaults to returning a stub reference. - IsIndirect = true; - IsPCRel = false; - - - MachineModuleInfoMachO &MachOMMI = - MMI->getObjFileInfo<MachineModuleInfoMachO>(); - - // FIXME: Use GetSymbolWithGlobalValueBase. - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += "$non_lazy_ptr"; - - // Add information about the stub reference to MachOMMI so that the stub gets - // emitted by the asmprinter. - MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); - MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym); - if (StubSym == 0) { - Name.clear(); + MachineModuleInfo *MMI, unsigned Encoding) const { + + // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which + // is an indirect pc-relative reference. + if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) { + SmallString<128> Name; Mang->getNameWithPrefix(Name, GV, false); - StubSym = getContext().GetOrCreateSymbol(Name.str()); + const MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + const MCExpr *Res = + X86MCTargetExpr::Create(Sym, X86MCTargetExpr::GOTPCREL, getContext()); + const MCExpr *Four = MCConstantExpr::Create(4, getContext()); + return MCBinaryExpr::CreateAdd(Res, Four, getContext()); } - - return MCSymbolRefExpr::Create(Sym, getContext()); + + return TargetLoweringObjectFileMachO:: + getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding); } -const MCExpr *X8664_MachoTargetObjectFile:: -getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { - - // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which - // is an indirect pc-relative reference. - IsIndirect = true; - IsPCRel = true; - - // FIXME: Use GetSymbolWithGlobalValueBase. - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, false); - const MCSymbol *Sym = getContext().CreateSymbol(Name); - const MCExpr *Res = - X86MCTargetExpr::Create(Sym, X86MCTargetExpr::GOTPCREL, getContext()); - const MCExpr *Four = MCConstantExpr::Create(4, getContext()); - return MCBinaryExpr::CreateAdd(Res, Four, getContext()); +unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8632_ELFTargetObjectFile::getFDEEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8632_ELFTargetObjectFile::getTTypeEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getPersonalityEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small || + Model == CodeModel::Medium ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small || Model == CodeModel::Medium) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | (Model == CodeModel::Small ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getFDEEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | (Model == CodeModel::Small || + Model == CodeModel::Medium ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small || Model == CodeModel::Medium) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; } +unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small || + Model == CodeModel::Medium ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 377a93b..0444417 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -10,21 +10,13 @@ #ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H #define LLVM_TARGET_X86_TARGETOBJECTFILE_H +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { - - /// X8632_MachoTargetObjectFile - This TLOF implementation is used for - /// Darwin/x86-32. - class X8632_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { - public: - - virtual const MCExpr * - getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const; - }; - + class X86TargetMachine; + /// X8664_MachoTargetObjectFile - This TLOF implementation is used for /// Darwin/x86-64. class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { @@ -32,9 +24,31 @@ namespace llvm { virtual const MCExpr * getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const; + MachineModuleInfo *MMI, unsigned Encoding) const; + }; + + class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF { + const X86TargetMachine &TM; + public: + X8632_ELFTargetObjectFile(const X86TargetMachine &tm) + :TM(tm) { } + virtual unsigned getPersonalityEncoding() const; + virtual unsigned getLSDAEncoding() const; + virtual unsigned getFDEEncoding() const; + virtual unsigned getTTypeEncoding() const; + }; + + class X8664_ELFTargetObjectFile : public TargetLoweringObjectFileELF { + const X86TargetMachine &TM; + public: + X8664_ELFTargetObjectFile(const X86TargetMachine &tm) + :TM(tm) { } + virtual unsigned getPersonalityEncoding() const; + virtual unsigned getLSDAEncoding() const; + virtual unsigned getFDEEncoding() const; + virtual unsigned getTTypeEncoding() const; }; + } // end namespace llvm #endif diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp index d18f55d..82e23a1 100644 --- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" @@ -62,6 +63,11 @@ namespace { } void printMemOperand(const MachineInstr *MI, int opNum); + void printInlineJT(const MachineInstr *MI, int opNum, + const std::string &directive = ".jmptable"); + void printInlineJT32(const MachineInstr *MI, int opNum) { + printInlineJT(MI, opNum, ".jmptable32"); + } void printOperand(const MachineInstr *MI, int opNum); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode); @@ -257,6 +263,23 @@ void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum) printOperand(MI, opNum+1); } +void XCoreAsmPrinter:: +printInlineJT(const MachineInstr *MI, int opNum, const std::string &directive) +{ + unsigned JTI = MI->getOperand(opNum).getIndex(); + const MachineFunction *MF = MI->getParent()->getParent(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + O << "\t" << directive << " "; + for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { + MachineBasicBlock *MBB = JTBBs[i]; + if (i > 0) + O << ","; + O << *MBB->getSymbol(OutContext); + } +} + void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { const MachineOperand &MO = MI->getOperand(opNum); switch (MO.getType()) { diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 383fd91..b1ab132 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -65,8 +65,6 @@ namespace { bool SelectADDRcpii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); - virtual void InstructionSelect(); - virtual const char *getPassName() const { return "XCore DAG->DAG Pattern Instruction Selection"; } @@ -147,15 +145,6 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDNode *Op, SDValue Addr, return false; } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void XCoreDAGToDAGISel::InstructionSelect() { - // Select target instructions for the DAG. - SelectRoot(*CurDAG); - - CurDAG->RemoveDeadNodes(); -} - SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); EVT NVT = N->getValueType(0); @@ -164,7 +153,11 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { default: break; case ISD::Constant: { if (Predicate_immMskBitp(N)) { - SDValue MskSize = Transform_msksize_xform(N); + // Transformation function: get the size of a mask + int64_t MaskVal = cast<ConstantSDNode>(N)->getZExtValue(); + assert(isMask_32(MaskVal)); + // Look for the first non-zero bit + SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(MaskVal)); return CurDAG->getMachineNode(XCore::MKMSK_rus, dl, MVT::i32, MskSize); } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index bf8c38f..e6515d8 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" @@ -53,6 +54,8 @@ getTargetNodeName(unsigned Opcode) const case XCoreISD::RETSP : return "XCoreISD::RETSP"; case XCoreISD::LADD : return "XCoreISD::LADD"; case XCoreISD::LSUB : return "XCoreISD::LSUB"; + case XCoreISD::BR_JT : return "XCoreISD::BR_JT"; + case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32"; default : return NULL; } } @@ -106,9 +109,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::TRAP, MVT::Other, Legal); - // Expand jump tables for now - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::JumpTable, MVT::i32, Custom); + // Jump tables. + setOperationAction(ISD::BR_JT, MVT::Other, Custom); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32 , Custom); @@ -157,7 +159,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -315,14 +317,27 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerJumpTable(SDValue Op, SelectionDAG &DAG) +LowerBR_JT(SDValue Op, SelectionDAG &DAG) { - // FIXME there isn't really debug info here + SDValue Chain = Op.getOperand(0); + SDValue Table = Op.getOperand(1); + SDValue Index = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - EVT PtrVT = Op.getValueType(); - JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); - SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); - return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); + unsigned JTI = JT->getIndex(); + MachineFunction &MF = DAG.getMachineFunction(); + const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); + + unsigned NumEntries = MJTI->getJumpTables()[JTI].MBBs.size(); + if (NumEntries <= 32) { + return DAG.getNode(XCoreISD::BR_JT, dl, MVT::Other, Chain, TargetJT, Index); + } + assert((NumEntries >> 31) == 0); + SDValue ScaledIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index, + DAG.getConstant(1, MVT::i32)); + return DAG.getNode(XCoreISD::BR_JT32, dl, MVT::Other, Chain, TargetJT, + ScaledIndex); } static bool @@ -390,7 +405,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) if (Offset % 4 == 0) { // We've managed to infer better alignment information than the load // already has. Use an aligned load. - return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4); + // + // FIXME: No new alignment information is actually passed here. + // Should the offset really be 4? + // + return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4, + false, false, 0); } // Lower to // ldw low, base[offset >> 2] @@ -407,9 +427,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset); SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain, - LowAddr, NULL, 4); + LowAddr, NULL, 4, false, false, 0); SDValue High = DAG.getLoad(getPointerTy(), dl, Chain, - HighAddr, NULL, 4); + HighAddr, NULL, 4, false, false, 0); SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift); SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift); SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted); @@ -423,12 +443,13 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) int SVOffset = LD->getSrcValueOffset(); SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, BasePtr, LD->getSrcValue(), SVOffset, MVT::i16, - LD->isVolatile(), 2); + LD->isVolatile(), LD->isNonTemporal(), 2); SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, DAG.getConstant(2, MVT::i32)); SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain, HighAddr, LD->getSrcValue(), SVOffset + 2, - MVT::i16, LD->isVolatile(), 2); + MVT::i16, LD->isVolatile(), + LD->isNonTemporal(), 2); SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, DAG.getConstant(16, MVT::i32)); SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted); @@ -452,7 +473,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__misaligned_load", getPointerTy()), - Args, DAG, dl, DAG.GetOrdering(Chain.getNode())); + Args, DAG, dl); SDValue Ops[] = { CallResult.first, CallResult.second }; @@ -487,12 +508,14 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) DAG.getConstant(16, MVT::i32)); SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr, ST->getSrcValue(), SVOffset, MVT::i16, - ST->isVolatile(), 2); + ST->isVolatile(), ST->isNonTemporal(), + 2); SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, DAG.getConstant(2, MVT::i32)); SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr, ST->getSrcValue(), SVOffset + 2, - MVT::i16, ST->isVolatile(), 2); + MVT::i16, ST->isVolatile(), + ST->isNonTemporal(), 2); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh); } @@ -513,7 +536,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__misaligned_store", getPointerTy()), - Args, DAG, dl, DAG.GetOrdering(Chain.getNode())); + Args, DAG, dl); return CallResult.second; } @@ -561,15 +584,16 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); EVT VT = Node->getValueType(0); SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0), - Node->getOperand(1), V, 0); + Node->getOperand(1), V, 0, false, false, 0); // Increment the pointer, VAList, to the next vararg SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, DAG.getConstant(VT.getSizeInBits(), getPointerTy())); // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0); + Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0, + false, false, 0); // Load the actual argument out of the pointer VAList - return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0); + return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, false, false, 0); } SDValue XCoreTargetLowering:: @@ -582,7 +606,8 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0, + false, false, 0); } SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { @@ -877,7 +902,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0, + false, false, 0)); } } @@ -908,7 +934,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, RegInfo.addLiveIn(ArgRegs[i], VReg); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); // Move argument from virt reg -> stack - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); } if (!MemOps.empty()) @@ -1134,10 +1161,8 @@ static inline bool isImmUs4(int64_t val) bool XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { - // Be conservative with void - // FIXME: Can we be more aggressive? if (Ty->getTypeID() == Type::VoidTyID) - return false; + return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs); const TargetData *TD = TM.getTargetData(); unsigned Size = TD->getTypeAllocSize(Ty); diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index f7b620e..0c638af 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -28,7 +28,7 @@ namespace llvm { namespace XCoreISD { enum NodeType { // Start the numbering where the builtin ops and target ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END+XCore::INSTRUCTION_LIST_END, + FIRST_NUMBER = ISD::BUILTIN_OP_END, // Branch and link (call) BL, @@ -52,7 +52,13 @@ namespace llvm { LADD, // Corresponds to LSUB instruction - LSUB + LSUB, + + // Jumptable branch. + BR_JT, + + // Jumptable branch using long branches for each entry. + BR_JT32 }; } @@ -122,7 +128,7 @@ namespace llvm { SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG); SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG); SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index 5a54844..722e747 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -145,6 +145,11 @@ static inline bool IsCondBranch(unsigned BrOpc) { return IsBRF(BrOpc) || IsBRT(BrOpc); } +static inline bool IsBR_JT(unsigned BrOpc) { + return BrOpc == XCore::BR_JT + || BrOpc == XCore::BR_JT32; +} + /// GetCondFromBranchOpc - Return the XCore CC that matches /// the correspondent Branch instruction opcode. static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc) @@ -271,6 +276,14 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, return false; } + // Likewise if it ends with a branch table followed by an unconditional branch. + if (IsBR_JT(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) { + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return true; + } + // Otherwise, can't handle this. return true; } diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 10dc18c..46805d5 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -34,6 +34,15 @@ def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink, def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTNone, [SDNPHasChain, SDNPOptInFlag]>; +def SDT_XCoreBR_JT : SDTypeProfile<0, 2, + [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +def XCoreBR_JT : SDNode<"XCoreISD::BR_JT", SDT_XCoreBR_JT, + [SDNPHasChain]>; + +def XCoreBR_JT32 : SDNode<"XCoreISD::BR_JT32", SDT_XCoreBR_JT, + [SDNPHasChain]>; + def SDT_XCoreAddress : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; @@ -185,6 +194,15 @@ def MEMii : Operand<i32> { let MIOperandInfo = (ops i32imm, i32imm); } +// Jump tables. +def InlineJT : Operand<i32> { + let PrintMethod = "printInlineJT"; +} + +def InlineJT32 : Operand<i32> { + let PrintMethod = "printInlineJT32"; +} + //===----------------------------------------------------------------------===// // Instruction Class Templates //===----------------------------------------------------------------------===// @@ -624,7 +642,7 @@ defm RETSP : FU6_LU6<"retsp", XCoreRetsp>; // TODO extdp, kentsp, krestsp, blat, setsr // clrsr, getsr, kalli -let isBranch = 1, isTerminator = 1 in { +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { def BRBU_u6 : _FU6< (outs), (ins brtarget:$target), @@ -756,24 +774,34 @@ def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), // One operand short // TODO edu, eeu, waitet, waitef, freer, tstart, msync, mjoin, syncr, clrtp -// bru, setdp, setcp, setv, setev, kcall +// setdp, setcp, setv, setev, kcall // dgetreg -let isBranch=1, isIndirectBranch=1, isTerminator=1 in +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in def BAU_1r : _F1R<(outs), (ins GRRegs:$addr), "bau $addr", [(brind GRRegs:$addr)]>; +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in +def BR_JT : PseudoInstXCore<(outs), (ins InlineJT:$t, GRRegs:$i), + "bru $i\n$t", + [(XCoreBR_JT tjumptable:$t, GRRegs:$i)]>; + +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in +def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i), + "bru $i\n$t", + [(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>; + let Defs=[SP], neverHasSideEffects=1 in def SETSP_1r : _F1R<(outs), (ins GRRegs:$src), "set sp, $src", []>; -let isBarrier = 1, hasCtrlDep = 1 in +let hasCtrlDep = 1 in def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src), "ecallt $src", []>; -let isBarrier = 1, hasCtrlDep = 1 in +let hasCtrlDep = 1 in def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src), "ecallf $src", []>; diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h index 7efb990..7424c78 100644 --- a/lib/Target/XCore/XCoreTargetObjectFile.h +++ b/lib/Target/XCore/XCoreTargetObjectFile.h @@ -10,13 +10,12 @@ #ifndef LLVM_TARGET_XCORE_TARGETOBJECTFILE_H #define LLVM_TARGET_XCORE_TARGETOBJECTFILE_H -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" namespace llvm { class XCoreTargetObjectFile : public TargetLoweringObjectFileELF { public: - void Initialize(MCContext &Ctx, const TargetMachine &TM); // TODO: Classify globals as xcore wishes. diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp index eac4e17..37d7a00 100644 --- a/lib/Transforms/Hello/Hello.cpp +++ b/lib/Transforms/Hello/Hello.cpp @@ -15,7 +15,6 @@ #define DEBUG_TYPE "hello" #include "llvm/Pass.h" #include "llvm/Function.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" using namespace llvm; diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 325d353..7cb1367 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -124,7 +124,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgNo) - if (isa<PointerType>(I->getType())) + if (I->getType()->isPointerTy()) PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo)); if (PointerArgs.empty()) return 0; @@ -317,7 +317,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. - if(isByVal || AllCalleesPassInValidPointerForArgument(Arg)) + if (isByVal || AllCalleesPassInValidPointerForArgument(Arg)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as @@ -673,7 +673,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, IE = SI->end(); II != IE; ++II) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. - const Type *IdxTy = (isa<StructType>(ElTy) ? + const Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, *II)); diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 4972687..3c05f88 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -19,10 +19,11 @@ #define DEBUG_TYPE "constmerge" #include "llvm/Transforms/IPO.h" +#include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" -#include <map> using namespace llvm; STATISTIC(NumMerged, "Number of global constants merged"); @@ -48,10 +49,10 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } bool ConstantMerge::runOnModule(Module &M) { // Map unique constant/section pairs to globals. We don't want to merge // globals in different sections. - std::map<std::pair<Constant*, std::string>, GlobalVariable*> CMap; + DenseMap<Constant*, GlobalVariable*> CMap; // Replacements - This vector contains a list of replacements to perform. - std::vector<std::pair<GlobalVariable*, GlobalVariable*> > Replacements; + SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements; bool MadeChange = false; @@ -76,19 +77,21 @@ bool ConstantMerge::runOnModule(Module &M) { continue; } - // Only process constants with initializers. - if (GV->isConstant() && GV->hasDefinitiveInitializer()) { - Constant *Init = GV->getInitializer(); - - // Check to see if the initializer is already known. - GlobalVariable *&Slot = CMap[std::make_pair(Init, GV->getSection())]; - - if (Slot == 0) { // Nope, add it to the map. - Slot = GV; - } else if (GV->hasLocalLinkage()) { // Yup, this is a duplicate! - // Make all uses of the duplicate constant use the canonical version. - Replacements.push_back(std::make_pair(GV, Slot)); - } + // Only process constants with initializers in the default addres space. + if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() || + GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty()) + continue; + + Constant *Init = GV->getInitializer(); + + // Check to see if the initializer is already known. + GlobalVariable *&Slot = CMap[Init]; + + if (Slot == 0) { // Nope, add it to the map. + Slot = GV; + } else if (GV->hasLocalLinkage()) { // Yup, this is a duplicate! + // Make all uses of the duplicate constant use the canonical version. + Replacements.push_back(std::make_pair(GV, Slot)); } } @@ -100,11 +103,11 @@ bool ConstantMerge::runOnModule(Module &M) { // now. This avoid invalidating the pointers in CMap, which are unneeded // now. for (unsigned i = 0, e = Replacements.size(); i != e; ++i) { - // Eliminate any uses of the dead global... + // Eliminate any uses of the dead global. Replacements[i].first->replaceAllUsesWith(Replacements[i].second); - // Delete the global value from the module... - M.getGlobalList().erase(Replacements[i].first); + // Delete the global value from the module. + Replacements[i].first->eraseFromParent(); } NumMerged += Replacements.size(); diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 1749b1e..f386ed7 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -796,7 +796,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Replace by null for now. Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); } else { - assert(isa<StructType>(RetTy) && + assert(RetTy->isStructTy() && "Return type changed, but not into a void. The old return type" " must have been a struct!"); Instruction *InsertPt = Call; @@ -870,7 +870,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (NFTy->getReturnType() == Type::getVoidTy(F->getContext())) { RetVal = 0; } else { - assert (isa<StructType>(RetTy)); + assert (RetTy->isStructTy()); // The original return value was a struct, insert // extractvalue/insertvalue chains to extract only the values we need // to return and insert them into our new result. diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp index 025d77e..662fbb5 100644 --- a/lib/Transforms/IPO/DeadTypeElimination.cpp +++ b/lib/Transforms/IPO/DeadTypeElimination.cpp @@ -57,13 +57,13 @@ ModulePass *llvm::createDeadTypeEliminationPass() { // static inline bool ShouldNukeSymtabEntry(const Type *Ty){ // Nuke all names for primitive types! - if (Ty->isPrimitiveType() || Ty->isInteger()) + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) return true; // Nuke all pointers to primitive types as well... if (const PointerType *PT = dyn_cast<PointerType>(Ty)) if (PT->getElementType()->isPrimitiveType() || - PT->getElementType()->isInteger()) + PT->getElementType()->isIntegerTy()) return true; return false; diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 64a6d78..298d5cf 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -175,7 +175,7 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) { for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI) { Value *Arg = *CI; - if (isa<PointerType>(Arg->getType()) && !PointsToLocalMemory(Arg)) + if (Arg->getType()->isPointerTy() && !PointsToLocalMemory(Arg)) // Writes memory. Just give up. return false; } @@ -257,7 +257,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC) { continue; for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A) - if (isa<PointerType>(A->getType()) && !A->hasNoCaptureAttr() && + if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr() && !PointerMayBeCaptured(A, true, /*StoreCaptures=*/false)) { A->addAttr(Attribute::NoCapture); ++NumNoCapture; @@ -362,7 +362,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) { // We annotate noalias return values, which are only applicable to // pointer types. - if (!isa<PointerType>(F->getReturnType())) + if (!F->getReturnType()->isPointerTy()) continue; if (!IsFunctionMallocLike(F, SCCNodes)) @@ -372,7 +372,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) { bool MadeChange = false; for (unsigned i = 0, e = SCC.size(); i != e; ++i) { Function *F = SCC[i]->getFunction(); - if (F->doesNotAlias(0) || !isa<PointerType>(F->getReturnType())) + if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy()) continue; F->setDoesNotAlias(0); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index ac91631..7b1e9c0 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -303,7 +303,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); Changed |= CleanupConstantGlobalUsers(CE, SubInit); } else if (CE->getOpcode() == Instruction::BitCast && - isa<PointerType>(CE->getType())) { + CE->getType()->isPointerTy()) { // Pointer cast, delete any stores and memsets to the global. Changed |= CleanupConstantGlobalUsers(CE, 0); } @@ -431,7 +431,7 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI)) NumElements = SubVectorTy->getNumElements(); else { - assert(isa<StructType>(*GEPI) && + assert((*GEPI)->isStructTy() && "Indexed GEP type is not array, vector, or struct!"); continue; } @@ -543,7 +543,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NewGlobals.empty()) return 0; - + DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); @@ -642,7 +642,7 @@ static bool AllUsesOfValueWillTrapIfNull(Value *V, return false; } else if (isa<ICmpInst>(*UI) && isa<ConstantPointerNull>(UI->getOperand(1))) { - // Ignore setcc X, null + // Ignore icmp X, null } else { //cerr << "NONTRAPPING USE: " << **UI; return false; @@ -813,57 +813,47 @@ static void ConstantPropUsersOf(Value *V) { static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, const Type *AllocTy, - Value* NElems, + ConstantInt *NElements, TargetData* TD) { - DEBUG(dbgs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); - - const Type *IntPtrTy = TD->getIntPtrType(GV->getContext()); + DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); - // CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have - // returned NULL and we would not be here). - BitCastInst *BCI = NULL; - for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); UI != E; ) - if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++)))) - break; - - ConstantInt *NElements = cast<ConstantInt>(NElems); - if (NElements->getZExtValue() != 1) { - // If we have an array allocation, transform it to a single element - // allocation to make the code below simpler. - Type *NewTy = ArrayType::get(AllocTy, NElements->getZExtValue()); - unsigned TypeSize = TD->getTypeAllocSize(NewTy); - if (const StructType *ST = dyn_cast<StructType>(NewTy)) - TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); - Instruction *NewCI = CallInst::CreateMalloc(CI, IntPtrTy, NewTy, - ConstantInt::get(IntPtrTy, TypeSize)); - Value* Indices[2]; - Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy); - Value *NewGEP = GetElementPtrInst::Create(NewCI, Indices, Indices + 2, - NewCI->getName()+".el0", CI); - Value *Cast = new BitCastInst(NewGEP, CI->getType(), "el0", CI); - if (BCI) BCI->replaceAllUsesWith(NewGEP); - CI->replaceAllUsesWith(Cast); - if (BCI) BCI->eraseFromParent(); - CI->eraseFromParent(); - BCI = dyn_cast<BitCastInst>(NewCI); - CI = BCI ? extractMallocCallFromBitCast(BCI) : cast<CallInst>(NewCI); - } + const Type *GlobalType; + if (NElements->getZExtValue() == 1) + GlobalType = AllocTy; + else + // If we have an array allocation, the global variable is of an array. + GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue()); // Create the new global variable. The contents of the malloc'd memory is // undefined, so initialize with an undef value. - const Type *MAT = getMallocAllocatedType(CI); - Constant *Init = UndefValue::get(MAT); GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), - MAT, false, - GlobalValue::InternalLinkage, Init, + GlobalType, false, + GlobalValue::InternalLinkage, + UndefValue::get(GlobalType), GV->getName()+".body", GV, GV->isThreadLocal()); - // Anything that used the malloc or its bitcast now uses the global directly. - if (BCI) BCI->replaceAllUsesWith(NewGV); - CI->replaceAllUsesWith(new BitCastInst(NewGV, CI->getType(), "newgv", CI)); - + // If there are bitcast users of the malloc (which is typical, usually we have + // a malloc + bitcast) then replace them with uses of the new global. Update + // other users to use the global as well. + BitCastInst *TheBC = 0; + while (!CI->use_empty()) { + Instruction *User = cast<Instruction>(CI->use_back()); + if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { + if (BCI->getType() == NewGV->getType()) { + BCI->replaceAllUsesWith(NewGV); + BCI->eraseFromParent(); + } else { + BCI->setOperand(0, NewGV); + } + } else { + if (TheBC == 0) + TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI); + User->replaceUsesOfWith(CI, TheBC); + } + } + Constant *RepValue = NewGV; if (NewGV->getType() != GV->getType()->getElementType()) RepValue = ConstantExpr::getBitCast(RepValue, @@ -879,60 +869,60 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, bool InitBoolUsed = false; // Loop over all uses of GV, processing them in turn. - std::vector<StoreInst*> Stores; - while (!GV->use_empty()) - if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) { - while (!LI->use_empty()) { - Use &LoadUse = LI->use_begin().getUse(); - if (!isa<ICmpInst>(LoadUse.getUser())) - LoadUse = RepValue; - else { - ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser()); - // Replace the cmp X, 0 with a use of the bool value. - Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); - InitBoolUsed = true; - switch (ICI->getPredicate()) { - default: llvm_unreachable("Unknown ICmp Predicate!"); - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: // X < null -> always false - LV = ConstantInt::getFalse(GV->getContext()); - break; - case ICmpInst::ICMP_ULE: - case ICmpInst::ICMP_SLE: - case ICmpInst::ICMP_EQ: - LV = BinaryOperator::CreateNot(LV, "notinit", ICI); - break; - case ICmpInst::ICMP_NE: - case ICmpInst::ICMP_UGE: - case ICmpInst::ICMP_SGE: - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: - break; // no change. - } - ICI->replaceAllUsesWith(LV); - ICI->eraseFromParent(); - } - } - LI->eraseFromParent(); - } else { - StoreInst *SI = cast<StoreInst>(GV->use_back()); + while (!GV->use_empty()) { + if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) { // The global is initialized when the store to it occurs. new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI); SI->eraseFromParent(); + continue; } + + LoadInst *LI = cast<LoadInst>(GV->use_back()); + while (!LI->use_empty()) { + Use &LoadUse = LI->use_begin().getUse(); + if (!isa<ICmpInst>(LoadUse.getUser())) { + LoadUse = RepValue; + continue; + } + + ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser()); + // Replace the cmp X, 0 with a use of the bool value. + Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); + InitBoolUsed = true; + switch (ICI->getPredicate()) { + default: llvm_unreachable("Unknown ICmp Predicate!"); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: // X < null -> always false + LV = ConstantInt::getFalse(GV->getContext()); + break; + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SLE: + case ICmpInst::ICMP_EQ: + LV = BinaryOperator::CreateNot(LV, "notinit", ICI); + break; + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + break; // no change. + } + ICI->replaceAllUsesWith(LV); + ICI->eraseFromParent(); + } + LI->eraseFromParent(); + } // If the initialization boolean was used, insert it, otherwise delete it. if (!InitBoolUsed) { while (!InitBool->use_empty()) // Delete initializations - cast<Instruction>(InitBool->use_back())->eraseFromParent(); + cast<StoreInst>(InitBool->use_back())->eraseFromParent(); delete InitBool; } else GV->getParent()->getGlobalList().insert(GV, InitBool); - - // Now the GV is dead, nuke it and the malloc (both CI and BCI). + // Now the GV is dead, nuke it and the malloc.. GV->eraseFromParent(); - if (BCI) BCI->eraseFromParent(); CI->eraseFromParent(); // To further other optimizations, loop over all users of NewGV and try to @@ -1303,9 +1293,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, ConstantInt::get(IntPtrTy, TypeSize), NElems, CI->getName() + ".f" + Twine(FieldNo)); - CallInst *NCI = dyn_cast<BitCastInst>(NMI) ? - extractMallocCallFromBitCast(NMI) : cast<CallInst>(NMI); - FieldMallocs.push_back(NCI); + FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, CI); } @@ -1497,7 +1485,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // something. if (TD && NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems, TD); + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); return true; } @@ -1556,7 +1544,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, // only has one (non-null) value stored into it, then we can optimize any // users of the loaded value (often calls and loads) that would trap if the // value was null. - if (isa<PointerType>(GV->getInitializer()->getType()) && + if (GV->getInitializer()->getType()->isPointerTy() && GV->getInitializer()->isNullValue()) { if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { if (GV->getInitializer()->getType() != SOVC->getType()) @@ -1590,8 +1578,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // simplification. In these cases, we typically end up with "cond ? v1 : v2" // where v1 and v2 both require constant pool loads, a big loss. if (GVElType == Type::getInt1Ty(GV->getContext()) || - GVElType->isFloatingPoint() || - isa<PointerType>(GVElType) || isa<VectorType>(GVElType)) + GVElType->isFloatingPointTy() || + GVElType->isPointerTy() || GVElType->isVectorTy()) return false; // Walk the use list of the global seeing if all the uses are load or store. @@ -1925,7 +1913,7 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { if (!ATy) return 0; const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); if (!STy || STy->getNumElements() != 2 || - !STy->getElementType(0)->isInteger(32)) return 0; + !STy->getElementType(0)->isIntegerTy(32)) return 0; const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); if (!PFTy) return 0; const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); @@ -2148,7 +2136,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, Elts[CI->getZExtValue()] = EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); - if (isa<ArrayType>(Init->getType())) + if (Init->getType()->isArrayTy()) return ConstantArray::get(cast<ArrayType>(InitTy), Elts); else return ConstantVector::get(&Elts[0], Elts.size()); diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 97e2f06..752a97c 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -41,12 +41,9 @@ static cl::opt<int> InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)")); -static cl::opt<bool> -RespectHint("respect-inlinehint", cl::Hidden, - cl::desc("Respect the inlinehint attribute")); - -// Threshold to use when inlinehint is given. -const int HintThreshold = 300; +static cl::opt<int> +HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), + cl::desc("Threshold for inlining functions with inline hint")); // Threshold to use when optsize is specified (and there is no -inline-limit). const int OptSizeThreshold = 75; @@ -183,20 +180,22 @@ static bool InlineCallIfPossible(CallSite CS, CallGraph &CG, } unsigned Inliner::getInlineThreshold(CallSite CS) const { - // Listen to inlinehint when -respect-inlinehint is given. - Function *Callee = CS.getCalledFunction(); - if (RespectHint && Callee && !Callee->isDeclaration() && - Callee->hasFnAttr(Attribute::InlineHint)) - return HintThreshold; + int thres = InlineThreshold; // Listen to optsize when -inline-limit is not given. Function *Caller = CS.getCaller(); if (Caller && !Caller->isDeclaration() && Caller->hasFnAttr(Attribute::OptimizeForSize) && InlineLimit.getNumOccurrences() == 0) - return OptSizeThreshold; + thres = OptSizeThreshold; + + // Listen to inlinehint when it would increase the threshold. + Function *Callee = CS.getCalledFunction(); + if (HintThreshold > thres && Callee && !Callee->isDeclaration() && + Callee->hasFnAttr(Attribute::InlineHint)) + thres = HintThreshold; - return InlineThreshold; + return thres; } /// shouldInline - Return true if the inliner should attempt to inline diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 0e0d83a..310e4a2 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -214,6 +214,15 @@ static bool StripDebugInfo(Module &M) { Changed = true; } + if (Function *DbgVal = M.getFunction("llvm.dbg.value")) { + while (!DbgVal->use_empty()) { + CallInst *CI = cast<CallInst>(DbgVal->use_back()); + CI->eraseFromParent(); + } + DbgVal->eraseFromParent(); + Changed = true; + } + NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); if (NMD) { Changed = true; diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 5367900..bd06499 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -117,11 +117,11 @@ public: Instruction *visitUDiv(BinaryOperator &I); Instruction *visitSDiv(BinaryOperator &I); Instruction *visitFDiv(BinaryOperator &I); - Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); - Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); + Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS); + Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS); Instruction *visitAnd(BinaryOperator &I); - Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); - Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); + Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS); + Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS); Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, Value *B, Value *C); Instruction *visitOr (BinaryOperator &I); @@ -199,13 +199,15 @@ private: SmallVectorImpl<Value*> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); - /// ValueRequiresCast - Return true if the cast from "V to Ty" actually - /// results in any code being generated. It does not require codegen if V is - /// simple enough or if the cast can be folded into other casts. - bool ValueRequiresCast(Instruction::CastOps opcode,const Value *V, - const Type *Ty); + /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually + /// results in any code being generated and is interesting to optimize out. If + /// the cast can be eliminated by some other simple transformation, we prefer + /// to do the simplification first. + bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V, + const Type *Ty); Instruction *visitCallSite(CallSite CS); + Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD); bool transformConstExprCastCall(CallSite CS); Instruction *transformCallThroughTrampoline(CallSite CS); Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, @@ -326,8 +328,8 @@ private: Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, bool isSub, Instruction &I); - Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, - bool isSigned, bool Inside, Instruction &IB); + Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, + bool isSigned, bool Inside); Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); Instruction *MatchBSwap(BinaryOperator &I); bool SimplifyStoreAtEndOfBlock(StoreInst &SI); diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c2924ab..4d2c89e 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -35,7 +35,7 @@ static Constant *SubOne(ConstantInt *C) { // Otherwise, return null. // static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { - if (!V->hasOneUse() || !V->getType()->isInteger()) + if (!V->hasOneUse() || !V->getType()->isIntegerTy()) return 0; Instruction *I = dyn_cast<Instruction>(V); @@ -145,10 +145,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } } - if (I.getType()->isInteger(1)) + if (I.getType()->isIntegerTy(1)) return BinaryOperator::CreateXor(LHS, RHS); - if (I.getType()->isInteger()) { + if (I.getType()->isIntegerTy()) { // X + X --> X << 1 if (LHS == RHS) return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); @@ -168,7 +168,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // -A + B --> B - A // -A + -B --> -(A + B) if (Value *LHSV = dyn_castNegVal(LHS)) { - if (LHS->getType()->isIntOrIntVector()) { + if (LHS->getType()->isIntOrIntVectorTy()) { if (Value *RHSV = dyn_castNegVal(RHS)) { Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); return BinaryOperator::CreateNeg(NewAdd); @@ -222,7 +222,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } // W*X + Y*Z --> W * (X+Z) iff W == Y - if (I.getType()->isIntOrIntVector()) { + if (I.getType()->isIntOrIntVectorTy()) { Value *W, *X, *Y, *Z; if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { @@ -373,10 +373,10 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) return ReplaceInstUsesWith(I, LHS); - // Check for (add double (sitofp x), y), see if we can merge this into an + // Check for (fadd double (sitofp x), y), see if we can merge this into an // integer add followed by a promotion. if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { - // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) + // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) // ... if the constant fits in the integer value. This is useful for things // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer // requires a constant pool load, and generally allows the add to be better @@ -394,7 +394,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { } } - // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) + // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) { // Only do this if x/y have the same type, if at last one of them has a // single use (so we don't increase the number of int->fp conversions), @@ -560,7 +560,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return ReplaceInstUsesWith(I, Op0); // undef - X -> undef if (isa<UndefValue>(Op1)) return ReplaceInstUsesWith(I, Op1); // X - undef -> undef - if (I.getType()->isInteger(1)) + if (I.getType()->isIntegerTy(1)) return BinaryOperator::CreateXor(Op0, Op1); if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 28fd70e..3fb3de7 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -137,80 +137,44 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { /// opcode and two operands into either a constant true or false, or a brand /// new ICmp instruction. The sign is passed in to determine which kind /// of predicate to use in the new icmp instruction. -static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS) { +static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, + InstCombiner::BuilderTy *Builder) { + CmpInst::Predicate Pred; switch (Code) { default: assert(0 && "Illegal ICmp code!"); - case 0: - return ConstantInt::getFalse(LHS->getContext()); - case 1: - if (Sign) - return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); - return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS); - case 2: - return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS); - case 3: - if (Sign) - return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS); - return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS); - case 4: - if (Sign) - return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS); - return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS); - case 5: - return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS); - case 6: - if (Sign) - return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS); - return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); - case 7: - return ConstantInt::getTrue(LHS->getContext()); + case 0: // False. + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; + case 2: Pred = ICmpInst::ICMP_EQ; break; + case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; + case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; + case 5: Pred = ICmpInst::ICMP_NE; break; + case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; + case 7: // True. + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); } + return Builder->CreateICmp(Pred, LHS, RHS); } /// getFCmpValue - This is the complement of getFCmpCode, which turns an /// opcode and two operands into either a FCmp instruction. isordered is passed /// in to determine which kind of predicate to use in the new fcmp instruction. static Value *getFCmpValue(bool isordered, unsigned code, - Value *LHS, Value *RHS) { + Value *LHS, Value *RHS, + InstCombiner::BuilderTy *Builder) { + CmpInst::Predicate Pred; switch (code) { - default: llvm_unreachable("Illegal FCmp code!"); - case 0: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS); - case 1: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS); - case 2: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS); - case 3: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS); - case 4: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS); - case 5: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS); - case 6: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); - case 7: return ConstantInt::getTrue(LHS->getContext()); + default: assert(0 && "Illegal FCmp code!"); + case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break; + case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break; + case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break; + case 3: Pred = isordered ? FCmpInst::FCMP_OGE : FCmpInst::FCMP_UGE; break; + case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break; + case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break; + case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break; + case 7: return ConstantInt::getTrue(LHS->getContext()); } + return Builder->CreateFCmp(Pred, LHS, RHS); } /// PredicatesFoldable - Return true if both predicates match sign or if at @@ -355,40 +319,39 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, /// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates /// whether to treat the V, Lo and HI as signed or not. IB is the location to /// insert new instructions. -Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, - bool isSigned, bool Inside, - Instruction &IB) { +Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, + bool isSigned, bool Inside) { assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && "Lo is not <= Hi in range emission code!"); if (Inside) { if (Lo == Hi) // Trivially false. - return new ICmpInst(ICmpInst::ICMP_NE, V, V); + return ConstantInt::getFalse(V->getContext()); // V >= Min && V < Hi --> V < Hi if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { ICmpInst::Predicate pred = (isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); - return new ICmpInst(pred, V, Hi); + return Builder->CreateICmp(pred, V, Hi); } // Emit V-Lo <u Hi-Lo Constant *NegLo = ConstantExpr::getNeg(Lo); Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); - return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound); + return Builder->CreateICmpULT(Add, UpperBound); } if (Lo == Hi) // Trivially true. - return new ICmpInst(ICmpInst::ICMP_EQ, V, V); + return ConstantInt::getTrue(V->getContext()); // V < Min || V >= Hi -> V > Hi-1 Hi = SubOne(cast<ConstantInt>(Hi)); if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { ICmpInst::Predicate pred = (isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); - return new ICmpInst(pred, V, Hi); + return Builder->CreateICmp(pred, V, Hi); } // Emit V-Lo >u Hi-1-Lo @@ -396,7 +359,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo)); Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); - return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound); + return Builder->CreateICmpUGT(Add, LowerBound); } // isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with @@ -472,8 +435,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, } /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. -Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, - ICmpInst *LHS, ICmpInst *RHS) { +Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) @@ -486,11 +448,7 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); unsigned Code = getICmpCode(LHS) & getICmpCode(RHS); bool isSigned = LHS->isSigned() || RHS->isSigned(); - Value *RV = getICmpValue(isSigned, Code, Op0, Op1); - if (Instruction *I = dyn_cast<Instruction>(RV)) - return I; - // Otherwise, it's a constant boolean value. - return ReplaceInstUsesWith(I, RV); + return getICmpValue(isSigned, Code, Op0, Op1, Builder); } } @@ -506,13 +464,13 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, if (LHSCC == ICmpInst::ICMP_ULT && LHSCst->getValue().isPowerOf2()) { Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } } @@ -562,33 +520,32 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 - return ReplaceInstUsesWith(I, LHS); + return LHS; } case ICmpInst::ICMP_NE: switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_ULT: if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 - return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst); + return Builder->CreateICmpULT(Val, LHSCst); break; // (X != 13 & X u< 15) -> no change case ICmpInst::ICMP_SLT: if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 - return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst); + return Builder->CreateICmpSLT(Val, LHSCst); break; // (X != 13 & X s< 15) -> no change case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 - return ReplaceInstUsesWith(I, RHS); + return RHS; case ICmpInst::ICMP_NE: if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 Constant *AddCST = ConstantExpr::getNeg(LHSCst); Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); - return new ICmpInst(ICmpInst::ICMP_UGT, Add, - ConstantInt::get(Add->getType(), 1)); + return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1)); } break; // (X != 13 & X != 15) -> no change } @@ -598,12 +555,12 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change break; case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 - return ReplaceInstUsesWith(I, LHS); + return LHS; case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change break; } @@ -613,12 +570,12 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change break; case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 - return ReplaceInstUsesWith(I, LHS); + return LHS; case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change break; } @@ -628,16 +585,15 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 - return ReplaceInstUsesWith(I, RHS); + return RHS; case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change break; case ICmpInst::ICMP_NE: if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 - return new ICmpInst(LHSCC, Val, RHSCst); + return Builder->CreateICmp(LHSCC, Val, RHSCst); break; // (X u> 13 & X != 15) -> no change case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1 - return InsertRangeTest(Val, AddOne(LHSCst), - RHSCst, false, true, I); + return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true); case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change break; } @@ -647,16 +603,15 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 - return ReplaceInstUsesWith(I, RHS); + return RHS; case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change break; case ICmpInst::ICMP_NE: if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 - return new ICmpInst(LHSCC, Val, RHSCst); + return Builder->CreateICmp(LHSCC, Val, RHSCst); break; // (X s> 13 & X != 15) -> no change case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 - return InsertRangeTest(Val, AddOne(LHSCst), - RHSCst, true, true, I); + return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true); case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change break; } @@ -666,9 +621,10 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, return 0; } -Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, - FCmpInst *RHS) { - +/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of +/// instcombine, this returns a Value which should already be inserted into the +/// function. +Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (LHS->getPredicate() == FCmpInst::FCMP_ORD && RHS->getPredicate() == FCmpInst::FCMP_ORD) { // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) @@ -677,17 +633,15 @@ Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, // If either of the constants are nans, then the whole thing returns // false. if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - return new FCmpInst(FCmpInst::FCMP_ORD, - LHS->getOperand(0), RHS->getOperand(0)); + return ConstantInt::getFalse(LHS->getContext()); + return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); } // Handle vector zeros. This occurs because the canonical form of // "fcmp ord x,x" is "fcmp ord x, 0". if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && isa<ConstantAggregateZero>(RHS->getOperand(1))) - return new FCmpInst(FCmpInst::FCMP_ORD, - LHS->getOperand(0), RHS->getOperand(0)); + return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); return 0; } @@ -705,14 +659,13 @@ Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). if (Op0CC == Op1CC) - return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); - + return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); if (Op0CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, RHS); + return RHS; if (Op1CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, LHS); + return LHS; bool Op0Ordered; bool Op1Ordered; @@ -727,14 +680,14 @@ Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, // uno && ueq -> uno && (uno || eq) -> ueq // ord && olt -> ord && (ord && lt) -> olt if (Op0Ordered == Op1Ordered) - return ReplaceInstUsesWith(I, RHS); + return RHS; // uno && oeq -> uno && (ord && eq) -> false // uno && ord -> false if (!Op0Ordered) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); // ord && ueq -> ord && (uno || eq) -> oeq - return cast<Instruction>(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS)); + return getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS, Builder); } } @@ -930,26 +883,47 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0)) - if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) - return Res; - + if (Value *Res = FoldAndOfICmps(LHS, RHS)) + return ReplaceInstUsesWith(I, Res); + + // If and'ing two fcmp, try combine them into one. + if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) + if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) + if (Value *Res = FoldAndOfFCmps(LHS, RHS)) + return ReplaceInstUsesWith(I, Res); + + // fold (and (cast A), (cast B)) -> (cast (and A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) - if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVector() && - // Only do this if the casts both really cause code to be generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType()) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType())) { - Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); + if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) { + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ? + SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVectorTy()) { + Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); + + // Only do this if the casts both really cause code to be generated. + if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { + Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } + + // If this is and(cast(icmp), cast(icmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) + if (Value *Res = FoldAndOfICmps(LHS, RHS)) + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + + // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) + if (Value *Res = FoldAndOfFCmps(LHS, RHS)) + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); } + } // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { @@ -965,13 +939,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } } - // If and'ing two fcmp, try combine them into one. - if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) - if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) - return Res; - } - return Changed ? &I : 0; } @@ -1143,7 +1110,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B, // If A is not a select of -1/0, this cannot match. Value *Cond = 0; if (!match(A, m_SExt(m_Value(Cond))) || - !Cond->getType()->isInteger(1)) + !Cond->getType()->isIntegerTy(1)) return 0; // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. @@ -1161,8 +1128,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B, } /// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. -Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, - ICmpInst *LHS, ICmpInst *RHS) { +Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) @@ -1175,11 +1141,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); unsigned Code = getICmpCode(LHS) | getICmpCode(RHS); bool isSigned = LHS->isSigned() || RHS->isSigned(); - Value *RV = getICmpValue(isSigned, Code, Op0, Op1); - if (Instruction *I = dyn_cast<Instruction>(RV)) - return I; - // Otherwise, it's a constant boolean value. - return ReplaceInstUsesWith(I, RV); + return getICmpValue(isSigned, Code, Op0, Op1, Builder); } } @@ -1193,7 +1155,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, if (LHSCst == RHSCst && LHSCC == RHSCC && LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } // From here on, we only handle: @@ -1245,7 +1207,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, Constant *AddCST = ConstantExpr::getNeg(LHSCst); Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); - return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); + return Builder->CreateICmpULT(Add, AddCST); } break; // (X == 13 | X == 15) -> no change case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change @@ -1254,7 +1216,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 - return ReplaceInstUsesWith(I, RHS); + return RHS; } break; case ICmpInst::ICMP_NE: @@ -1263,11 +1225,11 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 - return ReplaceInstUsesWith(I, LHS); + return LHS; case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ConstantInt::getTrue(LHS->getContext()); } break; case ICmpInst::ICMP_ULT: @@ -1279,14 +1241,13 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, // If RHSCst is [us]MAXINT, it is always false. Not handling // this can cause overflow. if (RHSCst->isMaxValue(false)) - return ReplaceInstUsesWith(I, LHS); - return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), - false, false, I); + return LHS; + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false); case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change break; case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 - return ReplaceInstUsesWith(I, RHS); + return RHS; case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change break; } @@ -1300,14 +1261,13 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, // If RHSCst is [us]MAXINT, it is always false. Not handling // this can cause overflow. if (RHSCst->isMaxValue(true)) - return ReplaceInstUsesWith(I, LHS); - return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), - true, false, I); + return LHS; + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false); case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change break; case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 - return ReplaceInstUsesWith(I, RHS); + return RHS; case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change break; } @@ -1317,12 +1277,12 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 - return ReplaceInstUsesWith(I, LHS); + return LHS; case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change break; case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ConstantInt::getTrue(LHS->getContext()); case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change break; } @@ -1332,12 +1292,12 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 - return ReplaceInstUsesWith(I, LHS); + return LHS; case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change break; case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ConstantInt::getTrue(LHS->getContext()); case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change break; } @@ -1346,8 +1306,10 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, return 0; } -Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, - FCmpInst *RHS) { +/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of +/// instcombine, this returns a Value which should already be inserted into the +/// function. +Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (LHS->getPredicate() == FCmpInst::FCMP_UNO && RHS->getPredicate() == FCmpInst::FCMP_UNO && LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { @@ -1356,20 +1318,18 @@ Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, // If either of the constants are nans, then the whole thing returns // true. if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ConstantInt::getTrue(LHS->getContext()); // Otherwise, no need to compare the two constants, compare the // rest. - return new FCmpInst(FCmpInst::FCMP_UNO, - LHS->getOperand(0), RHS->getOperand(0)); + return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); } // Handle vector zeros. This occurs because the canonical form of // "fcmp uno x,x" is "fcmp uno x, 0". if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && isa<ConstantAggregateZero>(RHS->getOperand(1))) - return new FCmpInst(FCmpInst::FCMP_UNO, - LHS->getOperand(0), RHS->getOperand(0)); + return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); return 0; } @@ -1386,14 +1346,13 @@ Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). if (Op0CC == Op1CC) - return new FCmpInst((FCmpInst::Predicate)Op0CC, - Op0LHS, Op0RHS); + return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); if (Op0CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, RHS); + return RHS; if (Op1CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, LHS); + return LHS; bool Op0Ordered; bool Op1Ordered; unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); @@ -1401,11 +1360,7 @@ Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, if (Op0Ordered == Op1Ordered) { // If both are ordered or unordered, return a new fcmp with // or'ed predicates. - Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS); - if (Instruction *I = dyn_cast<Instruction>(RV)) - return I; - // Otherwise, it's a constant boolean value... - return ReplaceInstUsesWith(I, RV); + return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder); } } return 0; @@ -1446,8 +1401,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Value *V = SimplifyOrInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); - - + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) @@ -1456,7 +1410,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { ConstantInt *C1 = 0; Value *X = 0; // (X & C1) | C2 --> (X | C2) & (C1|C2) + // iff (C1 & C2) == 0. if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && + (RHS->getValue() & C1->getValue()) != 0 && Op0->hasOneUse()) { Value *Or = Builder->CreateOr(X, RHS); Or->takeName(Op0); @@ -1479,6 +1435,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; + if (isa<PHINode>(Op0)) if (Instruction *NV = FoldOpIntoPhi(I)) return NV; @@ -1600,7 +1557,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants. // Don't do this for vector select idioms, the code generator doesn't handle // them well yet. - if (!isa<VectorType>(I.getType())) { + if (!I.getType()->isVectorTy()) { if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) return Match; if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) @@ -1666,40 +1623,50 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) - if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) - return Res; + if (Value *Res = FoldOrOfICmps(LHS, RHS)) + return ReplaceInstUsesWith(I, Res); + // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) + if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) + if (Value *Res = FoldOrOfFCmps(LHS, RHS)) + return ReplaceInstUsesWith(I, Res); + // fold (or (cast A), (cast B)) -> (cast (or A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? - if (!isa<ICmpInst>(Op0C->getOperand(0)) || - !isa<ICmpInst>(Op1C->getOperand(0))) { - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVector() && + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVectorTy()) { + Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); + + if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) && // Only do this if the casts both really cause code to be // generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType()) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType())) { - Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); + ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { + Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } + + // If this is or(cast(icmp), cast(icmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) + if (Value *Res = FoldOrOfICmps(LHS, RHS)) + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + + // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) + if (Value *Res = FoldOrOfFCmps(LHS, RHS)) + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); } } } - - // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) - if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) - if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) - return Res; - } - return Changed ? &I : 0; } @@ -1723,7 +1690,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; - if (isa<VectorType>(I.getType())) + if (I.getType()->isVectorTy()) if (isa<ConstantAggregateZero>(Op1)) return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X @@ -1971,11 +1938,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); bool isSigned = LHS->isSigned() || RHS->isSigned(); - Value *RV = getICmpValue(isSigned, Code, Op0, Op1); - if (Instruction *I = dyn_cast<Instruction>(RV)) - return I; - // Otherwise, it's a constant boolean value. - return ReplaceInstUsesWith(I, RV); + return ReplaceInstUsesWith(I, + getICmpValue(isSigned, Code, Op0, Op1, Builder)); } } @@ -1984,12 +1948,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && + if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() && // Only do this if the casts both really cause code to be generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType()) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType())) { + ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType())) { Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), Op1C->getOperand(0), I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 4929f40..e2b7d3d 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/CallSite.h" #include "llvm/Target/TargetData.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" using namespace llvm; /// getPromotedType - Return the specified type promoted as it would be to pass @@ -199,7 +200,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { // Extract the length and alignment and fill if they are constant. ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); - if (!LenC || !FillC || !FillC->getType()->isInteger(8)) + if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) return 0; uint64_t Len = LenC->getZExtValue(); Alignment = MI->getAlignment(); @@ -304,23 +305,77 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::objectsize: { + // We need target data for just about everything so depend on it. + if (!TD) break; + const Type *ReturnTy = CI.getType(); - Value *Op1 = II->getOperand(1); bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1); + + // Get to the real allocated thing and offset as fast as possible. + Value *Op1 = II->getOperand(1)->stripPointerCasts(); - if (!TD) break; - Op1 = Op1->stripPointerCasts(); - + // If we've stripped down to a single global variable that we + // can know the size of then just return that. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) { if (GV->hasDefinitiveInitializer()) { Constant *C = GV->getInitializer(); - size_t globalSize = TD->getTypeAllocSize(C->getType()); - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, globalSize)); + uint64_t GlobalSize = TD->getTypeAllocSize(C->getType()); + return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, GlobalSize)); } else { + // Can't determine size of the GV. Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); return ReplaceInstUsesWith(CI, RetVal); } - } + } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) { + // Get alloca size. + if (AI->getAllocatedType()->isSized()) { + uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); + if (AI->isArrayAllocation()) { + const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize()); + if (!C) break; + AllocaSize *= C->getZExtValue(); + } + return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, AllocaSize)); + } + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) { + // Only handle constant GEPs here. + if (CE->getOpcode() != Instruction::GetElementPtr) break; + GEPOperator *GEP = cast<GEPOperator>(CE); + + // Make sure we're not a constant offset from an external + // global. + Value *Operand = GEP->getPointerOperand(); + Operand = Operand->stripPointerCasts(); + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) + if (!GV->hasDefinitiveInitializer()) break; + + // Get what we're pointing to and its size. + const PointerType *BaseType = + cast<PointerType>(Operand->getType()); + uint64_t Size = TD->getTypeAllocSize(BaseType->getElementType()); + + // Get the current byte offset into the thing. Use the original + // operand in case we're looking through a bitcast. + SmallVector<Value*, 8> Ops(CE->op_begin()+1, CE->op_end()); + const PointerType *OffsetType = + cast<PointerType>(GEP->getPointerOperand()->getType()); + uint64_t Offset = TD->getIndexedOffset(OffsetType, &Ops[0], Ops.size()); + + if (Size < Offset) { + // Out of bound reference? Negative index normalized to large + // index? Just return "I don't know". + Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); + return ReplaceInstUsesWith(CI, RetVal); + } + + Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset); + return ReplaceInstUsesWith(CI, RetVal); + + } + + // Do not return "I don't know" here. Later optimization passes could + // make it possible to evaluate objectsize to a constant. + break; } case Intrinsic::bswap: // bswap(bswap(x)) -> x @@ -686,6 +741,122 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, return true; } +// Try to fold some different type of calls here. +// Currently we're only working with the checking functions, memcpy_chk, +// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, +// strcat_chk and strncat_chk. +Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { + if (CI->getCalledFunction() == 0) return 0; + + StringRef Name = CI->getCalledFunction()->getName(); + BasicBlock *BB = CI->getParent(); + IRBuilder<> B(CI->getParent()->getContext()); + + // Set the builder to the instruction after the call. + B.SetInsertPoint(BB, CI); + + if (Name == "__memcpy_chk") { + ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4)); + if (!SizeCI) + return 0; + ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3)); + if (!SizeArg) + return 0; + if (SizeCI->isAllOnesValue() || + SizeCI->getZExtValue() <= SizeArg->getZExtValue()) { + EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), + 1, B, TD); + return ReplaceInstUsesWith(*CI, CI->getOperand(1)); + } + return 0; + } + + // Should be similar to memcpy. + if (Name == "__mempcpy_chk") { + return 0; + } + + if (Name == "__memmove_chk") { + ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4)); + if (!SizeCI) + return 0; + ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3)); + if (!SizeArg) + return 0; + if (SizeCI->isAllOnesValue() || + SizeCI->getZExtValue() <= SizeArg->getZExtValue()) { + EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), + 1, B, TD); + return ReplaceInstUsesWith(*CI, CI->getOperand(1)); + } + return 0; + } + + if (Name == "__memset_chk") { + ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4)); + if (!SizeCI) + return 0; + ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3)); + if (!SizeArg) + return 0; + if (SizeCI->isAllOnesValue() || + SizeCI->getZExtValue() <= SizeArg->getZExtValue()) { + Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(), + false); + EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B, TD); + return ReplaceInstUsesWith(*CI, CI->getOperand(1)); + } + return 0; + } + + if (Name == "__strcpy_chk") { + ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3)); + if (!SizeCI) + return 0; + // If a) we don't have any length information, or b) we know this will + // fit then just lower to a plain strcpy. Otherwise we'll keep our + // strcpy_chk call which may fail at runtime if the size is too long. + // TODO: It might be nice to get a maximum length out of the possible + // string lengths for varying. + if (SizeCI->isAllOnesValue() || + SizeCI->getZExtValue() >= GetStringLength(CI->getOperand(2))) { + Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD); + return ReplaceInstUsesWith(*CI, Ret); + } + return 0; + } + + // Should be similar to strcpy. + if (Name == "__stpcpy_chk") { + return 0; + } + + if (Name == "__strncpy_chk") { + ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4)); + if (!SizeCI) + return 0; + ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3)); + if (!SizeArg) + return 0; + if (SizeCI->isAllOnesValue() || + SizeCI->getZExtValue() <= SizeArg->getZExtValue()) { + Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD); + return ReplaceInstUsesWith(*CI, Ret); + } + return 0; + } + + if (Name == "__strcat_chk") { + return 0; + } + + if (Name == "__strncat_chk") { + return 0; + } + + return 0; +} + // visitCallSite - Improvements for call and invoke instructions. // Instruction *InstCombiner::visitCallSite(CallSite CS) { @@ -772,6 +943,16 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { Changed = true; } + // Try to optimize the call if possible, we require TargetData for most of + // this. None of these calls are seen as possibly dead so go ahead and + // delete the instruction now. + if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) { + Instruction *I = tryOptimizeCall(CI, TD); + // If we changed something return the result, etc. Otherwise let + // the fallthrough check. + if (I) return EraseInstFromFunction(*I); + } + return Changed ? CS.getInstruction() : 0; } @@ -796,7 +977,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { const Type *OldRetTy = Caller->getType(); const Type *NewRetTy = FT->getReturnType(); - if (isa<StructType>(NewRetTy)) + if (NewRetTy->isStructTy()) return false; // TODO: Handle multiple return values. // Check to see if we are changing the return type... @@ -804,9 +985,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (Callee->isDeclaration() && // Conversion is ok if changing from one pointer type to another or from // a pointer to an integer of the same size. - !((isa<PointerType>(OldRetTy) || !TD || + !((OldRetTy->isPointerTy() || !TD || OldRetTy == TD->getIntPtrType(Caller->getContext())) && - (isa<PointerType>(NewRetTy) || !TD || + (NewRetTy->isPointerTy() || !TD || NewRetTy == TD->getIntPtrType(Caller->getContext())))) return false; // Cannot transform this return value. @@ -853,9 +1034,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Converting from one pointer type to another or between a pointer and an // integer of the same size is safe even if we do not have a body. bool isConvertible = ActTy == ParamTy || - (TD && ((isa<PointerType>(ParamTy) || + (TD && ((ParamTy->isPointerTy() || ParamTy == TD->getIntPtrType(Caller->getContext())) && - (isa<PointerType>(ActTy) || + (ActTy->isPointerTy() || ActTy == TD->getIntPtrType(Caller->getContext())))); if (Callee->isDeclaration() && !isConvertible) return false; } diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 09cd21f..a68fc6d 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -23,7 +23,7 @@ using namespace PatternMatch; /// static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, int &Offset) { - assert(Val->getType()->isInteger(32) && "Unexpected allocation size type!"); + assert(Val->getType()->isIntegerTy(32) && "Unexpected allocation size type!"); if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { Offset = CI->getZExtValue(); Scale = 0; @@ -255,17 +255,26 @@ isEliminableCastPair( return Instruction::CastOps(Res); } -/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results -/// in any code being generated. It does not require codegen if V is simple -/// enough or if the cast can be folded into other casts. -bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V, - const Type *Ty) { +/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually +/// results in any code being generated and is interesting to optimize out. If +/// the cast can be eliminated by some other simple transformation, we prefer +/// to do the simplification first. +bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, + const Type *Ty) { + // Noop casts and casts of constants should be eliminated trivially. if (V->getType() == Ty || isa<Constant>(V)) return false; - // If this is another cast that can be eliminated, it isn't codegen either. + // If this is another cast that can be eliminated, we prefer to have it + // eliminated. if (const CastInst *CI = dyn_cast<CastInst>(V)) - if (isEliminableCastPair(CI, opcode, Ty, TD)) + if (isEliminableCastPair(CI, opc, Ty, TD)) return false; + + // If this is a vector sext from a compare, then we don't want to break the + // idiom where each element of the extended vector is either zero or all ones. + if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy()) + return false; + return true; } @@ -294,8 +303,8 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { if (isa<PHINode>(Src)) { // We don't do this if this would create a PHI node with an illegal type if // it is currently legal. - if (!isa<IntegerType>(Src->getType()) || - !isa<IntegerType>(CI.getType()) || + if (!Src->getType()->isIntegerTy() || + !CI.getType()->isIntegerTy() || ShouldChangeType(CI.getType(), Src->getType())) if (Instruction *NV = FoldOpIntoPhi(CI)) return NV; @@ -427,7 +436,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. - if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) && + if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && CanEvaluateTruncated(Src, DestTy)) { // If this cast is a truncate, evaluting in a different type always @@ -719,7 +728,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // expression tree to something weird like i93 unless the source is also // strange. unsigned BitsToClear; - if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) && + if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && CanEvaluateZExtd(Src, DestTy, BitsToClear)) { assert(BitsToClear < SrcTy->getScalarSizeInBits() && "Unreasonable BitsToClear"); @@ -828,7 +837,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1 Value *X; - if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isInteger(1) && + if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) && match(SrcI, m_Not(m_Value(X))) && (!X->hasOneUse() || !isa<CmpInst>(X))) { Value *New = Builder->CreateZExt(X, CI.getType()); @@ -927,7 +936,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. - if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) && + if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && CanEvaluateSExtd(Src, DestTy)) { // Okay, we can transform this! Insert the new expression now. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" @@ -1280,7 +1289,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Constant::getNullValue(Type::getInt32Ty(CI.getContext())); unsigned NumZeros = 0; while (SrcElTy != DstElTy && - isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) && + isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() && SrcElTy->getNumContainedTypes() /* not "{}" */) { SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt); ++NumZeros; @@ -1295,7 +1304,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { - if (DestVTy->getNumElements() == 1 && !isa<VectorType>(SrcTy)) { + if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); return InsertElementInst::Create(UndefValue::get(DestTy), Elem, Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); @@ -1304,7 +1313,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { - if (SrcVTy->getNumElements() == 1 && !isa<VectorType>(DestTy)) { + if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) { Value *Elem = Builder->CreateExtractElement(Src, Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); @@ -1315,7 +1324,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) { // Okay, we have (bitcast (shuffle ..)). Check to see if this is // a bitconvert to a vector with the same # elts. - if (SVI->hasOneUse() && isa<VectorType>(DestTy) && + if (SVI->hasOneUse() && DestTy->isVectorTy() && cast<VectorType>(DestTy)->getNumElements() == SVI->getType()->getNumElements() && SVI->getType()->getNumElements() == @@ -1337,7 +1346,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } } - if (isa<PointerType>(SrcTy)) + if (SrcTy->isPointerTy()) return commonPointerCastTransforms(CI); return commonCastTransforms(CI); } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7c00c2c..72fd558 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -877,25 +877,26 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, case ICmpInst::ICMP_EQ: if (LoOverflow && HiOverflow) return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); - else if (HiOverflow) + if (HiOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, X, LoBound); - else if (LoOverflow) + if (LoOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI); + return ReplaceInstUsesWith(ICI, + InsertRangeTest(X, LoBound, HiBound, DivIsSigned, + true)); case ICmpInst::ICMP_NE: if (LoOverflow && HiOverflow) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); - else if (HiOverflow) + if (HiOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, LoBound); - else if (LoOverflow) + if (LoOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI); + return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, + DivIsSigned, false)); case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_SLT: if (LoOverflow == +1) // Low bound is greater than input range. @@ -1606,7 +1607,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { const Type *Ty = Op0->getType(); // icmp's with boolean values can always be turned into bitwise operations - if (Ty->isInteger(1)) { + if (Ty->isIntegerTy(1)) { switch (I.getPredicate()) { default: llvm_unreachable("Invalid icmp instruction!"); case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) @@ -1650,7 +1651,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { unsigned BitWidth = 0; if (TD) BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); - else if (Ty->isIntOrIntVector()) + else if (Ty->isIntOrIntVectorTy()) BitWidth = Ty->getScalarSizeInBits(); bool isSignBit = false; @@ -1988,7 +1989,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // values. If the ptr->ptr cast can be stripped off both arguments, we do so // now. if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) { - if (isa<PointerType>(Op0->getType()) && + if (Op0->getType()->isPointerTy() && (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { // We keep moving the cast from the left operand over to the right // operand, where it can often be eliminated completely. @@ -2458,17 +2459,17 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); break; } - case Instruction::Load: - if (GetElementPtrInst *GEP = - dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) - if (GV->isConstant() && GV->hasDefinitiveInitializer() && - !cast<LoadInst>(LHSI)->isVolatile()) - if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) - return Res; + case Instruction::Load: + if (GetElementPtrInst *GEP = + dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer() && + !cast<LoadInst>(LHSI)->isVolatile()) + if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) + return Res; + } + break; } - break; - } } return Changed ? &I : 0; diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 2d13298..0f2a24f 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -87,8 +87,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, const Type *SrcPTy = SrcTy->getElementType(); - if (DestPTy->isInteger() || isa<PointerType>(DestPTy) || - isa<VectorType>(DestPTy)) { + if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || + DestPTy->isVectorTy()) { // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for // constants. @@ -104,11 +104,11 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, } if (IC.getTargetData() && - (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || - isa<VectorType>(SrcPTy)) && + (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || + SrcPTy->isVectorTy()) && // Do not allow turning this into a load of an integer, which is then // casted to a pointer, this pessimizes pointer analysis a lot. - (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) && + (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) && IC.getTargetData()->getTypeSizeInBits(SrcPTy) == IC.getTargetData()->getTypeSizeInBits(DestPTy)) { @@ -243,7 +243,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { const Type *SrcPTy = SrcTy->getElementType(); - if (!DestPTy->isInteger() && !isa<PointerType>(DestPTy)) + if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) return 0; /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" @@ -255,7 +255,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for // constants. - if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) { + if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { // Index through pointer. Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); NewGEPIndices.push_back(Zero); @@ -277,7 +277,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); } - if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy)) + if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) return 0; // If the pointers point into different address spaces or if they point to @@ -297,11 +297,11 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { Instruction::CastOps opcode = Instruction::BitCast; const Type* CastSrcTy = SIOp0->getType(); const Type* CastDstTy = SrcPTy; - if (isa<PointerType>(CastDstTy)) { - if (CastSrcTy->isInteger()) + if (CastDstTy->isPointerTy()) { + if (CastSrcTy->isIntegerTy()) opcode = Instruction::IntToPtr; - } else if (isa<IntegerType>(CastDstTy)) { - if (isa<PointerType>(SIOp0->getType())) + } else if (CastDstTy->isIntegerTy()) { + if (SIOp0->getType()->isPointerTy()) opcode = Instruction::PtrToInt; } @@ -413,7 +413,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // Don't count debug info directives, lest they affect codegen, // and we skip pointer-to-pointer bitcasts, which are NOPs. if (isa<DbgInfoIntrinsic>(BBI) || - (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) { + (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { ScanInsts++; continue; } @@ -483,7 +483,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { do { ++BBI; } while (isa<DbgInfoIntrinsic>(BBI) || - (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))); + (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) if (BI->isUnconditional()) if (SimplifyStoreAtEndOfBlock(SI)) @@ -544,7 +544,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { --BBI; // Skip over debugging info. while (isa<DbgInfoIntrinsic>(BBI) || - (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) { + (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { if (BBI==OtherBB->begin()) return false; --BBI; diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 2e26a75..b3974e8 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -76,7 +76,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { return BinaryOperator::CreateShl(Op0, ConstantInt::get(Op0->getType(), Val.logBase2())); } - } else if (isa<VectorType>(Op1C->getType())) { + } else if (Op1C->getType()->isVectorTy()) { if (Op1C->isNullValue()) return ReplaceInstUsesWith(I, Op1C); @@ -157,7 +157,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } /// i1 mul -> i1 and. - if (I.getType()->isInteger(1)) + if (I.getType()->isIntegerTy(1)) return BinaryOperator::CreateAnd(Op0, Op1); // X*(1 << Y) --> X << Y @@ -173,7 +173,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { // If one of the operands of the multiply is a cast from a boolean value, then // we know the bool is either zero or one, so this is a 'masking' multiply. // X * Y (where Y is 0 or 1) -> X & (0-Y) - if (!isa<VectorType>(I.getType())) { + if (!I.getType()->isVectorTy()) { // -2 is "-1 << 1" so it is all bits set except the low one. APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); @@ -203,8 +203,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { // "In IEEE floating point, x*1 is not equivalent to x for nans. However, // ANSI says we can drop signals, so we can do this anyway." (from GCC) if (Op1F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' - } else if (isa<VectorType>(Op1C->getType())) { + return ReplaceInstUsesWith(I, Op0); // Eliminate 'fmul double %X, 1.0' + } else if (Op1C->getType()->isVectorTy()) { if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { // As above, vector X*splat(1.0) -> X in all defined cases. if (Constant *Splat = Op1V->getSplatValue()) { @@ -314,7 +314,7 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { // undef / X -> 0 for integer. // undef / X -> undef for FP (the undef could be a snan). if (isa<UndefValue>(Op0)) { - if (Op0->getType()->isFPOrFPVector()) + if (Op0->getType()->isFPOrFPVectorTy()) return ReplaceInstUsesWith(I, Op0); return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } @@ -386,7 +386,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // It can't be division by zero, hence it must be division by one. - if (I.getType()->isInteger(1)) + if (I.getType()->isIntegerTy(1)) return ReplaceInstUsesWith(I, Op0); if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { @@ -493,7 +493,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a udiv. - if (I.getType()->isInteger()) { + if (I.getType()->isIntegerTy()) { APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); if (MaskedValueIsZero(Op0, Mask)) { if (MaskedValueIsZero(Op1, Mask)) { @@ -527,7 +527,7 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (isa<UndefValue>(Op0)) { // undef % X -> 0 - if (I.getType()->isFPOrFPVector()) + if (I.getType()->isFPOrFPVectorTy()) return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } @@ -648,7 +648,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a urem. - if (I.getType()->isInteger()) { + if (I.getType()->isIntegerTy()) { APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { // X srem Y -> X urem Y, iff X and Y don't have sign bit set diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index bb7632f..65f0393 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -266,6 +266,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // and if TD isn't around, we can't handle the mixed case. bool isVolatile = FirstLI->isVolatile(); unsigned LoadAlignment = FirstLI->getAlignment(); + unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace(); // We can't sink the load if the loaded value could be modified between the // load and the PHI. @@ -290,6 +291,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // the load and the PHI. if (LI->isVolatile() != isVolatile || LI->getParent() != PN.getIncomingBlock(i) || + LI->getPointerAddressSpace() != LoadAddrSpace || !isSafeAndProfitableToSinkLoad(LI)) return 0; @@ -371,7 +373,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { // Be careful about transforming integer PHIs. We don't want to pessimize // the code by turning an i32 into an i1293. - if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) { + if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) { if (!ShouldChangeType(PN.getType(), CastSrcTy)) return 0; } @@ -832,7 +834,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { // it is only used by trunc or trunc(lshr) operations. If so, we split the // PHI into the various pieces being extracted. This sort of thing is // introduced when SROA promotes an aggregate to a single large integer type. - if (isa<IntegerType>(PN.getType()) && TD && + if (PN.getType()->isIntegerTy() && TD && !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) return Res; diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 9a02b33..2fc9325 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -441,7 +441,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return ReplaceInstUsesWith(SI, FalseVal); } - if (SI.getType()->isInteger(1)) { + if (SI.getType()->isIntegerTy(1)) { if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) { if (C->getZExtValue()) { // Change: A = select B, true, C --> A = or B, C @@ -539,9 +539,18 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { !CFPf->getValueAPF().isZero())) return ReplaceInstUsesWith(SI, FalseVal); } - // Transform (X != Y) ? X : Y -> X - if (FCI->getPredicate() == FCmpInst::FCMP_ONE) + // Transform (X une Y) ? X : Y -> X + if (FCI->getPredicate() == FCmpInst::FCMP_UNE) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast<ConstantFP>(FalseVal)) && + !CFPf->getValueAPF().isZero())) return ReplaceInstUsesWith(SI, TrueVal); + } // NOTE: if we wanted to, this is where to detect MIN/MAX } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ @@ -557,9 +566,18 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { !CFPf->getValueAPF().isZero())) return ReplaceInstUsesWith(SI, FalseVal); } - // Transform (X != Y) ? Y : X -> Y - if (FCI->getPredicate() == FCmpInst::FCMP_ONE) - return ReplaceInstUsesWith(SI, TrueVal); + // Transform (X une Y) ? Y : X -> Y + if (FCI->getPredicate() == FCmpInst::FCMP_UNE) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast<ConstantFP>(FalseVal)) && + !CFPf->getValueAPF().isZero())) + return ReplaceInstUsesWith(SI, TrueVal); + } // NOTE: if we wanted to, this is where to detect MIN/MAX } // NOTE: if we wanted to, this is where to detect ABS @@ -629,7 +647,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } // See if we can fold the select into one of our operands. - if (SI.getType()->isInteger()) { + if (SI.getType()->isIntegerTy()) { if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) return FoldI; diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 53a5684..cd41844 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -104,10 +104,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, assert(Depth <= 6 && "Limit Search Depth"); uint32_t BitWidth = DemandedMask.getBitWidth(); const Type *VTy = V->getType(); - assert((TD || !isa<PointerType>(VTy)) && + assert((TD || !VTy->isPointerTy()) && "SimplifyDemandedBits needs to know bit widths!"); assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && - (!VTy->isIntOrIntVector() || + (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) && KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && @@ -401,7 +401,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; } case Instruction::BitCast: - if (!I->getOperand(0)->getType()->isIntOrIntVector()) + if (!I->getOperand(0)->getType()->isIntOrIntVectorTy()) return 0; // vector->int or fp->int? if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { @@ -413,7 +413,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } else // Don't touch a scalar-to-vector bitcast. return 0; - } else if (isa<VectorType>(I->getOperand(0)->getType())) + } else if (I->getOperand(0)->getType()->isVectorTy()) // Don't touch a vector-to-scalar bitcast. return 0; diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 20fda1a..a58124d 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -78,7 +78,7 @@ static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) { /// value is already around as a register, for example if it were inserted then /// extracted from the vector. static Value *FindScalarElement(Value *V, unsigned EltNo) { - assert(isa<VectorType>(V->getType()) && "Not looking at a vector?"); + assert(V->getType()->isVectorTy() && "Not looking at a vector?"); const VectorType *PTy = cast<VectorType>(V->getType()); unsigned Width = PTy->getNumElements(); if (EltNo >= Width) // Out of range access. @@ -322,7 +322,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, /// that computes V and the LHS value of the shuffle. static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, Value *&RHS) { - assert(isa<VectorType>(V->getType()) && + assert(V->getType()->isVectorTy() && (RHS == 0 || V->getType() == RHS->getType()) && "Invalid shuffle!"); unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 93b1961..af9ec5c 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -73,7 +73,7 @@ void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { /// from 'From' to 'To'. We don't want to convert from a legal to an illegal /// type for example, or from a smaller to a larger illegal type. bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const { - assert(isa<IntegerType>(From) && isa<IntegerType>(To)); + assert(From->isIntegerTy() && To->isIntegerTy()); // If we don't have TD, we don't know if the source/dest are legal. if (!TD) return false; @@ -158,7 +158,7 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const { return ConstantExpr::getNeg(C); if (ConstantVector *C = dyn_cast<ConstantVector>(V)) - if (C->getType()->getElementType()->isInteger()) + if (C->getType()->getElementType()->isIntegerTy()) return ConstantExpr::getNeg(C); return 0; @@ -177,7 +177,7 @@ Value *InstCombiner::dyn_castFNegVal(Value *V) const { return ConstantExpr::getFNeg(C); if (ConstantVector *C = dyn_cast<ConstantVector>(V)) - if (C->getType()->getElementType()->isFloatingPoint()) + if (C->getType()->getElementType()->isFloatingPointTy()) return ConstantExpr::getFNeg(C); return 0; @@ -226,7 +226,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { if (isa<Constant>(TV) || isa<Constant>(FV)) { // Bool selects with constant operands can be folded to logical ops. - if (SI->getType()->isInteger(1)) return 0; + if (SI->getType()->isIntegerTy(1)) return 0; Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this); @@ -478,7 +478,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { bool EndsWithSequential = false; for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); I != E; ++I) - EndsWithSequential = !isa<StructType>(*I); + EndsWithSequential = !(*I)->isStructTy(); // Can we combine the two pointer arithmetics offsets? if (EndsWithSequential) { @@ -578,7 +578,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast const Type *SrcElTy = StrippedPtrTy->getElementType(); const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); - if (TD && isa<ArrayType>(SrcElTy) && + if (TD && SrcElTy->isArrayTy() && TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) == TD->getTypeAllocSize(ResElTy)) { Value *Idx[2]; @@ -596,7 +596,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - if (TD && isa<ArrayType>(SrcElTy) && ResElTy->isInteger(8)) { + if (TD && SrcElTy->isArrayTy() && ResElTy->isIntegerTy(8)) { uint64_t ArrayEltSize = TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 3214c8c..8662a82 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -84,7 +84,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, AI = MainFn->arg_begin(); // If the program looked at argc, have it look at the return value of the // init call instead. - if (!AI->getType()->isInteger(32)) { + if (!AI->getType()->isIntegerTy(32)) { Instruction::CastOps opcode; if (!AI->use_empty()) { opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true); diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp index cf5e8c0..ea8e5c3 100644 --- a/lib/Transforms/Scalar/ABCD.cpp +++ b/lib/Transforms/Scalar/ABCD.cpp @@ -505,7 +505,7 @@ void ABCD::executeABCD(Function &F) { continue; ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0)); - if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType())) + if (!ICI || !ICI->getOperand(0)->getType()->isIntegerTy()) continue; createConstraintCmpInst(ICI, TI); @@ -713,7 +713,7 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) { Value *V_op1 = ICI->getOperand(0); Value *V_op2 = ICI->getOperand(1); - if (!isa<IntegerType>(V_op1->getType())) + if (!V_op1->getType()->isIntegerTy()) return; Instruction *I_op1 = dyn_cast<Instruction>(V_op1); diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk new file mode 100644 index 0000000..dea9b8f --- /dev/null +++ b/lib/Transforms/Scalar/Android.mk @@ -0,0 +1,55 @@ +LOCAL_PATH:= $(call my-dir) + +transforms_scalar_SRC_FILES := \ + ABCD.cpp \ + ADCE.cpp \ + BasicBlockPlacement.cpp \ + CodeGenPrepare.cpp \ + ConstantProp.cpp \ + DCE.cpp \ + DeadStoreElimination.cpp \ + GEPSplitter.cpp \ + GVN.cpp \ + IndVarSimplify.cpp \ + JumpThreading.cpp \ + LICM.cpp \ + LoopDeletion.cpp \ + LoopIndexSplit.cpp \ + LoopRotation.cpp \ + LoopStrengthReduce.cpp \ + LoopUnrollPass.cpp \ + LoopUnswitch.cpp \ + MemCpyOptimizer.cpp \ + Reassociate.cpp \ + Reg2Mem.cpp \ + SCCP.cpp \ + SCCVN.cpp \ + Scalar.cpp \ + ScalarReplAggregates.cpp \ + SimplifyCFGPass.cpp \ + SimplifyHalfPowrLibCalls.cpp \ + SimplifyLibCalls.cpp \ + TailDuplication.cpp \ + TailRecursionElimination.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(transforms_scalar_SRC_FILES) +LOCAL_MODULE:= libLLVMScalarOpts + +include $(LLVM_HOST_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(transforms_scalar_SRC_FILES) +LOCAL_MODULE:= libLLVMScalarOpts + +include $(LLVM_DEVICE_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index fa60d3f..7ceda1f 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -32,7 +32,6 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CallSite.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" @@ -40,9 +39,6 @@ using namespace llvm; using namespace llvm::PatternMatch; -static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak", - cl::init(false), cl::Hidden); - namespace { class CodeGenPrepare : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining @@ -301,6 +297,70 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } +/// FindReusablePredBB - Check all of the predecessors of the block DestPHI +/// lives in to see if there is a block that we can reuse as a critical edge +/// from TIBB. +static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) { + BasicBlock *Dest = DestPHI->getParent(); + + /// TIPHIValues - This array is lazily computed to determine the values of + /// PHIs in Dest that TI would provide. + SmallVector<Value*, 32> TIPHIValues; + + /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB. + unsigned TIBBEntryNo = 0; + + // Check to see if Dest has any blocks that can be used as a split edge for + // this terminator. + for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) { + BasicBlock *Pred = DestPHI->getIncomingBlock(pi); + // To be usable, the pred has to end with an uncond branch to the dest. + BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); + if (!PredBr || !PredBr->isUnconditional()) + continue; + // Must be empty other than the branch and debug info. + BasicBlock::iterator I = Pred->begin(); + while (isa<DbgInfoIntrinsic>(I)) + I++; + if (&*I != PredBr) + continue; + // Cannot be the entry block; its label does not get emitted. + if (Pred == &Dest->getParent()->getEntryBlock()) + continue; + + // Finally, since we know that Dest has phi nodes in it, we have to make + // sure that jumping to Pred will have the same effect as going to Dest in + // terms of PHI values. + PHINode *PN; + unsigned PHINo = 0; + unsigned PredEntryNo = pi; + + bool FoundMatch = true; + for (BasicBlock::iterator I = Dest->begin(); + (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { + if (PHINo == TIPHIValues.size()) { + if (PN->getIncomingBlock(TIBBEntryNo) != TIBB) + TIBBEntryNo = PN->getBasicBlockIndex(TIBB); + TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo)); + } + + // If the PHI entry doesn't work, we can't use this pred. + if (PN->getIncomingBlock(PredEntryNo) != Pred) + PredEntryNo = PN->getBasicBlockIndex(Pred); + + if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) { + FoundMatch = false; + break; + } + } + + // If we found a workable predecessor, change TI to branch to Succ. + if (FoundMatch) + return Pred; + } + return 0; +} + /// SplitEdgeNicely - Split the critical edge from TI to its specified /// successor if it will improve codegen. We only do this if the successor has @@ -315,13 +375,12 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, BasicBlock *Dest = TI->getSuccessor(SuccNum); assert(isa<PHINode>(Dest->begin()) && "This should only be called if Dest has a PHI!"); + PHINode *DestPHI = cast<PHINode>(Dest->begin()); // Do not split edges to EH landing pads. - if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI)) { + if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI)) if (Invoke->getSuccessor(1) == Dest) return; - } - // As a hack, never split backedges of loops. Even though the copy for any // PHIs inserted on the backedge would be dead for exits from the loop, we @@ -329,92 +388,16 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, if (BackEdges.count(std::make_pair(TIBB, Dest))) return; - if (!FactorCommonPreds) { - /// TIPHIValues - This array is lazily computed to determine the values of - /// PHIs in Dest that TI would provide. - SmallVector<Value*, 32> TIPHIValues; - - // Check to see if Dest has any blocks that can be used as a split edge for - // this terminator. - for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { - BasicBlock *Pred = *PI; - // To be usable, the pred has to end with an uncond branch to the dest. - BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); - if (!PredBr || !PredBr->isUnconditional()) - continue; - // Must be empty other than the branch and debug info. - BasicBlock::iterator I = Pred->begin(); - while (isa<DbgInfoIntrinsic>(I)) - I++; - if (dyn_cast<Instruction>(I) != PredBr) - continue; - // Cannot be the entry block; its label does not get emitted. - if (Pred == &(Dest->getParent()->getEntryBlock())) - continue; - - // Finally, since we know that Dest has phi nodes in it, we have to make - // sure that jumping to Pred will have the same effect as going to Dest in - // terms of PHI values. - PHINode *PN; - unsigned PHINo = 0; - bool FoundMatch = true; - for (BasicBlock::iterator I = Dest->begin(); - (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { - if (PHINo == TIPHIValues.size()) - TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); - - // If the PHI entry doesn't work, we can't use this pred. - if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { - FoundMatch = false; - break; - } - } - - // If we found a workable predecessor, change TI to branch to Succ. - if (FoundMatch) { - ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>(); - if (PFI) - PFI->splitEdge(TIBB, Dest, Pred); - Dest->removePredecessor(TIBB); - TI->setSuccessor(SuccNum, Pred); - return; - } - } - - SplitCriticalEdge(TI, SuccNum, P, true); + if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) { + ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>(); + if (PFI) + PFI->splitEdge(TIBB, Dest, ReuseBB); + Dest->removePredecessor(TIBB); + TI->setSuccessor(SuccNum, ReuseBB); return; } - PHINode *PN; - SmallVector<Value*, 8> TIPHIValues; - for (BasicBlock::iterator I = Dest->begin(); - (PN = dyn_cast<PHINode>(I)); ++I) - TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); - - SmallVector<BasicBlock*, 8> IdenticalPreds; - for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { - BasicBlock *Pred = *PI; - if (BackEdges.count(std::make_pair(Pred, Dest))) - continue; - if (PI == TIBB) - IdenticalPreds.push_back(Pred); - else { - bool Identical = true; - unsigned PHINo = 0; - for (BasicBlock::iterator I = Dest->begin(); - (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) - if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { - Identical = false; - break; - } - if (Identical) - IdenticalPreds.push_back(Pred); - } - } - - assert(!IdenticalPreds.empty()); - SplitBlockPredecessors(Dest, &IdenticalPreds[0], IdenticalPreds.size(), - ".critedge", P); + SplitCriticalEdge(TI, SuccNum, P, true); } @@ -629,7 +612,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // we'd end up sinking both muls. if (AddrMode.BaseReg) { Value *V = AddrMode.BaseReg; - if (isa<PointerType>(V->getType())) + if (V->getType()->isPointerTy()) V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); if (V->getType() != IntPtrTy) V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true, @@ -642,7 +625,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *V = AddrMode.ScaledReg; if (V->getType() == IntPtrTy) { // done. - } else if (isa<PointerType>(V->getType())) { + } else if (V->getType()->isPointerTy()) { V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth()) { diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 320afa1..09c01d3 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -44,8 +44,14 @@ namespace { virtual bool runOnFunction(Function &F) { bool Changed = false; + + DominatorTree &DT = getAnalysis<DominatorTree>(); + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - Changed |= runOnBasicBlock(*I); + // Only check non-dead blocks. Dead blocks may have strange pointer + // cycles that will confuse alias analysis. + if (DT.isReachableFromEntry(I)) + Changed |= runOnBasicBlock(*I); return Changed; } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 80e0027..fcb802a 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -662,11 +662,10 @@ namespace { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - explicit GVN(bool nopre = false, bool noloads = false) - : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { } + explicit GVN(bool noloads = false) + : FunctionPass(&ID), NoLoads(noloads), MD(0) { } private: - bool NoPRE; bool NoLoads; MemoryDependenceAnalysis *MD; DominatorTree *DT; @@ -674,6 +673,9 @@ namespace { ValueTable VN; DenseMap<BasicBlock*, ValueNumberScope*> localAvail; + // List of critical edges to be split between iterations. + SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; + // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); @@ -701,14 +703,15 @@ namespace { Value *lookupNumber(BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; + bool splitCriticalEdges(); }; char GVN::ID = 0; } // createGVNPass - The public interface to this file... -FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) { - return new GVN(NoPRE, NoLoads); +FunctionPass *llvm::createGVNPass(bool NoLoads) { + return new GVN(NoLoads); } static RegisterPass<GVN> X("gvn", @@ -836,9 +839,9 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, const TargetData &TD) { // If the loaded or stored value is an first class array or struct, don't try // to transform them. We need to be able to bitcast to integer. - if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy) || - isa<StructType>(StoredVal->getType()) || - isa<ArrayType>(StoredVal->getType())) + if (LoadTy->isStructTy() || LoadTy->isArrayTy() || + StoredVal->getType()->isStructTy() || + StoredVal->getType()->isArrayTy()) return false; // The store has to be at least as big as the load. @@ -870,26 +873,26 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, // If the store and reload are the same size, we can always reuse it. if (StoreSize == LoadSize) { - if (isa<PointerType>(StoredValTy) && isa<PointerType>(LoadedTy)) { + if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) { // Pointer to Pointer -> use bitcast. return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); } // Convert source pointers to integers, which can be bitcast. - if (isa<PointerType>(StoredValTy)) { + if (StoredValTy->isPointerTy()) { StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); } const Type *TypeToCastTo = LoadedTy; - if (isa<PointerType>(TypeToCastTo)) + if (TypeToCastTo->isPointerTy()) TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); if (StoredValTy != TypeToCastTo) StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); // Cast to pointer if the load needs a pointer type. - if (isa<PointerType>(LoadedTy)) + if (LoadedTy->isPointerTy()) StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); return StoredVal; @@ -901,13 +904,13 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); // Convert source pointers to integers, which can be manipulated. - if (isa<PointerType>(StoredValTy)) { + if (StoredValTy->isPointerTy()) { StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); } // Convert vectors and fp to integer, which can be manipulated. - if (!isa<IntegerType>(StoredValTy)) { + if (!StoredValTy->isIntegerTy()) { StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); } @@ -927,7 +930,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, return StoredVal; // If the result is a pointer, inttoptr. - if (isa<PointerType>(LoadedTy)) + if (LoadedTy->isPointerTy()) return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); // Otherwise, bitcast. @@ -989,7 +992,7 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, const TargetData &TD) { // If the loaded or stored value is an first class array or struct, don't try // to transform them. We need to be able to bitcast to integer. - if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy)) + if (LoadTy->isStructTy() || LoadTy->isArrayTy()) return -1; int64_t StoreOffset = 0, LoadOffset = 0; @@ -1064,8 +1067,8 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, StoreInst *DepSI, const TargetData &TD) { // Cannot handle reading from store of first-class aggregate yet. - if (isa<StructType>(DepSI->getOperand(0)->getType()) || - isa<ArrayType>(DepSI->getOperand(0)->getType())) + if (DepSI->getOperand(0)->getType()->isStructTy() || + DepSI->getOperand(0)->getType()->isArrayTy()) return -1; Value *StorePtr = DepSI->getPointerOperand(); @@ -1136,9 +1139,9 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. - if (isa<PointerType>(SrcVal->getType())) + if (SrcVal->getType()->isPointerTy()) SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp"); - if (!isa<IntegerType>(SrcVal->getType())) + if (!SrcVal->getType()->isIntegerTy()) SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8), "tmp"); @@ -1323,7 +1326,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); // If new PHI nodes were created, notify alias analysis. - if (isa<PointerType>(V->getType())) + if (V->getType()->isPointerTy()) for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) AA->copyValue(LI, NewPHIs[i]); @@ -1491,8 +1494,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI, if (isa<PHINode>(V)) V->takeName(LI); - if (isa<PointerType>(V->getType())) + if (V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); + VN.erase(LI); toErase.push_back(LI); NumGVNLoad++; return true; @@ -1538,11 +1542,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // at least one of the values is LI. Since this means that we won't be able // to eliminate LI even if we insert uses in the other predecessors, we will // end up increasing code size. Reject this by scanning for LI. - if (!EnableFullLoadPRE) { - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (ValuesPerBlock[i].isSimpleValue() && - ValuesPerBlock[i].getSimpleValue() == LI) + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { + if (ValuesPerBlock[i].isSimpleValue() && + ValuesPerBlock[i].getSimpleValue() == LI) { + // Skip cases where LI is the only definition, even for EnableFullLoadPRE. + if (!EnableFullLoadPRE || e == 1) return false; + } } // FIXME: It is extremely unclear what this loop is doing, other than @@ -1576,6 +1582,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) FullyAvailableBlocks[UnavailableBlocks[i]] = false; + bool NeedToSplitEdges = false; for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; ++PI) { BasicBlock *Pred = *PI; @@ -1583,13 +1590,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI, continue; } PredLoads[Pred] = 0; - // We don't currently handle critical edges :( + if (Pred->getTerminator()->getNumSuccessors() != 1) { - DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" - << Pred->getName() << "': " << *LI << '\n'); - return false; + if (isa<IndirectBrInst>(Pred->getTerminator())) { + DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" + << Pred->getName() << "': " << *LI << '\n'); + return false; + } + unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB); + toSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); + NeedToSplitEdges = true; } } + if (NeedToSplitEdges) + return false; // Decide whether PRE is profitable for this load. unsigned NumUnavailablePreds = PredLoads.size(); @@ -1623,13 +1637,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI, LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, *DT, NewInsts); } else { - Address.PHITranslateValue(LoadBB, UnavailablePred); + Address.PHITranslateValue(LoadBB, UnavailablePred, DT); LoadPtr = Address.getAddr(); - - // Make sure the value is live in the predecessor. - if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr)) - if (!DT->dominates(Inst->getParent(), UnavailablePred)) - LoadPtr = 0; } // If we couldn't find or insert a computation of this phi translated value, @@ -1697,6 +1706,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // Add the newly created load. ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, NewLoad)); + MD->invalidateCachedPointerInfo(LoadPtr); + DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); } // Perform PHI construction. @@ -1705,8 +1716,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI, LI->replaceAllUsesWith(V); if (isa<PHINode>(V)) V->takeName(LI); - if (isa<PointerType>(V->getType())) + if (V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); + VN.erase(LI); toErase.push_back(LI); NumPRELoad++; return true; @@ -1765,8 +1777,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // Replace the load! L->replaceAllUsesWith(AvailVal); - if (isa<PointerType>(AvailVal->getType())) + if (AvailVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(AvailVal); + VN.erase(L); toErase.push_back(L); NumGVNLoad++; return true; @@ -1810,8 +1823,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // Remove it! L->replaceAllUsesWith(StoredVal); - if (isa<PointerType>(StoredVal->getType())) + if (StoredVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(StoredVal); + VN.erase(L); toErase.push_back(L); NumGVNLoad++; return true; @@ -1839,8 +1853,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // Remove it! L->replaceAllUsesWith(AvailableVal); - if (isa<PointerType>(DepLI->getType())) + if (DepLI->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); + VN.erase(L); toErase.push_back(L); NumGVNLoad++; return true; @@ -1851,6 +1866,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // intervening stores, for example. if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); + VN.erase(L); toErase.push_back(L); NumGVNLoad++; return true; @@ -1861,6 +1877,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { if (II->getIntrinsicID() == Intrinsic::lifetime_start) { L->replaceAllUsesWith(UndefValue::get(L->getType())); + VN.erase(L); toErase.push_back(L); NumGVNLoad++; return true; @@ -1891,6 +1908,10 @@ Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { /// by inserting it into the appropriate sets bool GVN::processInstruction(Instruction *I, SmallVectorImpl<Instruction*> &toErase) { + // Ignore dbg info intrinsics. + if (isa<DbgInfoIntrinsic>(I)) + return false; + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { bool Changed = processLoad(LI, toErase); @@ -1939,7 +1960,7 @@ bool GVN::processInstruction(Instruction *I, if (constVal) { p->replaceAllUsesWith(constVal); - if (MD && isa<PointerType>(constVal->getType())) + if (MD && constVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(constVal); VN.erase(p); @@ -1960,7 +1981,7 @@ bool GVN::processInstruction(Instruction *I, // Remove it! VN.erase(I); I->replaceAllUsesWith(repl); - if (MD && isa<PointerType>(repl->getType())) + if (MD && repl->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(repl); toErase.push_back(I); return true; @@ -2000,6 +2021,8 @@ bool GVN::runOnFunction(Function& F) { while (ShouldContinue) { DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); ShouldContinue = iterateOnFunction(F); + if (splitCriticalEdges()) + ShouldContinue = true; Changed |= ShouldContinue; ++Iteration; } @@ -2066,7 +2089,6 @@ bool GVN::processBlock(BasicBlock *BB) { /// control flow patterns and attempts to perform simple PRE at the join point. bool GVN::performPRE(Function &F) { bool Changed = false; - SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; DenseMap<BasicBlock*, Value*> predMap; for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { @@ -2137,14 +2159,7 @@ bool GVN::performPRE(Function &F) { // We can't do PRE safely on a critical edge, so instead we schedule // the edge to be split and perform the PRE the next time we iterate // on the function. - unsigned SuccNum = 0; - for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors(); - i != e; ++i) - if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) { - SuccNum = i; - break; - } - + unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); continue; @@ -2200,7 +2215,7 @@ bool GVN::performPRE(Function &F) { localAvail[CurrentBlock]->table[ValNo] = Phi; CurInst->replaceAllUsesWith(Phi); - if (MD && isa<PointerType>(Phi->getType())) + if (MD && Phi->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(Phi); VN.erase(CurInst); @@ -2212,11 +2227,23 @@ bool GVN::performPRE(Function &F) { } } - for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator - I = toSplit.begin(), E = toSplit.end(); I != E; ++I) - SplitCriticalEdge(I->first, I->second, this); + if (splitCriticalEdges()) + Changed = true; + + return Changed; +} - return Changed || toSplit.size(); +/// splitCriticalEdges - Split critical edges found during the previous +/// iteration that may enable further optimization. +bool GVN::splitCriticalEdges() { + if (toSplit.empty()) + return false; + do { + std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val(); + SplitCriticalEdge(Edge.first, Edge.second, this); + } while (!toSplit.empty()); + if (MD) MD->invalidateCachedPredecessors(); + return true; } /// iterateOnFunction - Executes one iteration of GVN diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index c54f596..cb563c3 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -103,11 +103,9 @@ namespace { BasicBlock *ExitingBlock, BranchInst *BI, SCEVExpander &Rewriter); - void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount, - SCEVExpander &Rewriter); + void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); - void RewriteIVExpressions(Loop *L, const Type *LargestType, - SCEVExpander &Rewriter); + void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); void SinkUnusedInvariants(Loop *L); @@ -190,7 +188,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond"); - Instruction *OrigCond = cast<Instruction>(BI->getCondition()); + Value *OrigCond = BI->getCondition(); // It's tempting to use replaceAllUsesWith here to fully replace the old // comparison, but that's not immediately safe, since users of the old // comparison may not be dominated by the new comparison. Instead, just @@ -215,7 +213,6 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, /// able to brute-force evaluate arbitrary instructions as long as they have /// constant operands at the beginning of the loop. void IndVarSimplify::RewriteLoopExitValues(Loop *L, - const SCEV *BackedgeTakenCount, SCEVExpander &Rewriter) { // Verify the input to the pass in already in LCSSA form. assert(L->isLCSSAForm()); @@ -241,15 +238,24 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, while ((PN = dyn_cast<PHINode>(BBI++))) { if (PN->use_empty()) continue; // dead use, don't replace it + + // SCEV only supports integer expressions for now. + if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy()) + continue; + + // It's necessary to tell ScalarEvolution about this explicitly so that + // it can walk the def-use list and forget all SCEVs, as it may not be + // watching the PHI itself. Once the new exit value is in place, there + // may not be a def-use connection between the loop and every instruction + // which got a SCEVAddRecExpr for that loop. + SE->forgetValue(PN); + // Iterate over all of the values in all the PHI nodes. for (unsigned i = 0; i != NumPreds; ++i) { // If the value being merged in is not integer or is not defined // in the loop, skip it. Value *InVal = PN->getIncomingValue(i); - if (!isa<Instruction>(InVal) || - // SCEV only supports integer expressions for now. - (!isa<IntegerType>(InVal->getType()) && - !isa<PointerType>(InVal->getType()))) + if (!isa<Instruction>(InVal)) continue; // If this pred is for a subloop, not L itself, skip it. @@ -349,7 +355,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // the current expressions. // if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) - RewriteLoopExitValues(L, BackedgeTakenCount, Rewriter); + RewriteLoopExitValues(L, Rewriter); // Compute the type of the largest recurrence expression, and decide whether // a canonical induction variable should be inserted. @@ -364,37 +370,32 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { if (ExitingBlock) NeedCannIV = true; } - for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - const SCEV *Stride = IU->StrideOrder[i]; - const Type *Ty = SE->getEffectiveSCEVType(Stride->getType()); + for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) { + const Type *Ty = + SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType()); if (!LargestType || SE->getTypeSizeInBits(Ty) > SE->getTypeSizeInBits(LargestType)) LargestType = Ty; - - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[i]); - assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - - if (!SI->second->Users.empty()) - NeedCannIV = true; + NeedCannIV = true; } // Now that we know the largest of the induction variable expressions // in this loop, insert a canonical induction variable of the largest size. Value *IndVar = 0; if (NeedCannIV) { - // Check to see if the loop already has a canonical-looking induction - // variable. If one is present and it's wider than the planned canonical - // induction variable, temporarily remove it, so that the Rewriter - // doesn't attempt to reuse it. - PHINode *OldCannIV = L->getCanonicalInductionVariable(); - if (OldCannIV) { + // Check to see if the loop already has any canonical-looking induction + // variables. If any are present and wider than the planned canonical + // induction variable, temporarily remove them, so that the Rewriter + // doesn't attempt to reuse them. + SmallVector<PHINode *, 2> OldCannIVs; + while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) { if (SE->getTypeSizeInBits(OldCannIV->getType()) > SE->getTypeSizeInBits(LargestType)) OldCannIV->removeFromParent(); else - OldCannIV = 0; + break; + OldCannIVs.push_back(OldCannIV); } IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType); @@ -404,17 +405,21 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n'); // Now that the official induction variable is established, reinsert - // the old canonical-looking variable after it so that the IR remains - // consistent. It will be deleted as part of the dead-PHI deletion at + // any old canonical-looking variables after it so that the IR remains + // consistent. They will be deleted as part of the dead-PHI deletion at // the end of the pass. - if (OldCannIV) - OldCannIV->insertAfter(cast<Instruction>(IndVar)); + while (!OldCannIVs.empty()) { + PHINode *OldCannIV = OldCannIVs.pop_back_val(); + OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI()); + } } // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. ICmpInst *NewICmp = 0; - if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && ExitingBlock) { + if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && + !BackedgeTakenCount->isZero() && + ExitingBlock) { assert(NeedCannIV && "LinearFunctionTestReplace requires a canonical induction variable"); // Can't rewrite non-branch yet. @@ -424,7 +429,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { } // Rewrite IV-derived expressions. Clears the rewriter cache. - RewriteIVExpressions(L, LargestType, Rewriter); + RewriteIVExpressions(L, Rewriter); // The Rewriter may not be used from this point on. @@ -444,8 +449,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { return Changed; } -void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, - SCEVExpander &Rewriter) { +void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { SmallVector<WeakVH, 16> DeadInsts; // Rewrite all induction variable expressions in terms of the canonical @@ -455,72 +459,64 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, // add the offsets to the primary induction variable and cast, avoiding // the need for the code evaluation methods to insert induction variables // of different sizes. - for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - const SCEV *Stride = IU->StrideOrder[i]; - - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[i]); - assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - ilist<IVStrideUse> &List = SI->second->Users; - for (ilist<IVStrideUse>::iterator UI = List.begin(), - E = List.end(); UI != E; ++UI) { - Value *Op = UI->getOperandValToReplace(); - const Type *UseTy = Op->getType(); - Instruction *User = UI->getUser(); - - // Compute the final addrec to expand into code. - const SCEV *AR = IU->getReplacementExpr(*UI); - - // Evaluate the expression out of the loop, if possible. - if (!L->contains(UI->getUser())) { - const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); - if (ExitVal->isLoopInvariant(L)) - AR = ExitVal; - } + for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { + const SCEV *Stride = UI->getStride(); + Value *Op = UI->getOperandValToReplace(); + const Type *UseTy = Op->getType(); + Instruction *User = UI->getUser(); + + // Compute the final addrec to expand into code. + const SCEV *AR = IU->getReplacementExpr(*UI); + + // Evaluate the expression out of the loop, if possible. + if (!L->contains(UI->getUser())) { + const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); + if (ExitVal->isLoopInvariant(L)) + AR = ExitVal; + } - // FIXME: It is an extremely bad idea to indvar substitute anything more - // complex than affine induction variables. Doing so will put expensive - // polynomial evaluations inside of the loop, and the str reduction pass - // currently can only reduce affine polynomials. For now just disable - // indvar subst on anything more complex than an affine addrec, unless - // it can be expanded to a trivial value. - if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L)) - continue; + // FIXME: It is an extremely bad idea to indvar substitute anything more + // complex than affine induction variables. Doing so will put expensive + // polynomial evaluations inside of the loop, and the str reduction pass + // currently can only reduce affine polynomials. For now just disable + // indvar subst on anything more complex than an affine addrec, unless + // it can be expanded to a trivial value. + if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L)) + continue; - // Determine the insertion point for this user. By default, insert - // immediately before the user. The SCEVExpander class will automatically - // hoist loop invariants out of the loop. For PHI nodes, there may be - // multiple uses, so compute the nearest common dominator for the - // incoming blocks. - Instruction *InsertPt = User; - if (PHINode *PHI = dyn_cast<PHINode>(InsertPt)) - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) - if (PHI->getIncomingValue(i) == Op) { - if (InsertPt == User) - InsertPt = PHI->getIncomingBlock(i)->getTerminator(); - else - InsertPt = - DT->findNearestCommonDominator(InsertPt->getParent(), - PHI->getIncomingBlock(i)) - ->getTerminator(); - } - - // Now expand it into actual Instructions and patch it into place. - Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); - - // Patch the new value into place. - if (Op->hasName()) - NewVal->takeName(Op); - User->replaceUsesOfWith(Op, NewVal); - UI->setOperandValToReplace(NewVal); - DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' - << " into = " << *NewVal << "\n"); - ++NumRemoved; - Changed = true; - - // The old value may be dead now. - DeadInsts.push_back(Op); - } + // Determine the insertion point for this user. By default, insert + // immediately before the user. The SCEVExpander class will automatically + // hoist loop invariants out of the loop. For PHI nodes, there may be + // multiple uses, so compute the nearest common dominator for the + // incoming blocks. + Instruction *InsertPt = User; + if (PHINode *PHI = dyn_cast<PHINode>(InsertPt)) + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) + if (PHI->getIncomingValue(i) == Op) { + if (InsertPt == User) + InsertPt = PHI->getIncomingBlock(i)->getTerminator(); + else + InsertPt = + DT->findNearestCommonDominator(InsertPt->getParent(), + PHI->getIncomingBlock(i)) + ->getTerminator(); + } + + // Now expand it into actual Instructions and patch it into place. + Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); + + // Patch the new value into place. + if (Op->hasName()) + NewVal->takeName(Op); + User->replaceUsesOfWith(Op, NewVal); + UI->setOperandValToReplace(NewVal); + DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' + << " into = " << *NewVal << "\n"); + ++NumRemoved; + Changed = true; + + // The old value may be dead now. + DeadInsts.push_back(Op); } // Clear the rewriter cache, because values that are in the rewriter's cache @@ -598,8 +594,8 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { } } -/// Return true if it is OK to use SIToFPInst for an inducation variable -/// with given inital and exit values. +/// Return true if it is OK to use SIToFPInst for an induction variable +/// with given initial and exit values. static bool useSIToFPInst(ConstantFP &InitV, ConstantFP &ExitV, uint64_t intIV, uint64_t intEV) { @@ -652,7 +648,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { if (!convertToInt(InitValue->getValueAPF(), &newInitValue)) return; - // Check IV increment. Reject this PH if increement operation is not + // Check IV increment. Reject this PH if increment operation is not // an add or increment value can not be represented by an integer. BinaryOperator *Incr = dyn_cast<BinaryOperator>(PH->getIncomingValue(BackEdge)); @@ -688,7 +684,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { if (BI->getCondition() != EC) return; } - // Find exit value. If exit value can not be represented as an interger then + // Find exit value. If exit value can not be represented as an integer then // do not handle this floating point PH. ConstantFP *EV = NULL; unsigned EVIndex = 1; @@ -750,11 +746,11 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { ICmpInst *NewEC = new ICmpInst(EC->getParent()->getTerminator(), NewPred, LHS, RHS, EC->getName()); - // In the following deltions, PH may become dead and may be deleted. + // In the following deletions, PH may become dead and may be deleted. // Use a WeakVH to observe whether this happens. WeakVH WeakPH = PH; - // Delete old, floating point, exit comparision instruction. + // Delete old, floating point, exit comparison instruction. NewEC->takeName(EC); EC->replaceAllUsesWith(NewEC); RecursivelyDeleteTriviallyDeadInstructions(EC); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 3eff3d8..a6489ec 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -201,7 +201,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { if (isa<DbgInfoIntrinsic>(I)) continue; // If this is a pointer->pointer bitcast, it is free. - if (isa<BitCastInst>(I) && isa<PointerType>(I->getType())) + if (isa<BitCastInst>(I) && I->getType()->isPointerTy()) continue; // All other instructions count for at least one unit. @@ -214,7 +214,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { if (const CallInst *CI = dyn_cast<CallInst>(I)) { if (!isa<IntrinsicInst>(CI)) Size += 3; - else if (!isa<VectorType>(CI->getType())) + else if (!CI->getType()->isVectorTy()) Size += 1; } } @@ -336,13 +336,18 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ else InterestingVal = ConstantInt::getFalse(I->getContext()); - // Scan for the sentinel. + // Scan for the sentinel. If we find an undef, force it to the + // interesting value: x|undef -> true and x&undef -> false. for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) - if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) + if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) { Result.push_back(LHSVals[i]); + Result.back().first = InterestingVal; + } for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) - if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) + if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) { Result.push_back(RHSVals[i]); + Result.back().first = InterestingVal; + } return !Result.empty(); } @@ -400,7 +405,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ // If comparing a live-in value against a constant, see if we know the // live-in value on any predecessors. if (LVI && isa<Constant>(Cmp->getOperand(1)) && - Cmp->getType()->isInteger() && // Not vector compare. + Cmp->getType()->isIntegerTy() && // Not vector compare. (!isa<Instruction>(Cmp->getOperand(0)) || cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) { Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 81f9ae6..d7ace34 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -678,7 +678,7 @@ void LICM::PromoteValuesInLoop() { // If we are promoting a pointer value, update alias information for the // inserted load. Value *LoadValue = 0; - if (isa<PointerType>(cast<PointerType>(Ptr->getType())->getElementType())) { + if (cast<PointerType>(Ptr->getType())->getElementType()->isPointerTy()) { // Locate a load or store through the pointer, and assign the same value // to LI as we are loading or storing. Since we know that the value is // stored in this loop, this will always succeed. @@ -751,7 +751,7 @@ void LICM::PromoteValuesInLoop() { LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos); // If this is a pointer type, update alias info appropriately. - if (isa<PointerType>(LI->getType())) + if (LI->getType()->isPointerTy()) CurAST->copyValue(PointerValueNumbers[PVN++], LI); // Store into the memory we promoted. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index a5611ff..f920dca 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -17,6 +17,40 @@ // available on the target, and it performs a variety of other optimizations // related to loop induction variables. // +// Terminology note: this code has a lot of handling for "post-increment" or +// "post-inc" users. This is not talking about post-increment addressing modes; +// it is instead talking about code like this: +// +// %i = phi [ 0, %entry ], [ %i.next, %latch ] +// ... +// %i.next = add %i, 1 +// %c = icmp eq %i.next, %n +// +// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however +// it's useful to think about these as the same register, with some uses using +// the value of the register before the add and some using // it after. In this +// example, the icmp is a post-increment user, since it uses %i.next, which is +// the value of the induction variable after the increment. The other common +// case of post-increment users is users outside the loop. +// +// TODO: More sophistication in the way Formulae are generated and filtered. +// +// TODO: Handle multiple loops at a time. +// +// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr +// instead of a GlobalValue? +// +// TODO: When truncation is free, truncate ICmp users' operands to make it a +// smaller encoding (on x86 at least). +// +// TODO: When a negated register is used by an add (such as in a list of +// multiple base registers, or as the increment expression in an addrec), +// we may not actually need both reg and (-1 * reg) in registers; the +// negation can be implemented by using a sub instead of an add. The +// lack of support for taking this into consideration when making +// register pressure decisions is partly worked around by the "Special" +// use kind. +// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-reduce" @@ -26,208 +60,434 @@ #include "llvm/IntrinsicInst.h" #include "llvm/DerivedTypes.h" #include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include <algorithm> using namespace llvm; -STATISTIC(NumReduced , "Number of IV uses strength reduced"); -STATISTIC(NumInserted, "Number of PHIs inserted"); -STATISTIC(NumVariable, "Number of PHIs with variable strides"); -STATISTIC(NumEliminated, "Number of strides eliminated"); -STATISTIC(NumShadow, "Number of Shadow IVs optimized"); -STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses"); -STATISTIC(NumLoopCond, "Number of loop terminating conds optimized"); -STATISTIC(NumCountZero, "Number of count iv optimized to count toward zero"); +namespace { + +/// RegSortData - This class holds data which is used to order reuse candidates. +class RegSortData { +public: + /// UsedByIndices - This represents the set of LSRUse indices which reference + /// a particular register. + SmallBitVector UsedByIndices; + + RegSortData() {} + + void print(raw_ostream &OS) const; + void dump() const; +}; -static cl::opt<bool> EnableFullLSRMode("enable-full-lsr", - cl::init(false), - cl::Hidden); +} + +void RegSortData::print(raw_ostream &OS) const { + OS << "[NumUses=" << UsedByIndices.count() << ']'; +} + +void RegSortData::dump() const { + print(errs()); errs() << '\n'; +} namespace { - struct BasedUser; +/// RegUseTracker - Map register candidates to information about how they are +/// used. +class RegUseTracker { + typedef DenseMap<const SCEV *, RegSortData> RegUsesTy; - /// IVInfo - This structure keeps track of one IV expression inserted during - /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as - /// well as the PHI node and increment value created for rewrite. - struct IVExpr { - const SCEV *Stride; - const SCEV *Base; - PHINode *PHI; + RegUsesTy RegUses; + SmallVector<const SCEV *, 16> RegSequence; - IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi) - : Stride(stride), Base(base), PHI(phi) {} - }; +public: + void CountRegister(const SCEV *Reg, size_t LUIdx); + + bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const; + + const SmallBitVector &getUsedByIndices(const SCEV *Reg) const; + + void clear(); + + typedef SmallVectorImpl<const SCEV *>::iterator iterator; + typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator; + iterator begin() { return RegSequence.begin(); } + iterator end() { return RegSequence.end(); } + const_iterator begin() const { return RegSequence.begin(); } + const_iterator end() const { return RegSequence.end(); } +}; + +} - /// IVsOfOneStride - This structure keeps track of all IV expression inserted - /// during StrengthReduceStridedIVUsers for a particular stride of the IV. - struct IVsOfOneStride { - std::vector<IVExpr> IVs; +void +RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) { + std::pair<RegUsesTy::iterator, bool> Pair = + RegUses.insert(std::make_pair(Reg, RegSortData())); + RegSortData &RSD = Pair.first->second; + if (Pair.second) + RegSequence.push_back(Reg); + RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1)); + RSD.UsedByIndices.set(LUIdx); +} + +bool +RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { + if (!RegUses.count(Reg)) return false; + const SmallBitVector &UsedByIndices = + RegUses.find(Reg)->second.UsedByIndices; + int i = UsedByIndices.find_first(); + if (i == -1) return false; + if ((size_t)i != LUIdx) return true; + return UsedByIndices.find_next(i) != -1; +} + +const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const { + RegUsesTy::const_iterator I = RegUses.find(Reg); + assert(I != RegUses.end() && "Unknown register!"); + return I->second.UsedByIndices; +} + +void RegUseTracker::clear() { + RegUses.clear(); + RegSequence.clear(); +} + +namespace { + +/// Formula - This class holds information that describes a formula for +/// computing satisfying a use. It may include broken-out immediates and scaled +/// registers. +struct Formula { + /// AM - This is used to represent complex addressing, as well as other kinds + /// of interesting uses. + TargetLowering::AddrMode AM; + + /// BaseRegs - The list of "base" registers for this use. When this is + /// non-empty, AM.HasBaseReg should be set to true. + SmallVector<const SCEV *, 2> BaseRegs; - void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) { - IVs.push_back(IVExpr(Stride, Base, PHI)); + /// ScaledReg - The 'scaled' register for this use. This should be non-null + /// when AM.Scale is not zero. + const SCEV *ScaledReg; + + Formula() : ScaledReg(0) {} + + void InitialMatch(const SCEV *S, Loop *L, + ScalarEvolution &SE, DominatorTree &DT); + + unsigned getNumRegs() const; + const Type *getType() const; + + bool referencesReg(const SCEV *S) const; + bool hasRegsUsedByUsesOtherThan(size_t LUIdx, + const RegUseTracker &RegUses) const; + + void print(raw_ostream &OS) const; + void dump() const; +}; + +} + +/// DoInitialMatch - Recursion helper for InitialMatch. +static void DoInitialMatch(const SCEV *S, Loop *L, + SmallVectorImpl<const SCEV *> &Good, + SmallVectorImpl<const SCEV *> &Bad, + ScalarEvolution &SE, DominatorTree &DT) { + // Collect expressions which properly dominate the loop header. + if (S->properlyDominates(L->getHeader(), &DT)) { + Good.push_back(S); + return; + } + + // Look at add operands. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) + DoInitialMatch(*I, L, Good, Bad, SE, DT); + return; + } + + // Look at addrec operands. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + if (!AR->getStart()->isZero()) { + DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT); + DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()), + AR->getStepRecurrence(SE), + AR->getLoop()), + L, Good, Bad, SE, DT); + return; } - }; - class LoopStrengthReduce : public LoopPass { - IVUsers *IU; - ScalarEvolution *SE; - bool Changed; - - /// IVsByStride - Keep track of all IVs that have been inserted for a - /// particular stride. - std::map<const SCEV *, IVsOfOneStride> IVsByStride; - - /// DeadInsts - Keep track of instructions we may have made dead, so that - /// we can remove them after we are done working. - SmallVector<WeakVH, 16> DeadInsts; - - /// TLI - Keep a pointer of a TargetLowering to consult for determining - /// transformation profitability. - const TargetLowering *TLI; - - public: - static char ID; // Pass ID, replacement for typeid - explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : - LoopPass(&ID), TLI(tli) {} - - bool runOnLoop(Loop *L, LPPassManager &LPM); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - // We split critical edges, so we change the CFG. However, we do update - // many analyses if they are around. - AU.addPreservedID(LoopSimplifyID); - AU.addPreserved("loops"); - AU.addPreserved("domfrontier"); - AU.addPreserved("domtree"); - - AU.addRequiredID(LoopSimplifyID); - AU.addRequired<ScalarEvolution>(); - AU.addPreserved<ScalarEvolution>(); - AU.addRequired<IVUsers>(); - AU.addPreserved<IVUsers>(); + // Handle a multiplication by -1 (negation) if it didn't fold. + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) + if (Mul->getOperand(0)->isAllOnesValue()) { + SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end()); + const SCEV *NewMul = SE.getMulExpr(Ops); + + SmallVector<const SCEV *, 4> MyGood; + SmallVector<const SCEV *, 4> MyBad; + DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT); + const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue( + SE.getEffectiveSCEVType(NewMul->getType()))); + for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(), + E = MyGood.end(); I != E; ++I) + Good.push_back(SE.getMulExpr(NegOne, *I)); + for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(), + E = MyBad.end(); I != E; ++I) + Bad.push_back(SE.getMulExpr(NegOne, *I)); + return; } - private: - void OptimizeIndvars(Loop *L); - - /// OptimizeLoopTermCond - Change loop terminating condition to use the - /// postinc iv when possible. - void OptimizeLoopTermCond(Loop *L); - - /// OptimizeShadowIV - If IV is used in a int-to-float cast - /// inside the loop then try to eliminate the cast opeation. - void OptimizeShadowIV(Loop *L); - - /// OptimizeMax - Rewrite the loop's terminating condition - /// if it uses a max computation. - ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse); - - /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for - /// deciding when to exit the loop is used only for that purpose, try to - /// rearrange things so it counts down to a test against zero. - bool OptimizeLoopCountIV(Loop *L); - bool OptimizeLoopCountIVOfStride(const SCEV* &Stride, - IVStrideUse* &CondUse, Loop *L); - - /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a - /// single stride of IV. All of the users may have different starting - /// values, and this may not be the only stride. - void StrengthReduceIVUsersOfStride(const SCEV *Stride, - IVUsersOfOneStride &Uses, - Loop *L); - void StrengthReduceIVUsers(Loop *L); - - ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse, - const SCEV* &CondStride, - bool PostPass = false); - - bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, - const SCEV* &CondStride); - bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); - const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *, - IVExpr&, const Type*, - const std::vector<BasedUser>& UsersToProcess); - bool ValidScale(bool, int64_t, - const std::vector<BasedUser>& UsersToProcess); - bool ValidOffset(bool, int64_t, int64_t, - const std::vector<BasedUser>& UsersToProcess); - const SCEV *CollectIVUsers(const SCEV *Stride, - IVUsersOfOneStride &Uses, - Loop *L, - bool &AllUsesAreAddresses, - bool &AllUsesAreOutsideLoop, - std::vector<BasedUser> &UsersToProcess); - bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc); - bool ShouldUseFullStrengthReductionMode( - const std::vector<BasedUser> &UsersToProcess, - const Loop *L, - bool AllUsesAreAddresses, - const SCEV *Stride); - void PrepareToStrengthReduceFully( - std::vector<BasedUser> &UsersToProcess, - const SCEV *Stride, - const SCEV *CommonExprs, - const Loop *L, - SCEVExpander &PreheaderRewriter); - void PrepareToStrengthReduceFromSmallerStride( - std::vector<BasedUser> &UsersToProcess, - Value *CommonBaseV, - const IVExpr &ReuseIV, - Instruction *PreInsertPt); - void PrepareToStrengthReduceWithNewPhi( - std::vector<BasedUser> &UsersToProcess, - const SCEV *Stride, - const SCEV *CommonExprs, - Value *CommonBaseV, - Instruction *IVIncInsertPt, - const Loop *L, - SCEVExpander &PreheaderRewriter); - - void DeleteTriviallyDeadInstructions(); - }; + // Ok, we can't do anything interesting. Just stuff the whole thing into a + // register and hope for the best. + Bad.push_back(S); } -char LoopStrengthReduce::ID = 0; -static RegisterPass<LoopStrengthReduce> -X("loop-reduce", "Loop Strength Reduction"); +/// InitialMatch - Incorporate loop-variant parts of S into this Formula, +/// attempting to keep all loop-invariant and loop-computable values in a +/// single base register. +void Formula::InitialMatch(const SCEV *S, Loop *L, + ScalarEvolution &SE, DominatorTree &DT) { + SmallVector<const SCEV *, 4> Good; + SmallVector<const SCEV *, 4> Bad; + DoInitialMatch(S, L, Good, Bad, SE, DT); + if (!Good.empty()) { + BaseRegs.push_back(SE.getAddExpr(Good)); + AM.HasBaseReg = true; + } + if (!Bad.empty()) { + BaseRegs.push_back(SE.getAddExpr(Bad)); + AM.HasBaseReg = true; + } +} -Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { - return new LoopStrengthReduce(TLI); +/// getNumRegs - Return the total number of register operands used by this +/// formula. This does not include register uses implied by non-constant +/// addrec strides. +unsigned Formula::getNumRegs() const { + return !!ScaledReg + BaseRegs.size(); } -/// DeleteTriviallyDeadInstructions - If any of the instructions is the -/// specified set are trivially dead, delete them and see if this makes any of -/// their operands subsequently dead. -void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { - while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); +/// getType - Return the type of this formula, if it has one, or null +/// otherwise. This type is meaningless except for the bit size. +const Type *Formula::getType() const { + return !BaseRegs.empty() ? BaseRegs.front()->getType() : + ScaledReg ? ScaledReg->getType() : + AM.BaseGV ? AM.BaseGV->getType() : + 0; +} - if (I == 0 || !isInstructionTriviallyDead(I)) - continue; +/// referencesReg - Test if this formula references the given register. +bool Formula::referencesReg(const SCEV *S) const { + return S == ScaledReg || + std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end(); +} - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) - if (Instruction *U = dyn_cast<Instruction>(*OI)) { - *OI = 0; - if (U->use_empty()) - DeadInsts.push_back(U); +/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers +/// which are used by uses other than the use with the given index. +bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx, + const RegUseTracker &RegUses) const { + if (ScaledReg) + if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx)) + return true; + for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(), + E = BaseRegs.end(); I != E; ++I) + if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx)) + return true; + return false; +} + +void Formula::print(raw_ostream &OS) const { + bool First = true; + if (AM.BaseGV) { + if (!First) OS << " + "; else First = false; + WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false); + } + if (AM.BaseOffs != 0) { + if (!First) OS << " + "; else First = false; + OS << AM.BaseOffs; + } + for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(), + E = BaseRegs.end(); I != E; ++I) { + if (!First) OS << " + "; else First = false; + OS << "reg(" << **I << ')'; + } + if (AM.Scale != 0) { + if (!First) OS << " + "; else First = false; + OS << AM.Scale << "*reg("; + if (ScaledReg) + OS << *ScaledReg; + else + OS << "<unknown>"; + OS << ')'; + } +} + +void Formula::dump() const { + print(errs()); errs() << '\n'; +} + +/// isAddRecSExtable - Return true if the given addrec can be sign-extended +/// without changing its value. +static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { + const Type *WideTy = + IntegerType::get(SE.getContext(), + SE.getTypeSizeInBits(AR->getType()) + 1); + return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy)); +} + +/// isAddSExtable - Return true if the given add can be sign-extended +/// without changing its value. +static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { + const Type *WideTy = + IntegerType::get(SE.getContext(), + SE.getTypeSizeInBits(A->getType()) + 1); + return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy)); +} + +/// isMulSExtable - Return true if the given add can be sign-extended +/// without changing its value. +static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) { + const Type *WideTy = + IntegerType::get(SE.getContext(), + SE.getTypeSizeInBits(A->getType()) + 1); + return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy)); +} + +/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined +/// and if the remainder is known to be zero, or null otherwise. If +/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified +/// to Y, ignoring that the multiplication may overflow, which is useful when +/// the result will be used in a context where the most significant bits are +/// ignored. +static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, + ScalarEvolution &SE, + bool IgnoreSignificantBits = false) { + // Handle the trivial case, which works for any SCEV type. + if (LHS == RHS) + return SE.getIntegerSCEV(1, LHS->getType()); + + // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some + // folding. + if (RHS->isAllOnesValue()) + return SE.getMulExpr(LHS, RHS); + + // Check for a division of a constant by a constant. + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) { + const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS); + if (!RC) + return 0; + if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0) + return 0; + return SE.getConstant(C->getValue()->getValue() + .sdiv(RC->getValue()->getValue())); + } + + // Distribute the sdiv over addrec operands, if the addrec doesn't overflow. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) { + if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) { + const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, + IgnoreSignificantBits); + if (!Start) return 0; + const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE, + IgnoreSignificantBits); + if (!Step) return 0; + return SE.getAddRecExpr(Start, Step, AR->getLoop()); + } + } + + // Distribute the sdiv over add operands, if the add doesn't overflow. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) { + if (IgnoreSignificantBits || isAddSExtable(Add, SE)) { + SmallVector<const SCEV *, 8> Ops; + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) { + const SCEV *Op = getExactSDiv(*I, RHS, SE, + IgnoreSignificantBits); + if (!Op) return 0; + Ops.push_back(Op); } + return SE.getAddExpr(Ops); + } + } - I->eraseFromParent(); - Changed = true; + // Check for a multiply operand that we can pull RHS out of. + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) + if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) { + SmallVector<const SCEV *, 4> Ops; + bool Found = false; + for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end(); + I != E; ++I) { + if (!Found) + if (const SCEV *Q = getExactSDiv(*I, RHS, SE, + IgnoreSignificantBits)) { + Ops.push_back(Q); + Found = true; + continue; + } + Ops.push_back(*I); + } + return Found ? SE.getMulExpr(Ops) : 0; + } + + // Otherwise we don't know. + return 0; +} + +/// ExtractImmediate - If S involves the addition of a constant integer value, +/// return that integer value, and mutate S to point to a new SCEV with that +/// value excluded. +static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { + if (C->getValue()->getValue().getMinSignedBits() <= 64) { + S = SE.getIntegerSCEV(0, C->getType()); + return C->getValue()->getSExtValue(); + } + } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); + int64_t Result = ExtractImmediate(NewOps.front(), SE); + S = SE.getAddExpr(NewOps); + return Result; + } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); + int64_t Result = ExtractImmediate(NewOps.front(), SE); + S = SE.getAddRecExpr(NewOps, AR->getLoop()); + return Result; + } + return 0; +} + +/// ExtractSymbol - If S involves the addition of a GlobalValue address, +/// return that symbol, and mutate S to point to a new SCEV with that +/// value excluded. +static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) { + S = SE.getIntegerSCEV(0, GV->getType()); + return GV; + } + } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); + GlobalValue *Result = ExtractSymbol(NewOps.back(), SE); + S = SE.getAddExpr(NewOps); + return Result; + } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); + GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); + S = SE.getAddRecExpr(NewOps, AR->getLoop()); + return Result; } + return 0; } /// isAddressUse - Returns true if the specified instruction is using the @@ -276,1776 +536,833 @@ static const Type *getAccessType(const Instruction *Inst) { break; } } - return AccessTy; -} -namespace { - /// BasedUser - For a particular base value, keep information about how we've - /// partitioned the expression so far. - struct BasedUser { - /// Base - The Base value for the PHI node that needs to be inserted for - /// this use. As the use is processed, information gets moved from this - /// field to the Imm field (below). BasedUser values are sorted by this - /// field. - const SCEV *Base; - - /// Inst - The instruction using the induction variable. - Instruction *Inst; - - /// OperandValToReplace - The operand value of Inst to replace with the - /// EmittedBase. - Value *OperandValToReplace; - - /// Imm - The immediate value that should be added to the base immediately - /// before Inst, because it will be folded into the imm field of the - /// instruction. This is also sometimes used for loop-variant values that - /// must be added inside the loop. - const SCEV *Imm; - - /// Phi - The induction variable that performs the striding that - /// should be used for this user. - PHINode *Phi; - - // isUseOfPostIncrementedValue - True if this should use the - // post-incremented version of this IV, not the preincremented version. - // This can only be set in special cases, such as the terminating setcc - // instruction for a loop and uses outside the loop that are dominated by - // the loop. - bool isUseOfPostIncrementedValue; - - BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) - : Base(IVSU.getOffset()), Inst(IVSU.getUser()), - OperandValToReplace(IVSU.getOperandValToReplace()), - Imm(se->getIntegerSCEV(0, Base->getType())), - isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} - - // Once we rewrite the code to insert the new IVs we want, update the - // operands of Inst to use the new expression 'NewBase', with 'Imm' added - // to it. - void RewriteInstructionToUseNewBase(const SCEV *NewBase, - Instruction *InsertPt, - SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl<WeakVH> &DeadInsts, - ScalarEvolution *SE); - - Value *InsertCodeForBaseAtPosition(const SCEV *NewBase, - const Type *Ty, - SCEVExpander &Rewriter, - Instruction *IP, - ScalarEvolution *SE); - void dump() const; - }; -} + // All pointers have the same requirements, so canonicalize them to an + // arbitrary pointer type to minimize variation. + if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy)) + AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1), + PTy->getAddressSpace()); -void BasedUser::dump() const { - dbgs() << " Base=" << *Base; - dbgs() << " Imm=" << *Imm; - dbgs() << " Inst: " << *Inst; + return AccessTy; } -Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *NewBase, - const Type *Ty, - SCEVExpander &Rewriter, - Instruction *IP, - ScalarEvolution *SE) { - Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP); - - // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to - // re-analyze it. - const SCEV *NewValSCEV = SE->getUnknown(Base); - - // Always emit the immediate into the same block as the user. - NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); - - return Rewriter.expandCodeFor(NewValSCEV, Ty, IP); -} +/// DeleteTriviallyDeadInstructions - If any of the instructions is the +/// specified set are trivially dead, delete them and see if this makes any of +/// their operands subsequently dead. +static bool +DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { + bool Changed = false; + while (!DeadInsts.empty()) { + Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); -// Once we rewrite the code to insert the new IVs we want, update the -// operands of Inst to use the new expression 'NewBase', with 'Imm' added -// to it. NewBasePt is the last instruction which contributes to the -// value of NewBase in the case that it's a diffferent instruction from -// the PHI that NewBase is computed from, or null otherwise. -// -void BasedUser::RewriteInstructionToUseNewBase(const SCEV *NewBase, - Instruction *NewBasePt, - SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl<WeakVH> &DeadInsts, - ScalarEvolution *SE) { - if (!isa<PHINode>(Inst)) { - // By default, insert code at the user instruction. - BasicBlock::iterator InsertPt = Inst; - - // However, if the Operand is itself an instruction, the (potentially - // complex) inserted code may be shared by many users. Because of this, we - // want to emit code for the computation of the operand right before its old - // computation. This is usually safe, because we obviously used to use the - // computation when it was computed in its current block. However, in some - // cases (e.g. use of a post-incremented induction variable) the NewBase - // value will be pinned to live somewhere after the original computation. - // In this case, we have to back off. - // - // If this is a use outside the loop (which means after, since it is based - // on a loop indvar) we use the post-incremented value, so that we don't - // artificially make the preinc value live out the bottom of the loop. - if (!isUseOfPostIncrementedValue && L->contains(Inst)) { - if (NewBasePt && isa<PHINode>(OperandValToReplace)) { - InsertPt = NewBasePt; - ++InsertPt; - } else if (Instruction *OpInst - = dyn_cast<Instruction>(OperandValToReplace)) { - InsertPt = OpInst; - while (isa<PHINode>(InsertPt)) ++InsertPt; - } - } - Value *NewVal = InsertCodeForBaseAtPosition(NewBase, - OperandValToReplace->getType(), - Rewriter, InsertPt, SE); - // Replace the use of the operand Value with the new Phi we just created. - Inst->replaceUsesOfWith(OperandValToReplace, NewVal); - - DEBUG(dbgs() << " Replacing with "); - DEBUG(WriteAsOperand(dbgs(), NewVal, /*PrintType=*/false)); - DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM " - << *Imm << "\n"); - return; - } + if (I == 0 || !isInstructionTriviallyDead(I)) + continue; - // PHI nodes are more complex. We have to insert one copy of the NewBase+Imm - // expression into each operand block that uses it. Note that PHI nodes can - // have multiple entries for the same predecessor. We use a map to make sure - // that a PHI node only has a single Value* for each predecessor (which also - // prevents us from inserting duplicate code in some blocks). - DenseMap<BasicBlock*, Value*> InsertedCode; - PHINode *PN = cast<PHINode>(Inst); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - if (PN->getIncomingValue(i) == OperandValToReplace) { - // If the original expression is outside the loop, put the replacement - // code in the same place as the original expression, - // which need not be an immediate predecessor of this PHI. This way we - // need only one copy of it even if it is referenced multiple times in - // the PHI. We don't do this when the original expression is inside the - // loop because multiple copies sometimes do useful sinking of code in - // that case(?). - Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace); - BasicBlock *PHIPred = PN->getIncomingBlock(i); - if (L->contains(OldLoc)) { - // If this is a critical edge, split the edge so that we do not insert - // the code on all predecessor/successor paths. We do this unless this - // is the canonical backedge for this loop, as this can make some - // inserted code be in an illegal position. - if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 && - !isa<IndirectBrInst>(PHIPred->getTerminator()) && - (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) { - - // First step, split the critical edge. - BasicBlock *NewBB = SplitCriticalEdge(PHIPred, PN->getParent(), - P, false); - - // Next step: move the basic block. In particular, if the PHI node - // is outside of the loop, and PredTI is in the loop, we want to - // move the block to be immediately before the PHI block, not - // immediately after PredTI. - if (L->contains(PHIPred) && !L->contains(PN)) - NewBB->moveBefore(PN->getParent()); - - // Splitting the edge can reduce the number of PHI entries we have. - e = PN->getNumIncomingValues(); - PHIPred = NewBB; - i = PN->getBasicBlockIndex(PHIPred); - } - } - Value *&Code = InsertedCode[PHIPred]; - if (!Code) { - // Insert the code into the end of the predecessor block. - Instruction *InsertPt = (L->contains(OldLoc)) ? - PHIPred->getTerminator() : - OldLoc->getParent()->getTerminator(); - Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), - Rewriter, InsertPt, SE); - - DEBUG(dbgs() << " Changing PHI use to "); - DEBUG(WriteAsOperand(dbgs(), Code, /*PrintType=*/false)); - DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM " - << *Imm << "\n"); + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) + if (Instruction *U = dyn_cast<Instruction>(*OI)) { + *OI = 0; + if (U->use_empty()) + DeadInsts.push_back(U); } - // Replace the use of the operand Value with the new Phi we just created. - PN->setIncomingValue(i, Code); - Rewriter.clear(); - } + I->eraseFromParent(); + Changed = true; } - // PHI node might have become a constant value after SplitCriticalEdge. - DeadInsts.push_back(Inst); + return Changed; } +namespace { -/// fitsInAddressMode - Return true if V can be subsumed within an addressing -/// mode, and does not need to be put in a register first. -static bool fitsInAddressMode(const SCEV *V, const Type *AccessTy, - const TargetLowering *TLI, bool HasBaseReg) { - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) { - int64_t VC = SC->getValue()->getSExtValue(); - if (TLI) { - TargetLowering::AddrMode AM; - AM.BaseOffs = VC; - AM.HasBaseReg = HasBaseReg; - return TLI->isLegalAddressingMode(AM, AccessTy); - } else { - // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field. - return (VC > -(1 << 16) && VC < (1 << 16)-1); - } - } - - if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) - if (GlobalValue *GV = dyn_cast<GlobalValue>(SU->getValue())) { - if (TLI) { - TargetLowering::AddrMode AM; - AM.BaseGV = GV; - AM.HasBaseReg = HasBaseReg; - return TLI->isLegalAddressingMode(AM, AccessTy); - } else { - // Default: assume global addresses are not legal. - } - } +/// Cost - This class is used to measure and compare candidate formulae. +class Cost { + /// TODO: Some of these could be merged. Also, a lexical ordering + /// isn't always optimal. + unsigned NumRegs; + unsigned AddRecCost; + unsigned NumIVMuls; + unsigned NumBaseAdds; + unsigned ImmCost; + unsigned SetupCost; + +public: + Cost() + : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0), + SetupCost(0) {} + + unsigned getNumRegs() const { return NumRegs; } + + bool operator<(const Cost &Other) const; + + void Loose(); + + void RateFormula(const Formula &F, + SmallPtrSet<const SCEV *, 16> &Regs, + const DenseSet<const SCEV *> &VisitedRegs, + const Loop *L, + const SmallVectorImpl<int64_t> &Offsets, + ScalarEvolution &SE, DominatorTree &DT); + + void print(raw_ostream &OS) const; + void dump() const; + +private: + void RateRegister(const SCEV *Reg, + SmallPtrSet<const SCEV *, 16> &Regs, + const Loop *L, + ScalarEvolution &SE, DominatorTree &DT); + void RatePrimaryRegister(const SCEV *Reg, + SmallPtrSet<const SCEV *, 16> &Regs, + const Loop *L, + ScalarEvolution &SE, DominatorTree &DT); +}; - return false; } -/// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are -/// loop varying to the Imm operand. -static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm, - Loop *L, ScalarEvolution *SE) { - if (Val->isLoopInvariant(L)) return; // Nothing to do. - - if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { - SmallVector<const SCEV *, 4> NewOps; - NewOps.reserve(SAE->getNumOperands()); +/// RateRegister - Tally up interesting quantities from the given register. +void Cost::RateRegister(const SCEV *Reg, + SmallPtrSet<const SCEV *, 16> &Regs, + const Loop *L, + ScalarEvolution &SE, DominatorTree &DT) { + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) { + if (AR->getLoop() == L) + AddRecCost += 1; /// TODO: This should be a function of the stride. + + // If this is an addrec for a loop that's already been visited by LSR, + // don't second-guess its addrec phi nodes. LSR isn't currently smart + // enough to reason about more than one loop at a time. Consider these + // registers free and leave them alone. + else if (L->contains(AR->getLoop()) || + (!AR->getLoop()->contains(L) && + DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) { + for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + if (SE.isSCEVable(PN->getType()) && + (SE.getEffectiveSCEVType(PN->getType()) == + SE.getEffectiveSCEVType(AR->getType())) && + SE.getSCEV(PN) == AR) + return; - for (unsigned i = 0; i != SAE->getNumOperands(); ++i) - if (!SAE->getOperand(i)->isLoopInvariant(L)) { - // If this is a loop-variant expression, it must stay in the immediate - // field of the expression. - Imm = SE->getAddExpr(Imm, SAE->getOperand(i)); - } else { - NewOps.push_back(SAE->getOperand(i)); - } + // If this isn't one of the addrecs that the loop already has, it + // would require a costly new phi and add. TODO: This isn't + // precisely modeled right now. + ++NumBaseAdds; + if (!Regs.count(AR->getStart())) + RateRegister(AR->getStart(), Regs, L, SE, DT); + } - if (NewOps.empty()) - Val = SE->getIntegerSCEV(0, Val->getType()); - else - Val = SE->getAddExpr(NewOps); - } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) { - // Try to pull immediates out of the start value of nested addrec's. - const SCEV *Start = SARE->getStart(); - MoveLoopVariantsToImmediateField(Start, Imm, L, SE); - - SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end()); - Ops[0] = Start; - Val = SE->getAddRecExpr(Ops, SARE->getLoop()); - } else { - // Otherwise, all of Val is variant, move the whole thing over. - Imm = SE->getAddExpr(Imm, Val); - Val = SE->getIntegerSCEV(0, Val->getType()); + // Add the step value register, if it needs one. + // TODO: The non-affine case isn't precisely modeled here. + if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) + if (!Regs.count(AR->getStart())) + RateRegister(AR->getOperand(1), Regs, L, SE, DT); } + ++NumRegs; + + // Rough heuristic; favor registers which don't require extra setup + // instructions in the preheader. + if (!isa<SCEVUnknown>(Reg) && + !isa<SCEVConstant>(Reg) && + !(isa<SCEVAddRecExpr>(Reg) && + (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) || + isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart())))) + ++SetupCost; } +/// RatePrimaryRegister - Record this register in the set. If we haven't seen it +/// before, rate it. +void Cost::RatePrimaryRegister(const SCEV *Reg, + SmallPtrSet<const SCEV *, 16> &Regs, + const Loop *L, + ScalarEvolution &SE, DominatorTree &DT) { + if (Regs.insert(Reg)) + RateRegister(Reg, Regs, L, SE, DT); +} -/// MoveImmediateValues - Look at Val, and pull out any additions of constants -/// that can fit into the immediate field of instructions in the target. -/// Accumulate these immediate values into the Imm value. -static void MoveImmediateValues(const TargetLowering *TLI, - const Type *AccessTy, - const SCEV *&Val, const SCEV *&Imm, - bool isAddress, Loop *L, - ScalarEvolution *SE) { - if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { - SmallVector<const SCEV *, 4> NewOps; - NewOps.reserve(SAE->getNumOperands()); - - for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { - const SCEV *NewOp = SAE->getOperand(i); - MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE); - - if (!NewOp->isLoopInvariant(L)) { - // If this is a loop-variant expression, it must stay in the immediate - // field of the expression. - Imm = SE->getAddExpr(Imm, NewOp); - } else { - NewOps.push_back(NewOp); - } - } - - if (NewOps.empty()) - Val = SE->getIntegerSCEV(0, Val->getType()); - else - Val = SE->getAddExpr(NewOps); - return; - } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) { - // Try to pull immediates out of the start value of nested addrec's. - const SCEV *Start = SARE->getStart(); - MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE); - - if (Start != SARE->getStart()) { - SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end()); - Ops[0] = Start; - Val = SE->getAddRecExpr(Ops, SARE->getLoop()); +void Cost::RateFormula(const Formula &F, + SmallPtrSet<const SCEV *, 16> &Regs, + const DenseSet<const SCEV *> &VisitedRegs, + const Loop *L, + const SmallVectorImpl<int64_t> &Offsets, + ScalarEvolution &SE, DominatorTree &DT) { + // Tally up the registers. + if (const SCEV *ScaledReg = F.ScaledReg) { + if (VisitedRegs.count(ScaledReg)) { + Loose(); + return; } - return; - } else if (const SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) { - // Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field. - if (isAddress && - fitsInAddressMode(SME->getOperand(0), AccessTy, TLI, false) && - SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) { - - const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType()); - const SCEV *NewOp = SME->getOperand(1); - MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE); - - // If we extracted something out of the subexpressions, see if we can - // simplify this! - if (NewOp != SME->getOperand(1)) { - // Scale SubImm up by "8". If the result is a target constant, we are - // good. - SubImm = SE->getMulExpr(SubImm, SME->getOperand(0)); - if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) { - // Accumulate the immediate. - Imm = SE->getAddExpr(Imm, SubImm); - - // Update what is left of 'Val'. - Val = SE->getMulExpr(SME->getOperand(0), NewOp); - return; - } - } + RatePrimaryRegister(ScaledReg, Regs, L, SE, DT); + } + for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(), + E = F.BaseRegs.end(); I != E; ++I) { + const SCEV *BaseReg = *I; + if (VisitedRegs.count(BaseReg)) { + Loose(); + return; } + RatePrimaryRegister(BaseReg, Regs, L, SE, DT); + + NumIVMuls += isa<SCEVMulExpr>(BaseReg) && + BaseReg->hasComputableLoopEvolution(L); } - // Loop-variant expressions must stay in the immediate field of the - // expression. - if ((isAddress && fitsInAddressMode(Val, AccessTy, TLI, false)) || - !Val->isLoopInvariant(L)) { - Imm = SE->getAddExpr(Imm, Val); - Val = SE->getIntegerSCEV(0, Val->getType()); - return; + if (F.BaseRegs.size() > 1) + NumBaseAdds += F.BaseRegs.size() - 1; + + // Tally up the non-zero immediates. + for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(), + E = Offsets.end(); I != E; ++I) { + int64_t Offset = (uint64_t)*I + F.AM.BaseOffs; + if (F.AM.BaseGV) + ImmCost += 64; // Handle symbolic values conservatively. + // TODO: This should probably be the pointer size. + else if (Offset != 0) + ImmCost += APInt(64, Offset, true).getMinSignedBits(); } +} - // Otherwise, no immediates to move. +/// Loose - Set this cost to a loosing value. +void Cost::Loose() { + NumRegs = ~0u; + AddRecCost = ~0u; + NumIVMuls = ~0u; + NumBaseAdds = ~0u; + ImmCost = ~0u; + SetupCost = ~0u; } -static void MoveImmediateValues(const TargetLowering *TLI, - Instruction *User, - const SCEV *&Val, const SCEV *&Imm, - bool isAddress, Loop *L, - ScalarEvolution *SE) { - const Type *AccessTy = getAccessType(User); - MoveImmediateValues(TLI, AccessTy, Val, Imm, isAddress, L, SE); +/// operator< - Choose the lower cost. +bool Cost::operator<(const Cost &Other) const { + if (NumRegs != Other.NumRegs) + return NumRegs < Other.NumRegs; + if (AddRecCost != Other.AddRecCost) + return AddRecCost < Other.AddRecCost; + if (NumIVMuls != Other.NumIVMuls) + return NumIVMuls < Other.NumIVMuls; + if (NumBaseAdds != Other.NumBaseAdds) + return NumBaseAdds < Other.NumBaseAdds; + if (ImmCost != Other.ImmCost) + return ImmCost < Other.ImmCost; + if (SetupCost != Other.SetupCost) + return SetupCost < Other.SetupCost; + return false; } -/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are -/// added together. This is used to reassociate common addition subexprs -/// together for maximal sharing when rewriting bases. -static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs, - const SCEV *Expr, - ScalarEvolution *SE) { - if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) { - for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j) - SeparateSubExprs(SubExprs, AE->getOperand(j), SE); - } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) { - const SCEV *Zero = SE->getIntegerSCEV(0, Expr->getType()); - if (SARE->getOperand(0) == Zero) { - SubExprs.push_back(Expr); - } else { - // Compute the addrec with zero as its base. - SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end()); - Ops[0] = Zero; // Start with zero base. - SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop())); +void Cost::print(raw_ostream &OS) const { + OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s"); + if (AddRecCost != 0) + OS << ", with addrec cost " << AddRecCost; + if (NumIVMuls != 0) + OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s"); + if (NumBaseAdds != 0) + OS << ", plus " << NumBaseAdds << " base add" + << (NumBaseAdds == 1 ? "" : "s"); + if (ImmCost != 0) + OS << ", plus " << ImmCost << " imm cost"; + if (SetupCost != 0) + OS << ", plus " << SetupCost << " setup cost"; +} +void Cost::dump() const { + print(errs()); errs() << '\n'; +} - SeparateSubExprs(SubExprs, SARE->getOperand(0), SE); - } - } else if (!Expr->isZero()) { - // Do not add zero. - SubExprs.push_back(Expr); - } -} - -// This is logically local to the following function, but C++ says we have -// to make it file scope. -struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; - -/// RemoveCommonExpressionsFromUseBases - Look through all of the Bases of all -/// the Uses, removing any common subexpressions, except that if all such -/// subexpressions can be folded into an addressing mode for all uses inside -/// the loop (this case is referred to as "free" in comments herein) we do -/// not remove anything. This looks for things like (a+b+c) and -/// (a+c+d) and computes the common (a+c) subexpression. The common expression -/// is *removed* from the Bases and returned. -static const SCEV * -RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, - ScalarEvolution *SE, Loop *L, - const TargetLowering *TLI) { - unsigned NumUses = Uses.size(); - - // Only one use? This is a very common case, so we handle it specially and - // cheaply. - const SCEV *Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType()); - const SCEV *Result = Zero; - const SCEV *FreeResult = Zero; - if (NumUses == 1) { - // If the use is inside the loop, use its base, regardless of what it is: - // it is clearly shared across all the IV's. If the use is outside the loop - // (which means after it) we don't want to factor anything *into* the loop, - // so just use 0 as the base. - if (L->contains(Uses[0].Inst)) - std::swap(Result, Uses[0].Base); - return Result; - } +namespace { - // To find common subexpressions, count how many of Uses use each expression. - // If any subexpressions are used Uses.size() times, they are common. - // Also track whether all uses of each expression can be moved into an - // an addressing mode "for free"; such expressions are left within the loop. - // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; - std::map<const SCEV *, SubExprUseData> SubExpressionUseData; - - // UniqueSubExprs - Keep track of all of the subexpressions we see in the - // order we see them. - SmallVector<const SCEV *, 16> UniqueSubExprs; - - SmallVector<const SCEV *, 16> SubExprs; - unsigned NumUsesInsideLoop = 0; - for (unsigned i = 0; i != NumUses; ++i) { - // If the user is outside the loop, just ignore it for base computation. - // Since the user is outside the loop, it must be *after* the loop (if it - // were before, it could not be based on the loop IV). We don't want users - // after the loop to affect base computation of values *inside* the loop, - // because we can always add their offsets to the result IV after the loop - // is done, ensuring we get good code inside the loop. - if (!L->contains(Uses[i].Inst)) - continue; - NumUsesInsideLoop++; +/// LSRFixup - An operand value in an instruction which is to be replaced +/// with some equivalent, possibly strength-reduced, replacement. +struct LSRFixup { + /// UserInst - The instruction which will be updated. + Instruction *UserInst; - // If the base is zero (which is common), return zero now, there are no - // CSEs we can find. - if (Uses[i].Base == Zero) return Zero; + /// OperandValToReplace - The operand of the instruction which will + /// be replaced. The operand may be used more than once; every instance + /// will be replaced. + Value *OperandValToReplace; - // If this use is as an address we may be able to put CSEs in the addressing - // mode rather than hoisting them. - bool isAddrUse = isAddressUse(Uses[i].Inst, Uses[i].OperandValToReplace); - // We may need the AccessTy below, but only when isAddrUse, so compute it - // only in that case. - const Type *AccessTy = 0; - if (isAddrUse) - AccessTy = getAccessType(Uses[i].Inst); - - // Split the expression into subexprs. - SeparateSubExprs(SubExprs, Uses[i].Base, SE); - // Add one to SubExpressionUseData.Count for each subexpr present, and - // if the subexpr is not a valid immediate within an addressing mode use, - // set SubExpressionUseData.notAllUsesAreFree. We definitely want to - // hoist these out of the loop (if they are common to all uses). - for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) { - if (++SubExpressionUseData[SubExprs[j]].Count == 1) - UniqueSubExprs.push_back(SubExprs[j]); - if (!isAddrUse || !fitsInAddressMode(SubExprs[j], AccessTy, TLI, false)) - SubExpressionUseData[SubExprs[j]].notAllUsesAreFree = true; - } - SubExprs.clear(); - } - - // Now that we know how many times each is used, build Result. Iterate over - // UniqueSubexprs so that we have a stable ordering. - for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) { - std::map<const SCEV *, SubExprUseData>::iterator I = - SubExpressionUseData.find(UniqueSubExprs[i]); - assert(I != SubExpressionUseData.end() && "Entry not found?"); - if (I->second.Count == NumUsesInsideLoop) { // Found CSE! - if (I->second.notAllUsesAreFree) - Result = SE->getAddExpr(Result, I->first); - else - FreeResult = SE->getAddExpr(FreeResult, I->first); - } else - // Remove non-cse's from SubExpressionUseData. - SubExpressionUseData.erase(I); - } - - if (FreeResult != Zero) { - // We have some subexpressions that can be subsumed into addressing - // modes in every use inside the loop. However, it's possible that - // there are so many of them that the combined FreeResult cannot - // be subsumed, or that the target cannot handle both a FreeResult - // and a Result in the same instruction (for example because it would - // require too many registers). Check this. - for (unsigned i=0; i<NumUses; ++i) { - if (!L->contains(Uses[i].Inst)) - continue; - // We know this is an addressing mode use; if there are any uses that - // are not, FreeResult would be Zero. - const Type *AccessTy = getAccessType(Uses[i].Inst); - if (!fitsInAddressMode(FreeResult, AccessTy, TLI, Result!=Zero)) { - // FIXME: could split up FreeResult into pieces here, some hoisted - // and some not. There is no obvious advantage to this. - Result = SE->getAddExpr(Result, FreeResult); - FreeResult = Zero; - break; - } - } - } + /// PostIncLoop - If this user is to use the post-incremented value of an + /// induction variable, this variable is non-null and holds the loop + /// associated with the induction variable. + const Loop *PostIncLoop; - // If we found no CSE's, return now. - if (Result == Zero) return Result; + /// LUIdx - The index of the LSRUse describing the expression which + /// this fixup needs, minus an offset (below). + size_t LUIdx; - // If we still have a FreeResult, remove its subexpressions from - // SubExpressionUseData. This means they will remain in the use Bases. - if (FreeResult != Zero) { - SeparateSubExprs(SubExprs, FreeResult, SE); - for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) { - std::map<const SCEV *, SubExprUseData>::iterator I = - SubExpressionUseData.find(SubExprs[j]); - SubExpressionUseData.erase(I); - } - SubExprs.clear(); - } + /// Offset - A constant offset to be added to the LSRUse expression. + /// This allows multiple fixups to share the same LSRUse with different + /// offsets, for example in an unrolled loop. + int64_t Offset; - // Otherwise, remove all of the CSE's we found from each of the base values. - for (unsigned i = 0; i != NumUses; ++i) { - // Uses outside the loop don't necessarily include the common base, but - // the final IV value coming into those uses does. Instead of trying to - // remove the pieces of the common base, which might not be there, - // subtract off the base to compensate for this. - if (!L->contains(Uses[i].Inst)) { - Uses[i].Base = SE->getMinusSCEV(Uses[i].Base, Result); - continue; - } + LSRFixup(); - // Split the expression into subexprs. - SeparateSubExprs(SubExprs, Uses[i].Base, SE); + void print(raw_ostream &OS) const; + void dump() const; +}; - // Remove any common subexpressions. - for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) - if (SubExpressionUseData.count(SubExprs[j])) { - SubExprs.erase(SubExprs.begin()+j); - --j; --e; - } +} - // Finally, add the non-shared expressions together. - if (SubExprs.empty()) - Uses[i].Base = Zero; - else - Uses[i].Base = SE->getAddExpr(SubExprs); - SubExprs.clear(); +LSRFixup::LSRFixup() + : UserInst(0), OperandValToReplace(0), PostIncLoop(0), + LUIdx(~size_t(0)), Offset(0) {} + +void LSRFixup::print(raw_ostream &OS) const { + OS << "UserInst="; + // Store is common and interesting enough to be worth special-casing. + if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) { + OS << "store "; + WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false); + } else if (UserInst->getType()->isVoidTy()) + OS << UserInst->getOpcodeName(); + else + WriteAsOperand(OS, UserInst, /*PrintType=*/false); + + OS << ", OperandValToReplace="; + WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false); + + if (PostIncLoop) { + OS << ", PostIncLoop="; + WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false); } - return Result; -} + if (LUIdx != ~size_t(0)) + OS << ", LUIdx=" << LUIdx; -/// ValidScale - Check whether the given Scale is valid for all loads and -/// stores in UsersToProcess. -/// -bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale, - const std::vector<BasedUser>& UsersToProcess) { - if (!TLI) - return true; + if (Offset != 0) + OS << ", Offset=" << Offset; +} - for (unsigned i = 0, e = UsersToProcess.size(); i!=e; ++i) { - // If this is a load or other access, pass the type of the access in. - const Type *AccessTy = - Type::getVoidTy(UsersToProcess[i].Inst->getContext()); - if (isAddressUse(UsersToProcess[i].Inst, - UsersToProcess[i].OperandValToReplace)) - AccessTy = getAccessType(UsersToProcess[i].Inst); - else if (isa<PHINode>(UsersToProcess[i].Inst)) - continue; +void LSRFixup::dump() const { + print(errs()); errs() << '\n'; +} - TargetLowering::AddrMode AM; - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm)) - AM.BaseOffs = SC->getValue()->getSExtValue(); - AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero(); - AM.Scale = Scale; +namespace { - // If load[imm+r*scale] is illegal, bail out. - if (!TLI->isLegalAddressingMode(AM, AccessTy)) - return false; +/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding +/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*. +struct UniquifierDenseMapInfo { + static SmallVector<const SCEV *, 2> getEmptyKey() { + SmallVector<const SCEV *, 2> V; + V.push_back(reinterpret_cast<const SCEV *>(-1)); + return V; } - return true; -} - -/// ValidOffset - Check whether the given Offset is valid for all loads and -/// stores in UsersToProcess. -/// -bool LoopStrengthReduce::ValidOffset(bool HasBaseReg, - int64_t Offset, - int64_t Scale, - const std::vector<BasedUser>& UsersToProcess) { - if (!TLI) - return true; - for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) { - // If this is a load or other access, pass the type of the access in. - const Type *AccessTy = - Type::getVoidTy(UsersToProcess[i].Inst->getContext()); - if (isAddressUse(UsersToProcess[i].Inst, - UsersToProcess[i].OperandValToReplace)) - AccessTy = getAccessType(UsersToProcess[i].Inst); - else if (isa<PHINode>(UsersToProcess[i].Inst)) - continue; + static SmallVector<const SCEV *, 2> getTombstoneKey() { + SmallVector<const SCEV *, 2> V; + V.push_back(reinterpret_cast<const SCEV *>(-2)); + return V; + } - TargetLowering::AddrMode AM; - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm)) - AM.BaseOffs = SC->getValue()->getSExtValue(); - AM.BaseOffs = (uint64_t)AM.BaseOffs + (uint64_t)Offset; - AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero(); - AM.Scale = Scale; + static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) { + unsigned Result = 0; + for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(), + E = V.end(); I != E; ++I) + Result ^= DenseMapInfo<const SCEV *>::getHashValue(*I); + return Result; + } - // If load[imm+r*scale] is illegal, bail out. - if (!TLI->isLegalAddressingMode(AM, AccessTy)) - return false; + static bool isEqual(const SmallVector<const SCEV *, 2> &LHS, + const SmallVector<const SCEV *, 2> &RHS) { + return LHS == RHS; } - return true; -} +}; + +/// LSRUse - This class holds the state that LSR keeps for each use in +/// IVUsers, as well as uses invented by LSR itself. It includes information +/// about what kinds of things can be folded into the user, information about +/// the user itself, and information about how the use may be satisfied. +/// TODO: Represent multiple users of the same expression in common? +class LSRUse { + DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier; + +public: + /// KindType - An enum for a kind of use, indicating what types of + /// scaled and immediate operands it might support. + enum KindType { + Basic, ///< A normal use, with no folding. + Special, ///< A special case of basic, allowing -1 scales. + Address, ///< An address use; folding according to TargetLowering + ICmpZero ///< An equality icmp with both operands folded into one. + // TODO: Add a generic icmp too? + }; -/// RequiresTypeConversion - Returns true if converting Ty1 to Ty2 is not -/// a nop. -bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1, - const Type *Ty2) { - if (Ty1 == Ty2) - return false; - Ty1 = SE->getEffectiveSCEVType(Ty1); - Ty2 = SE->getEffectiveSCEVType(Ty2); - if (Ty1 == Ty2) - return false; - if (Ty1->canLosslesslyBitCastTo(Ty2)) - return false; - if (TLI && TLI->isTruncateFree(Ty1, Ty2)) - return false; - return true; -} + KindType Kind; + const Type *AccessTy; -/// CheckForIVReuse - Returns the multiple if the stride is the multiple -/// of a previous stride and it is a legal value for the target addressing -/// mode scale component and optional base reg. This allows the users of -/// this stride to be rewritten as prev iv * factor. It returns 0 if no -/// reuse is possible. Factors can be negative on same targets, e.g. ARM. -/// -/// If all uses are outside the loop, we don't require that all multiplies -/// be folded into the addressing mode, nor even that the factor be constant; -/// a multiply (executed once) outside the loop is better than another IV -/// within. Well, usually. -const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, - bool AllUsesAreAddresses, - bool AllUsesAreOutsideLoop, - const SCEV *Stride, - IVExpr &IV, const Type *Ty, - const std::vector<BasedUser>& UsersToProcess) { - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) { - int64_t SInt = SC->getValue()->getSExtValue(); - for (unsigned NewStride = 0, e = IU->StrideOrder.size(); - NewStride != e; ++NewStride) { - std::map<const SCEV *, IVsOfOneStride>::iterator SI = - IVsByStride.find(IU->StrideOrder[NewStride]); - if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first)) - continue; - // The other stride has no uses, don't reuse it. - std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI = - IU->IVUsesByStride.find(IU->StrideOrder[NewStride]); - if (UI->second->Users.empty()) - continue; - int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); - if (SI->first != Stride && - (unsigned(abs64(SInt)) < SSInt || (SInt % SSInt) != 0)) - continue; - int64_t Scale = SInt / SSInt; - // Check that this stride is valid for all the types used for loads and - // stores; if it can be used for some and not others, we might as well use - // the original stride everywhere, since we have to create the IV for it - // anyway. If the scale is 1, then we don't need to worry about folding - // multiplications. - if (Scale == 1 || - (AllUsesAreAddresses && - ValidScale(HasBaseReg, Scale, UsersToProcess))) { - // Prefer to reuse an IV with a base of zero. - for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(), - IE = SI->second.IVs.end(); II != IE; ++II) - // Only reuse previous IV if it would not require a type conversion - // and if the base difference can be folded. - if (II->Base->isZero() && - !RequiresTypeConversion(II->Base->getType(), Ty)) { - IV = *II; - return SE->getIntegerSCEV(Scale, Stride->getType()); - } - // Otherwise, settle for an IV with a foldable base. - if (AllUsesAreAddresses) - for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(), - IE = SI->second.IVs.end(); II != IE; ++II) - // Only reuse previous IV if it would not require a type conversion - // and if the base difference can be folded. - if (SE->getEffectiveSCEVType(II->Base->getType()) == - SE->getEffectiveSCEVType(Ty) && - isa<SCEVConstant>(II->Base)) { - int64_t Base = - cast<SCEVConstant>(II->Base)->getValue()->getSExtValue(); - if (Base > INT32_MIN && Base <= INT32_MAX && - ValidOffset(HasBaseReg, -Base * Scale, - Scale, UsersToProcess)) { - IV = *II; - return SE->getIntegerSCEV(Scale, Stride->getType()); - } - } - } - } - } else if (AllUsesAreOutsideLoop) { - // Accept nonconstant strides here; it is really really right to substitute - // an existing IV if we can. - for (unsigned NewStride = 0, e = IU->StrideOrder.size(); - NewStride != e; ++NewStride) { - std::map<const SCEV *, IVsOfOneStride>::iterator SI = - IVsByStride.find(IU->StrideOrder[NewStride]); - if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first)) - continue; - int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); - if (SI->first != Stride && SSInt != 1) - continue; - for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(), - IE = SI->second.IVs.end(); II != IE; ++II) - // Accept nonzero base here. - // Only reuse previous IV if it would not require a type conversion. - if (!RequiresTypeConversion(II->Base->getType(), Ty)) { - IV = *II; - return Stride; - } - } - // Special case, old IV is -1*x and this one is x. Can treat this one as - // -1*old. - for (unsigned NewStride = 0, e = IU->StrideOrder.size(); - NewStride != e; ++NewStride) { - std::map<const SCEV *, IVsOfOneStride>::iterator SI = - IVsByStride.find(IU->StrideOrder[NewStride]); - if (SI == IVsByStride.end()) - continue; - if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first)) - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0))) - if (Stride == ME->getOperand(1) && - SC->getValue()->getSExtValue() == -1LL) - for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(), - IE = SI->second.IVs.end(); II != IE; ++II) - // Accept nonzero base here. - // Only reuse previous IV if it would not require type conversion. - if (!RequiresTypeConversion(II->Base->getType(), Ty)) { - IV = *II; - return SE->getIntegerSCEV(-1LL, Stride->getType()); - } - } - } - return SE->getIntegerSCEV(0, Stride->getType()); -} - -/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that -/// returns true if Val's isUseOfPostIncrementedValue is true. -static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) { - return Val.isUseOfPostIncrementedValue; -} - -/// isNonConstantNegative - Return true if the specified scev is negated, but -/// not a constant. -static bool isNonConstantNegative(const SCEV *Expr) { - const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr); - if (!Mul) return false; - - // If there is a constant factor, it will be first. - const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0)); - if (!SC) return false; - - // Return true if the value is negative, this matches things like (-42 * V). - return SC->getValue()->getValue().isNegative(); -} - -/// CollectIVUsers - Transform our list of users and offsets to a bit more -/// complex table. In this new vector, each 'BasedUser' contains 'Base', the -/// base of the strided accesses, as well as the old information from Uses. We -/// progressively move information from the Base field to the Imm field, until -/// we eventually have the full access expression to rewrite the use. -const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *Stride, - IVUsersOfOneStride &Uses, - Loop *L, - bool &AllUsesAreAddresses, - bool &AllUsesAreOutsideLoop, - std::vector<BasedUser> &UsersToProcess) { - // FIXME: Generalize to non-affine IV's. - if (!Stride->isLoopInvariant(L)) - return SE->getIntegerSCEV(0, Stride->getType()); - - UsersToProcess.reserve(Uses.Users.size()); - for (ilist<IVStrideUse>::iterator I = Uses.Users.begin(), - E = Uses.Users.end(); I != E; ++I) { - UsersToProcess.push_back(BasedUser(*I, SE)); - - // Move any loop variant operands from the offset field to the immediate - // field of the use, so that we don't try to use something before it is - // computed. - MoveLoopVariantsToImmediateField(UsersToProcess.back().Base, - UsersToProcess.back().Imm, L, SE); - assert(UsersToProcess.back().Base->isLoopInvariant(L) && - "Base value is not loop invariant!"); - } - - // We now have a whole bunch of uses of like-strided induction variables, but - // they might all have different bases. We want to emit one PHI node for this - // stride which we fold as many common expressions (between the IVs) into as - // possible. Start by identifying the common expressions in the base values - // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find - // "A+B"), emit it to the preheader, then remove the expression from the - // UsersToProcess base values. - const SCEV *CommonExprs = - RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI); - - // Next, figure out what we can represent in the immediate fields of - // instructions. If we can represent anything there, move it to the imm - // fields of the BasedUsers. We do this so that it increases the commonality - // of the remaining uses. - unsigned NumPHI = 0; - bool HasAddress = false; - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { - // If the user is not in the current loop, this means it is using the exit - // value of the IV. Do not put anything in the base, make sure it's all in - // the immediate field to allow as much factoring as possible. - if (!L->contains(UsersToProcess[i].Inst)) { - UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, - UsersToProcess[i].Base); - UsersToProcess[i].Base = - SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType()); - } else { - // Not all uses are outside the loop. - AllUsesAreOutsideLoop = false; - - // Addressing modes can be folded into loads and stores. Be careful that - // the store is through the expression, not of the expression though. - bool isPHI = false; - bool isAddress = isAddressUse(UsersToProcess[i].Inst, - UsersToProcess[i].OperandValToReplace); - if (isa<PHINode>(UsersToProcess[i].Inst)) { - isPHI = true; - ++NumPHI; - } + SmallVector<int64_t, 8> Offsets; + int64_t MinOffset; + int64_t MaxOffset; - if (isAddress) - HasAddress = true; + /// AllFixupsOutsideLoop - This records whether all of the fixups using this + /// LSRUse are outside of the loop, in which case some special-case heuristics + /// may be used. + bool AllFixupsOutsideLoop; - // If this use isn't an address, then not all uses are addresses. - if (!isAddress && !isPHI) - AllUsesAreAddresses = false; + /// Formulae - A list of ways to build a value that can satisfy this user. + /// After the list is populated, one of these is selected heuristically and + /// used to formulate a replacement for OperandValToReplace in UserInst. + SmallVector<Formula, 12> Formulae; - MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base, - UsersToProcess[i].Imm, isAddress, L, SE); - } - } + /// Regs - The set of register candidates used by all formulae in this LSRUse. + SmallPtrSet<const SCEV *, 4> Regs; - // If one of the use is a PHI node and all other uses are addresses, still - // allow iv reuse. Essentially we are trading one constant multiplication - // for one fewer iv. - if (NumPHI > 1) - AllUsesAreAddresses = false; + LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T), + MinOffset(INT64_MAX), + MaxOffset(INT64_MIN), + AllFixupsOutsideLoop(true) {} - // There are no in-loop address uses. - if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop)) - AllUsesAreAddresses = false; + bool InsertFormula(const Formula &F); - return CommonExprs; -} + void check() const; -/// ShouldUseFullStrengthReductionMode - Test whether full strength-reduction -/// is valid and profitable for the given set of users of a stride. In -/// full strength-reduction mode, all addresses at the current stride are -/// strength-reduced all the way down to pointer arithmetic. -/// -bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode( - const std::vector<BasedUser> &UsersToProcess, - const Loop *L, - bool AllUsesAreAddresses, - const SCEV *Stride) { - if (!EnableFullLSRMode) - return false; + void print(raw_ostream &OS) const; + void dump() const; +}; - // The heuristics below aim to avoid increasing register pressure, but - // fully strength-reducing all the addresses increases the number of - // add instructions, so don't do this when optimizing for size. - // TODO: If the loop is large, the savings due to simpler addresses - // may oughtweight the costs of the extra increment instructions. - if (L->getHeader()->getParent()->hasFnAttr(Attribute::OptimizeForSize)) - return false; +/// InsertFormula - If the given formula has not yet been inserted, add it to +/// the list, and return true. Return false otherwise. +bool LSRUse::InsertFormula(const Formula &F) { + SmallVector<const SCEV *, 2> Key = F.BaseRegs; + if (F.ScaledReg) Key.push_back(F.ScaledReg); + // Unstable sort by host order ok, because this is only used for uniquifying. + std::sort(Key.begin(), Key.end()); - // TODO: For now, don't do full strength reduction if there could - // potentially be greater-stride multiples of the current stride - // which could reuse the current stride IV. - if (IU->StrideOrder.back() != Stride) + if (!Uniquifier.insert(Key).second) return false; - // Iterate through the uses to find conditions that automatically rule out - // full-lsr mode. - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) { - const SCEV *Base = UsersToProcess[i].Base; - const SCEV *Imm = UsersToProcess[i].Imm; - // If any users have a loop-variant component, they can't be fully - // strength-reduced. - if (Imm && !Imm->isLoopInvariant(L)) - return false; - // If there are to users with the same base and the difference between - // the two Imm values can't be folded into the address, full - // strength reduction would increase register pressure. - do { - const SCEV *CurImm = UsersToProcess[i].Imm; - if ((CurImm || Imm) && CurImm != Imm) { - if (!CurImm) CurImm = SE->getIntegerSCEV(0, Stride->getType()); - if (!Imm) Imm = SE->getIntegerSCEV(0, Stride->getType()); - const Instruction *Inst = UsersToProcess[i].Inst; - const Type *AccessTy = getAccessType(Inst); - const SCEV *Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm); - if (!Diff->isZero() && - (!AllUsesAreAddresses || - !fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true))) - return false; - } - } while (++i != e && Base == UsersToProcess[i].Base); - } + // Using a register to hold the value of 0 is not profitable. + assert((!F.ScaledReg || !F.ScaledReg->isZero()) && + "Zero allocated in a scaled register!"); +#ifndef NDEBUG + for (SmallVectorImpl<const SCEV *>::const_iterator I = + F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) + assert(!(*I)->isZero() && "Zero allocated in a base register!"); +#endif - // If there's exactly one user in this stride, fully strength-reducing it - // won't increase register pressure. If it's starting from a non-zero base, - // it'll be simpler this way. - if (UsersToProcess.size() == 1 && !UsersToProcess[0].Base->isZero()) - return true; + // Add the formula to the list. + Formulae.push_back(F); - // Otherwise, if there are any users in this stride that don't require - // a register for their base, full strength-reduction will increase - // register pressure. - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) - if (UsersToProcess[i].Base->isZero()) - return false; + // Record registers now being used by this use. + if (F.ScaledReg) Regs.insert(F.ScaledReg); + Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); - // Otherwise, go for it. return true; } -/// InsertAffinePhi Create and insert a PHI node for an induction variable -/// with the specified start and step values in the specified loop. -/// -/// If NegateStride is true, the stride should be negated by using a -/// subtract instead of an add. -/// -/// Return the created phi node. -/// -static PHINode *InsertAffinePhi(const SCEV *Start, const SCEV *Step, - Instruction *IVIncInsertPt, - const Loop *L, - SCEVExpander &Rewriter) { - assert(Start->isLoopInvariant(L) && "New PHI start is not loop invariant!"); - assert(Step->isLoopInvariant(L) && "New PHI stride is not loop invariant!"); - - BasicBlock *Header = L->getHeader(); - BasicBlock *Preheader = L->getLoopPreheader(); - BasicBlock *LatchBlock = L->getLoopLatch(); - const Type *Ty = Start->getType(); - Ty = Rewriter.SE.getEffectiveSCEVType(Ty); - - PHINode *PN = PHINode::Create(Ty, "lsr.iv", Header->begin()); - PN->addIncoming(Rewriter.expandCodeFor(Start, Ty, Preheader->getTerminator()), - Preheader); - - // If the stride is negative, insert a sub instead of an add for the - // increment. - bool isNegative = isNonConstantNegative(Step); - const SCEV *IncAmount = Step; - if (isNegative) - IncAmount = Rewriter.SE.getNegativeSCEV(Step); - - // Insert an add instruction right before the terminator corresponding - // to the back-edge or just before the only use. The location is determined - // by the caller and passed in as IVIncInsertPt. - Value *StepV = Rewriter.expandCodeFor(IncAmount, Ty, - Preheader->getTerminator()); - Instruction *IncV; - if (isNegative) { - IncV = BinaryOperator::CreateSub(PN, StepV, "lsr.iv.next", - IVIncInsertPt); - } else { - IncV = BinaryOperator::CreateAdd(PN, StepV, "lsr.iv.next", - IVIncInsertPt); - } - if (!isa<ConstantInt>(StepV)) ++NumVariable; - - PN->addIncoming(IncV, LatchBlock); - - ++NumInserted; - return PN; -} - -static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) { - // We want to emit code for users inside the loop first. To do this, we - // rearrange BasedUser so that the entries at the end have - // isUseOfPostIncrementedValue = false, because we pop off the end of the - // vector (so we handle them first). - std::partition(UsersToProcess.begin(), UsersToProcess.end(), - PartitionByIsUseOfPostIncrementedValue); - - // Sort this by base, so that things with the same base are handled - // together. By partitioning first and stable-sorting later, we are - // guaranteed that within each base we will pop off users from within the - // loop before users outside of the loop with a particular base. - // - // We would like to use stable_sort here, but we can't. The problem is that - // const SCEV *'s don't have a deterministic ordering w.r.t to each other, so - // we don't have anything to do a '<' comparison on. Because we think the - // number of uses is small, do a horrible bubble sort which just relies on - // ==. - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { - // Get a base value. - const SCEV *Base = UsersToProcess[i].Base; - - // Compact everything with this base to be consecutive with this one. - for (unsigned j = i+1; j != e; ++j) { - if (UsersToProcess[j].Base == Base) { - std::swap(UsersToProcess[i+1], UsersToProcess[j]); - ++i; - } - } +void LSRUse::print(raw_ostream &OS) const { + OS << "LSR Use: Kind="; + switch (Kind) { + case Basic: OS << "Basic"; break; + case Special: OS << "Special"; break; + case ICmpZero: OS << "ICmpZero"; break; + case Address: + OS << "Address of "; + if (AccessTy->isPointerTy()) + OS << "pointer"; // the full pointer type could be really verbose + else + OS << *AccessTy; } -} -/// PrepareToStrengthReduceFully - Prepare to fully strength-reduce -/// UsersToProcess, meaning lowering addresses all the way down to direct -/// pointer arithmetic. -/// -void -LoopStrengthReduce::PrepareToStrengthReduceFully( - std::vector<BasedUser> &UsersToProcess, - const SCEV *Stride, - const SCEV *CommonExprs, - const Loop *L, - SCEVExpander &PreheaderRewriter) { - DEBUG(dbgs() << " Fully reducing all users\n"); - - // Rewrite the UsersToProcess records, creating a separate PHI for each - // unique Base value. - Instruction *IVIncInsertPt = L->getLoopLatch()->getTerminator(); - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) { - // TODO: The uses are grouped by base, but not sorted. We arbitrarily - // pick the first Imm value here to start with, and adjust it for the - // other uses. - const SCEV *Imm = UsersToProcess[i].Imm; - const SCEV *Base = UsersToProcess[i].Base; - const SCEV *Start = SE->getAddExpr(CommonExprs, Base, Imm); - PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L, - PreheaderRewriter); - // Loop over all the users with the same base. - do { - UsersToProcess[i].Base = SE->getIntegerSCEV(0, Stride->getType()); - UsersToProcess[i].Imm = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm); - UsersToProcess[i].Phi = Phi; - assert(UsersToProcess[i].Imm->isLoopInvariant(L) && - "ShouldUseFullStrengthReductionMode should reject this!"); - } while (++i != e && Base == UsersToProcess[i].Base); - } -} - -/// FindIVIncInsertPt - Return the location to insert the increment instruction. -/// If the only use if a use of postinc value, (must be the loop termination -/// condition), then insert it just before the use. -static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess, - const Loop *L) { - if (UsersToProcess.size() == 1 && - UsersToProcess[0].isUseOfPostIncrementedValue && - L->contains(UsersToProcess[0].Inst)) - return UsersToProcess[0].Inst; - return L->getLoopLatch()->getTerminator(); -} - -/// PrepareToStrengthReduceWithNewPhi - Insert a new induction variable for the -/// given users to share. -/// -void -LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi( - std::vector<BasedUser> &UsersToProcess, - const SCEV *Stride, - const SCEV *CommonExprs, - Value *CommonBaseV, - Instruction *IVIncInsertPt, - const Loop *L, - SCEVExpander &PreheaderRewriter) { - DEBUG(dbgs() << " Inserting new PHI:\n"); - - PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV), - Stride, IVIncInsertPt, L, - PreheaderRewriter); - - // Remember this in case a later stride is multiple of this. - IVsByStride[Stride].addIV(Stride, CommonExprs, Phi); - - // All the users will share this new IV. - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) - UsersToProcess[i].Phi = Phi; - - DEBUG(dbgs() << " IV="); - DEBUG(WriteAsOperand(dbgs(), Phi, /*PrintType=*/false)); - DEBUG(dbgs() << "\n"); -} - -/// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to -/// reuse an induction variable with a stride that is a factor of the current -/// induction variable. -/// -void -LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride( - std::vector<BasedUser> &UsersToProcess, - Value *CommonBaseV, - const IVExpr &ReuseIV, - Instruction *PreInsertPt) { - DEBUG(dbgs() << " Rewriting in terms of existing IV of STRIDE " - << *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << "\n"); - - // All the users will share the reused IV. - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) - UsersToProcess[i].Phi = ReuseIV.PHI; - - Constant *C = dyn_cast<Constant>(CommonBaseV); - if (C && - (!C->isNullValue() && - !fitsInAddressMode(SE->getUnknown(CommonBaseV), CommonBaseV->getType(), - TLI, false))) - // We want the common base emitted into the preheader! This is just - // using cast as a copy so BitCast (no-op cast) is appropriate - CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(), - "commonbase", PreInsertPt); -} - -static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset, - const Type *AccessTy, - std::vector<BasedUser> &UsersToProcess, - const TargetLowering *TLI) { - SmallVector<Instruction*, 16> AddrModeInsts; - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { - if (UsersToProcess[i].isUseOfPostIncrementedValue) - continue; - ExtAddrMode AddrMode = - AddressingModeMatcher::Match(UsersToProcess[i].OperandValToReplace, - AccessTy, UsersToProcess[i].Inst, - AddrModeInsts, *TLI); - if (GV && GV != AddrMode.BaseGV) - return false; - if (Offset && !AddrMode.BaseOffs) - // FIXME: How to accurate check it's immediate offset is folded. - return false; - AddrModeInsts.clear(); + OS << ", Offsets={"; + for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(), + E = Offsets.end(); I != E; ++I) { + OS << *I; + if (next(I) != E) + OS << ','; } - return true; -} + OS << '}'; -/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a single -/// stride of IV. All of the users may have different starting values, and this -/// may not be the only stride. -void -LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *Stride, - IVUsersOfOneStride &Uses, - Loop *L) { - // If all the users are moved to another stride, then there is nothing to do. - if (Uses.Users.empty()) - return; + if (AllFixupsOutsideLoop) + OS << ", all-fixups-outside-loop"; +} - // Keep track if every use in UsersToProcess is an address. If they all are, - // we may be able to rewrite the entire collection of them in terms of a - // smaller-stride IV. - bool AllUsesAreAddresses = true; - - // Keep track if every use of a single stride is outside the loop. If so, - // we want to be more aggressive about reusing a smaller-stride IV; a - // multiply outside the loop is better than another IV inside. Well, usually. - bool AllUsesAreOutsideLoop = true; - - // Transform our list of users and offsets to a bit more complex table. In - // this new vector, each 'BasedUser' contains 'Base' the base of the strided - // access as well as the old information from Uses. We progressively move - // information from the Base field to the Imm field until we eventually have - // the full access expression to rewrite the use. - std::vector<BasedUser> UsersToProcess; - const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses, - AllUsesAreOutsideLoop, - UsersToProcess); - - // Sort the UsersToProcess array so that users with common bases are - // next to each other. - SortUsersToProcess(UsersToProcess); - - // If we managed to find some expressions in common, we'll need to carry - // their value in a register and add it in for each use. This will take up - // a register operand, which potentially restricts what stride values are - // valid. - bool HaveCommonExprs = !CommonExprs->isZero(); - const Type *ReplacedTy = CommonExprs->getType(); - - // If all uses are addresses, consider sinking the immediate part of the - // common expression back into uses if they can fit in the immediate fields. - if (TLI && HaveCommonExprs && AllUsesAreAddresses) { - const SCEV *NewCommon = CommonExprs; - const SCEV *Imm = SE->getIntegerSCEV(0, ReplacedTy); - MoveImmediateValues(TLI, Type::getVoidTy( - L->getLoopPreheader()->getContext()), - NewCommon, Imm, true, L, SE); - if (!Imm->isZero()) { - bool DoSink = true; - - // If the immediate part of the common expression is a GV, check if it's - // possible to fold it into the target addressing mode. - GlobalValue *GV = 0; - if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(Imm)) - GV = dyn_cast<GlobalValue>(SU->getValue()); - int64_t Offset = 0; - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm)) - Offset = SC->getValue()->getSExtValue(); - if (GV || Offset) - // Pass VoidTy as the AccessTy to be conservative, because - // there could be multiple access types among all the uses. - DoSink = IsImmFoldedIntoAddrMode(GV, Offset, - Type::getVoidTy(L->getLoopPreheader()->getContext()), - UsersToProcess, TLI); - - if (DoSink) { - DEBUG(dbgs() << " Sinking " << *Imm << " back down into uses\n"); - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) - UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm); - CommonExprs = NewCommon; - HaveCommonExprs = !CommonExprs->isZero(); - ++NumImmSunk; - } - } - } +void LSRUse::dump() const { + print(errs()); errs() << '\n'; +} - // Now that we know what we need to do, insert the PHI node itself. - // - DEBUG(dbgs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE " - << *Stride << ":\n" - << " Common base: " << *CommonExprs << '\n'); +/// isLegalUse - Test whether the use described by AM is "legal", meaning it can +/// be completely folded into the user instruction at isel time. This includes +/// address-mode folding and special icmp tricks. +static bool isLegalUse(const TargetLowering::AddrMode &AM, + LSRUse::KindType Kind, const Type *AccessTy, + const TargetLowering *TLI) { + switch (Kind) { + case LSRUse::Address: + // If we have low-level target information, ask the target if it can + // completely fold this address. + if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy); + + // Otherwise, just guess that reg+reg addressing is legal. + return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1; + + case LSRUse::ICmpZero: + // There's not even a target hook for querying whether it would be legal to + // fold a GV into an ICmp. + if (AM.BaseGV) + return false; - SCEVExpander Rewriter(*SE); - SCEVExpander PreheaderRewriter(*SE); + // ICmp only has two operands; don't allow more than two non-trivial parts. + if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0) + return false; - BasicBlock *Preheader = L->getLoopPreheader(); - Instruction *PreInsertPt = Preheader->getTerminator(); - BasicBlock *LatchBlock = L->getLoopLatch(); - Instruction *IVIncInsertPt = LatchBlock->getTerminator(); - - Value *CommonBaseV = Constant::getNullValue(ReplacedTy); - - const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy); - IVExpr ReuseIV(SE->getIntegerSCEV(0, - Type::getInt32Ty(Preheader->getContext())), - SE->getIntegerSCEV(0, - Type::getInt32Ty(Preheader->getContext())), - 0); - - // Choose a strength-reduction strategy and prepare for it by creating - // the necessary PHIs and adjusting the bookkeeping. - if (ShouldUseFullStrengthReductionMode(UsersToProcess, L, - AllUsesAreAddresses, Stride)) { - PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L, - PreheaderRewriter); - } else { - // Emit the initial base value into the loop preheader. - CommonBaseV = PreheaderRewriter.expandCodeFor(CommonExprs, ReplacedTy, - PreInsertPt); - - // If all uses are addresses, check if it is possible to reuse an IV. The - // new IV must have a stride that is a multiple of the old stride; the - // multiple must be a number that can be encoded in the scale field of the - // target addressing mode; and we must have a valid instruction after this - // substitution, including the immediate field, if any. - RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses, - AllUsesAreOutsideLoop, - Stride, ReuseIV, ReplacedTy, - UsersToProcess); - if (!RewriteFactor->isZero()) - PrepareToStrengthReduceFromSmallerStride(UsersToProcess, CommonBaseV, - ReuseIV, PreInsertPt); - else { - IVIncInsertPt = FindIVIncInsertPt(UsersToProcess, L); - PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs, - CommonBaseV, IVIncInsertPt, - L, PreheaderRewriter); - } - } + // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by + // putting the scaled register in the other operand of the icmp. + if (AM.Scale != 0 && AM.Scale != -1) + return false; - // Process all the users now, replacing their strided uses with - // strength-reduced forms. This outer loop handles all bases, the inner - // loop handles all users of a particular base. - while (!UsersToProcess.empty()) { - const SCEV *Base = UsersToProcess.back().Base; - Instruction *Inst = UsersToProcess.back().Inst; - - // Emit the code for Base into the preheader. - Value *BaseV = 0; - if (!Base->isZero()) { - BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt); - - DEBUG(dbgs() << " INSERTING code for BASE = " << *Base << ":"); - if (BaseV->hasName()) - DEBUG(dbgs() << " Result value name = %" << BaseV->getName()); - DEBUG(dbgs() << "\n"); - - // If BaseV is a non-zero constant, make sure that it gets inserted into - // the preheader, instead of being forward substituted into the uses. We - // do this by forcing a BitCast (noop cast) to be inserted into the - // preheader in this case. - if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false) && - isa<Constant>(BaseV)) { - // We want this constant emitted into the preheader! This is just - // using cast as a copy so BitCast (no-op cast) is appropriate - BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert", - PreInsertPt); - } + // If we have low-level target information, ask the target if it can fold an + // integer immediate on an icmp. + if (AM.BaseOffs != 0) { + if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs); + return false; } - // Emit the code to add the immediate offset to the Phi value, just before - // the instructions that we identified as using this stride and base. - do { - // FIXME: Use emitted users to emit other users. - BasedUser &User = UsersToProcess.back(); - - DEBUG(dbgs() << " Examining "); - if (User.isUseOfPostIncrementedValue) - DEBUG(dbgs() << "postinc"); - else - DEBUG(dbgs() << "preinc"); - DEBUG(dbgs() << " use "); - DEBUG(WriteAsOperand(dbgs(), UsersToProcess.back().OperandValToReplace, - /*PrintType=*/false)); - DEBUG(dbgs() << " in Inst: " << *User.Inst << '\n'); - - // If this instruction wants to use the post-incremented value, move it - // after the post-inc and use its value instead of the PHI. - Value *RewriteOp = User.Phi; - if (User.isUseOfPostIncrementedValue) { - RewriteOp = User.Phi->getIncomingValueForBlock(LatchBlock); - // If this user is in the loop, make sure it is the last thing in the - // loop to ensure it is dominated by the increment. In case it's the - // only use of the iv, the increment instruction is already before the - // use. - if (L->contains(User.Inst) && User.Inst != IVIncInsertPt) - User.Inst->moveBefore(IVIncInsertPt); - } - - const SCEV *RewriteExpr = SE->getUnknown(RewriteOp); - - if (SE->getEffectiveSCEVType(RewriteOp->getType()) != - SE->getEffectiveSCEVType(ReplacedTy)) { - assert(SE->getTypeSizeInBits(RewriteOp->getType()) > - SE->getTypeSizeInBits(ReplacedTy) && - "Unexpected widening cast!"); - RewriteExpr = SE->getTruncateExpr(RewriteExpr, ReplacedTy); - } - - // If we had to insert new instructions for RewriteOp, we have to - // consider that they may not have been able to end up immediately - // next to RewriteOp, because non-PHI instructions may never precede - // PHI instructions in a block. In this case, remember where the last - // instruction was inserted so that if we're replacing a different - // PHI node, we can use the later point to expand the final - // RewriteExpr. - Instruction *NewBasePt = dyn_cast<Instruction>(RewriteOp); - if (RewriteOp == User.Phi) NewBasePt = 0; - - // Clear the SCEVExpander's expression map so that we are guaranteed - // to have the code emitted where we expect it. - Rewriter.clear(); - - // If we are reusing the iv, then it must be multiplied by a constant - // factor to take advantage of the addressing mode scale component. - if (!RewriteFactor->isZero()) { - // If we're reusing an IV with a nonzero base (currently this happens - // only when all reuses are outside the loop) subtract that base here. - // The base has been used to initialize the PHI node but we don't want - // it here. - if (!ReuseIV.Base->isZero()) { - const SCEV *typedBase = ReuseIV.Base; - if (SE->getEffectiveSCEVType(RewriteExpr->getType()) != - SE->getEffectiveSCEVType(ReuseIV.Base->getType())) { - // It's possible the original IV is a larger type than the new IV, - // in which case we have to truncate the Base. We checked in - // RequiresTypeConversion that this is valid. - assert(SE->getTypeSizeInBits(RewriteExpr->getType()) < - SE->getTypeSizeInBits(ReuseIV.Base->getType()) && - "Unexpected lengthening conversion!"); - typedBase = SE->getTruncateExpr(ReuseIV.Base, - RewriteExpr->getType()); - } - RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase); - } + return true; - // Multiply old variable, with base removed, by new scale factor. - RewriteExpr = SE->getMulExpr(RewriteFactor, - RewriteExpr); - - // The common base is emitted in the loop preheader. But since we - // are reusing an IV, it has not been used to initialize the PHI node. - // Add it to the expression used to rewrite the uses. - // When this use is outside the loop, we earlier subtracted the - // common base, and are adding it back here. Use the same expression - // as before, rather than CommonBaseV, so DAGCombiner will zap it. - if (!CommonExprs->isZero()) { - if (L->contains(User.Inst)) - RewriteExpr = SE->getAddExpr(RewriteExpr, - SE->getUnknown(CommonBaseV)); - else - RewriteExpr = SE->getAddExpr(RewriteExpr, CommonExprs); - } - } + case LSRUse::Basic: + // Only handle single-register values. + return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0; - // Now that we know what we need to do, insert code before User for the - // immediate and any loop-variant expressions. - if (BaseV) - // Add BaseV to the PHI value if needed. - RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV)); - - User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt, - Rewriter, L, this, - DeadInsts, SE); - - // Mark old value we replaced as possibly dead, so that it is eliminated - // if we just replaced the last use of that value. - DeadInsts.push_back(User.OperandValToReplace); - - UsersToProcess.pop_back(); - ++NumReduced; - - // If there are any more users to process with the same base, process them - // now. We sorted by base above, so we just have to check the last elt. - } while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base); - // TODO: Next, find out which base index is the most common, pull it out. - } - - // IMPORTANT TODO: Figure out how to partition the IV's with this stride, but - // different starting values, into different PHIs. -} - -void LoopStrengthReduce::StrengthReduceIVUsers(Loop *L) { - // Note: this processes each stride/type pair individually. All users - // passed into StrengthReduceIVUsersOfStride have the same type AND stride. - // Also, note that we iterate over IVUsesByStride indirectly by using - // StrideOrder. This extra layer of indirection makes the ordering of - // strides deterministic - not dependent on map order. - for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) { - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[Stride]); - assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - // FIXME: Generalize to non-affine IV's. - if (!SI->first->isLoopInvariant(L)) - continue; - StrengthReduceIVUsersOfStride(SI->first, *SI->second, L); + case LSRUse::Special: + // Only handle -1 scales, or no scale. + return AM.Scale == 0 || AM.Scale == -1; } + + return false; } -/// FindIVUserForCond - If Cond has an operand that is an expression of an IV, -/// set the IV user and stride information and return true, otherwise return -/// false. -bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, - IVStrideUse *&CondUse, - const SCEV* &CondStride) { - for (unsigned Stride = 0, e = IU->StrideOrder.size(); - Stride != e && !CondUse; ++Stride) { - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[Stride]); - assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - - for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(), - E = SI->second->Users.end(); UI != E; ++UI) - if (UI->getUser() == Cond) { - // NOTE: we could handle setcc instructions with multiple uses here, but - // InstCombine does it as well for simple uses, it's not clear that it - // occurs enough in real life to handle. - CondUse = UI; - CondStride = SI->first; - return true; - } +static bool isLegalUse(TargetLowering::AddrMode AM, + int64_t MinOffset, int64_t MaxOffset, + LSRUse::KindType Kind, const Type *AccessTy, + const TargetLowering *TLI) { + // Check for overflow. + if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) != + (MinOffset > 0)) + return false; + AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset; + if (isLegalUse(AM, Kind, AccessTy, TLI)) { + AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset; + // Check for overflow. + if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) != + (MaxOffset > 0)) + return false; + AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset; + return isLegalUse(AM, Kind, AccessTy, TLI); } return false; } -namespace { - // Constant strides come first which in turns are sorted by their absolute - // values. If absolute values are the same, then positive strides comes first. - // e.g. - // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X - struct StrideCompare { - const ScalarEvolution *SE; - explicit StrideCompare(const ScalarEvolution *se) : SE(se) {} - - bool operator()(const SCEV *LHS, const SCEV *RHS) { - const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS); - const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS); - if (LHSC && RHSC) { - int64_t LV = LHSC->getValue()->getSExtValue(); - int64_t RV = RHSC->getValue()->getSExtValue(); - uint64_t ALV = (LV < 0) ? -LV : LV; - uint64_t ARV = (RV < 0) ? -RV : RV; - if (ALV == ARV) { - if (LV != RV) - return LV > RV; - } else { - return ALV < ARV; - } +static bool isAlwaysFoldable(int64_t BaseOffs, + GlobalValue *BaseGV, + bool HasBaseReg, + LSRUse::KindType Kind, const Type *AccessTy, + const TargetLowering *TLI) { + // Fast-path: zero is always foldable. + if (BaseOffs == 0 && !BaseGV) return true; + + // Conservatively, create an address with an immediate and a + // base and a scale. + TargetLowering::AddrMode AM; + AM.BaseOffs = BaseOffs; + AM.BaseGV = BaseGV; + AM.HasBaseReg = HasBaseReg; + AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1; + + return isLegalUse(AM, Kind, AccessTy, TLI); +} - // If it's the same value but different type, sort by bit width so - // that we emit larger induction variables before smaller - // ones, letting the smaller be re-written in terms of larger ones. - return SE->getTypeSizeInBits(RHS->getType()) < - SE->getTypeSizeInBits(LHS->getType()); - } - return LHSC && !RHSC; - } - }; +static bool isAlwaysFoldable(const SCEV *S, + int64_t MinOffset, int64_t MaxOffset, + bool HasBaseReg, + LSRUse::KindType Kind, const Type *AccessTy, + const TargetLowering *TLI, + ScalarEvolution &SE) { + // Fast-path: zero is always foldable. + if (S->isZero()) return true; + + // Conservatively, create an address with an immediate and a + // base and a scale. + int64_t BaseOffs = ExtractImmediate(S, SE); + GlobalValue *BaseGV = ExtractSymbol(S, SE); + + // If there's anything else involved, it's not foldable. + if (!S->isZero()) return false; + + // Fast-path: zero is always foldable. + if (BaseOffs == 0 && !BaseGV) return true; + + // Conservatively, create an address with an immediate and a + // base and a scale. + TargetLowering::AddrMode AM; + AM.BaseOffs = BaseOffs; + AM.BaseGV = BaseGV; + AM.HasBaseReg = HasBaseReg; + AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1; + + return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI); } -/// ChangeCompareStride - If a loop termination compare instruction is the only -/// use of its stride, and the comparison is against a constant value, try to -/// eliminate the stride by moving the compare instruction to another stride and -/// changing its constant operand accordingly. E.g. -/// -/// loop: -/// ... -/// v1 = v1 + 3 -/// v2 = v2 + 1 -/// if (v2 < 10) goto loop -/// => -/// loop: -/// ... -/// v1 = v1 + 3 -/// if (v1 < 30) goto loop -ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse, - const SCEV* &CondStride, - bool PostPass) { - // If there's only one stride in the loop, there's nothing to do here. - if (IU->StrideOrder.size() < 2) - return Cond; +/// FormulaSorter - This class implements an ordering for formulae which sorts +/// the by their standalone cost. +class FormulaSorter { + /// These two sets are kept empty, so that we compute standalone costs. + DenseSet<const SCEV *> VisitedRegs; + SmallPtrSet<const SCEV *, 16> Regs; + Loop *L; + LSRUse *LU; + ScalarEvolution &SE; + DominatorTree &DT; + +public: + FormulaSorter(Loop *l, LSRUse &lu, ScalarEvolution &se, DominatorTree &dt) + : L(l), LU(&lu), SE(se), DT(dt) {} + + bool operator()(const Formula &A, const Formula &B) { + Cost CostA; + CostA.RateFormula(A, Regs, VisitedRegs, L, LU->Offsets, SE, DT); + Regs.clear(); + Cost CostB; + CostB.RateFormula(B, Regs, VisitedRegs, L, LU->Offsets, SE, DT); + Regs.clear(); + return CostA < CostB; + } +}; + +/// LSRInstance - This class holds state for the main loop strength reduction +/// logic. +class LSRInstance { + IVUsers &IU; + ScalarEvolution &SE; + DominatorTree &DT; + const TargetLowering *const TLI; + Loop *const L; + bool Changed; + + /// IVIncInsertPos - This is the insert position that the current loop's + /// induction variable increment should be placed. In simple loops, this is + /// the latch block's terminator. But in more complicated cases, this is a + /// position which will dominate all the in-loop post-increment users. + Instruction *IVIncInsertPos; + + /// Factors - Interesting factors between use strides. + SmallSetVector<int64_t, 8> Factors; + + /// Types - Interesting use types, to facilitate truncation reuse. + SmallSetVector<const Type *, 4> Types; + + /// Fixups - The list of operands which are to be replaced. + SmallVector<LSRFixup, 16> Fixups; + + /// Uses - The list of interesting uses. + SmallVector<LSRUse, 16> Uses; + + /// RegUses - Track which uses use which register candidates. + RegUseTracker RegUses; + + void OptimizeShadowIV(); + bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse); + ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); + bool OptimizeLoopTermCond(); + + void CollectInterestingTypesAndFactors(); + void CollectFixupsAndInitialFormulae(); + + LSRFixup &getNewFixup() { + Fixups.push_back(LSRFixup()); + return Fixups.back(); + } - // If there are other users of the condition's stride, don't bother trying to - // change the condition because the stride will still remain. - std::map<const SCEV *, IVUsersOfOneStride *>::iterator I = - IU->IVUsesByStride.find(CondStride); - if (I == IU->IVUsesByStride.end()) - return Cond; + // Support for sharing of LSRUses between LSRFixups. + typedef DenseMap<const SCEV *, size_t> UseMapTy; + UseMapTy UseMap; + + bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, + LSRUse::KindType Kind, const Type *AccessTy); + + std::pair<size_t, int64_t> getUse(const SCEV *&Expr, + LSRUse::KindType Kind, + const Type *AccessTy); + +public: + void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); + void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); + void CountRegisters(const Formula &F, size_t LUIdx); + bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F); + + void CollectLoopInvariantFixupsAndFormulae(); + + void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base, + unsigned Depth = 0); + void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateCrossUseConstantOffsets(); + void GenerateAllReuseFormulae(); + + void FilterOutUndesirableDedicatedRegisters(); + void NarrowSearchSpaceUsingHeuristics(); + + void SolveRecurse(SmallVectorImpl<const Formula *> &Solution, + Cost &SolutionCost, + SmallVectorImpl<const Formula *> &Workspace, + const Cost &CurCost, + const SmallPtrSet<const SCEV *, 16> &CurRegs, + DenseSet<const SCEV *> &VisitedRegs) const; + void Solve(SmallVectorImpl<const Formula *> &Solution) const; + + Value *Expand(const LSRFixup &LF, + const Formula &F, + BasicBlock::iterator IP, + SCEVExpander &Rewriter, + SmallVectorImpl<WeakVH> &DeadInsts) const; + void RewriteForPHI(PHINode *PN, const LSRFixup &LF, + const Formula &F, + SCEVExpander &Rewriter, + SmallVectorImpl<WeakVH> &DeadInsts, + Pass *P) const; + void Rewrite(const LSRFixup &LF, + const Formula &F, + SCEVExpander &Rewriter, + SmallVectorImpl<WeakVH> &DeadInsts, + Pass *P) const; + void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, + Pass *P); + + LSRInstance(const TargetLowering *tli, Loop *l, Pass *P); + + bool getChanged() const { return Changed; } + + void print_factors_and_types(raw_ostream &OS) const; + void print_fixups(raw_ostream &OS) const; + void print_uses(raw_ostream &OS) const; + void print(raw_ostream &OS) const; + void dump() const; +}; - if (I->second->Users.size() > 1) { - for (ilist<IVStrideUse>::iterator II = I->second->Users.begin(), - EE = I->second->Users.end(); II != EE; ++II) { - if (II->getUser() == Cond) - continue; - if (!isInstructionTriviallyDead(II->getUser())) - return Cond; - } - } +} - // Only handle constant strides for now. - const SCEVConstant *SC = dyn_cast<SCEVConstant>(CondStride); - if (!SC) return Cond; - - ICmpInst::Predicate Predicate = Cond->getPredicate(); - int64_t CmpSSInt = SC->getValue()->getSExtValue(); - unsigned BitWidth = SE->getTypeSizeInBits(CondStride->getType()); - uint64_t SignBit = 1ULL << (BitWidth-1); - const Type *CmpTy = Cond->getOperand(0)->getType(); - const Type *NewCmpTy = NULL; - unsigned TyBits = SE->getTypeSizeInBits(CmpTy); - unsigned NewTyBits = 0; - const SCEV *NewStride = NULL; - Value *NewCmpLHS = NULL; - Value *NewCmpRHS = NULL; - int64_t Scale = 1; - const SCEV *NewOffset = SE->getIntegerSCEV(0, CmpTy); - - if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) { - int64_t CmpVal = C->getValue().getSExtValue(); - - // Check the relevant induction variable for conformance to the pattern. - const SCEV *IV = SE->getSCEV(Cond->getOperand(0)); - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); - if (!AR || !AR->isAffine()) - return Cond; - - const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart()); - // Check stride constant and the comparision constant signs to detect - // overflow. - if (StartC) { - if ((StartC->getValue()->getSExtValue() < CmpVal && CmpSSInt < 0) || - (StartC->getValue()->getSExtValue() > CmpVal && CmpSSInt > 0)) - return Cond; - } else { - // More restrictive check for the other cases. - if ((CmpVal & SignBit) != (CmpSSInt & SignBit)) - return Cond; - } +/// OptimizeShadowIV - If IV is used in a int-to-float cast +/// inside the loop then try to eliminate the cast operation. +void LSRInstance::OptimizeShadowIV() { + const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) + return; - // Look for a suitable stride / iv as replacement. - for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[i]); - if (!isa<SCEVConstant>(SI->first) || SI->second->Users.empty()) - continue; - int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); - if (SSInt == CmpSSInt || - abs64(SSInt) < abs64(CmpSSInt) || - (SSInt % CmpSSInt) != 0) - continue; + for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); + UI != E; /* empty */) { + IVUsers::const_iterator CandidateUI = UI; + ++UI; + Instruction *ShadowUse = CandidateUI->getUser(); + const Type *DestTy = NULL; - Scale = SSInt / CmpSSInt; - int64_t NewCmpVal = CmpVal * Scale; + /* If shadow use is a int->float cast then insert a second IV + to eliminate this cast. - // If old icmp value fits in icmp immediate field, but the new one doesn't - // try something else. - if (TLI && - TLI->isLegalICmpImmediate(CmpVal) && - !TLI->isLegalICmpImmediate(NewCmpVal)) - continue; + for (unsigned i = 0; i < n; ++i) + foo((double)i); - APInt Mul = APInt(BitWidth*2, CmpVal, true); - Mul = Mul * APInt(BitWidth*2, Scale, true); - // Check for overflow. - if (!Mul.isSignedIntN(BitWidth)) - continue; - // Check for overflow in the stride's type too. - if (!Mul.isSignedIntN(SE->getTypeSizeInBits(SI->first->getType()))) - continue; + is transformed into - // Watch out for overflow. - if (ICmpInst::isSigned(Predicate) && - (CmpVal & SignBit) != (NewCmpVal & SignBit)) - continue; + double d = 0.0; + for (unsigned i = 0; i < n; ++i, ++d) + foo(d); + */ + if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) + DestTy = UCast->getDestTy(); + else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) + DestTy = SCast->getDestTy(); + if (!DestTy) continue; - // Pick the best iv to use trying to avoid a cast. - NewCmpLHS = NULL; - for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(), - E = SI->second->Users.end(); UI != E; ++UI) { - Value *Op = UI->getOperandValToReplace(); - - // If the IVStrideUse implies a cast, check for an actual cast which - // can be used to find the original IV expression. - if (SE->getEffectiveSCEVType(Op->getType()) != - SE->getEffectiveSCEVType(SI->first->getType())) { - CastInst *CI = dyn_cast<CastInst>(Op); - // If it's not a simple cast, it's complicated. - if (!CI) - continue; - // If it's a cast from a type other than the stride type, - // it's complicated. - if (CI->getOperand(0)->getType() != SI->first->getType()) - continue; - // Ok, we found the IV expression in the stride's type. - Op = CI->getOperand(0); - } + if (TLI) { + // If target does not support DestTy natively then do not apply + // this transformation. + EVT DVT = TLI->getValueType(DestTy); + if (!TLI->isTypeLegal(DVT)) continue; + } - NewCmpLHS = Op; - if (NewCmpLHS->getType() == CmpTy) - break; - } - if (!NewCmpLHS) - continue; + PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0)); + if (!PH) continue; + if (PH->getNumIncomingValues() != 2) continue; - NewCmpTy = NewCmpLHS->getType(); - NewTyBits = SE->getTypeSizeInBits(NewCmpTy); - const Type *NewCmpIntTy = IntegerType::get(Cond->getContext(), NewTyBits); - if (RequiresTypeConversion(NewCmpTy, CmpTy)) { - // Check if it is possible to rewrite it using - // an iv / stride of a smaller integer type. - unsigned Bits = NewTyBits; - if (ICmpInst::isSigned(Predicate)) - --Bits; - uint64_t Mask = (1ULL << Bits) - 1; - if (((uint64_t)NewCmpVal & Mask) != (uint64_t)NewCmpVal) - continue; - } + const Type *SrcTy = PH->getType(); + int Mantissa = DestTy->getFPMantissaWidth(); + if (Mantissa == -1) continue; + if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa) + continue; - // Don't rewrite if use offset is non-constant and the new type is - // of a different type. - // FIXME: too conservative? - if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset())) - continue; + unsigned Entry, Latch; + if (PH->getIncomingBlock(0) == L->getLoopPreheader()) { + Entry = 0; + Latch = 1; + } else { + Entry = 1; + Latch = 0; + } - if (!PostPass) { - bool AllUsesAreAddresses = true; - bool AllUsesAreOutsideLoop = true; - std::vector<BasedUser> UsersToProcess; - const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, - AllUsesAreAddresses, - AllUsesAreOutsideLoop, - UsersToProcess); - // Avoid rewriting the compare instruction with an iv of new stride - // if it's likely the new stride uses will be rewritten using the - // stride of the compare instruction. - if (AllUsesAreAddresses && - ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) - continue; - } + ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry)); + if (!Init) continue; + Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); - // Avoid rewriting the compare instruction with an iv which has - // implicit extension or truncation built into it. - // TODO: This is over-conservative. - if (SE->getTypeSizeInBits(CondUse->getOffset()->getType()) != TyBits) - continue; + BinaryOperator *Incr = + dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); + if (!Incr) continue; + if (Incr->getOpcode() != Instruction::Add + && Incr->getOpcode() != Instruction::Sub) + continue; - // If scale is negative, use swapped predicate unless it's testing - // for equality. - if (Scale < 0 && !Cond->isEquality()) - Predicate = ICmpInst::getSwappedPredicate(Predicate); + /* Initialize new IV, double d = 0.0 in above example. */ + ConstantInt *C = NULL; + if (Incr->getOperand(0) == PH) + C = dyn_cast<ConstantInt>(Incr->getOperand(1)); + else if (Incr->getOperand(1) == PH) + C = dyn_cast<ConstantInt>(Incr->getOperand(0)); + else + continue; - NewStride = IU->StrideOrder[i]; - if (!isa<PointerType>(NewCmpTy)) - NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal); - else { - Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal); - NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy); - } - NewOffset = TyBits == NewTyBits - ? SE->getMulExpr(CondUse->getOffset(), - SE->getConstant(CmpTy, Scale)) - : SE->getConstant(NewCmpIntTy, - cast<SCEVConstant>(CondUse->getOffset())->getValue() - ->getSExtValue()*Scale); - break; - } - } + if (!C) continue; - // Forgo this transformation if it the increment happens to be - // unfortunately positioned after the condition, and the condition - // has multiple uses which prevent it from being moved immediately - // before the branch. See - // test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll - // for an example of this situation. - if (!Cond->hasOneUse()) { - for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end(); - I != E; ++I) - if (I == NewCmpLHS) - return Cond; - } + // Ignore negative constants, as the code below doesn't handle them + // correctly. TODO: Remove this restriction. + if (!C->getValue().isStrictlyPositive()) continue; - if (NewCmpRHS) { - // Create a new compare instruction using new stride / iv. - ICmpInst *OldCond = Cond; - // Insert new compare instruction. - Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS, - L->getHeader()->getName() + ".termcond"); + /* Add new PHINode. */ + PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH); - DEBUG(dbgs() << " Change compare stride in Inst " << *OldCond); - DEBUG(dbgs() << " to " << *Cond << '\n'); + /* create new increment. '++d' in above example. */ + Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); + BinaryOperator *NewIncr = + BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? + Instruction::FAdd : Instruction::FSub, + NewPH, CFP, "IV.S.next.", Incr); - // Remove the old compare instruction. The old indvar is probably dead too. - DeadInsts.push_back(CondUse->getOperandValToReplace()); - OldCond->replaceAllUsesWith(Cond); - OldCond->eraseFromParent(); + NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry)); + NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch)); - IU->IVUsesByStride[NewStride]->addUser(NewOffset, Cond, NewCmpLHS); - CondUse = &IU->IVUsesByStride[NewStride]->Users.back(); - CondStride = NewStride; - ++NumEliminated; - Changed = true; + /* Remove cast operation */ + ShadowUse->replaceAllUsesWith(NewPH); + ShadowUse->eraseFromParent(); + break; } +} - return Cond; +/// FindIVUserForCond - If Cond has an operand that is an expression of an IV, +/// set the IV user and stride information and return true, otherwise return +/// false. +bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, + IVStrideUse *&CondUse) { + for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) + if (UI->getUser() == Cond) { + // NOTE: we could handle setcc instructions with multiple uses here, but + // InstCombine does it as well for simple uses, it's not clear that it + // occurs enough in real life to handle. + CondUse = UI; + return true; + } + return false; } /// OptimizeMax - Rewrite the loop's terminating condition if it uses @@ -2088,7 +1405,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, /// are designed around them. The most obvious example of this is the /// LoopInfo analysis, which doesn't remember trip count values. It /// expects to be able to rediscover the trip count each time it is -/// needed, and it does this using a simple analyis that only succeeds if +/// needed, and it does this using a simple analysis that only succeeds if /// the loop has a canonical induction variable. /// /// However, when it comes time to generate code, the maximum operation @@ -2098,8 +1415,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting /// the instructions for the maximum computation. /// -ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse) { +ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { // Check that the loop matches the pattern we're looking for. if (Cond->getPredicate() != CmpInst::ICMP_EQ && Cond->getPredicate() != CmpInst::ICMP_NE) @@ -2108,19 +1424,19 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1)); if (!Sel || !Sel->hasOneUse()) return Cond; - const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); + const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) return Cond; - const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); + const SCEV *One = SE.getIntegerSCEV(1, BackedgeTakenCount->getType()); // Add one to the backedge-taken count to get the trip count. - const SCEV *IterationCount = SE->getAddExpr(BackedgeTakenCount, One); + const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One); // Check for a max calculation that matches the pattern. if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount)) return Cond; const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount); - if (Max != SE->getSCEV(Sel)) return Cond; + if (Max != SE.getSCEV(Sel)) return Cond; // To handle a max with more than two operands, this optimization would // require additional checking and setup. @@ -2130,14 +1446,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, const SCEV *MaxLHS = Max->getOperand(0); const SCEV *MaxRHS = Max->getOperand(1); if (!MaxLHS || MaxLHS != One) return Cond; - // Check the relevant induction variable for conformance to // the pattern. - const SCEV *IV = SE->getSCEV(Cond->getOperand(0)); + const SCEV *IV = SE.getSCEV(Cond->getOperand(0)); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); if (!AR || !AR->isAffine() || AR->getStart() != One || - AR->getStepRecurrence(*SE) != One) + AR->getStepRecurrence(SE) != One) return Cond; assert(AR->getLoop() == L && @@ -2146,9 +1461,9 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, // Check the right operand of the select, and remember it, as it will // be used in the new comparison instruction. Value *NewRHS = 0; - if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS) + if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) NewRHS = Sel->getOperand(1); - else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS) + else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS) NewRHS = Sel->getOperand(2); if (!NewRHS) return Cond; @@ -2175,552 +1490,1804 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, return NewCond; } -/// OptimizeShadowIV - If IV is used in a int-to-float cast -/// inside the loop then try to eliminate the cast opeation. -void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { +/// OptimizeLoopTermCond - Change loop terminating condition to use the +/// postinc iv when possible. +bool +LSRInstance::OptimizeLoopTermCond() { + SmallPtrSet<Instruction *, 4> PostIncs; - const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); - if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) - return; + BasicBlock *LatchBlock = L->getLoopLatch(); + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + BasicBlock *ExitingBlock = ExitingBlocks[i]; - for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; - ++Stride) { - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[Stride]); - assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - if (!isa<SCEVConstant>(SI->first)) + // Get the terminating condition for the loop if possible. If we + // can, we want to change it to use a post-incremented version of its + // induction variable, to allow coalescing the live ranges for the IV into + // one register value. + + BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (!TermBr) + continue; + // FIXME: Overly conservative, termination condition could be an 'or' etc.. + if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition())) continue; - for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(), - E = SI->second->Users.end(); UI != E; /* empty */) { - ilist<IVStrideUse>::iterator CandidateUI = UI; - ++UI; - Instruction *ShadowUse = CandidateUI->getUser(); - const Type *DestTy = NULL; - - /* If shadow use is a int->float cast then insert a second IV - to eliminate this cast. - - for (unsigned i = 0; i < n; ++i) - foo((double)i); - - is transformed into - - double d = 0.0; - for (unsigned i = 0; i < n; ++i, ++d) - foo(d); - */ - if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) - DestTy = UCast->getDestTy(); - else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) - DestTy = SCast->getDestTy(); - if (!DestTy) continue; - - if (TLI) { - // If target does not support DestTy natively then do not apply - // this transformation. - EVT DVT = TLI->getValueType(DestTy); - if (!TLI->isTypeLegal(DVT)) continue; - } + // Search IVUsesByStride to find Cond's IVUse if there is one. + IVStrideUse *CondUse = 0; + ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); + if (!FindIVUserForCond(Cond, CondUse)) + continue; + + // If the trip count is computed in terms of a max (due to ScalarEvolution + // being unable to find a sufficient guard, for example), change the loop + // comparison to use SLT or ULT instead of NE. + // One consequence of doing this now is that it disrupts the count-down + // optimization. That's not always a bad thing though, because in such + // cases it may still be worthwhile to avoid a max. + Cond = OptimizeMax(Cond, CondUse); + + // If this exiting block dominates the latch block, it may also use + // the post-inc value if it won't be shared with other uses. + // Check for dominance. + if (!DT.dominates(ExitingBlock, LatchBlock)) + continue; - PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0)); - if (!PH) continue; - if (PH->getNumIncomingValues() != 2) continue; + // Conservatively avoid trying to use the post-inc value in non-latch + // exits if there may be pre-inc users in intervening blocks. + if (LatchBlock != ExitingBlock) + for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) + // Test if the use is reachable from the exiting block. This dominator + // query is a conservative approximation of reachability. + if (&*UI != CondUse && + !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) { + // Conservatively assume there may be reuse if the quotient of their + // strides could be a legal scale. + const SCEV *A = CondUse->getStride(); + const SCEV *B = UI->getStride(); + if (SE.getTypeSizeInBits(A->getType()) != + SE.getTypeSizeInBits(B->getType())) { + if (SE.getTypeSizeInBits(A->getType()) > + SE.getTypeSizeInBits(B->getType())) + B = SE.getSignExtendExpr(B, A->getType()); + else + A = SE.getSignExtendExpr(A, B->getType()); + } + if (const SCEVConstant *D = + dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) { + // Stride of one or negative one can have reuse with non-addresses. + if (D->getValue()->isOne() || + D->getValue()->isAllOnesValue()) + goto decline_post_inc; + // Avoid weird situations. + if (D->getValue()->getValue().getMinSignedBits() >= 64 || + D->getValue()->getValue().isMinSignedValue()) + goto decline_post_inc; + // Without TLI, assume that any stride might be valid, and so any + // use might be shared. + if (!TLI) + goto decline_post_inc; + // Check for possible scaled-address reuse. + const Type *AccessTy = getAccessType(UI->getUser()); + TargetLowering::AddrMode AM; + AM.Scale = D->getValue()->getSExtValue(); + if (TLI->isLegalAddressingMode(AM, AccessTy)) + goto decline_post_inc; + AM.Scale = -AM.Scale; + if (TLI->isLegalAddressingMode(AM, AccessTy)) + goto decline_post_inc; + } + } - const Type *SrcTy = PH->getType(); - int Mantissa = DestTy->getFPMantissaWidth(); - if (Mantissa == -1) continue; - if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa) - continue; + DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: " + << *Cond << '\n'); - unsigned Entry, Latch; - if (PH->getIncomingBlock(0) == L->getLoopPreheader()) { - Entry = 0; - Latch = 1; + // It's possible for the setcc instruction to be anywhere in the loop, and + // possible for it to have multiple users. If it is not immediately before + // the exiting block branch, move it. + if (&*++BasicBlock::iterator(Cond) != TermBr) { + if (Cond->hasOneUse()) { + Cond->moveBefore(TermBr); } else { - Entry = 1; - Latch = 0; + // Clone the terminating condition and insert into the loopend. + ICmpInst *OldCond = Cond; + Cond = cast<ICmpInst>(Cond->clone()); + Cond->setName(L->getHeader()->getName() + ".termcond"); + ExitingBlock->getInstList().insert(TermBr, Cond); + + // Clone the IVUse, as the old use still exists! + CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(), + Cond, CondUse->getOperandValToReplace()); + TermBr->replaceUsesOfWith(OldCond, Cond); } + } - ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry)); - if (!Init) continue; - Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); + // If we get to here, we know that we can transform the setcc instruction to + // use the post-incremented version of the IV, allowing us to coalesce the + // live ranges for the IV correctly. + CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(), + CondUse->getStride())); + CondUse->setIsUseOfPostIncrementedValue(true); + Changed = true; - BinaryOperator *Incr = - dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); - if (!Incr) continue; - if (Incr->getOpcode() != Instruction::Add - && Incr->getOpcode() != Instruction::Sub) - continue; + PostIncs.insert(Cond); + decline_post_inc:; + } - /* Initialize new IV, double d = 0.0 in above example. */ - ConstantInt *C = NULL; - if (Incr->getOperand(0) == PH) - C = dyn_cast<ConstantInt>(Incr->getOperand(1)); - else if (Incr->getOperand(1) == PH) - C = dyn_cast<ConstantInt>(Incr->getOperand(0)); - else - continue; + // Determine an insertion point for the loop induction variable increment. It + // must dominate all the post-inc comparisons we just set up, and it must + // dominate the loop latch edge. + IVIncInsertPos = L->getLoopLatch()->getTerminator(); + for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(), + E = PostIncs.end(); I != E; ++I) { + BasicBlock *BB = + DT.findNearestCommonDominator(IVIncInsertPos->getParent(), + (*I)->getParent()); + if (BB == (*I)->getParent()) + IVIncInsertPos = *I; + else if (BB != IVIncInsertPos->getParent()) + IVIncInsertPos = BB->getTerminator(); + } + + return Changed; +} - if (!C) continue; +bool +LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, + LSRUse::KindType Kind, const Type *AccessTy) { + int64_t NewMinOffset = LU.MinOffset; + int64_t NewMaxOffset = LU.MaxOffset; + const Type *NewAccessTy = AccessTy; + + // Check for a mismatched kind. It's tempting to collapse mismatched kinds to + // something conservative, however this can pessimize in the case that one of + // the uses will have all its uses outside the loop, for example. + if (LU.Kind != Kind) + return false; + // Conservatively assume HasBaseReg is true for now. + if (NewOffset < LU.MinOffset) { + if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, /*HasBaseReg=*/true, + Kind, AccessTy, TLI)) + return false; + NewMinOffset = NewOffset; + } else if (NewOffset > LU.MaxOffset) { + if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, /*HasBaseReg=*/true, + Kind, AccessTy, TLI)) + return false; + NewMaxOffset = NewOffset; + } + // Check for a mismatched access type, and fall back conservatively as needed. + if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) + NewAccessTy = Type::getVoidTy(AccessTy->getContext()); + + // Update the use. + LU.MinOffset = NewMinOffset; + LU.MaxOffset = NewMaxOffset; + LU.AccessTy = NewAccessTy; + if (NewOffset != LU.Offsets.back()) + LU.Offsets.push_back(NewOffset); + return true; +} - // Ignore negative constants, as the code below doesn't handle them - // correctly. TODO: Remove this restriction. - if (!C->getValue().isStrictlyPositive()) continue; +/// getUse - Return an LSRUse index and an offset value for a fixup which +/// needs the given expression, with the given kind and optional access type. +/// Either reuse an existing use or create a new one, as needed. +std::pair<size_t, int64_t> +LSRInstance::getUse(const SCEV *&Expr, + LSRUse::KindType Kind, const Type *AccessTy) { + const SCEV *Copy = Expr; + int64_t Offset = ExtractImmediate(Expr, SE); + + // Basic uses can't accept any offset, for example. + if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) { + Expr = Copy; + Offset = 0; + } - /* Add new PHINode. */ - PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH); + std::pair<UseMapTy::iterator, bool> P = + UseMap.insert(std::make_pair(Expr, 0)); + if (!P.second) { + // A use already existed with this base. + size_t LUIdx = P.first->second; + LSRUse &LU = Uses[LUIdx]; + if (reconcileNewOffset(LU, Offset, Kind, AccessTy)) + // Reuse this use. + return std::make_pair(LUIdx, Offset); + } - /* create new increment. '++d' in above example. */ - Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); - BinaryOperator *NewIncr = - BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? - Instruction::FAdd : Instruction::FSub, - NewPH, CFP, "IV.S.next.", Incr); + // Create a new use. + size_t LUIdx = Uses.size(); + P.first->second = LUIdx; + Uses.push_back(LSRUse(Kind, AccessTy)); + LSRUse &LU = Uses[LUIdx]; - NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry)); - NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch)); + // We don't need to track redundant offsets, but we don't need to go out + // of our way here to avoid them. + if (LU.Offsets.empty() || Offset != LU.Offsets.back()) + LU.Offsets.push_back(Offset); - /* Remove cast operation */ - ShadowUse->replaceAllUsesWith(NewPH); - ShadowUse->eraseFromParent(); - NumShadow++; - break; - } - } + LU.MinOffset = Offset; + LU.MaxOffset = Offset; + return std::make_pair(LUIdx, Offset); } -/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar -/// uses in the loop, look to see if we can eliminate some, in favor of using -/// common indvars for the different uses. -void LoopStrengthReduce::OptimizeIndvars(Loop *L) { - // TODO: implement optzns here. +void LSRInstance::CollectInterestingTypesAndFactors() { + SmallSetVector<const SCEV *, 4> Strides; + + // Collect interesting types and strides. + for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { + const SCEV *Stride = UI->getStride(); + + // Collect interesting types. + Types.insert(SE.getEffectiveSCEVType(Stride->getType())); - OptimizeShadowIV(L); + // Add the stride for this loop. + Strides.insert(Stride); + + // Add strides for other mentioned loops. + for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset()); + AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart())) + Strides.insert(AR->getStepRecurrence(SE)); + } + + // Compute interesting factors from the set of interesting strides. + for (SmallSetVector<const SCEV *, 4>::const_iterator + I = Strides.begin(), E = Strides.end(); I != E; ++I) + for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter = + next(I); NewStrideIter != E; ++NewStrideIter) { + const SCEV *OldStride = *I; + const SCEV *NewStride = *NewStrideIter; + + if (SE.getTypeSizeInBits(OldStride->getType()) != + SE.getTypeSizeInBits(NewStride->getType())) { + if (SE.getTypeSizeInBits(OldStride->getType()) > + SE.getTypeSizeInBits(NewStride->getType())) + NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType()); + else + OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType()); + } + if (const SCEVConstant *Factor = + dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride, + SE, true))) { + if (Factor->getValue()->getValue().getMinSignedBits() <= 64) + Factors.insert(Factor->getValue()->getValue().getSExtValue()); + } else if (const SCEVConstant *Factor = + dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride, + NewStride, + SE, true))) { + if (Factor->getValue()->getValue().getMinSignedBits() <= 64) + Factors.insert(Factor->getValue()->getValue().getSExtValue()); + } + } + + // If all uses use the same type, don't bother looking for truncation-based + // reuse. + if (Types.size() == 1) + Types.clear(); + + DEBUG(print_factors_and_types(dbgs())); } -bool LoopStrengthReduce::StrideMightBeShared(const SCEV* Stride, Loop *L, - bool CheckPreInc) { - int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue(); - for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[i]); - const SCEV *Share = SI->first; - if (!isa<SCEVConstant>(SI->first) || Share == Stride) - continue; - int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue(); - if (SSInt == SInt) - return true; // This can definitely be reused. - if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0) - continue; - int64_t Scale = SSInt / SInt; - bool AllUsesAreAddresses = true; - bool AllUsesAreOutsideLoop = true; - std::vector<BasedUser> UsersToProcess; - const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, - AllUsesAreAddresses, - AllUsesAreOutsideLoop, - UsersToProcess); - if (AllUsesAreAddresses && - ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) { - if (!CheckPreInc) - return true; - // Any pre-inc iv use? - IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[Share]; - for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(), - E = StrideUses.Users.end(); I != E; ++I) { - if (!I->isUseOfPostIncrementedValue()) - return true; +void LSRInstance::CollectFixupsAndInitialFormulae() { + for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { + // Record the uses. + LSRFixup &LF = getNewFixup(); + LF.UserInst = UI->getUser(); + LF.OperandValToReplace = UI->getOperandValToReplace(); + if (UI->isUseOfPostIncrementedValue()) + LF.PostIncLoop = L; + + LSRUse::KindType Kind = LSRUse::Basic; + const Type *AccessTy = 0; + if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) { + Kind = LSRUse::Address; + AccessTy = getAccessType(LF.UserInst); + } + + const SCEV *S = IU.getCanonicalExpr(*UI); + + // Equality (== and !=) ICmps are special. We can rewrite (i == N) as + // (N - i == 0), and this allows (N - i) to be the expression that we work + // with rather than just N or i, so we can consider the register + // requirements for both N and i at the same time. Limiting this code to + // equality icmps is not a problem because all interesting loops use + // equality icmps, thanks to IndVarSimplify. + if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst)) + if (CI->isEquality()) { + // Swap the operands if needed to put the OperandValToReplace on the + // left, for consistency. + Value *NV = CI->getOperand(1); + if (NV == LF.OperandValToReplace) { + CI->setOperand(1, CI->getOperand(0)); + CI->setOperand(0, NV); + } + + // x == y --> x - y == 0 + const SCEV *N = SE.getSCEV(NV); + if (N->isLoopInvariant(L)) { + Kind = LSRUse::ICmpZero; + S = SE.getMinusSCEV(N, S); + } + + // -1 and the negations of all interesting strides (except the negation + // of -1) are now also interesting. + for (size_t i = 0, e = Factors.size(); i != e; ++i) + if (Factors[i] != -1) + Factors.insert(-(uint64_t)Factors[i]); + Factors.insert(-1); } + + // Set up the initial formula for this use. + std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy); + LF.LUIdx = P.first; + LF.Offset = P.second; + LSRUse &LU = Uses[LF.LUIdx]; + LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst); + + // If this is the first use of this LSRUse, give it a formula. + if (LU.Formulae.empty()) { + InsertInitialFormula(S, LU, LF.LUIdx); + CountRegisters(LU.Formulae.back(), LF.LUIdx); } } - return false; + + DEBUG(print_fixups(dbgs())); } -/// isUsedByExitBranch - Return true if icmp is used by a loop terminating -/// conditional branch or it's and / or with other conditions before being used -/// as the condition. -static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) { - BasicBlock *CondBB = Cond->getParent(); - if (!L->isLoopExiting(CondBB)) - return false; - BranchInst *TermBr = dyn_cast<BranchInst>(CondBB->getTerminator()); - if (!TermBr || !TermBr->isConditional()) +void +LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { + Formula F; + F.InitialMatch(S, L, SE, DT); + bool Inserted = InsertFormula(LU, LUIdx, F); + assert(Inserted && "Initial formula already exists!"); (void)Inserted; +} + +void +LSRInstance::InsertSupplementalFormula(const SCEV *S, + LSRUse &LU, size_t LUIdx) { + Formula F; + F.BaseRegs.push_back(S); + F.AM.HasBaseReg = true; + bool Inserted = InsertFormula(LU, LUIdx, F); + assert(Inserted && "Supplemental formula already exists!"); (void)Inserted; +} + +/// CountRegisters - Note which registers are used by the given formula, +/// updating RegUses. +void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) { + if (F.ScaledReg) + RegUses.CountRegister(F.ScaledReg, LUIdx); + for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(), + E = F.BaseRegs.end(); I != E; ++I) + RegUses.CountRegister(*I, LUIdx); +} + +/// InsertFormula - If the given formula has not yet been inserted, add it to +/// the list, and return true. Return false otherwise. +bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { + if (!LU.InsertFormula(F)) return false; - Value *User = *Cond->use_begin(); - Instruction *UserInst = dyn_cast<Instruction>(User); - while (UserInst && - (UserInst->getOpcode() == Instruction::And || - UserInst->getOpcode() == Instruction::Or)) { - if (!UserInst->hasOneUse() || UserInst->getParent() != CondBB) - return false; - User = *User->use_begin(); - UserInst = dyn_cast<Instruction>(User); + CountRegisters(F, LUIdx); + return true; +} + +/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of +/// loop-invariant values which we're tracking. These other uses will pin these +/// values in registers, making them less profitable for elimination. +/// TODO: This currently misses non-constant addrec step registers. +/// TODO: Should this give more weight to users inside the loop? +void +LSRInstance::CollectLoopInvariantFixupsAndFormulae() { + SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end()); + SmallPtrSet<const SCEV *, 8> Inserted; + + while (!Worklist.empty()) { + const SCEV *S = Worklist.pop_back_val(); + + if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) + Worklist.insert(Worklist.end(), N->op_begin(), N->op_end()); + else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) + Worklist.push_back(C->getOperand()); + else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { + Worklist.push_back(D->getLHS()); + Worklist.push_back(D->getRHS()); + } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + if (!Inserted.insert(U)) continue; + const Value *V = U->getValue(); + if (const Instruction *Inst = dyn_cast<Instruction>(V)) + if (L->contains(Inst)) continue; + for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + const Instruction *UserInst = dyn_cast<Instruction>(*UI); + // Ignore non-instructions. + if (!UserInst) + continue; + // Ignore instructions in other functions (as can happen with + // Constants). + if (UserInst->getParent()->getParent() != L->getHeader()->getParent()) + continue; + // Ignore instructions not dominated by the loop. + const BasicBlock *UseBB = !isa<PHINode>(UserInst) ? + UserInst->getParent() : + cast<PHINode>(UserInst)->getIncomingBlock( + PHINode::getIncomingValueNumForOperand(UI.getOperandNo())); + if (!DT.dominates(L->getHeader(), UseBB)) + continue; + // Ignore uses which are part of other SCEV expressions, to avoid + // analyzing them multiple times. + if (SE.isSCEVable(UserInst->getType()) && + !isa<SCEVUnknown>(SE.getSCEV(const_cast<Instruction *>(UserInst)))) + continue; + // Ignore icmp instructions which are already being analyzed. + if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) { + unsigned OtherIdx = !UI.getOperandNo(); + Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx)); + if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L)) + continue; + } + + LSRFixup &LF = getNewFixup(); + LF.UserInst = const_cast<Instruction *>(UserInst); + LF.OperandValToReplace = UI.getUse(); + std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0); + LF.LUIdx = P.first; + LF.Offset = P.second; + LSRUse &LU = Uses[LF.LUIdx]; + LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst); + InsertSupplementalFormula(U, LU, LF.LUIdx); + CountRegisters(LU.Formulae.back(), Uses.size() - 1); + break; + } + } } - return User == TermBr; } -static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse, - ScalarEvolution *SE, Loop *L, - const TargetLowering *TLI = 0) { - if (!L->contains(Cond)) - return false; +/// CollectSubexprs - Split S into subexpressions which can be pulled out into +/// separate registers. If C is non-null, multiply each subexpression by C. +static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, + SmallVectorImpl<const SCEV *> &Ops, + ScalarEvolution &SE) { + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + // Break out add operands. + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) + CollectSubexprs(*I, C, Ops, SE); + return; + } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // Split a non-zero base out of an addrec. + if (!AR->getStart()->isZero()) { + CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()), + AR->getStepRecurrence(SE), + AR->getLoop()), C, Ops, SE); + CollectSubexprs(AR->getStart(), C, Ops, SE); + return; + } + } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { + // Break (C * (a + b + c)) into C*a + C*b + C*c. + if (Mul->getNumOperands() == 2) + if (const SCEVConstant *Op0 = + dyn_cast<SCEVConstant>(Mul->getOperand(0))) { + CollectSubexprs(Mul->getOperand(1), + C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0, + Ops, SE); + return; + } + } - if (!isa<SCEVConstant>(CondUse->getOffset())) - return false; + // Otherwise use the value itself. + Ops.push_back(C ? SE.getMulExpr(C, S) : S); +} - // Handle only tests for equality for the moment. - if (!Cond->isEquality() || !Cond->hasOneUse()) - return false; - if (!isUsedByExitBranch(Cond, L)) - return false; +/// GenerateReassociations - Split out subexpressions from adds and the bases of +/// addrecs. +void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, + Formula Base, + unsigned Depth) { + // Arbitrarily cap recursion to protect compile time. + if (Depth >= 3) return; + + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { + const SCEV *BaseReg = Base.BaseRegs[i]; + + SmallVector<const SCEV *, 8> AddOps; + CollectSubexprs(BaseReg, 0, AddOps, SE); + if (AddOps.size() == 1) continue; + + for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(), + JE = AddOps.end(); J != JE; ++J) { + // Don't pull a constant into a register if the constant could be folded + // into an immediate field. + if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset, + Base.getNumRegs() > 1, + LU.Kind, LU.AccessTy, TLI, SE)) + continue; - Value *CondOp0 = Cond->getOperand(0); - const SCEV *IV = SE->getSCEV(CondOp0); - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); - if (!AR || !AR->isAffine()) - return false; + // Collect all operands except *J. + SmallVector<const SCEV *, 8> InnerAddOps; + for (SmallVectorImpl<const SCEV *>::const_iterator K = AddOps.begin(), + KE = AddOps.end(); K != KE; ++K) + if (K != J) + InnerAddOps.push_back(*K); + + // Don't leave just a constant behind in a register if the constant could + // be folded into an immediate field. + if (InnerAddOps.size() == 1 && + isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset, + Base.getNumRegs() > 1, + LU.Kind, LU.AccessTy, TLI, SE)) + continue; - const SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); - if (!SC || SC->getValue()->getSExtValue() < 0) - // If it's already counting down, don't do anything. - return false; + Formula F = Base; + F.BaseRegs[i] = SE.getAddExpr(InnerAddOps); + F.BaseRegs.push_back(*J); + if (InsertFormula(LU, LUIdx, F)) + // If that formula hadn't been seen before, recurse to find more like + // it. + GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1); + } + } +} - // If the RHS of the comparison is not an loop invariant, the rewrite - // cannot be done. Also bail out if it's already comparing against a zero. - // If we are checking this before cmp stride optimization, check if it's - // comparing against a already legal immediate. - Value *RHS = Cond->getOperand(1); - ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS); - if (!L->isLoopInvariant(RHS) || - (RHSC && RHSC->isZero()) || - (RHSC && TLI && TLI->isLegalICmpImmediate(RHSC->getSExtValue()))) - return false; +/// GenerateCombinations - Generate a formula consisting of all of the +/// loop-dominating registers added into a single register. +void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, + Formula Base) { + // This method is only interesting on a plurality of registers. + if (Base.BaseRegs.size() <= 1) return; + + Formula F = Base; + F.BaseRegs.clear(); + SmallVector<const SCEV *, 4> Ops; + for (SmallVectorImpl<const SCEV *>::const_iterator + I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) { + const SCEV *BaseReg = *I; + if (BaseReg->properlyDominates(L->getHeader(), &DT) && + !BaseReg->hasComputableLoopEvolution(L)) + Ops.push_back(BaseReg); + else + F.BaseRegs.push_back(BaseReg); + } + if (Ops.size() > 1) { + const SCEV *Sum = SE.getAddExpr(Ops); + // TODO: If Sum is zero, it probably means ScalarEvolution missed an + // opportunity to fold something. For now, just ignore such cases + // rather than proceed with zero in a register. + if (!Sum->isZero()) { + F.BaseRegs.push_back(Sum); + (void)InsertFormula(LU, LUIdx, F); + } + } +} - // Make sure the IV is only used for counting. Value may be preinc or - // postinc; 2 uses in either case. - if (!CondOp0->hasNUses(2)) - return false; +/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets. +void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, + Formula Base) { + // We can't add a symbolic offset if the address already contains one. + if (Base.AM.BaseGV) return; - return true; + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { + const SCEV *G = Base.BaseRegs[i]; + GlobalValue *GV = ExtractSymbol(G, SE); + if (G->isZero() || !GV) + continue; + Formula F = Base; + F.AM.BaseGV = GV; + if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset, + LU.Kind, LU.AccessTy, TLI)) + continue; + F.BaseRegs[i] = G; + (void)InsertFormula(LU, LUIdx, F); + } } -/// OptimizeLoopTermCond - Change loop terminating condition to use the -/// postinc iv when possible. -void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { - BasicBlock *LatchBlock = L->getLoopLatch(); - bool LatchExit = L->isLoopExiting(LatchBlock); - SmallVector<BasicBlock*, 8> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); +/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets. +void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, + Formula Base) { + // TODO: For now, just add the min and max offset, because it usually isn't + // worthwhile looking at everything inbetween. + SmallVector<int64_t, 4> Worklist; + Worklist.push_back(LU.MinOffset); + if (LU.MaxOffset != LU.MinOffset) + Worklist.push_back(LU.MaxOffset); + + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { + const SCEV *G = Base.BaseRegs[i]; + + for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(), + E = Worklist.end(); I != E; ++I) { + Formula F = Base; + F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I; + if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I, + LU.Kind, LU.AccessTy, TLI)) { + F.BaseRegs[i] = SE.getAddExpr(G, SE.getIntegerSCEV(*I, G->getType())); + + (void)InsertFormula(LU, LUIdx, F); + } + } - for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - BasicBlock *ExitingBlock = ExitingBlocks[i]; + int64_t Imm = ExtractImmediate(G, SE); + if (G->isZero() || Imm == 0) + continue; + Formula F = Base; + F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm; + if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset, + LU.Kind, LU.AccessTy, TLI)) + continue; + F.BaseRegs[i] = G; + (void)InsertFormula(LU, LUIdx, F); + } +} - // Finally, get the terminating condition for the loop if possible. If we - // can, we want to change it to use a post-incremented version of its - // induction variable, to allow coalescing the live ranges for the IV into - // one register value. +/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up +/// the comparison. For example, x == y -> x*c == y*c. +void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, + Formula Base) { + if (LU.Kind != LSRUse::ICmpZero) return; - BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); - if (!TermBr) + // Determine the integer type for the base formula. + const Type *IntTy = Base.getType(); + if (!IntTy) return; + if (SE.getTypeSizeInBits(IntTy) > 64) return; + + // Don't do this if there is more than one offset. + if (LU.MinOffset != LU.MaxOffset) return; + + assert(!Base.AM.BaseGV && "ICmpZero use is not legal!"); + + // Check each interesting stride. + for (SmallSetVector<int64_t, 8>::const_iterator + I = Factors.begin(), E = Factors.end(); I != E; ++I) { + int64_t Factor = *I; + Formula F = Base; + + // Check that the multiplication doesn't overflow. + if (F.AM.BaseOffs == INT64_MIN && Factor == -1) continue; - // FIXME: Overly conservative, termination condition could be an 'or' etc.. - if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition())) + F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor; + if (F.AM.BaseOffs / Factor != Base.AM.BaseOffs) continue; - // Search IVUsesByStride to find Cond's IVUse if there is one. - IVStrideUse *CondUse = 0; - const SCEV *CondStride = 0; - ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); - if (!FindIVUserForCond(Cond, CondUse, CondStride)) + // Check that multiplying with the use offset doesn't overflow. + int64_t Offset = LU.MinOffset; + if (Offset == INT64_MIN && Factor == -1) + continue; + Offset = (uint64_t)Offset * Factor; + if (Offset / Factor != LU.MinOffset) continue; - // If the latch block is exiting and it's not a single block loop, it's - // not safe to use postinc iv in other exiting blocks. FIXME: overly - // conservative? How about icmp stride optimization? - bool UsePostInc = !(e > 1 && LatchExit && ExitingBlock != LatchBlock); - if (UsePostInc && ExitingBlock != LatchBlock) { - if (!Cond->hasOneUse()) - // See below, we don't want the condition to be cloned. - UsePostInc = false; - else { - // If exiting block is the latch block, we know it's safe and profitable - // to transform the icmp to use post-inc iv. Otherwise do so only if it - // would not reuse another iv and its iv would be reused by other uses. - // We are optimizing for the case where the icmp is the only use of the - // iv. - IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride]; - for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(), - E = StrideUses.Users.end(); I != E; ++I) { - if (I->getUser() == Cond) - continue; - if (!I->isUseOfPostIncrementedValue()) { - UsePostInc = false; - break; - } + // Check that this scale is legal. + if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI)) + continue; + + // Compensate for the use having MinOffset built into it. + F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset; + + const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy); + + // Check that multiplying with each base register doesn't overflow. + for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) { + F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS); + if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i]) + goto next; + } + + // Check that multiplying with the scaled register doesn't overflow. + if (F.ScaledReg) { + F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS); + if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg) + continue; + } + + // If we make it here and it's legal, add it. + (void)InsertFormula(LU, LUIdx, F); + next:; + } +} + +/// GenerateScales - Generate stride factor reuse formulae by making use of +/// scaled-offset address modes, for example. +void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, + Formula Base) { + // Determine the integer type for the base formula. + const Type *IntTy = Base.getType(); + if (!IntTy) return; + + // If this Formula already has a scaled register, we can't add another one. + if (Base.AM.Scale != 0) return; + + // Check each interesting stride. + for (SmallSetVector<int64_t, 8>::const_iterator + I = Factors.begin(), E = Factors.end(); I != E; ++I) { + int64_t Factor = *I; + + Base.AM.Scale = Factor; + Base.AM.HasBaseReg = Base.BaseRegs.size() > 1; + // Check whether this scale is going to be legal. + if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset, + LU.Kind, LU.AccessTy, TLI)) { + // As a special-case, handle special out-of-loop Basic users specially. + // TODO: Reconsider this special case. + if (LU.Kind == LSRUse::Basic && + isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset, + LSRUse::Special, LU.AccessTy, TLI) && + LU.AllFixupsOutsideLoop) + LU.Kind = LSRUse::Special; + else + continue; + } + // For an ICmpZero, negating a solitary base register won't lead to + // new solutions. + if (LU.Kind == LSRUse::ICmpZero && + !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV) + continue; + // For each addrec base reg, apply the scale, if possible. + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) + if (const SCEVAddRecExpr *AR = + dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) { + const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy); + if (FactorS->isZero()) + continue; + // Divide out the factor, ignoring high bits, since we'll be + // scaling the value back up in the end. + if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) { + // TODO: This could be optimized to avoid all the copying. + Formula F = Base; + F.ScaledReg = Quotient; + std::swap(F.BaseRegs[i], F.BaseRegs.back()); + F.BaseRegs.pop_back(); + (void)InsertFormula(LU, LUIdx, F); } } + } +} - // If iv for the stride might be shared and any of the users use pre-inc - // iv might be used, then it's not safe to use post-inc iv. - if (UsePostInc && - isa<SCEVConstant>(CondStride) && - StrideMightBeShared(CondStride, L, true)) - UsePostInc = false; - } +/// GenerateTruncates - Generate reuse formulae from different IV types. +void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, + Formula Base) { + // This requires TargetLowering to tell us which truncates are free. + if (!TLI) return; + + // Don't bother truncating symbolic values. + if (Base.AM.BaseGV) return; + + // Determine the integer type for the base formula. + const Type *DstTy = Base.getType(); + if (!DstTy) return; + DstTy = SE.getEffectiveSCEVType(DstTy); + + for (SmallSetVector<const Type *, 4>::const_iterator + I = Types.begin(), E = Types.end(); I != E; ++I) { + const Type *SrcTy = *I; + if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) { + Formula F = Base; + + if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I); + for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(), + JE = F.BaseRegs.end(); J != JE; ++J) + *J = SE.getAnyExtendExpr(*J, SrcTy); + + // TODO: This assumes we've done basic processing on all uses and + // have an idea what the register usage is. + if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses)) + continue; - // If the trip count is computed in terms of a max (due to ScalarEvolution - // being unable to find a sufficient guard, for example), change the loop - // comparison to use SLT or ULT instead of NE. - Cond = OptimizeMax(L, Cond, CondUse); - - // If possible, change stride and operands of the compare instruction to - // eliminate one stride. However, avoid rewriting the compare instruction - // with an iv of new stride if it's likely the new stride uses will be - // rewritten using the stride of the compare instruction. - if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) { - // If the condition stride is a constant and it's the only use, we might - // want to optimize it first by turning it to count toward zero. - if (!StrideMightBeShared(CondStride, L, false) && - !ShouldCountToZero(Cond, CondUse, SE, L, TLI)) - Cond = ChangeCompareStride(L, Cond, CondUse, CondStride); + (void)InsertFormula(LU, LUIdx, F); } + } +} + +namespace { + +/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to +/// defer modifications so that the search phase doesn't have to worry about +/// the data structures moving underneath it. +struct WorkItem { + size_t LUIdx; + int64_t Imm; + const SCEV *OrigReg; + + WorkItem(size_t LI, int64_t I, const SCEV *R) + : LUIdx(LI), Imm(I), OrigReg(R) {} + + void print(raw_ostream &OS) const; + void dump() const; +}; + +} + +void WorkItem::print(raw_ostream &OS) const { + OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx + << " , add offset " << Imm; +} + +void WorkItem::dump() const { + print(errs()); errs() << '\n'; +} - if (!UsePostInc) +/// GenerateCrossUseConstantOffsets - Look for registers which are a constant +/// distance apart and try to form reuse opportunities between them. +void LSRInstance::GenerateCrossUseConstantOffsets() { + // Group the registers by their value without any added constant offset. + typedef std::map<int64_t, const SCEV *> ImmMapTy; + typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy; + RegMapTy Map; + DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap; + SmallVector<const SCEV *, 8> Sequence; + for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end(); + I != E; ++I) { + const SCEV *Reg = *I; + int64_t Imm = ExtractImmediate(Reg, SE); + std::pair<RegMapTy::iterator, bool> Pair = + Map.insert(std::make_pair(Reg, ImmMapTy())); + if (Pair.second) + Sequence.push_back(Reg); + Pair.first->second.insert(std::make_pair(Imm, *I)); + UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I); + } + + // Now examine each set of registers with the same base value. Build up + // a list of work to do and do the work in a separate step so that we're + // not adding formulae and register counts while we're searching. + SmallVector<WorkItem, 32> WorkItems; + SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems; + for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(), + E = Sequence.end(); I != E; ++I) { + const SCEV *Reg = *I; + const ImmMapTy &Imms = Map.find(Reg)->second; + + // It's not worthwhile looking for reuse if there's only one offset. + if (Imms.size() == 1) continue; - DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: " - << *Cond << '\n'); + DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':'; + for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end(); + J != JE; ++J) + dbgs() << ' ' << J->first; + dbgs() << '\n'); - // It's possible for the setcc instruction to be anywhere in the loop, and - // possible for it to have multiple users. If it is not immediately before - // the exiting block branch, move it. - if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) { - if (Cond->hasOneUse()) { // Condition has a single use, just move it. - Cond->moveBefore(TermBr); + // Examine each offset. + for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end(); + J != JE; ++J) { + const SCEV *OrigReg = J->second; + + int64_t JImm = J->first; + const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg); + + if (!isa<SCEVConstant>(OrigReg) && + UsedByIndicesMap[Reg].count() == 1) { + DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n'); + continue; + } + + // Conservatively examine offsets between this orig reg a few selected + // other orig regs. + ImmMapTy::const_iterator OtherImms[] = { + Imms.begin(), prior(Imms.end()), + Imms.upper_bound((Imms.begin()->first + prior(Imms.end())->first) / 2) + }; + for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) { + ImmMapTy::const_iterator M = OtherImms[i]; + if (M == J || M == JE) continue; + + // Compute the difference between the two. + int64_t Imm = (uint64_t)JImm - M->first; + for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1; + LUIdx = UsedByIndices.find_next(LUIdx)) + // Make a memo of this use, offset, and register tuple. + if (UniqueItems.insert(std::make_pair(LUIdx, Imm))) + WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg)); + } + } + } + + Map.clear(); + Sequence.clear(); + UsedByIndicesMap.clear(); + UniqueItems.clear(); + + // Now iterate through the worklist and add new formulae. + for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(), + E = WorkItems.end(); I != E; ++I) { + const WorkItem &WI = *I; + size_t LUIdx = WI.LUIdx; + LSRUse &LU = Uses[LUIdx]; + int64_t Imm = WI.Imm; + const SCEV *OrigReg = WI.OrigReg; + + const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType()); + const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm)); + unsigned BitWidth = SE.getTypeSizeInBits(IntTy); + + // TODO: Use a more targeted data structure. + for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) { + Formula F = LU.Formulae[L]; + // Use the immediate in the scaled register. + if (F.ScaledReg == OrigReg) { + int64_t Offs = (uint64_t)F.AM.BaseOffs + + Imm * (uint64_t)F.AM.Scale; + // Don't create 50 + reg(-50). + if (F.referencesReg(SE.getSCEV( + ConstantInt::get(IntTy, -(uint64_t)Offs)))) + continue; + Formula NewF = F; + NewF.AM.BaseOffs = Offs; + if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset, + LU.Kind, LU.AccessTy, TLI)) + continue; + NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg); + + // If the new scale is a constant in a register, and adding the constant + // value to the immediate would produce a value closer to zero than the + // immediate itself, then the formula isn't worthwhile. + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) + if (C->getValue()->getValue().isNegative() != + (NewF.AM.BaseOffs < 0) && + (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale)) + .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs())) + continue; + + // OK, looks good. + (void)InsertFormula(LU, LUIdx, NewF); } else { - // Otherwise, clone the terminating condition and insert into the - // loopend. - Cond = cast<ICmpInst>(Cond->clone()); - Cond->setName(L->getHeader()->getName() + ".termcond"); - ExitingBlock->getInstList().insert(TermBr, Cond); + // Use the immediate in a base register. + for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) { + const SCEV *BaseReg = F.BaseRegs[N]; + if (BaseReg != OrigReg) + continue; + Formula NewF = F; + NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm; + if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset, + LU.Kind, LU.AccessTy, TLI)) + continue; + NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg); + + // If the new formula has a constant in a register, and adding the + // constant value to the immediate would produce a value closer to + // zero than the immediate itself, then the formula isn't worthwhile. + for (SmallVectorImpl<const SCEV *>::const_iterator + J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end(); + J != JE; ++J) + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J)) + if (C->getValue()->getValue().isNegative() != + (NewF.AM.BaseOffs < 0) && + C->getValue()->getValue().abs() + .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs())) + goto skip_formula; + + // Ok, looks good. + (void)InsertFormula(LU, LUIdx, NewF); + break; + skip_formula:; + } + } + } + } +} - // Clone the IVUse, as the old use still exists! - IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond, - CondUse->getOperandValToReplace()); - CondUse = &IU->IVUsesByStride[CondStride]->Users.back(); +/// GenerateAllReuseFormulae - Generate formulae for each use. +void +LSRInstance::GenerateAllReuseFormulae() { + // This is split into multiple loops so that hasRegsUsedByUsesOtherThan + // queries are more precise. + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) + GenerateReassociations(LU, LUIdx, LU.Formulae[i]); + for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) + GenerateCombinations(LU, LUIdx, LU.Formulae[i]); + } + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) + GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]); + for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) + GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]); + for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) + GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]); + for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) + GenerateScales(LU, LUIdx, LU.Formulae[i]); + } + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) + GenerateTruncates(LU, LUIdx, LU.Formulae[i]); + } + + GenerateCrossUseConstantOffsets(); +} + +/// If their are multiple formulae with the same set of registers used +/// by other uses, pick the best one and delete the others. +void LSRInstance::FilterOutUndesirableDedicatedRegisters() { +#ifndef NDEBUG + bool Changed = false; +#endif + + // Collect the best formula for each unique set of shared registers. This + // is reset for each use. + typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo> + BestFormulaeTy; + BestFormulaeTy BestFormulae; + + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + FormulaSorter Sorter(L, LU, SE, DT); + + // Clear out the set of used regs; it will be recomputed. + LU.Regs.clear(); + + for (size_t FIdx = 0, NumForms = LU.Formulae.size(); + FIdx != NumForms; ++FIdx) { + Formula &F = LU.Formulae[FIdx]; + + SmallVector<const SCEV *, 2> Key; + for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(), + JE = F.BaseRegs.end(); J != JE; ++J) { + const SCEV *Reg = *J; + if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx)) + Key.push_back(Reg); } + if (F.ScaledReg && + RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx)) + Key.push_back(F.ScaledReg); + // Unstable sort by host order ok, because this is only used for + // uniquifying. + std::sort(Key.begin(), Key.end()); + + std::pair<BestFormulaeTy::const_iterator, bool> P = + BestFormulae.insert(std::make_pair(Key, FIdx)); + if (!P.second) { + Formula &Best = LU.Formulae[P.first->second]; + if (Sorter.operator()(F, Best)) + std::swap(F, Best); + DEBUG(dbgs() << "Filtering out "; F.print(dbgs()); + dbgs() << "\n" + " in favor of "; Best.print(dbgs()); + dbgs() << '\n'); +#ifndef NDEBUG + Changed = true; +#endif + std::swap(F, LU.Formulae.back()); + LU.Formulae.pop_back(); + --FIdx; + --NumForms; + continue; + } + if (F.ScaledReg) LU.Regs.insert(F.ScaledReg); + LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); } + BestFormulae.clear(); + } - // If we get to here, we know that we can transform the setcc instruction to - // use the post-incremented version of the IV, allowing us to coalesce the - // live ranges for the IV correctly. - CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride)); - CondUse->setIsUseOfPostIncrementedValue(true); - Changed = true; + DEBUG(if (Changed) { + dbgs() << "\n" + "After filtering out undesirable candidates:\n"; + print_uses(dbgs()); + }); +} - ++NumLoopCond; +/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of +/// formulae to choose from, use some rough heuristics to prune down the number +/// of formulae. This keeps the main solver from taking an extraordinary amount +/// of time in some worst-case scenarios. +void LSRInstance::NarrowSearchSpaceUsingHeuristics() { + // This is a rough guess that seems to work fairly well. + const size_t Limit = UINT16_MAX; + + SmallPtrSet<const SCEV *, 4> Taken; + for (;;) { + // Estimate the worst-case number of solutions we might consider. We almost + // never consider this many solutions because we prune the search space, + // but the pruning isn't always sufficient. + uint32_t Power = 1; + for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + size_t FSize = I->Formulae.size(); + if (FSize >= Limit) { + Power = Limit; + break; + } + Power *= FSize; + if (Power >= Limit) + break; + } + if (Power < Limit) + break; + + // Ok, we have too many of formulae on our hands to conveniently handle. + // Use a rough heuristic to thin out the list. + + // Pick the register which is used by the most LSRUses, which is likely + // to be a good reuse register candidate. + const SCEV *Best = 0; + unsigned BestNum = 0; + for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end(); + I != E; ++I) { + const SCEV *Reg = *I; + if (Taken.count(Reg)) + continue; + if (!Best) + Best = Reg; + else { + unsigned Count = RegUses.getUsedByIndices(Reg).count(); + if (Count > BestNum) { + Best = Reg; + BestNum = Count; + } + } + } + + DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best + << " will yield profitable reuse.\n"); + Taken.insert(Best); + + // In any use with formulae which references this register, delete formulae + // which don't reference it. + for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + LSRUse &LU = *I; + if (!LU.Regs.count(Best)) continue; + + // Clear out the set of used regs; it will be recomputed. + LU.Regs.clear(); + + for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { + Formula &F = LU.Formulae[i]; + if (!F.referencesReg(Best)) { + DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); + std::swap(LU.Formulae.back(), F); + LU.Formulae.pop_back(); + --e; + --i; + continue; + } + + if (F.ScaledReg) LU.Regs.insert(F.ScaledReg); + LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); + } + } + + DEBUG(dbgs() << "After pre-selection:\n"; + print_uses(dbgs())); } } -bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride, - IVStrideUse* &CondUse, - Loop *L) { - // If the only use is an icmp of a loop exiting conditional branch, then - // attempt the optimization. - BasedUser User = BasedUser(*CondUse, SE); - assert(isa<ICmpInst>(User.Inst) && "Expecting an ICMPInst!"); - ICmpInst *Cond = cast<ICmpInst>(User.Inst); +/// SolveRecurse - This is the recursive solver. +void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, + Cost &SolutionCost, + SmallVectorImpl<const Formula *> &Workspace, + const Cost &CurCost, + const SmallPtrSet<const SCEV *, 16> &CurRegs, + DenseSet<const SCEV *> &VisitedRegs) const { + // Some ideas: + // - prune more: + // - use more aggressive filtering + // - sort the formula so that the most profitable solutions are found first + // - sort the uses too + // - search faster: + // - don't compute a cost, and then compare. compare while computing a cost + // and bail early. + // - track register sets with SmallBitVector + + const LSRUse &LU = Uses[Workspace.size()]; + + // If this use references any register that's already a part of the + // in-progress solution, consider it a requirement that a formula must + // reference that register in order to be considered. This prunes out + // unprofitable searching. + SmallSetVector<const SCEV *, 4> ReqRegs; + for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(), + E = CurRegs.end(); I != E; ++I) + if (LU.Regs.count(*I)) + ReqRegs.insert(*I); + + bool AnySatisfiedReqRegs = false; + SmallPtrSet<const SCEV *, 16> NewRegs; + Cost NewCost; +retry: + for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), + E = LU.Formulae.end(); I != E; ++I) { + const Formula &F = *I; + + // Ignore formulae which do not use any of the required registers. + for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(), + JE = ReqRegs.end(); J != JE; ++J) { + const SCEV *Reg = *J; + if ((!F.ScaledReg || F.ScaledReg != Reg) && + std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) == + F.BaseRegs.end()) + goto skip; + } + AnySatisfiedReqRegs = true; + + // Evaluate the cost of the current formula. If it's already worse than + // the current best, prune the search at that point. + NewCost = CurCost; + NewRegs = CurRegs; + NewCost.RateFormula(F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT); + if (NewCost < SolutionCost) { + Workspace.push_back(&F); + if (Workspace.size() != Uses.size()) { + SolveRecurse(Solution, SolutionCost, Workspace, NewCost, + NewRegs, VisitedRegs); + if (F.getNumRegs() == 1 && Workspace.size() == 1) + VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]); + } else { + DEBUG(dbgs() << "New best at "; NewCost.print(dbgs()); + dbgs() << ". Regs:"; + for (SmallPtrSet<const SCEV *, 16>::const_iterator + I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I) + dbgs() << ' ' << **I; + dbgs() << '\n'); + + SolutionCost = NewCost; + Solution = Workspace; + } + Workspace.pop_back(); + } + skip:; + } - // Less strict check now that compare stride optimization is done. - if (!ShouldCountToZero(Cond, CondUse, SE, L)) - return false; + // If none of the formulae had all of the required registers, relax the + // constraint so that we don't exclude all formulae. + if (!AnySatisfiedReqRegs) { + ReqRegs.clear(); + goto retry; + } +} - Value *CondOp0 = Cond->getOperand(0); - PHINode *PHIExpr = dyn_cast<PHINode>(CondOp0); - Instruction *Incr; - if (!PHIExpr) { - // Value tested is postinc. Find the phi node. - Incr = dyn_cast<BinaryOperator>(CondOp0); - // FIXME: Just use User.OperandValToReplace here? - if (!Incr || Incr->getOpcode() != Instruction::Add) - return false; +void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const { + SmallVector<const Formula *, 8> Workspace; + Cost SolutionCost; + SolutionCost.Loose(); + Cost CurCost; + SmallPtrSet<const SCEV *, 16> CurRegs; + DenseSet<const SCEV *> VisitedRegs; + Workspace.reserve(Uses.size()); + + SolveRecurse(Solution, SolutionCost, Workspace, CurCost, + CurRegs, VisitedRegs); + + // Ok, we've now made all our decisions. + DEBUG(dbgs() << "\n" + "The chosen solution requires "; SolutionCost.print(dbgs()); + dbgs() << ":\n"; + for (size_t i = 0, e = Uses.size(); i != e; ++i) { + dbgs() << " "; + Uses[i].print(dbgs()); + dbgs() << "\n" + " "; + Solution[i]->print(dbgs()); + dbgs() << '\n'; + }); +} - PHIExpr = dyn_cast<PHINode>(Incr->getOperand(0)); - if (!PHIExpr) - return false; - // 1 use for preinc value, the increment. - if (!PHIExpr->hasOneUse()) - return false; - } else { - assert(isa<PHINode>(CondOp0) && - "Unexpected loop exiting counting instruction sequence!"); - PHIExpr = cast<PHINode>(CondOp0); - // Value tested is preinc. Find the increment. - // A CmpInst is not a BinaryOperator; we depend on this. - Instruction::use_iterator UI = PHIExpr->use_begin(); - Incr = dyn_cast<BinaryOperator>(UI); - if (!Incr) - Incr = dyn_cast<BinaryOperator>(++UI); - // One use for postinc value, the phi. Unnecessarily conservative? - if (!Incr || !Incr->hasOneUse() || Incr->getOpcode() != Instruction::Add) - return false; +/// getImmediateDominator - A handy utility for the specific DominatorTree +/// query that we need here. +/// +static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) { + DomTreeNode *Node = DT.getNode(BB); + if (!Node) return 0; + Node = Node->getIDom(); + if (!Node) return 0; + return Node->getBlock(); +} + +Value *LSRInstance::Expand(const LSRFixup &LF, + const Formula &F, + BasicBlock::iterator IP, + SCEVExpander &Rewriter, + SmallVectorImpl<WeakVH> &DeadInsts) const { + const LSRUse &LU = Uses[LF.LUIdx]; + + // Then, collect some instructions which we will remain dominated by when + // expanding the replacement. These must be dominated by any operands that + // will be required in the expansion. + SmallVector<Instruction *, 4> Inputs; + if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace)) + Inputs.push_back(I); + if (LU.Kind == LSRUse::ICmpZero) + if (Instruction *I = + dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1))) + Inputs.push_back(I); + if (LF.PostIncLoop) { + if (!L->contains(LF.UserInst)) + Inputs.push_back(L->getLoopLatch()->getTerminator()); + else + Inputs.push_back(IVIncInsertPos); } - // Replace the increment with a decrement. - DEBUG(dbgs() << "LSR: Examining use "); - DEBUG(WriteAsOperand(dbgs(), CondOp0, /*PrintType=*/false)); - DEBUG(dbgs() << " in Inst: " << *Cond << '\n'); - BinaryOperator *Decr = BinaryOperator::Create(Instruction::Sub, - Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr); - Incr->replaceAllUsesWith(Decr); - Incr->eraseFromParent(); - - // Substitute endval-startval for the original startval, and 0 for the - // original endval. Since we're only testing for equality this is OK even - // if the computation wraps around. - BasicBlock *Preheader = L->getLoopPreheader(); - Instruction *PreInsertPt = Preheader->getTerminator(); - unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0; - Value *StartVal = PHIExpr->getIncomingValue(InBlock); - Value *EndVal = Cond->getOperand(1); - DEBUG(dbgs() << " Optimize loop counting iv to count down [" - << *EndVal << " .. " << *StartVal << "]\n"); - - // FIXME: check for case where both are constant. - Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0); - BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub, - EndVal, StartVal, "tmp", PreInsertPt); - PHIExpr->setIncomingValue(InBlock, NewStartVal); - Cond->setOperand(1, Zero); - DEBUG(dbgs() << " New icmp: " << *Cond << "\n"); - - int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue(); - const SCEV *NewStride = 0; - bool Found = false; - for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - const SCEV *OldStride = IU->StrideOrder[i]; - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OldStride)) - if (SC->getValue()->getSExtValue() == -SInt) { - Found = true; - NewStride = OldStride; + // Then, climb up the immediate dominator tree as far as we can go while + // still being dominated by the input positions. + for (;;) { + bool AllDominate = true; + Instruction *BetterPos = 0; + BasicBlock *IDom = getImmediateDominator(IP->getParent(), DT); + if (!IDom) break; + Instruction *Tentative = IDom->getTerminator(); + for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(), + E = Inputs.end(); I != E; ++I) { + Instruction *Inst = *I; + if (Inst == Tentative || !DT.dominates(Inst, Tentative)) { + AllDominate = false; break; } + if (IDom == Inst->getParent() && + (!BetterPos || DT.dominates(BetterPos, Inst))) + BetterPos = next(BasicBlock::iterator(Inst)); + } + if (!AllDominate) + break; + if (BetterPos) + IP = BetterPos; + else + IP = Tentative; } + while (isa<PHINode>(IP)) ++IP; + + // Inform the Rewriter if we have a post-increment use, so that it can + // perform an advantageous expansion. + Rewriter.setPostInc(LF.PostIncLoop); + + // This is the type that the user actually needs. + const Type *OpTy = LF.OperandValToReplace->getType(); + // This will be the type that we'll initially expand to. + const Type *Ty = F.getType(); + if (!Ty) + // No type known; just expand directly to the ultimate type. + Ty = OpTy; + else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy)) + // Expand directly to the ultimate type if it's the right size. + Ty = OpTy; + // This is the type to do integer arithmetic in. + const Type *IntTy = SE.getEffectiveSCEVType(Ty); + + // Build up a list of operands to add together to form the full base. + SmallVector<const SCEV *, 8> Ops; + + // Expand the BaseRegs portion. + for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(), + E = F.BaseRegs.end(); I != E; ++I) { + const SCEV *Reg = *I; + assert(!Reg->isZero() && "Zero allocated in a base register!"); + + // If we're expanding for a post-inc user for the add-rec's loop, make the + // post-inc adjustment. + const SCEV *Start = Reg; + while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) { + if (AR->getLoop() == LF.PostIncLoop) { + Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE)); + // If the user is inside the loop, insert the code after the increment + // so that it is dominated by its operand. If the original insert point + // was already dominated by the increment, keep it, because there may + // be loop-variant operands that need to be respected also. + if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) + IP = IVIncInsertPos; + break; + } + Start = AR->getStart(); + } - if (!Found) - NewStride = SE->getIntegerSCEV(-SInt, Stride->getType()); - IU->AddUser(NewStride, CondUse->getOffset(), Cond, Cond->getOperand(0)); - IU->IVUsesByStride[Stride]->removeUser(CondUse); + Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP))); + } - CondUse = &IU->IVUsesByStride[NewStride]->Users.back(); - Stride = NewStride; + // Flush the operand list to suppress SCEVExpander hoisting. + if (!Ops.empty()) { + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); + Ops.clear(); + Ops.push_back(SE.getUnknown(FullV)); + } - ++NumCountZero; + // Expand the ScaledReg portion. + Value *ICmpScaledV = 0; + if (F.AM.Scale != 0) { + const SCEV *ScaledS = F.ScaledReg; + + // If we're expanding for a post-inc user for the add-rec's loop, make the + // post-inc adjustment. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS)) + if (AR->getLoop() == LF.PostIncLoop) + ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE)); + + if (LU.Kind == LSRUse::ICmpZero) { + // An interesting way of "folding" with an icmp is to use a negated + // scale, which we'll implement by inserting it into the other operand + // of the icmp. + assert(F.AM.Scale == -1 && + "The only scale supported by ICmpZero uses is -1!"); + ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP); + } else { + // Otherwise just expand the scaled register and an explicit scale, + // which is expected to be matched as part of the address. + ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP)); + ScaledS = SE.getMulExpr(ScaledS, + SE.getIntegerSCEV(F.AM.Scale, + ScaledS->getType())); + Ops.push_back(ScaledS); + + // Flush the operand list to suppress SCEVExpander hoisting. + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); + Ops.clear(); + Ops.push_back(SE.getUnknown(FullV)); + } + } - return true; + // Expand the GV portion. + if (F.AM.BaseGV) { + Ops.push_back(SE.getUnknown(F.AM.BaseGV)); + + // Flush the operand list to suppress SCEVExpander hoisting. + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); + Ops.clear(); + Ops.push_back(SE.getUnknown(FullV)); + } + + // Expand the immediate portion. + int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset; + if (Offset != 0) { + if (LU.Kind == LSRUse::ICmpZero) { + // The other interesting way of "folding" with an ICmpZero is to use a + // negated immediate. + if (!ICmpScaledV) + ICmpScaledV = ConstantInt::get(IntTy, -Offset); + else { + Ops.push_back(SE.getUnknown(ICmpScaledV)); + ICmpScaledV = ConstantInt::get(IntTy, Offset); + } + } else { + // Just add the immediate values. These again are expected to be matched + // as part of the address. + Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset))); + } + } + + // Emit instructions summing all the operands. + const SCEV *FullS = Ops.empty() ? + SE.getIntegerSCEV(0, IntTy) : + SE.getAddExpr(Ops); + Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP); + + // We're done expanding now, so reset the rewriter. + Rewriter.setPostInc(0); + + // An ICmpZero Formula represents an ICmp which we're handling as a + // comparison against zero. Now that we've expanded an expression for that + // form, update the ICmp's other operand. + if (LU.Kind == LSRUse::ICmpZero) { + ICmpInst *CI = cast<ICmpInst>(LF.UserInst); + DeadInsts.push_back(CI->getOperand(1)); + assert(!F.AM.BaseGV && "ICmp does not support folding a global value and " + "a scale at the same time!"); + if (F.AM.Scale == -1) { + if (ICmpScaledV->getType() != OpTy) { + Instruction *Cast = + CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false, + OpTy, false), + ICmpScaledV, OpTy, "tmp", CI); + ICmpScaledV = Cast; + } + CI->setOperand(1, ICmpScaledV); + } else { + assert(F.AM.Scale == 0 && + "ICmp does not support folding a global value and " + "a scale at the same time!"); + Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), + -(uint64_t)Offset); + if (C->getType() != OpTy) + C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, + OpTy, false), + C, OpTy); + + CI->setOperand(1, C); + } + } + + return FullV; } -/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding -/// when to exit the loop is used only for that purpose, try to rearrange things -/// so it counts down to a test against zero. -bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { - bool ThisChanged = false; - for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - const SCEV *Stride = IU->StrideOrder[i]; - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(Stride); - assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - // FIXME: Generalize to non-affine IV's. - if (!SI->first->isLoopInvariant(L)) - continue; - // If stride is a constant and it has an icmpinst use, check if we can - // optimize the loop to count down. - if (isa<SCEVConstant>(Stride) && SI->second->Users.size() == 1) { - Instruction *User = SI->second->Users.begin()->getUser(); - if (!isa<ICmpInst>(User)) - continue; - const SCEV *CondStride = Stride; - IVStrideUse *Use = &*SI->second->Users.begin(); - if (!OptimizeLoopCountIVOfStride(CondStride, Use, L)) - continue; - ThisChanged = true; +/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use +/// of their operands effectively happens in their predecessor blocks, so the +/// expression may need to be expanded in multiple places. +void LSRInstance::RewriteForPHI(PHINode *PN, + const LSRFixup &LF, + const Formula &F, + SCEVExpander &Rewriter, + SmallVectorImpl<WeakVH> &DeadInsts, + Pass *P) const { + DenseMap<BasicBlock *, Value *> Inserted; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == LF.OperandValToReplace) { + BasicBlock *BB = PN->getIncomingBlock(i); + + // If this is a critical edge, split the edge so that we do not insert + // the code on all predecessor/successor paths. We do this unless this + // is the canonical backedge for this loop, which complicates post-inc + // users. + if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 && + !isa<IndirectBrInst>(BB->getTerminator()) && + (PN->getParent() != L->getHeader() || !L->contains(BB))) { + // Split the critical edge. + BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P); + + // If PN is outside of the loop and BB is in the loop, we want to + // move the block to be immediately before the PHI block, not + // immediately after BB. + if (L->contains(BB) && !L->contains(PN)) + NewBB->moveBefore(PN->getParent()); + + // Splitting the edge can reduce the number of PHI entries we have. + e = PN->getNumIncomingValues(); + BB = NewBB; + i = PN->getBasicBlockIndex(BB); + } - // Now check if it's possible to reuse this iv for other stride uses. - for (unsigned j = 0, ee = IU->StrideOrder.size(); j != ee; ++j) { - const SCEV *SStride = IU->StrideOrder[j]; - if (SStride == CondStride) - continue; - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SII = - IU->IVUsesByStride.find(SStride); - assert(SII != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - // FIXME: Generalize to non-affine IV's. - if (!SII->first->isLoopInvariant(L)) - continue; - // FIXME: Rewrite other stride using CondStride. + std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair = + Inserted.insert(std::make_pair(BB, static_cast<Value *>(0))); + if (!Pair.second) + PN->setIncomingValue(i, Pair.first->second); + else { + Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts); + + // If this is reuse-by-noop-cast, insert the noop cast. + const Type *OpTy = LF.OperandValToReplace->getType(); + if (FullV->getType() != OpTy) + FullV = + CastInst::Create(CastInst::getCastOpcode(FullV, false, + OpTy, false), + FullV, LF.OperandValToReplace->getType(), + "tmp", BB->getTerminator()); + + PN->setIncomingValue(i, FullV); + Pair.first->second = FullV; } } +} + +/// Rewrite - Emit instructions for the leading candidate expression for this +/// LSRUse (this is called "expanding"), and update the UserInst to reference +/// the newly expanded value. +void LSRInstance::Rewrite(const LSRFixup &LF, + const Formula &F, + SCEVExpander &Rewriter, + SmallVectorImpl<WeakVH> &DeadInsts, + Pass *P) const { + // First, find an insertion point that dominates UserInst. For PHI nodes, + // find the nearest block which dominates all the relevant uses. + if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) { + RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P); + } else { + Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts); + + // If this is reuse-by-noop-cast, insert the noop cast. + const Type *OpTy = LF.OperandValToReplace->getType(); + if (FullV->getType() != OpTy) { + Instruction *Cast = + CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false), + FullV, OpTy, "tmp", LF.UserInst); + FullV = Cast; + } + + // Update the user. ICmpZero is handled specially here (for now) because + // Expand may have updated one of the operands of the icmp already, and + // its new value may happen to be equal to LF.OperandValToReplace, in + // which case doing replaceUsesOfWith leads to replacing both operands + // with the same value. TODO: Reorganize this. + if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero) + LF.UserInst->setOperand(0, FullV); + else + LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV); } - Changed |= ThisChanged; - return ThisChanged; + DeadInsts.push_back(LF.OperandValToReplace); } -bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { - IU = &getAnalysis<IVUsers>(); - SE = &getAnalysis<ScalarEvolution>(); - Changed = false; +void +LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, + Pass *P) { + // Keep track of instructions we may have made dead, so that + // we can remove them after we are done working. + SmallVector<WeakVH, 16> DeadInsts; + + SCEVExpander Rewriter(SE); + Rewriter.disableCanonicalMode(); + Rewriter.setIVIncInsertPos(L, IVIncInsertPos); - // If LoopSimplify form is not available, stay out of trouble. - if (!L->getLoopPreheader() || !L->getLoopLatch()) - return false; + // Expand the new value definitions and update the users. + for (size_t i = 0, e = Fixups.size(); i != e; ++i) { + size_t LUIdx = Fixups[i].LUIdx; + + Rewrite(Fixups[i], *Solution[LUIdx], Rewriter, DeadInsts, P); + + Changed = true; + } - if (!IU->IVUsesByStride.empty()) { - DEBUG(dbgs() << "\nLSR on \"" << L->getHeader()->getParent()->getName() - << "\" "; - L->print(dbgs())); + // Clean up after ourselves. This must be done before deleting any + // instructions. + Rewriter.clear(); - // Sort the StrideOrder so we process larger strides first. - std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(), - StrideCompare(SE)); + Changed |= DeleteTriviallyDeadInstructions(DeadInsts); +} - // Optimize induction variables. Some indvar uses can be transformed to use - // strides that will be needed for other purposes. A common example of this - // is the exit test for the loop, which can often be rewritten to use the - // computation of some other indvar to decide when to terminate the loop. - OptimizeIndvars(L); +LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) + : IU(P->getAnalysis<IVUsers>()), + SE(P->getAnalysis<ScalarEvolution>()), + DT(P->getAnalysis<DominatorTree>()), + TLI(tli), L(l), Changed(false), IVIncInsertPos(0) { - // Change loop terminating condition to use the postinc iv when possible - // and optimize loop terminating compare. FIXME: Move this after - // StrengthReduceIVUsersOfStride? - OptimizeLoopTermCond(L); + // If LoopSimplify form is not available, stay out of trouble. + if (!L->isLoopSimplifyForm()) return; + + // If there's no interesting work to be done, bail early. + if (IU.empty()) return; + + DEBUG(dbgs() << "\nLSR on loop "; + WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false); + dbgs() << ":\n"); + + /// OptimizeShadowIV - If IV is used in a int-to-float cast + /// inside the loop then try to eliminate the cast operation. + OptimizeShadowIV(); + + // Change loop terminating condition to use the postinc iv when possible. + Changed |= OptimizeLoopTermCond(); + + CollectInterestingTypesAndFactors(); + CollectFixupsAndInitialFormulae(); + CollectLoopInvariantFixupsAndFormulae(); + + DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n"; + print_uses(dbgs())); + + // Now use the reuse data to generate a bunch of interesting ways + // to formulate the values needed for the uses. + GenerateAllReuseFormulae(); + + DEBUG(dbgs() << "\n" + "After generating reuse formulae:\n"; + print_uses(dbgs())); + + FilterOutUndesirableDedicatedRegisters(); + NarrowSearchSpaceUsingHeuristics(); + + SmallVector<const Formula *, 8> Solution; + Solve(Solution); + assert(Solution.size() == Uses.size() && "Malformed solution!"); + + // Release memory that is no longer needed. + Factors.clear(); + Types.clear(); + RegUses.clear(); + +#ifndef NDEBUG + // Formulae should be legal. + for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + const LSRUse &LU = *I; + for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(), + JE = LU.Formulae.end(); J != JE; ++J) + assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset, + LU.Kind, LU.AccessTy, TLI) && + "Illegal formula generated!"); + }; +#endif - // FIXME: We can shrink overlarge IV's here. e.g. if the code has - // computation in i64 values and the target doesn't support i64, demote - // the computation to 32-bit if safe. + // Now that we've decided what we want, make it so. + ImplementSolution(Solution, P); +} - // FIXME: Attempt to reuse values across multiple IV's. In particular, we - // could have something like "for(i) { foo(i*8); bar(i*16) }", which should - // be codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC. - // Need to be careful that IV's are all the same type. Only works for - // intptr_t indvars. +void LSRInstance::print_factors_and_types(raw_ostream &OS) const { + if (Factors.empty() && Types.empty()) return; - // IVsByStride keeps IVs for one particular loop. - assert(IVsByStride.empty() && "Stale entries in IVsByStride?"); + OS << "LSR has identified the following interesting factors and types: "; + bool First = true; - StrengthReduceIVUsers(L); + for (SmallSetVector<int64_t, 8>::const_iterator + I = Factors.begin(), E = Factors.end(); I != E; ++I) { + if (!First) OS << ", "; + First = false; + OS << '*' << *I; + } - // After all sharing is done, see if we can adjust the loop to test against - // zero instead of counting up to a maximum. This is usually faster. - OptimizeLoopCountIV(L); + for (SmallSetVector<const Type *, 4>::const_iterator + I = Types.begin(), E = Types.end(); I != E; ++I) { + if (!First) OS << ", "; + First = false; + OS << '(' << **I << ')'; + } + OS << '\n'; +} - // We're done analyzing this loop; release all the state we built up for it. - IVsByStride.clear(); +void LSRInstance::print_fixups(raw_ostream &OS) const { + OS << "LSR is examining the following fixup sites:\n"; + for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(), + E = Fixups.end(); I != E; ++I) { + const LSRFixup &LF = *I; + dbgs() << " "; + LF.print(OS); + OS << '\n'; + } +} - // Clean up after ourselves - DeleteTriviallyDeadInstructions(); +void LSRInstance::print_uses(raw_ostream &OS) const { + OS << "LSR is examining the following uses:\n"; + for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + const LSRUse &LU = *I; + dbgs() << " "; + LU.print(OS); + OS << '\n'; + for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(), + JE = LU.Formulae.end(); J != JE; ++J) { + OS << " "; + J->print(OS); + OS << '\n'; + } } +} + +void LSRInstance::print(raw_ostream &OS) const { + print_factors_and_types(OS); + print_fixups(OS); + print_uses(OS); +} + +void LSRInstance::dump() const { + print(errs()); errs() << '\n'; +} + +namespace { + +class LoopStrengthReduce : public LoopPass { + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// transformation profitability. + const TargetLowering *const TLI; + +public: + static char ID; // Pass ID, replacement for typeid + explicit LoopStrengthReduce(const TargetLowering *tli = 0); + +private: + bool runOnLoop(Loop *L, LPPassManager &LPM); + void getAnalysisUsage(AnalysisUsage &AU) const; +}; + +} + +char LoopStrengthReduce::ID = 0; +static RegisterPass<LoopStrengthReduce> +X("loop-reduce", "Loop Strength Reduction"); + +Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { + return new LoopStrengthReduce(TLI); +} + +LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli) + : LoopPass(&ID), TLI(tli) {} + +void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { + // We split critical edges, so we change the CFG. However, we do update + // many analyses if they are around. + AU.addPreservedID(LoopSimplifyID); + AU.addPreserved<LoopInfo>(); + AU.addPreserved("domfrontier"); + + AU.addRequiredID(LoopSimplifyID); + AU.addRequired<DominatorTree>(); + AU.addPreserved<DominatorTree>(); + AU.addRequired<ScalarEvolution>(); + AU.addPreserved<ScalarEvolution>(); + AU.addRequired<IVUsers>(); + AU.addPreserved<IVUsers>(); +} + +bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { + bool Changed = false; + + // Run the main LSR transformation. + Changed |= LSRInstance(TLI, L, this).getChanged(); // At this point, it is worth checking to see if any recurrence PHIs are also // dead, so that we can remove them as well. diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index e5fba28..071e9b7 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -170,7 +170,7 @@ Pass *llvm::createLoopUnswitchPass(bool Os) { /// Otherwise, return null. static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { // We can never unswitch on vector conditions. - if (isa<VectorType>(Cond->getType())) + if (Cond->getType()->isVectorTy()) return 0; // Constants should be folded, not unswitched on! @@ -871,7 +871,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC // in the loop with the appropriate one directly. if (IsEqual || (isa<ConstantInt>(Val) && - Val->getType()->isInteger(1))) { + Val->getType()->isIntegerTy(1))) { Value *Replacement; if (IsEqual) Replacement = Val; @@ -997,10 +997,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { case Instruction::And: if (isa<ConstantInt>(I->getOperand(0)) && // constant -> RHS - I->getOperand(0)->getType()->isInteger(1)) + I->getOperand(0)->getType()->isIntegerTy(1)) cast<BinaryOperator>(I)->swapOperands(); if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) - if (CB->getType()->isInteger(1)) { + if (CB->getType()->isIntegerTy(1)) { if (CB->isOne()) // X & 1 -> X ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM); else // X & 0 -> 0 @@ -1011,10 +1011,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { case Instruction::Or: if (isa<ConstantInt>(I->getOperand(0)) && // constant -> RHS - I->getOperand(0)->getType()->isInteger(1)) + I->getOperand(0)->getType()->isIntegerTy(1)) cast<BinaryOperator>(I)->swapOperands(); if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) - if (CB->getType()->isInteger(1)) { + if (CB->getType()->isIntegerTy(1)) { if (CB->isOne()) // X | 1 -> 1 ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM); else // X | 0 -> X diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index e0aa491..62e2977 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -42,7 +42,7 @@ static Value *isBytewiseValue(Value *V) { LLVMContext &Context = V->getContext(); // All byte-wide stores are splatable, even of arbitrary variables. - if (V->getType()->isInteger(8)) return V; + if (V->getType()->isIntegerTy(8)) return V; // Constant float and double values can be handled as integer values if the // corresponding integer value is "byteable". An important case is 0.0. diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index bbd4b45..5aca9cd 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -182,7 +182,7 @@ unsigned Reassociate::getRank(Value *V) { // If this is a not or neg instruction, do not count it for rank. This // assures us that X and ~X will have the same rank. - if (!I->getType()->isInteger() || + if (!I->getType()->isIntegerTy() || (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I))) ++Rank; @@ -597,19 +597,35 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { /// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively /// add its operands as factors, otherwise add V to the list of factors. +/// +/// Ops is the top-level list of add operands we're trying to factor. static void FindSingleUseMultiplyFactors(Value *V, - SmallVectorImpl<Value*> &Factors) { + SmallVectorImpl<Value*> &Factors, + const SmallVectorImpl<ValueEntry> &Ops, + bool IsRoot) { BinaryOperator *BO; - if ((!V->hasOneUse() && !V->use_empty()) || + if (!(V->hasOneUse() || V->use_empty()) || // More than one use. !(BO = dyn_cast<BinaryOperator>(V)) || BO->getOpcode() != Instruction::Mul) { Factors.push_back(V); return; } + // If this value has a single use because it is another input to the add + // tree we're reassociating and we dropped its use, it actually has two + // uses and we can't factor it. + if (!IsRoot) { + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (Ops[i].Op == V) { + Factors.push_back(V); + return; + } + } + + // Otherwise, add the LHS and RHS to the list of factors. - FindSingleUseMultiplyFactors(BO->getOperand(1), Factors); - FindSingleUseMultiplyFactors(BO->getOperand(0), Factors); + FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false); + FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false); } /// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor' @@ -753,7 +769,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // Compute all of the factors of this added value. SmallVector<Value*, 8> Factors; - FindSingleUseMultiplyFactors(BOp, Factors); + FindSingleUseMultiplyFactors(BOp, Factors, Ops, true); assert(Factors.size() > 1 && "Bad linearize!"); // Add one to FactorOccurrences for each unique factor in this op. @@ -929,8 +945,8 @@ void Reassociate::ReassociateBB(BasicBlock *BB) { } // Reject cases where it is pointless to do this. - if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPoint() || - isa<VectorType>(BI->getType())) + if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || + BI->getType()->isVectorTy()) continue; // Floating point ops are not associative. // Do not reassociate boolean (i1) expressions. We want to preserve the @@ -939,7 +955,7 @@ void Reassociate::ReassociateBB(BasicBlock *BB) { // is not further optimized, it is likely to be transformed back to a // short-circuited form for code gen, and the source order may have been // optimized for the most likely conditions. - if (BI->getType()->isInteger(1)) + if (BI->getType()->isIntegerTy(1)) continue; // If this is a subtract instruction which is not already in negate form, diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 02b45a1..7e37938 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -295,7 +295,7 @@ public: } void markOverdefined(Value *V) { - assert(!isa<StructType>(V->getType()) && "Should use other method"); + assert(!V->getType()->isStructTy() && "Should use other method"); markOverdefined(ValueState[V], V); } @@ -321,12 +321,12 @@ private: } void markConstant(Value *V, Constant *C) { - assert(!isa<StructType>(V->getType()) && "Should use other method"); + assert(!V->getType()->isStructTy() && "Should use other method"); markConstant(ValueState[V], V, C); } void markForcedConstant(Value *V, Constant *C) { - assert(!isa<StructType>(V->getType()) && "Should use other method"); + assert(!V->getType()->isStructTy() && "Should use other method"); ValueState[V].markForcedConstant(C); DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n'); InstWorkList.push_back(V); @@ -360,7 +360,7 @@ private: } void mergeInValue(Value *V, LatticeVal MergeWithV) { - assert(!isa<StructType>(V->getType()) && "Should use other method"); + assert(!V->getType()->isStructTy() && "Should use other method"); mergeInValue(ValueState[V], V, MergeWithV); } @@ -369,7 +369,7 @@ private: /// value. This function handles the case when the value hasn't been seen yet /// by properly seeding constants etc. LatticeVal &getValueState(Value *V) { - assert(!isa<StructType>(V->getType()) && "Should use getStructValueState"); + assert(!V->getType()->isStructTy() && "Should use getStructValueState"); std::pair<DenseMap<Value*, LatticeVal>::iterator, bool> I = ValueState.insert(std::make_pair(V, LatticeVal())); @@ -392,7 +392,7 @@ private: /// value/field pair. This function handles the case when the value hasn't /// been seen yet by properly seeding constants etc. LatticeVal &getStructValueState(Value *V, unsigned i) { - assert(isa<StructType>(V->getType()) && "Should use getValueState"); + assert(V->getType()->isStructTy() && "Should use getValueState"); assert(i < cast<StructType>(V->getType())->getNumElements() && "Invalid element #"); @@ -666,7 +666,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { void SCCPSolver::visitPHINode(PHINode &PN) { // If this PN returns a struct, just mark the result overdefined. // TODO: We could do a lot better than this if code actually uses this. - if (isa<StructType>(PN.getType())) + if (PN.getType()->isStructTy()) return markAnythingOverdefined(&PN); if (getValueState(&PN).isOverdefined()) { @@ -742,7 +742,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) { Value *ResultOp = I.getOperand(0); // If we are tracking the return value of this function, merge it in. - if (!TrackedRetVals.empty() && !isa<StructType>(ResultOp->getType())) { + if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) { DenseMap<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F); if (TFRVI != TrackedRetVals.end()) { @@ -787,7 +787,7 @@ void SCCPSolver::visitCastInst(CastInst &I) { void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) { // If this returns a struct, mark all elements over defined, we don't track // structs in structs. - if (isa<StructType>(EVI.getType())) + if (EVI.getType()->isStructTy()) return markAnythingOverdefined(&EVI); // If this is extracting from more than one level of struct, we don't know. @@ -795,7 +795,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) { return markOverdefined(&EVI); Value *AggVal = EVI.getAggregateOperand(); - if (isa<StructType>(AggVal->getType())) { + if (AggVal->getType()->isStructTy()) { unsigned i = *EVI.idx_begin(); LatticeVal EltVal = getStructValueState(AggVal, i); mergeInValue(getValueState(&EVI), &EVI, EltVal); @@ -828,7 +828,7 @@ void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) { } Value *Val = IVI.getInsertedValueOperand(); - if (isa<StructType>(Val->getType())) + if (Val->getType()->isStructTy()) // We don't track structs in structs. markOverdefined(getStructValueState(&IVI, i), &IVI); else { @@ -841,7 +841,7 @@ void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) { void SCCPSolver::visitSelectInst(SelectInst &I) { // If this select returns a struct, just mark the result overdefined. // TODO: We could do a lot better than this if code actually uses this. - if (isa<StructType>(I.getType())) + if (I.getType()->isStructTy()) return markAnythingOverdefined(&I); LatticeVal CondValue = getValueState(I.getCondition()); @@ -1166,7 +1166,7 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) { void SCCPSolver::visitStoreInst(StoreInst &SI) { // If this store is of a struct, ignore it. - if (isa<StructType>(SI.getOperand(0)->getType())) + if (SI.getOperand(0)->getType()->isStructTy()) return; if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1))) @@ -1187,7 +1187,7 @@ void SCCPSolver::visitStoreInst(StoreInst &SI) { // global, we can replace the load with the loaded constant value! void SCCPSolver::visitLoadInst(LoadInst &I) { // If this load is of a struct, just mark the result overdefined. - if (isa<StructType>(I.getType())) + if (I.getType()->isStructTy()) return markAnythingOverdefined(&I); LatticeVal PtrVal = getValueState(I.getOperand(0)); @@ -1241,7 +1241,7 @@ CallOverdefined: // Otherwise, if we have a single return value case, and if the function is // a declaration, maybe we can constant fold it. - if (F && F->isDeclaration() && !isa<StructType>(I->getType()) && + if (F && F->isDeclaration() && !I->getType()->isStructTy() && canConstantFoldCallTo(F)) { SmallVector<Constant*, 8> Operands; @@ -1352,7 +1352,7 @@ void SCCPSolver::Solve() { // since all of its users will have already been marked as overdefined. // Update all of the users of this instruction's value. // - if (isa<StructType>(I->getType()) || !getValueState(I).isOverdefined()) + if (I->getType()->isStructTy() || !getValueState(I).isOverdefined()) for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) if (Instruction *I = dyn_cast<Instruction>(*UI)) @@ -1418,7 +1418,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { if (!LV.isUndefined()) continue; // No instructions using structs need disambiguation. - if (isa<StructType>(I->getOperand(0)->getType())) + if (I->getOperand(0)->getType()->isStructTy()) continue; // Get the lattice values of the first two operands for use below. @@ -1426,7 +1426,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { LatticeVal Op1LV; if (I->getNumOperands() == 2) { // No instructions using structs need disambiguation. - if (isa<StructType>(I->getOperand(1)->getType())) + if (I->getOperand(1)->getType()->isStructTy()) continue; // If this is a two-operand instruction, and if both operands are @@ -1656,7 +1656,7 @@ bool SCCP::runOnFunction(Function &F) { continue; // TODO: Reconstruct structs from their elements. - if (isa<StructType>(Inst->getType())) + if (Inst->getType()->isStructTy()) continue; LatticeVal IV = Solver.getLatticeValueFor(Inst); @@ -1792,7 +1792,7 @@ bool IPSCCP::runOnModule(Module &M) { if (Solver.isBlockExecutable(F->begin())) { for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { - if (AI->use_empty() || isa<StructType>(AI->getType())) continue; + if (AI->use_empty() || AI->getType()->isStructTy()) continue; // TODO: Could use getStructLatticeValueFor to find out if the entire // result is a constant and replace it entirely if so. @@ -1835,7 +1835,7 @@ bool IPSCCP::runOnModule(Module &M) { for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) { Instruction *Inst = BI++; - if (Inst->getType()->isVoidTy() || isa<StructType>(Inst->getType())) + if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy()) continue; // TODO: Could use getStructLatticeValueFor to find out if the entire @@ -1918,6 +1918,14 @@ bool IPSCCP::runOnModule(Module &M) { // all call uses with the inferred value. This means we don't need to bother // actually returning anything from the function. Replace all return // instructions with return undef. + // + // Do this in two stages: first identify the functions we should process, then + // actually zap their returns. This is important because we can only do this + // if the address of the function isn't taken. In cases where a return is the + // last use of a function, the order of processing functions would affect + // whether other functions are optimizable. + SmallVector<ReturnInst*, 8> ReturnsToZap; + // TODO: Process multiple value ret instructions also. const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals(); for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(), @@ -1933,7 +1941,13 @@ bool IPSCCP::runOnModule(Module &M) { for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) if (!isa<UndefValue>(RI->getOperand(0))) - RI->setOperand(0, UndefValue::get(F->getReturnType())); + ReturnsToZap.push_back(RI); + } + + // Zap all returns which we've identified as zap to change. + for (unsigned i = 0, e = ReturnsToZap.size(); i != e; ++i) { + Function *F = ReturnsToZap[i]->getParent()->getParent(); + ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType())); } // If we infered constant or undef values for globals variables, we can delete diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 900d119..bbe6270 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -302,7 +302,7 @@ bool SROA::performScalarRepl(Function &F) { // random stuff that doesn't use vectors (e.g. <9 x double>) because then // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. - if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) { + if (VectorTy && VectorTy->isVectorTy() && HadAVector) { DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " << *VectorTy << '\n'); @@ -449,7 +449,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, // into. for (; GEPIt != E; ++GEPIt) { // Ignore struct elements, no extra checking needed for these. - if (isa<StructType>(*GEPIt)) + if ((*GEPIt)->isStructTy()) continue; ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); @@ -480,7 +480,7 @@ void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize, // (which are essentially the same as the MemIntrinsics, especially with // regard to copying padding between elements), or references using the // aggregate type of the alloca. - if (!MemOpType || isa<IntegerType>(MemOpType) || UsesAggregateType) { + if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) { if (!UsesAggregateType) { if (isStore) Info.isMemCpyDst = true; @@ -565,7 +565,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, } LI->replaceAllUsesWith(Insert); DeadInsts.push_back(LI); - } else if (isa<IntegerType>(LIType) && + } else if (LIType->isIntegerTy() && TD->getTypeAllocSize(LIType) == TD->getTypeAllocSize(AI->getAllocatedType())) { // If this is a load of the entire alloca to an integer, rewrite it. @@ -588,7 +588,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, new StoreInst(Extract, NewElts[i], SI); } DeadInsts.push_back(SI); - } else if (isa<IntegerType>(SIType) && + } else if (SIType->isIntegerTy() && TD->getTypeAllocSize(SIType) == TD->getTypeAllocSize(AI->getAllocatedType())) { // If this is a store of the entire alloca from an integer, rewrite it. @@ -833,9 +833,9 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // Convert the integer value to the appropriate type. StoreVal = ConstantInt::get(Context, TotalVal); - if (isa<PointerType>(ValTy)) + if (ValTy->isPointerTy()) StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); - else if (ValTy->isFloatingPoint()) + else if (ValTy->isFloatingPointTy()) StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); assert(StoreVal->getType() == ValTy && "Type mismatch!"); @@ -939,7 +939,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, Value *DestField = NewElts[i]; if (EltVal->getType() == FieldTy) { // Storing to an integer field of this size, just do it. - } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) { + } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) { // Bitcast to the right element type (for fp/vector values). EltVal = new BitCastInst(EltVal, FieldTy, "", SI); } else { @@ -983,7 +983,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, Value *DestField = NewElts[i]; if (EltVal->getType() == ArrayEltTy) { // Storing to an integer field of this size, just do it. - } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) { + } else if (ArrayEltTy->isFloatingPointTy() || + ArrayEltTy->isVectorTy()) { // Bitcast to the right element type (for fp/vector values). EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI); } else { @@ -1043,8 +1044,8 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), FieldSizeBits); - if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() && - !isa<VectorType>(FieldTy)) + if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() && + !FieldTy->isVectorTy()) SrcField = new BitCastInst(SrcField, PointerType::getUnqual(FieldIntTy), "", LI); @@ -1182,7 +1183,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, return; } } else if (In->isFloatTy() || In->isDoubleTy() || - (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 && + (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && isPowerOf2_32(In->getPrimitiveSizeInBits()))) { // If we're accessing something that could be an element of a vector, see // if the implied vector agrees with what we already have and if Offset is @@ -1226,7 +1227,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, return false; MergeInType(LI->getType(), Offset, VecTy, AllocaSize, *TD, V->getContext()); - SawVec |= isa<VectorType>(LI->getType()); + SawVec |= LI->getType()->isVectorTy(); continue; } @@ -1235,7 +1236,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, if (SI->getOperand(0) == V || SI->isVolatile()) return 0; MergeInType(SI->getOperand(0)->getType(), Offset, VecTy, AllocaSize, *TD, V->getContext()); - SawVec |= isa<VectorType>(SI->getOperand(0)->getType()); + SawVec |= SI->getOperand(0)->getType()->isVectorTy(); continue; } @@ -1437,7 +1438,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) { - if (isa<VectorType>(ToType)) + if (ToType->isVectorTy()) return Builder.CreateBitCast(FromVal, ToType, "tmp"); // Otherwise it must be an element access. @@ -1520,9 +1521,9 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, LIBitWidth), "tmp"); // If the result is an integer, this is a trunc or bitcast. - if (isa<IntegerType>(ToType)) { + if (ToType->isIntegerTy()) { // Should be done. - } else if (ToType->isFloatingPoint() || isa<VectorType>(ToType)) { + } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) { // Just do a bitcast, we know the sizes match up. FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp"); } else { @@ -1600,10 +1601,10 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, unsigned DestWidth = TD->getTypeSizeInBits(AllocaType); unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType()); unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType); - if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType())) + if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth), "tmp"); - else if (isa<PointerType>(SV->getType())) + else if (SV->getType()->isPointerTy()) SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp"); // Zero extend or truncate the value if needed. diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 4216e8f..05027ae 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -17,6 +17,7 @@ #define DEBUG_TYPE "simplify-libcalls" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" @@ -67,496 +68,14 @@ public: Context = &CI->getCalledFunction()->getContext(); return CallOptimizer(CI->getCalledFunction(), CI, B); } - - /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. - Value *CastToCStr(Value *V, IRBuilder<> &B); - - /// EmitStrLen - Emit a call to the strlen function to the builder, for the - /// specified pointer. Ptr is required to be some pointer type, and the - /// return value has 'intptr_t' type. - Value *EmitStrLen(Value *Ptr, IRBuilder<> &B); - - /// EmitStrChr - Emit a call to the strchr function to the builder, for the - /// specified pointer and character. Ptr is required to be some pointer type, - /// and the return value has 'i8*' type. - Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B); - - /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the - /// specified pointer arguments. - Value *EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B); - - /// EmitMemCpy - Emit a call to the memcpy function to the builder. This - /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. - Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, - unsigned Align, IRBuilder<> &B); - - /// EmitMemMove - Emit a call to the memmove function to the builder. This - /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. - Value *EmitMemMove(Value *Dst, Value *Src, Value *Len, - unsigned Align, IRBuilder<> &B); - - /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is - /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. - Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B); - - /// EmitMemCmp - Emit a call to the memcmp function. - Value *EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B); - - /// EmitMemSet - Emit a call to the memset function - Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, IRBuilder<> &B); - - /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' - /// (e.g. 'floor'). This function is known to take a single of type matching - /// 'Op' and returns one value with the same type. If 'Op' is a long double, - /// 'l' is added as the suffix of name, if 'Op' is a float, we add a 'f' - /// suffix. - Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B, - const AttrListPtr &Attrs); - - /// EmitPutChar - Emit a call to the putchar function. This assumes that Char - /// is an integer. - Value *EmitPutChar(Value *Char, IRBuilder<> &B); - - /// EmitPutS - Emit a call to the puts function. This assumes that Str is - /// some pointer. - void EmitPutS(Value *Str, IRBuilder<> &B); - - /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is - /// an i32, and File is a pointer to FILE. - void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B); - - /// EmitFPutS - Emit a call to the puts function. Str is required to be a - /// pointer and File is a pointer to FILE. - void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B); - - /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is - /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. - void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B); - }; } // End anonymous namespace. -/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. -Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) { - return B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr"); -} - -/// EmitStrLen - Emit a call to the strlen function to the builder, for the -/// specified pointer. This always returns an integer value of size intptr_t. -Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | - Attribute::NoUnwind); - - Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2), - TD->getIntPtrType(*Context), - Type::getInt8PtrTy(*Context), - NULL); - CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); - if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; -} - -/// EmitStrChr - Emit a call to the strchr function to the builder, for the -/// specified pointer and character. Ptr is required to be some pointer type, -/// and the return value has 'i8*' type. -Value *LibCallOptimization::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI = - AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); - - const Type *I8Ptr = Type::getInt8PtrTy(*Context); - const Type *I32Ty = Type::getInt32Ty(*Context); - Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1), - I8Ptr, I8Ptr, I32Ty, NULL); - CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), - ConstantInt::get(I32Ty, C), "strchr"); - if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - return CI; -} - -/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the -/// specified pointer arguments. -Value *LibCallOptimization::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - const Type *I8Ptr = Type::getInt8PtrTy(*Context); - Value *StrCpy = M->getOrInsertFunction("strcpy", AttrListPtr::get(AWI, 2), - I8Ptr, I8Ptr, I8Ptr, NULL); - CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), - "strcpy"); - if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - return CI; -} - -/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always -/// expects that the size has type 'intptr_t' and Dst/Src are pointers. -Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len, - unsigned Align, IRBuilder<> &B) { - Module *M = Caller->getParent(); - const Type *Ty = Len->getType(); - Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1); - Dst = CastToCStr(Dst, B); - Src = CastToCStr(Src, B); - return B.CreateCall4(MemCpy, Dst, Src, Len, - ConstantInt::get(Type::getInt32Ty(*Context), Align)); -} - -/// EmitMemMove - Emit a call to the memmove function to the builder. This -/// always expects that the size has type 'intptr_t' and Dst/Src are pointers. -Value *LibCallOptimization::EmitMemMove(Value *Dst, Value *Src, Value *Len, - unsigned Align, IRBuilder<> &B) { - Module *M = Caller->getParent(); - const Type *Ty = TD->getIntPtrType(*Context); - Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1); - Dst = CastToCStr(Dst, B); - Src = CastToCStr(Src, B); - Value *A = ConstantInt::get(Type::getInt32Ty(*Context), Align); - return B.CreateCall4(MemMove, Dst, Src, Len, A); -} - -/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is -/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. -Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val, - Value *Len, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI; - AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); - - Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1), - Type::getInt8PtrTy(*Context), - Type::getInt8PtrTy(*Context), - Type::getInt32Ty(*Context), - TD->getIntPtrType(*Context), - NULL); - CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); - - if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; -} - -/// EmitMemCmp - Emit a call to the memcmp function. -Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2, - Value *Len, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | - Attribute::NoUnwind); - - Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3), - Type::getInt32Ty(*Context), - Type::getInt8PtrTy(*Context), - Type::getInt8PtrTy(*Context), - TD->getIntPtrType(*Context), NULL); - CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), - Len, "memcmp"); - - if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; -} - -/// EmitMemSet - Emit a call to the memset function -Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val, - Value *Len, IRBuilder<> &B) { - Module *M = Caller->getParent(); - Intrinsic::ID IID = Intrinsic::memset; - const Type *Tys[1]; - Tys[0] = Len->getType(); - Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1); - Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1); - return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align); -} - -/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. -/// 'floor'). This function is known to take a single of type matching 'Op' and -/// returns one value with the same type. If 'Op' is a long double, 'l' is -/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. -Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name, - IRBuilder<> &B, - const AttrListPtr &Attrs) { - char NameBuffer[20]; - if (!Op->getType()->isDoubleTy()) { - // If we need to add a suffix, copy into NameBuffer. - unsigned NameLen = strlen(Name); - assert(NameLen < sizeof(NameBuffer)-2); - memcpy(NameBuffer, Name, NameLen); - if (Op->getType()->isFloatTy()) - NameBuffer[NameLen] = 'f'; // floorf - else - NameBuffer[NameLen] = 'l'; // floorl - NameBuffer[NameLen+1] = 0; - Name = NameBuffer; - } - - Module *M = Caller->getParent(); - Value *Callee = M->getOrInsertFunction(Name, Op->getType(), - Op->getType(), NULL); - CallInst *CI = B.CreateCall(Callee, Op, Name); - CI->setAttributes(Attrs); - if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; -} - -/// EmitPutChar - Emit a call to the putchar function. This assumes that Char -/// is an integer. -Value *LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) { - Module *M = Caller->getParent(); - Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context), - Type::getInt32Ty(*Context), NULL); - CallInst *CI = B.CreateCall(PutChar, - B.CreateIntCast(Char, - Type::getInt32Ty(*Context), - /*isSigned*/true, - "chari"), - "putchar"); - - if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - return CI; -} - -/// EmitPutS - Emit a call to the puts function. This assumes that Str is -/// some pointer. -void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - - Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), - Type::getInt32Ty(*Context), - Type::getInt8PtrTy(*Context), - NULL); - CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); - if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - -} - -/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is -/// an integer and File is a pointer to FILE. -void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - Constant *F; - if (isa<PointerType>(File->getType())) - F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), - Type::getInt32Ty(*Context), - Type::getInt32Ty(*Context), File->getType(), - NULL); - else - F = M->getOrInsertFunction("fputc", - Type::getInt32Ty(*Context), - Type::getInt32Ty(*Context), - File->getType(), NULL); - Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), /*isSigned*/true, - "chari"); - CallInst *CI = B.CreateCall2(F, Char, File, "fputc"); - - if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) - CI->setCallingConv(Fn->getCallingConv()); -} - -/// EmitFPutS - Emit a call to the puts function. Str is required to be a -/// pointer and File is a pointer to FILE. -void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - Constant *F; - if (isa<PointerType>(File->getType())) - F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), - Type::getInt32Ty(*Context), - Type::getInt8PtrTy(*Context), - File->getType(), NULL); - else - F = M->getOrInsertFunction("fputs", Type::getInt32Ty(*Context), - Type::getInt8PtrTy(*Context), - File->getType(), NULL); - CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); - - if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) - CI->setCallingConv(Fn->getCallingConv()); -} - -/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is -/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. -void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File, - IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - Constant *F; - if (isa<PointerType>(File->getType())) - F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3), - TD->getIntPtrType(*Context), - Type::getInt8PtrTy(*Context), - TD->getIntPtrType(*Context), - TD->getIntPtrType(*Context), - File->getType(), NULL); - else - F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(*Context), - Type::getInt8PtrTy(*Context), - TD->getIntPtrType(*Context), - TD->getIntPtrType(*Context), - File->getType(), NULL); - CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, - ConstantInt::get(TD->getIntPtrType(*Context), 1), File); - - if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) - CI->setCallingConv(Fn->getCallingConv()); -} //===----------------------------------------------------------------------===// // Helper Functions //===----------------------------------------------------------------------===// -/// GetStringLengthH - If we can compute the length of the string pointed to by -/// the specified pointer, return 'len+1'. If we can't, return 0. -static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { - // Look through noop bitcast instructions. - if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) - return GetStringLengthH(BCI->getOperand(0), PHIs); - - // If this is a PHI node, there are two cases: either we have already seen it - // or we haven't. - if (PHINode *PN = dyn_cast<PHINode>(V)) { - if (!PHIs.insert(PN)) - return ~0ULL; // already in the set. - - // If it was new, see if all the input strings are the same length. - uint64_t LenSoFar = ~0ULL; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs); - if (Len == 0) return 0; // Unknown length -> unknown. - - if (Len == ~0ULL) continue; - - if (Len != LenSoFar && LenSoFar != ~0ULL) - return 0; // Disagree -> unknown. - LenSoFar = Len; - } - - // Success, all agree. - return LenSoFar; - } - - // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) - if (SelectInst *SI = dyn_cast<SelectInst>(V)) { - uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); - if (Len1 == 0) return 0; - uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs); - if (Len2 == 0) return 0; - if (Len1 == ~0ULL) return Len2; - if (Len2 == ~0ULL) return Len1; - if (Len1 != Len2) return 0; - return Len1; - } - - // If the value is not a GEP instruction nor a constant expression with a - // GEP instruction, then return unknown. - User *GEP = 0; - if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) { - GEP = GEPI; - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - if (CE->getOpcode() != Instruction::GetElementPtr) - return 0; - GEP = CE; - } else { - return 0; - } - - // Make sure the GEP has exactly three arguments. - if (GEP->getNumOperands() != 3) - return 0; - - // Check to make sure that the first operand of the GEP is an integer and - // has value 0 so that we are sure we're indexing into the initializer. - if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) { - if (!Idx->isZero()) - return 0; - } else - return 0; - - // If the second index isn't a ConstantInt, then this is a variable index - // into the array. If this occurs, we can't say anything meaningful about - // the string. - uint64_t StartIdx = 0; - if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2))) - StartIdx = CI->getZExtValue(); - else - return 0; - - // The GEP instruction, constant or instruction, must reference a global - // variable that is a constant and is initialized. The referenced constant - // initializer is the array that we'll use for optimization. - GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); - if (!GV || !GV->isConstant() || !GV->hasInitializer() || - GV->mayBeOverridden()) - return 0; - Constant *GlobalInit = GV->getInitializer(); - - // Handle the ConstantAggregateZero case, which is a degenerate case. The - // initializer is constant zero so the length of the string must be zero. - if (isa<ConstantAggregateZero>(GlobalInit)) - return 1; // Len = 0 offset by 1. - - // Must be a Constant Array - ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); - if (!Array || !Array->getType()->getElementType()->isInteger(8)) - return false; - - // Get the number of elements in the array - uint64_t NumElts = Array->getType()->getNumElements(); - - // Traverse the constant array from StartIdx (derived above) which is - // the place the GEP refers to in the array. - for (unsigned i = StartIdx; i != NumElts; ++i) { - Constant *Elt = Array->getOperand(i); - ConstantInt *CI = dyn_cast<ConstantInt>(Elt); - if (!CI) // This array isn't suitable, non-int initializer. - return 0; - if (CI->isZero()) - return i-StartIdx+1; // We found end of string, success! - } - - return 0; // The array isn't null terminated, conservatively return 'unknown'. -} - -/// GetStringLength - If we can compute the length of the string pointed to by -/// the specified pointer, return 'len+1'. If we can't, return 0. -static uint64_t GetStringLength(Value *V) { - if (!isa<PointerType>(V->getType())) return 0; - - SmallPtrSet<PHINode*, 32> PHIs; - uint64_t Len = GetStringLengthH(V, PHIs); - // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return - // an empty string as a length. - return Len == ~0ULL ? 1 : Len; -} - /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the /// value is equal or not-equal to zero. static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { @@ -613,7 +132,7 @@ struct StrCatOpt : public LibCallOptimization { void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) { // We need to find the end of the destination string. That's where the // memory is to be moved to. We just generate a call to strlen. - Value *DstLen = EmitStrLen(Dst, B); + Value *DstLen = EmitStrLen(Dst, B, TD); // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of @@ -623,7 +142,7 @@ struct StrCatOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. EmitMemCpy(CpyDst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B); + ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B, TD); } }; @@ -638,7 +157,7 @@ struct StrNCatOpt : public StrCatOpt { FT->getReturnType() != Type::getInt8PtrTy(*Context) || FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType() || - !isa<IntegerType>(FT->getParamType(2))) + !FT->getParamType(2)->isIntegerTy()) return 0; // Extract some information from the instruction @@ -697,11 +216,12 @@ struct StrChrOpt : public LibCallOptimization { if (!TD) return 0; uint64_t Len = GetStringLength(SrcStr); - if (Len == 0 || !FT->getParamType(1)->isInteger(32)) // memchr needs i32. + if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. return 0; return EmitMemChr(SrcStr, CI->getOperand(2), // include nul. - ConstantInt::get(TD->getIntPtrType(*Context), Len), B); + ConstantInt::get(TD->getIntPtrType(*Context), Len), + B, TD); } // Otherwise, the character is a constant, see if the first argument is @@ -739,7 +259,7 @@ struct StrCmpOpt : public LibCallOptimization { // Verify the "strcmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - !FT->getReturnType()->isInteger(32) || + !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; @@ -772,7 +292,7 @@ struct StrCmpOpt : public LibCallOptimization { return EmitMemCmp(Str1P, Str2P, ConstantInt::get(TD->getIntPtrType(*Context), - std::min(Len1, Len2)), B); + std::min(Len1, Len2)), B, TD); } return 0; @@ -787,10 +307,10 @@ struct StrNCmpOpt : public LibCallOptimization { // Verify the "strncmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || - !FT->getReturnType()->isInteger(32) || + !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context) || - !isa<IntegerType>(FT->getParamType(2))) + !FT->getParamType(2)->isIntegerTy()) return 0; Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); @@ -852,7 +372,7 @@ struct StrCpyOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. EmitMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B); + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD); return Dst; } }; @@ -866,7 +386,7 @@ struct StrNCpyOpt : public LibCallOptimization { if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context) || - !isa<IntegerType>(FT->getParamType(2))) + !FT->getParamType(2)->isIntegerTy()) return 0; Value *Dst = CI->getOperand(1); @@ -881,7 +401,7 @@ struct StrNCpyOpt : public LibCallOptimization { if (SrcLen == 0) { // strncpy(x, "", y) -> memset(x, '\0', y, 1) EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp, - B); + B, TD); return Dst; } @@ -901,7 +421,7 @@ struct StrNCpyOpt : public LibCallOptimization { // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] EmitMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B); + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD); return Dst; } @@ -915,7 +435,7 @@ struct StrLenOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || FT->getParamType(0) != Type::getInt8PtrTy(*Context) || - !isa<IntegerType>(FT->getReturnType())) + !FT->getReturnType()->isIntegerTy()) return 0; Value *Src = CI->getOperand(1); @@ -939,8 +459,8 @@ struct StrToOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || - !isa<PointerType>(FT->getParamType(0)) || - !isa<PointerType>(FT->getParamType(1))) + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy()) return 0; Value *EndPtr = CI->getOperand(2); @@ -960,9 +480,9 @@ struct StrStrOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - !isa<PointerType>(FT->getParamType(0)) || - !isa<PointerType>(FT->getParamType(1)) || - !isa<PointerType>(FT->getReturnType())) + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isPointerTy()) return 0; // fold strstr(x, x) -> x. @@ -993,7 +513,7 @@ struct StrStrOpt : public LibCallOptimization { // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) - return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B), + return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B, TD), CI->getType()); return 0; } @@ -1006,9 +526,9 @@ struct StrStrOpt : public LibCallOptimization { struct MemCmpOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || !isa<PointerType>(FT->getParamType(0)) || - !isa<PointerType>(FT->getParamType(1)) || - !FT->getReturnType()->isInteger(32)) + if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy(32)) return 0; Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2); @@ -1055,13 +575,14 @@ struct MemCpyOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !isa<PointerType>(FT->getParamType(0)) || - !isa<PointerType>(FT->getParamType(1)) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) - EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B); + EmitMemCpy(CI->getOperand(1), CI->getOperand(2), + CI->getOperand(3), 1, B, TD); return CI->getOperand(1); } }; @@ -1076,13 +597,14 @@ struct MemMoveOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !isa<PointerType>(FT->getParamType(0)) || - !isa<PointerType>(FT->getParamType(1)) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) - EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B); + EmitMemMove(CI->getOperand(1), CI->getOperand(2), + CI->getOperand(3), 1, B, TD); return CI->getOperand(1); } }; @@ -1097,137 +619,20 @@ struct MemSetOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !isa<PointerType>(FT->getParamType(0)) || - !isa<IntegerType>(FT->getParamType(1)) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), false); - EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B); + EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B, TD); return CI->getOperand(1); } }; //===----------------------------------------------------------------------===// -// Object Size Checking Optimizations -//===----------------------------------------------------------------------===// - -//===---------------------------------------===// -// 'memcpy_chk' Optimizations - -struct MemCpyChkOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // These optimizations require TargetData. - if (!TD) return 0; - - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || - !isa<PointerType>(FT->getParamType(0)) || - !isa<PointerType>(FT->getParamType(1)) || - !isa<IntegerType>(FT->getParamType(3)) || - FT->getParamType(2) != TD->getIntPtrType(*Context)) - return 0; - - ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4)); - if (!SizeCI) - return 0; - if (SizeCI->isAllOnesValue()) { - EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B); - return CI->getOperand(1); - } - - return 0; - } -}; - -//===---------------------------------------===// -// 'memset_chk' Optimizations - -struct MemSetChkOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // These optimizations require TargetData. - if (!TD) return 0; - - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || - !isa<PointerType>(FT->getParamType(0)) || - !isa<IntegerType>(FT->getParamType(1)) || - !isa<IntegerType>(FT->getParamType(3)) || - FT->getParamType(2) != TD->getIntPtrType(*Context)) - return 0; - - ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4)); - if (!SizeCI) - return 0; - if (SizeCI->isAllOnesValue()) { - Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), - false); - EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B); - return CI->getOperand(1); - } - - return 0; - } -}; - -//===---------------------------------------===// -// 'memmove_chk' Optimizations - -struct MemMoveChkOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // These optimizations require TargetData. - if (!TD) return 0; - - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || - !isa<PointerType>(FT->getParamType(0)) || - !isa<PointerType>(FT->getParamType(1)) || - !isa<IntegerType>(FT->getParamType(3)) || - FT->getParamType(2) != TD->getIntPtrType(*Context)) - return 0; - - ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4)); - if (!SizeCI) - return 0; - if (SizeCI->isAllOnesValue()) { - EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), - 1, B); - return CI->getOperand(1); - } - - return 0; - } -}; - -struct StrCpyChkOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !isa<PointerType>(FT->getParamType(0)) || - !isa<PointerType>(FT->getParamType(1))) - return 0; - - ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3)); - if (!SizeCI) - return 0; - - // If a) we don't have any length information, or b) we know this will - // fit then just lower to a plain strcpy. Otherwise we'll keep our - // strcpy_chk call which may fail at runtime if the size is too long. - // TODO: It might be nice to get a maximum length out of the possible - // string lengths for varying. - if (SizeCI->isAllOnesValue() || - SizeCI->getZExtValue() >= GetStringLength(CI->getOperand(2))) - return EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B); - - return 0; - } -}; - - -//===----------------------------------------------------------------------===// // Math Library Optimizations //===----------------------------------------------------------------------===// @@ -1241,7 +646,7 @@ struct PowOpt : public LibCallOptimization { // result type. if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || - !FT->getParamType(0)->isFloatingPoint()) + !FT->getParamType(0)->isFloatingPointTy()) return 0; Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2); @@ -1295,7 +700,7 @@ struct Exp2Opt : public LibCallOptimization { // Just make sure this has 1 argument of FP type, which matches the // result type. if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isFloatingPoint()) + !FT->getParamType(0)->isFloatingPointTy()) return 0; Value *Op = CI->getOperand(1); @@ -1375,8 +780,8 @@ struct FFSOpt : public LibCallOptimization { // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 1 || - !FT->getReturnType()->isInteger(32) || - !isa<IntegerType>(FT->getParamType(0))) + !FT->getReturnType()->isIntegerTy(32) || + !FT->getParamType(0)->isIntegerTy()) return 0; Value *Op = CI->getOperand(1); @@ -1410,8 +815,8 @@ struct IsDigitOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) - if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) || - !FT->getParamType(0)->isInteger(32)) + if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || + !FT->getParamType(0)->isIntegerTy(32)) return 0; // isdigit(c) -> (c-'0') <u 10 @@ -1431,8 +836,8 @@ struct IsAsciiOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) - if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) || - !FT->getParamType(0)->isInteger(32)) + if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || + !FT->getParamType(0)->isIntegerTy(32)) return 0; // isascii(c) -> c <u 128 @@ -1450,7 +855,7 @@ struct AbsOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // We require integer(integer) where the types agree. - if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) || + if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || FT->getParamType(0) != FT->getReturnType()) return 0; @@ -1473,7 +878,7 @@ struct ToAsciiOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); // We require i32(i32) if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isInteger(32)) + !FT->getParamType(0)->isIntegerTy(32)) return 0; // isascii(c) -> c & 0x7f @@ -1493,8 +898,8 @@ struct PrintFOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require one fixed pointer argument and an integer/void result. const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() < 1 || !isa<PointerType>(FT->getParamType(0)) || - !(isa<IntegerType>(FT->getReturnType()) || + if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || + !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy())) return 0; @@ -1512,7 +917,7 @@ struct PrintFOpt : public LibCallOptimization { // in case there is an error writing to stdout. if (FormatStr.size() == 1) { Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), - FormatStr[0]), B); + FormatStr[0]), B, TD); if (CI->use_empty()) return CI; return B.CreateIntCast(Res, CI->getType(), true); } @@ -1526,7 +931,7 @@ struct PrintFOpt : public LibCallOptimization { Constant *C = ConstantArray::get(*Context, FormatStr, true); C = new GlobalVariable(*Callee->getParent(), C->getType(), true, GlobalVariable::InternalLinkage, C, "str"); - EmitPutS(C, B); + EmitPutS(C, B, TD); return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), FormatStr.size()+1); } @@ -1534,8 +939,8 @@ struct PrintFOpt : public LibCallOptimization { // Optimize specific format strings. // printf("%c", chr) --> putchar(*(i8*)dst) if (FormatStr == "%c" && CI->getNumOperands() > 2 && - isa<IntegerType>(CI->getOperand(2)->getType())) { - Value *Res = EmitPutChar(CI->getOperand(2), B); + CI->getOperand(2)->getType()->isIntegerTy()) { + Value *Res = EmitPutChar(CI->getOperand(2), B, TD); if (CI->use_empty()) return CI; return B.CreateIntCast(Res, CI->getType(), true); @@ -1543,9 +948,9 @@ struct PrintFOpt : public LibCallOptimization { // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->getNumOperands() > 2 && - isa<PointerType>(CI->getOperand(2)->getType()) && + CI->getOperand(2)->getType()->isPointerTy() && CI->use_empty()) { - EmitPutS(CI->getOperand(2), B); + EmitPutS(CI->getOperand(2), B, TD); return CI; } return 0; @@ -1559,9 +964,9 @@ struct SPrintFOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require two fixed pointer arguments and an integer result. const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) || - !isa<PointerType>(FT->getParamType(1)) || - !isa<IntegerType>(FT->getReturnType())) + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) return 0; // Check for a fixed format string. @@ -1582,8 +987,8 @@ struct SPrintFOpt : public LibCallOptimization { // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte. - ConstantInt::get - (TD->getIntPtrType(*Context), FormatStr.size()+1),1,B); + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()+1), 1, B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1595,7 +1000,7 @@ struct SPrintFOpt : public LibCallOptimization { // Decode the second character of the format string. if (FormatStr[1] == 'c') { // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 - if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0; + if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0; Value *V = B.CreateTrunc(CI->getOperand(3), Type::getInt8Ty(*Context), "char"); Value *Ptr = CastToCStr(CI->getOperand(1), B); @@ -1612,13 +1017,13 @@ struct SPrintFOpt : public LibCallOptimization { if (!TD) return 0; // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) - if (!isa<PointerType>(CI->getOperand(3)->getType())) return 0; + if (!CI->getOperand(3)->getType()->isPointerTy()) return 0; - Value *Len = EmitStrLen(CI->getOperand(3), B); + Value *Len = EmitStrLen(CI->getOperand(3), B, TD); Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B); + EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B, TD); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); @@ -1634,11 +1039,11 @@ struct FWriteOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require a pointer, an integer, an integer, a pointer, returning integer. const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 4 || !isa<PointerType>(FT->getParamType(0)) || - !isa<IntegerType>(FT->getParamType(1)) || - !isa<IntegerType>(FT->getParamType(2)) || - !isa<PointerType>(FT->getParamType(3)) || - !isa<IntegerType>(FT->getReturnType())) + if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + !FT->getParamType(2)->isIntegerTy() || + !FT->getParamType(3)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) return 0; // Get the element size and count. @@ -1654,7 +1059,7 @@ struct FWriteOpt : public LibCallOptimization { // If this is writing one byte, turn it into fputc. if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char"); - EmitFPutC(Char, CI->getOperand(4), B); + EmitFPutC(Char, CI->getOperand(4), B, TD); return ConstantInt::get(CI->getType(), 1); } @@ -1672,8 +1077,8 @@ struct FPutsOpt : public LibCallOptimization { // Require two pointers. Also, we can't optimize if return value is used. const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) || - !isa<PointerType>(FT->getParamType(1)) || + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || !CI->use_empty()) return 0; @@ -1682,7 +1087,7 @@ struct FPutsOpt : public LibCallOptimization { if (!Len) return 0; EmitFWrite(CI->getOperand(1), ConstantInt::get(TD->getIntPtrType(*Context), Len-1), - CI->getOperand(2), B); + CI->getOperand(2), B, TD); return CI; // Known to have no uses (see above). } }; @@ -1694,9 +1099,9 @@ struct FPrintFOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require two fixed paramters as pointers and integer result. const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) || - !isa<PointerType>(FT->getParamType(1)) || - !isa<IntegerType>(FT->getReturnType())) + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) return 0; // All the optimizations depend on the format string. @@ -1716,7 +1121,7 @@ struct FPrintFOpt : public LibCallOptimization { EmitFWrite(CI->getOperand(2), ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()), - CI->getOperand(1), B); + CI->getOperand(1), B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1728,16 +1133,16 @@ struct FPrintFOpt : public LibCallOptimization { // Decode the second character of the format string. if (FormatStr[1] == 'c') { // fprintf(F, "%c", chr) --> *(i8*)dst = chr - if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0; - EmitFPutC(CI->getOperand(3), CI->getOperand(1), B); + if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0; + EmitFPutC(CI->getOperand(3), CI->getOperand(1), B, TD); return ConstantInt::get(CI->getType(), 1); } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) -> fputs(str, F) - if (!isa<PointerType>(CI->getOperand(3)->getType()) || !CI->use_empty()) + if (!CI->getOperand(3)->getType()->isPointerTy() || !CI->use_empty()) return 0; - EmitFPutS(CI->getOperand(3), CI->getOperand(1), B); + EmitFPutS(CI->getOperand(3), CI->getOperand(1), B, TD); return CI; } return 0; @@ -1769,10 +1174,6 @@ namespace { SPrintFOpt SPrintF; PrintFOpt PrintF; FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF; - // Object Size Checking - MemCpyChkOpt MemCpyChk; MemSetChkOpt MemSetChk; MemMoveChkOpt MemMoveChk; - StrCpyChkOpt StrCpyChk; - bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification @@ -1878,12 +1279,6 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["fwrite"] = &FWrite; Optimizations["fputs"] = &FPuts; Optimizations["fprintf"] = &FPrintF; - - // Object Size Checking - Optimizations["__memcpy_chk"] = &MemCpyChk; - Optimizations["__memset_chk"] = &MemSetChk; - Optimizations["__memmove_chk"] = &MemMoveChk; - Optimizations["__strcpy_chk"] = &StrCpyChk; } @@ -2000,7 +1395,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { case 's': if (Name == "strlen") { if (FTy->getNumParams() != 1 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setOnlyReadsMemory(F); setDoesNotThrow(F); @@ -2018,14 +1413,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) { Name == "strncpy" || Name == "strtoull") { if (FTy->getNumParams() < 2 || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); } else if (Name == "strxfrm") { if (FTy->getNumParams() != 3 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2038,8 +1433,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) { Name == "strcasecmp" || Name == "strncasecmp") { if (FTy->getNumParams() < 2 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setOnlyReadsMemory(F); setDoesNotThrow(F); @@ -2048,7 +1443,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } else if (Name == "strstr" || Name == "strpbrk") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(1)->isPointerTy()) continue; setOnlyReadsMemory(F); setDoesNotThrow(F); @@ -2056,7 +1451,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } else if (Name == "strtok" || Name == "strtok_r") { if (FTy->getNumParams() < 2 || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); @@ -2064,15 +1459,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) { Name == "setbuf" || Name == "setvbuf") { if (FTy->getNumParams() < 1 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); } else if (Name == "strdup" || Name == "strndup") { if (FTy->getNumParams() < 1 || - !isa<PointerType>(FTy->getReturnType()) || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); @@ -2082,31 +1477,31 @@ bool SimplifyLibCalls::doInitialization(Module &M) { Name == "sprintf" || Name == "statvfs") { if (FTy->getNumParams() < 2 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); } else if (Name == "snprintf") { if (FTy->getNumParams() != 3 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(2))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 3); } else if (Name == "setitimer") { if (FTy->getNumParams() != 3 || - !isa<PointerType>(FTy->getParamType(1)) || - !isa<PointerType>(FTy->getParamType(2))) + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); setDoesNotCapture(F, 3); } else if (Name == "system") { if (FTy->getNumParams() != 1 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; // May throw; "system" is a valid pthread cancellation point. setDoesNotCapture(F, 1); @@ -2115,14 +1510,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) { case 'm': if (Name == "malloc") { if (FTy->getNumParams() != 1 || - !isa<PointerType>(FTy->getReturnType())) + !FTy->getReturnType()->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); } else if (Name == "memcmp") { if (FTy->getNumParams() != 3 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setOnlyReadsMemory(F); setDoesNotThrow(F); @@ -2141,18 +1536,18 @@ bool SimplifyLibCalls::doInitialization(Module &M) { Name == "memccpy" || Name == "memmove") { if (FTy->getNumParams() < 2 || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); } else if (Name == "memalign") { - if (!isa<PointerType>(FTy->getReturnType())) + if (!FTy->getReturnType()->isPointerTy()) continue; setDoesNotAlias(F, 0); } else if (Name == "mkdir" || Name == "mktime") { if (FTy->getNumParams() == 0 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2161,15 +1556,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) { case 'r': if (Name == "realloc") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getReturnType())) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getReturnType()->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); setDoesNotCapture(F, 1); } else if (Name == "read") { if (FTy->getNumParams() != 3 || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(1)->isPointerTy()) continue; // May throw; "read" is a valid pthread cancellation point. setDoesNotCapture(F, 2); @@ -2178,15 +1573,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) { Name == "remove" || Name == "realpath") { if (FTy->getNumParams() < 1 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); } else if (Name == "rename" || Name == "readlink") { if (FTy->getNumParams() < 2 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2196,7 +1591,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { case 'w': if (Name == "write") { if (FTy->getNumParams() != 3 || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(1)->isPointerTy()) continue; // May throw; "write" is a valid pthread cancellation point. setDoesNotCapture(F, 2); @@ -2205,16 +1600,16 @@ bool SimplifyLibCalls::doInitialization(Module &M) { case 'b': if (Name == "bcopy") { if (FTy->getNumParams() != 3 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); } else if (Name == "bcmp") { if (FTy->getNumParams() != 3 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setOnlyReadsMemory(F); @@ -2222,7 +1617,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotCapture(F, 2); } else if (Name == "bzero") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2231,7 +1626,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { case 'c': if (Name == "calloc") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getReturnType())) + !FTy->getReturnType()->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); @@ -2241,7 +1636,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { Name == "clearerr" || Name == "closedir") { if (FTy->getNumParams() == 0 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2253,14 +1648,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) { Name == "atof" || Name == "atoll") { if (FTy->getNumParams() != 1 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setOnlyReadsMemory(F); setDoesNotCapture(F, 1); } else if (Name == "access") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2269,9 +1664,9 @@ bool SimplifyLibCalls::doInitialization(Module &M) { case 'f': if (Name == "fopen") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getReturnType()) || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); @@ -2279,8 +1674,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotCapture(F, 2); } else if (Name == "fdopen") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getReturnType()) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); @@ -2300,13 +1695,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) { Name == "funlockfile" || Name == "ftrylockfile") { if (FTy->getNumParams() == 0 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); } else if (Name == "ferror") { if (FTy->getNumParams() != 1 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2318,22 +1713,22 @@ bool SimplifyLibCalls::doInitialization(Module &M) { Name == "frexpl" || Name == "fstatvfs") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); } else if (Name == "fgets") { if (FTy->getNumParams() != 3 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(2))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 3); } else if (Name == "fread" || Name == "fwrite") { if (FTy->getNumParams() != 4 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(3))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(3)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2343,8 +1738,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) { Name == "fprintf" || Name == "fgetpos") { if (FTy->getNumParams() < 2 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2356,13 +1751,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) { Name == "getlogin_r" || Name == "getc_unlocked") { if (FTy->getNumParams() == 0 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); } else if (Name == "getenv") { if (FTy->getNumParams() != 1 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setOnlyReadsMemory(F); @@ -2372,13 +1767,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); } else if (Name == "getitimer") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); } else if (Name == "getpwnam") { if (FTy->getNumParams() != 1 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2387,7 +1782,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { case 'u': if (Name == "ungetc") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); @@ -2395,15 +1790,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) { Name == "unlink" || Name == "unsetenv") { if (FTy->getNumParams() != 1 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); } else if (Name == "utime" || Name == "utimes") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2413,7 +1808,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { case 'p': if (Name == "putc") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); @@ -2421,14 +1816,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) { Name == "printf" || Name == "perror") { if (FTy->getNumParams() != 1 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); } else if (Name == "pread" || Name == "pwrite") { if (FTy->getNumParams() != 4 || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(1)->isPointerTy()) continue; // May throw; these are valid pthread cancellation points. setDoesNotCapture(F, 2); @@ -2436,9 +1831,9 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); } else if (Name == "popen") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getReturnType()) || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); @@ -2446,7 +1841,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotCapture(F, 2); } else if (Name == "pclose") { if (FTy->getNumParams() != 1 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2455,43 +1850,43 @@ bool SimplifyLibCalls::doInitialization(Module &M) { case 'v': if (Name == "vscanf") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); } else if (Name == "vsscanf" || Name == "vfscanf") { if (FTy->getNumParams() != 3 || - !isa<PointerType>(FTy->getParamType(1)) || - !isa<PointerType>(FTy->getParamType(2))) + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); } else if (Name == "valloc") { - if (!isa<PointerType>(FTy->getReturnType())) + if (!FTy->getReturnType()->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); } else if (Name == "vprintf") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); } else if (Name == "vfprintf" || Name == "vsprintf") { if (FTy->getNumParams() != 3 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); } else if (Name == "vsnprintf") { if (FTy->getNumParams() != 4 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(2))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2501,14 +1896,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) { case 'o': if (Name == "open") { if (FTy->getNumParams() < 2 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; // May throw; "open" is a valid pthread cancellation point. setDoesNotCapture(F, 1); } else if (Name == "opendir") { if (FTy->getNumParams() != 1 || - !isa<PointerType>(FTy->getReturnType()) || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); @@ -2517,13 +1912,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) { break; case 't': if (Name == "tmpfile") { - if (!isa<PointerType>(FTy->getReturnType())) + if (!FTy->getReturnType()->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); } else if (Name == "times") { if (FTy->getNumParams() != 1 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2546,15 +1941,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) { case 'l': if (Name == "lstat") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); } else if (Name == "lchown") { if (FTy->getNumParams() != 3 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2563,7 +1958,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { case 'q': if (Name == "qsort") { if (FTy->getNumParams() != 4 || - !isa<PointerType>(FTy->getParamType(3))) + !FTy->getParamType(3)->isPointerTy()) continue; // May throw; places call through function pointer. setDoesNotCapture(F, 4); @@ -2573,27 +1968,27 @@ bool SimplifyLibCalls::doInitialization(Module &M) { if (Name == "__strdup" || Name == "__strndup") { if (FTy->getNumParams() < 1 || - !isa<PointerType>(FTy->getReturnType()) || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); setDoesNotCapture(F, 1); } else if (Name == "__strtok_r") { if (FTy->getNumParams() != 3 || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); } else if (Name == "_IO_getc") { if (FTy->getNumParams() != 1 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); } else if (Name == "_IO_putc") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); @@ -2602,7 +1997,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { case 1: if (Name == "\1__isoc99_scanf") { if (FTy->getNumParams() < 1 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); @@ -2611,17 +2006,17 @@ bool SimplifyLibCalls::doInitialization(Module &M) { Name == "\1statvfs64" || Name == "\1__isoc99_sscanf") { if (FTy->getNumParams() < 1 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); } else if (Name == "\1fopen64") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getReturnType()) || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); @@ -2630,25 +2025,25 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } else if (Name == "\1fseeko64" || Name == "\1ftello64") { if (FTy->getNumParams() == 0 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); } else if (Name == "\1tmpfile64") { - if (!isa<PointerType>(FTy->getReturnType())) + if (!FTy->getReturnType()->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); } else if (Name == "\1fstat64" || Name == "\1fstatvfs64") { if (FTy->getNumParams() != 2 || - !isa<PointerType>(FTy->getParamType(1))) + !FTy->getParamType(1)->isPointerTy()) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); } else if (Name == "\1open64") { if (FTy->getNumParams() < 2 || - !isa<PointerType>(FTy->getParamType(0))) + !FTy->getParamType(0)->isPointerTy()) continue; // May throw; "open" is a valid pthread cancellation point. setDoesNotCapture(F, 1); diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index 8c4aa59..be6b383 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -125,7 +125,7 @@ static bool MightBeFoldableInst(Instruction *I) { // Don't touch identity bitcasts. if (I->getType() == I->getOperand(0)->getType()) return false; - return isa<PointerType>(I->getType()) || isa<IntegerType>(I->getType()); + return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); case Instruction::PtrToInt: // PtrToInt is always a noop, as we know that the int type is pointer sized. return true; @@ -167,8 +167,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, case Instruction::BitCast: // BitCast is always a noop, and we can handle it as long as it is // int->int or pointer->pointer (we don't want int<->fp or something). - if ((isa<PointerType>(AddrInst->getOperand(0)->getType()) || - isa<IntegerType>(AddrInst->getOperand(0)->getType())) && + if ((AddrInst->getOperand(0)->getType()->isPointerTy() || + AddrInst->getOperand(0)->getType()->isIntegerTy()) && // Don't touch identity bitcasts. These were probably put here by LSR, // and we don't want to mess around with them. Assume it knows what it // is doing. @@ -569,7 +569,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // Get the access type of this use. If the use isn't a pointer, we don't // know what it accesses. Value *Address = User->getOperand(OpNo); - if (!isa<PointerType>(Address->getType())) + if (!Address->getType()->isPointerTy()) return false; const Type *AddressAccessTy = cast<PointerType>(Address->getType())->getElementType(); diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk new file mode 100644 index 0000000..d9f31d7 --- /dev/null +++ b/lib/Transforms/Utils/Android.mk @@ -0,0 +1,49 @@ +LOCAL_PATH:= $(call my-dir) + +transforms_utils_SRC_FILES := \ + AddrModeMatcher.cpp \ + BasicBlockUtils.cpp \ + BasicInliner.cpp \ + BreakCriticalEdges.cpp \ + CloneFunction.cpp \ + CloneLoop.cpp \ + CloneModule.cpp \ + CodeExtractor.cpp \ + DemoteRegToStack.cpp \ + InlineFunction.cpp \ + InstructionNamer.cpp \ + LCSSA.cpp \ + Local.cpp \ + LoopSimplify.cpp \ + LoopUnroll.cpp \ + LowerInvoke.cpp \ + LowerSwitch.cpp \ + Mem2Reg.cpp \ + PromoteMemoryToRegister.cpp \ + SSAUpdater.cpp \ + SSI.cpp \ + SimplifyCFG.cpp \ + UnifyFunctionExitNodes.cpp \ + ValueMapper.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(transforms_utils_SRC_FILES) +LOCAL_MODULE:= libLLVMTransformUtils + +include $(LLVM_HOST_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(transforms_utils_SRC_FILES) +LOCAL_MODULE:= libLLVMTransformUtils + +include $(LLVM_DEVICE_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 7bc4fcd..1f62dab 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -274,24 +274,31 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) { ReplaceInstWithInst(TI, NewTI); } -/// SplitEdge - Split the edge connecting specified block. Pass P must -/// not be NULL. -BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { - TerminatorInst *LatchTerm = BB->getTerminator(); - unsigned SuccNum = 0; +/// GetSuccessorNumber - Search for the specified successor of basic block BB +/// and return its position in the terminator instruction's list of +/// successors. It is an error to call this with a block that is not a +/// successor. +unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) { + TerminatorInst *Term = BB->getTerminator(); #ifndef NDEBUG - unsigned e = LatchTerm->getNumSuccessors(); + unsigned e = Term->getNumSuccessors(); #endif for (unsigned i = 0; ; ++i) { assert(i != e && "Didn't find edge?"); - if (LatchTerm->getSuccessor(i) == Succ) { - SuccNum = i; - break; - } + if (Term->getSuccessor(i) == Succ) + return i; } + return 0; +} + +/// SplitEdge - Split the edge connecting specified block. Pass P must +/// not be NULL. +BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { + unsigned SuccNum = GetSuccessorNumber(BB, Succ); // If this is a critical edge, let SplitCriticalEdge do it. - if (SplitCriticalEdge(BB->getTerminator(), SuccNum, P)) + TerminatorInst *LatchTerm = BB->getTerminator(); + if (SplitCriticalEdge(LatchTerm, SuccNum, P)) return LatchTerm->getSuccessor(SuccNum); // If the edge isn't critical, then BB has a single successor or Succ has a diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 19c7206..3657390 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -179,7 +179,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); - // Create our unconditional branch... + // Create our unconditional branch. BranchInst::Create(DestBB, NewBB); // Branch to the new block, breaking the edge. @@ -192,16 +192,47 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. - // - for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - // We no longer enter through TIBB, now we come in through NewBB. Revector - // exactly one entry in the PHI node that used to come from TIBB to come - // from NewBB. - int BBIdx = PN->getBasicBlockIndex(TIBB); - PN->setIncomingBlock(BBIdx, NewBB); + if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) { + // This conceptually does: + // foreach (PHINode *PN in DestBB) + // PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB); + // but is optimized for two cases. + + if (APHI->getNumIncomingValues() <= 8) { // Small # preds case. + unsigned BBIdx = 0; + for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { + // We no longer enter through TIBB, now we come in through NewBB. + // Revector exactly one entry in the PHI node that used to come from + // TIBB to come from NewBB. + PHINode *PN = cast<PHINode>(I); + + // Reuse the previous value of BBIdx if it lines up. In cases where we + // have multiple phi nodes with *lots* of predecessors, this is a speed + // win because we don't have to scan the PHI looking for TIBB. This + // happens because the BB list of PHI nodes are usually in the same + // order. + if (PN->getIncomingBlock(BBIdx) != TIBB) + BBIdx = PN->getBasicBlockIndex(TIBB); + PN->setIncomingBlock(BBIdx, NewBB); + } + } else { + // However, the foreach loop is slow for blocks with lots of predecessors + // because PHINode::getIncomingBlock is O(n) in # preds. Instead, walk + // the user list of TIBB to find the PHI nodes. + SmallPtrSet<PHINode*, 16> UpdatedPHIs; + + for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end(); + UI != E; ) { + Value::use_iterator Use = UI++; + if (PHINode *PN = dyn_cast<PHINode>(Use)) { + // Remove one entry from each PHI. + if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN)) + PN->setOperand(Use.getOperandNo(), NewBB); + } + } + } } - + // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). @@ -221,6 +252,15 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // If we don't have a pass object, we can't update anything... if (P == 0) return NewBB; + + DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); + DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>(); + LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); + ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); + + // If we have nothing to update, just return. + if (DT == 0 && DF == 0 && LI == 0 && PI == 0) + return NewBB; // Now update analysis information. Since the only predecessor of NewBB is // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate @@ -229,14 +269,23 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // loop header) then NewBB dominates DestBB. SmallVector<BasicBlock*, 8> OtherPreds; - for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) - if (*I != NewBB) - OtherPreds.push_back(*I); + // If there is a PHI in the block, loop over predecessors with it, which is + // faster than iterating pred_begin/end. + if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingBlock(i) != NewBB) + OtherPreds.push_back(PN->getIncomingBlock(i)); + } else { + for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); + I != E; ++I) + if (*I != NewBB) + OtherPreds.push_back(*I); + } bool NewBBDominatesDestBB = true; // Should we update DominatorTree information? - if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) { + if (DT) { DomTreeNode *TINode = DT->getNode(TIBB); // The new block is not the immediate dominator for any other nodes, but @@ -267,7 +316,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } // Should we update DominanceFrontier information? - if (DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>()) { + if (DF) { // If NewBBDominatesDestBB hasn't been computed yet, do so with DF. if (!OtherPreds.empty()) { // FIXME: IMPLEMENT THIS! @@ -301,7 +350,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } // Update LoopInfo if it is around. - if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) { + if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. @@ -382,9 +431,8 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } // Update ProfileInfo if it is around. - if (ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>()) { - PI->splitEdge(TIBB,DestBB,NewBB,MergeIdenticalEdges); - } + if (PI) + PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges); return NewBB; } diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp new file mode 100644 index 0000000..2ea4bb6 --- /dev/null +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -0,0 +1,324 @@ +//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some functions that will create standard C libcalls. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Type.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Target/TargetData.h" +#include "llvm/LLVMContext.h" +#include "llvm/Intrinsics.h" + +using namespace llvm; + +/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. +Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { + return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr"); +} + +/// EmitStrLen - Emit a call to the strlen function to the builder, for the +/// specified pointer. This always returns an integer value of size intptr_t. +Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2), + TD->getIntPtrType(Context), + B.getInt8PtrTy(), + NULL); + CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); + if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitStrChr - Emit a call to the strchr function to the builder, for the +/// specified pointer and character. Ptr is required to be some pointer type, +/// and the return value has 'i8*' type. +Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, + const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI = + AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); + + const Type *I8Ptr = B.getInt8PtrTy(); + const Type *I32Ty = B.getInt32Ty(); + Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1), + I8Ptr, I8Ptr, I32Ty, NULL); + CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), + ConstantInt::get(I32Ty, C), "strchr"); + if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the +/// specified pointer arguments. +Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, + const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + const Type *I8Ptr = B.getInt8PtrTy(); + Value *StrCpy = M->getOrInsertFunction("strcpy", AttrListPtr::get(AWI, 2), + I8Ptr, I8Ptr, I8Ptr, NULL); + CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), + "strcpy"); + if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always +/// expects that the size has type 'intptr_t' and Dst/Src are pointers. +Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, + unsigned Align, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + const Type *Ty = Len->getType(); + Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1); + Dst = CastToCStr(Dst, B); + Src = CastToCStr(Src, B); + return B.CreateCall4(MemCpy, Dst, Src, Len, + ConstantInt::get(B.getInt32Ty(), Align)); +} + +/// EmitMemMove - Emit a call to the memmove function to the builder. This +/// always expects that the size has type 'intptr_t' and Dst/Src are pointers. +Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len, + unsigned Align, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + const Type *Ty = TD->getIntPtrType(Context); + Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1); + Dst = CastToCStr(Dst, B); + Src = CastToCStr(Src, B); + Value *A = ConstantInt::get(B.getInt32Ty(), Align); + return B.CreateCall4(MemMove, Dst, Src, Len, A); +} + +/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is +/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. +Value *llvm::EmitMemChr(Value *Ptr, Value *Val, + Value *Len, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI; + AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + B.getInt32Ty(), + TD->getIntPtrType(Context), + NULL); + CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); + + if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitMemCmp - Emit a call to the memcmp function. +Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, + Value *Len, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3), + B.getInt32Ty(), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), NULL); + CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), + Len, "memcmp"); + + if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitMemSet - Emit a call to the memset function +Value *llvm::EmitMemSet(Value *Dst, Value *Val, + Value *Len, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Intrinsic::ID IID = Intrinsic::memset; + const Type *Tys[1]; + Tys[0] = Len->getType(); + Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1); + Value *Align = ConstantInt::get(B.getInt32Ty(), 1); + return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align); +} + +/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. +/// 'floor'). This function is known to take a single of type matching 'Op' and +/// returns one value with the same type. If 'Op' is a long double, 'l' is +/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. +Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name, + IRBuilder<> &B, const AttrListPtr &Attrs) { + char NameBuffer[20]; + if (!Op->getType()->isDoubleTy()) { + // If we need to add a suffix, copy into NameBuffer. + unsigned NameLen = strlen(Name); + assert(NameLen < sizeof(NameBuffer)-2); + memcpy(NameBuffer, Name, NameLen); + if (Op->getType()->isFloatTy()) + NameBuffer[NameLen] = 'f'; // floorf + else + NameBuffer[NameLen] = 'l'; // floorl + NameBuffer[NameLen+1] = 0; + Name = NameBuffer; + } + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Value *Callee = M->getOrInsertFunction(Name, Op->getType(), + Op->getType(), NULL); + CallInst *CI = B.CreateCall(Callee, Op, Name); + CI->setAttributes(Attrs); + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitPutChar - Emit a call to the putchar function. This assumes that Char +/// is an integer. +Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), + B.getInt32Ty(), NULL); + CallInst *CI = B.CreateCall(PutChar, + B.CreateIntCast(Char, + B.getInt32Ty(), + /*isSigned*/true, + "chari"), + "putchar"); + + if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitPutS - Emit a call to the puts function. This assumes that Str is +/// some pointer. +void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + + Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), + B.getInt32Ty(), + B.getInt8PtrTy(), + NULL); + CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); + if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + +} + +/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is +/// an integer and File is a pointer to FILE. +void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, + const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + Constant *F; + if (File->getType()->isPointerTy()) + F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), + B.getInt32Ty(), + B.getInt32Ty(), File->getType(), + NULL); + else + F = M->getOrInsertFunction("fputc", + B.getInt32Ty(), + B.getInt32Ty(), + File->getType(), NULL); + Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true, + "chari"); + CallInst *CI = B.CreateCall2(F, Char, File, "fputc"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); +} + +/// EmitFPutS - Emit a call to the puts function. Str is required to be a +/// pointer and File is a pointer to FILE. +void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, + const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + Constant *F; + if (File->getType()->isPointerTy()) + F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), + B.getInt32Ty(), + B.getInt8PtrTy(), + File->getType(), NULL); + else + F = M->getOrInsertFunction("fputs", B.getInt32Ty(), + B.getInt8PtrTy(), + File->getType(), NULL); + CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); +} + +/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is +/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. +void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, + IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Constant *F; + if (File->getType()->isPointerTy()) + F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3), + TD->getIntPtrType(Context), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + TD->getIntPtrType(Context), + File->getType(), NULL); + else + F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + TD->getIntPtrType(Context), + File->getType(), NULL); + CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, + ConstantInt::get(TD->getIntPtrType(Context), 1), File); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); +} diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 93577b4..dec227a 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMTransformUtils BasicBlockUtils.cpp BasicInliner.cpp BreakCriticalEdges.cpp + BuildLibCalls.cpp CloneFunction.cpp CloneLoop.cpp CloneModule.cpp diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 7e7973a..d03f7a6 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -46,7 +46,7 @@ using namespace llvm; static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, uint64_t &ByteOffset, unsigned MaxLookup = 6) { - if (!isa<PointerType>(V->getType())) + if (!V->getType()->isPointerTy()) return V; for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { @@ -65,7 +65,7 @@ static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, } else { return V; } - assert(isa<PointerType>(V->getType()) && "Unexpected operand type!"); + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } return V; } @@ -490,6 +490,17 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { // Splice all the instructions from PredBB to DestBB. PredBB->getTerminator()->eraseFromParent(); DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); + + // Zap anything that took the address of DestBB. Not doing this will give the + // address an invalid value. + if (DestBB->hasAddressTaken()) { + BlockAddress *BA = BlockAddress::get(DestBB); + Constant *Replacement = + ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1); + BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement, + BA->getType())); + BA->destroyConstant(); + } // Anything that branched to PredBB now branches to DestBB. PredBB->replaceAllUsesWith(DestBB); diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 57bab60..924b744 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -51,6 +51,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" @@ -147,6 +148,11 @@ ReprocessLoop: // Delete each unique out-of-loop (and thus dead) predecessor. for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(), E = BadPreds.end(); I != E; ++I) { + + DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "; + WriteAsOperand(dbgs(), *I, false); + dbgs() << "\n"); + // Inform each successor of each dead pred. for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI) (*SI)->removePredecessor(*I); @@ -159,6 +165,27 @@ ReprocessLoop: } } + // If there are exiting blocks with branches on undef, resolve the undef in + // the direction which will exit the loop. This will help simplify loop + // trip count computations. + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), + E = ExitingBlocks.end(); I != E; ++I) + if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator())) + if (BI->isConditional()) { + if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { + + DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "; + WriteAsOperand(dbgs(), *I, false); + dbgs() << "\n"); + + BI->setCondition(ConstantInt::get(Cond->getType(), + !L->contains(BI->getSuccessor(0)))); + Changed = true; + } + } + // Does the loop already have a preheader? If so, don't insert one. BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { @@ -250,8 +277,6 @@ ReprocessLoop: break; } if (UniqueExit) { - SmallVector<BasicBlock*, 8> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitingBlock = ExitingBlocks[i]; if (!ExitingBlock->getSinglePredecessor()) continue; @@ -282,6 +307,11 @@ ReprocessLoop: // Success. The block is now dead, so remove it from the loop, // update the dominator tree and dominance frontier, and delete it. + + DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "; + WriteAsOperand(dbgs(), ExitingBlock, false); + dbgs() << "\n"); + assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); Changed = true; LI->removeBlock(ExitingBlock); @@ -335,6 +365,10 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(), ".preheader", this); + DEBUG(dbgs() << "LoopSimplify: Creating pre-header "; + WriteAsOperand(dbgs(), NewBB, false); + dbgs() << "\n"); + // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L); @@ -360,6 +394,10 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { LoopBlocks.size(), ".loopexit", this); + DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "; + WriteAsOperand(dbgs(), NewBB, false); + dbgs() << "\n"); + return NewBB; } @@ -480,6 +518,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { OuterLoopPreds.push_back(PN->getIncomingBlock(i)); } + DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); + BasicBlock *Header = L->getHeader(); BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0], OuterLoopPreds.size(), @@ -574,6 +614,10 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { Header->getName()+".backedge", F); BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); + DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "; + WriteAsOperand(dbgs(), BEBlock, false); + dbgs() << "\n"); + // Move the new backedge block to right after the last backedge block. Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos; F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 544e20b..4f5a70b 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -518,7 +518,7 @@ void PromoteMem2Reg::run() { // If this PHI node merges one value and/or undefs, get the value. if (Value *V = PN->hasConstantValue(&DT)) { - if (AST && isa<PointerType>(PN->getType())) + if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); @@ -780,7 +780,7 @@ void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, if (ReplVal == LI) ReplVal = UndefValue::get(LI->getType()); LI->replaceAllUsesWith(ReplVal); - if (AST && isa<PointerType>(LI->getType())) + if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); LI->eraseFromParent(); LBI.deleteValue(LI); @@ -838,7 +838,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { LI->replaceAllUsesWith(UndefValue::get(LI->getType())); - if (AST && isa<PointerType>(LI->getType())) + if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); LBI.deleteValue(LI); LI->eraseFromParent(); @@ -874,7 +874,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, // Otherwise, there was a store before this load, the load takes its value. --I; LI->replaceAllUsesWith(I->second->getOperand(0)); - if (AST && isa<PointerType>(LI->getType())) + if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); LI->eraseFromParent(); LBI.deleteValue(LI); @@ -922,7 +922,7 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, InsertedPHINodes.insert(PN); - if (AST && isa<PointerType>(PN->getType())) + if (AST && PN->getType()->isPointerTy()) AST->copyValue(PointerAllocaValues[AllocaNo], PN); return true; @@ -996,7 +996,7 @@ NextIteration: // Anything using the load now uses the current value. LI->replaceAllUsesWith(V); - if (AST && isa<PointerType>(LI->getType())) + if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); BB->getInstList().erase(LI); } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 795b6bf..f343c38 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -271,7 +271,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) { // Normal constant int. ConstantInt *CI = dyn_cast<ConstantInt>(V); - if (CI || !TD || !isa<Constant>(V) || !isa<PointerType>(V->getType())) + if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy()) return CI; // This is some kind of pointer constant. Turn it into a pointer-sized @@ -701,7 +701,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI) { AddPredecessorToBlock(NewSuccessors[i], Pred, BB); // Convert pointer to int before we switch. - if (isa<PointerType>(CV->getType())) { + if (CV->getType()->isPointerTy()) { assert(TD && "Cannot switch on pointer without TargetData"); CV = new PtrToIntInst(CV, TD->getIntPtrType(CV->getContext()), "magicptr", PTI); @@ -915,7 +915,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { case Instruction::Add: case Instruction::Sub: // Not worth doing for vector ops. - if (isa<VectorType>(HInst->getType())) + if (HInst->getType()->isVectorTy()) return false; break; case Instruction::And: @@ -925,7 +925,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { case Instruction::LShr: case Instruction::AShr: // Don't mess with vector operations. - if (isa<VectorType>(HInst->getType())) + if (HInst->getType()->isVectorTy()) return false; break; // These are all cheap and non-trapping instructions. } @@ -1077,7 +1077,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB; if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) && - CB->getType()->isInteger(1)) { + CB->getType()->isIntegerTy(1)) { // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. BasicBlock *PredBB = PN->getIncomingBlock(i); @@ -2068,7 +2068,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); // Convert pointer to int before we switch. - if (isa<PointerType>(CompVal->getType())) { + if (CompVal->getType()->isPointerTy()) { assert(TD && "Cannot switch on pointer without TargetData"); CompVal = new PtrToIntInst(CompVal, TD->getIntPtrType(CompVal->getContext()), diff --git a/lib/VMCore/Android.mk b/lib/VMCore/Android.mk new file mode 100644 index 0000000..4784684 --- /dev/null +++ b/lib/VMCore/Android.mk @@ -0,0 +1,61 @@ +LOCAL_PATH:= $(call my-dir) + +vmcore_SRC_FILES := \ + AsmWriter.cpp \ + Attributes.cpp \ + AutoUpgrade.cpp \ + BasicBlock.cpp \ + ConstantFold.cpp \ + Constants.cpp \ + Core.cpp \ + Dominators.cpp \ + Function.cpp \ + GVMaterializer.cpp \ + Globals.cpp \ + IRBuilder.cpp \ + InlineAsm.cpp \ + Instruction.cpp \ + Instructions.cpp \ + IntrinsicInst.cpp \ + LLVMContext.cpp \ + LeakDetector.cpp \ + Metadata.cpp \ + Module.cpp \ + Pass.cpp \ + PassManager.cpp \ + PrintModulePass.cpp \ + Type.cpp \ + TypeSymbolTable.cpp \ + Use.cpp \ + Value.cpp \ + ValueSymbolTable.cpp \ + ValueTypes.cpp \ + Verifier.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +REQUIRES_RTTI := 1 + +LOCAL_SRC_FILES := $(vmcore_SRC_FILES) + +LOCAL_MODULE:= libLLVMCore + +include $(LLVM_HOST_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) + +REQUIRES_RTTI := 1 + +LOCAL_SRC_FILES := $(vmcore_SRC_FILES) + +LOCAL_MODULE:= libLLVMCore + +include $(LLVM_DEVICE_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index ab5f45a..fd74241 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -17,6 +17,7 @@ #include "llvm/Assembly/Writer.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Assembly/AsmAnnotationWriter.h" +#include "llvm/LLVMContext.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -239,6 +240,19 @@ void TypePrinting::CalcTypeName(const Type *Ty, OS << '>'; break; } + case Type::UnionTyID: { + const UnionType *UTy = cast<UnionType>(Ty); + OS << "union { "; + for (StructType::element_iterator I = UTy->element_begin(), + E = UTy->element_end(); I != E; ++I) { + CalcTypeName(*I, TypeStack, OS); + if (next(I) != UTy->element_end()) + OS << ','; + OS << ' '; + } + OS << '}'; + break; + } case Type::PointerTyID: { const PointerType *PTy = cast<PointerType>(Ty); CalcTypeName(PTy->getElementType(), TypeStack, OS); @@ -363,8 +377,8 @@ namespace { return; // If this is a structure or opaque type, add a name for the type. - if (((isa<StructType>(Ty) && cast<StructType>(Ty)->getNumElements()) - || isa<OpaqueType>(Ty)) && !TP.hasTypeName(Ty)) { + if (((Ty->isStructTy() && cast<StructType>(Ty)->getNumElements()) + || Ty->isOpaqueTy()) && !TP.hasTypeName(Ty)) { TP.addTypeName(Ty, "%"+utostr(unsigned(NumberedTypes.size()))); NumberedTypes.push_back(Ty); } @@ -418,13 +432,13 @@ static void AddModuleTypesToPrinter(TypePrinting &TP, // they are used too often to have a single useful name. if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) { const Type *PETy = PTy->getElementType(); - if ((PETy->isPrimitiveType() || PETy->isInteger()) && - !isa<OpaqueType>(PETy)) + if ((PETy->isPrimitiveType() || PETy->isIntegerTy()) && + !PETy->isOpaqueTy()) continue; } // Likewise don't insert primitives either. - if (Ty->isInteger() || Ty->isPrimitiveType()) + if (Ty->isIntegerTy() || Ty->isPrimitiveType()) continue; // Get the name as a string and insert it into TypeNames. @@ -836,7 +850,7 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) { static void WriteConstantInt(raw_ostream &Out, const Constant *CV, TypePrinting &TypePrinter, SlotTracker *Machine) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - if (CI->getType()->isInteger(1)) { + if (CI->getType()->isIntegerTy(1)) { Out << (CI->getZExtValue() ? "true" : "false"); return; } @@ -1223,7 +1237,6 @@ class AssemblyWriter { TypePrinting TypePrinter; AssemblyAnnotationWriter *AnnotationWriter; std::vector<const Type*> NumberedTypes; - SmallVector<StringRef, 8> MDNames; public: inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, @@ -1231,8 +1244,6 @@ public: AssemblyAnnotationWriter *AAW) : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) { AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M); - if (M) - M->getMDKindNames(MDNames); } void printMDNodeBody(const MDNode *MD); @@ -1252,15 +1263,14 @@ public: void printArgument(const Argument *FA, Attributes Attrs); void printBasicBlock(const BasicBlock *BB); void printInstruction(const Instruction &I); -private: +private: // printInfoComment - Print a little comment after the instruction indicating // which slot it occupies. void printInfoComment(const Value &V); }; } // end of anonymous namespace - void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { if (Operand == 0) { Out << "<null operand!>"; @@ -1689,11 +1699,15 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out); } - /// printInfoComment - Print a little comment after the instruction indicating /// which slot it occupies. /// void AssemblyWriter::printInfoComment(const Value &V) { + if (AnnotationWriter) { + AnnotationWriter->printInfoComment(V, Out); + return; + } + if (V.getType()->isVoidTy()) return; Out.PadToColumn(50); @@ -1834,8 +1848,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) { // Out << ' '; if (!FTy->isVarArg() && - (!isa<PointerType>(RetTy) || - !isa<FunctionType>(cast<PointerType>(RetTy)->getElementType()))) { + (!RetTy->isPointerTy() || + !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) { TypePrinter.print(RetTy, Out); Out << ' '; writeOperand(Operand, false); @@ -1880,8 +1894,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) { // Out << ' '; if (!FTy->isVarArg() && - (!isa<PointerType>(RetTy) || - !isa<FunctionType>(cast<PointerType>(RetTy)->getElementType()))) { + (!RetTy->isPointerTy() || + !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) { TypePrinter.print(RetTy, Out); Out << ' '; writeOperand(Operand, false); @@ -1972,12 +1986,20 @@ void AssemblyWriter::printInstruction(const Instruction &I) { } // Print Metadata info. - if (!MDNames.empty()) { - SmallVector<std::pair<unsigned, MDNode*>, 4> InstMD; - I.getAllMetadata(InstMD); - for (unsigned i = 0, e = InstMD.size(); i != e; ++i) - Out << ", !" << MDNames[InstMD[i].first] - << " !" << Machine.getMetadataSlot(InstMD[i].second); + SmallVector<std::pair<unsigned, MDNode*>, 4> InstMD; + I.getAllMetadata(InstMD); + if (!InstMD.empty()) { + SmallVector<StringRef, 8> MDNames; + I.getType()->getContext().getMDKindNames(MDNames); + for (unsigned i = 0, e = InstMD.size(); i != e; ++i) { + unsigned Kind = InstMD[i].first; + if (Kind < MDNames.size()) { + Out << ", !" << MDNames[Kind]; + } else { + Out << ", !<unknown kind #" << Kind << ">"; + } + Out << " !" << Machine.getMetadataSlot(InstMD[i].second); + } } printInfoComment(I); } diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index a371c6f..a000aee 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -70,6 +70,11 @@ std::string Attribute::getAsString(Attributes Attrs) { Result += "noimplicitfloat "; if (Attrs & Attribute::Naked) Result += "naked "; + if (Attrs & Attribute::StackAlignment) { + Result += "alignstack("; + Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs)); + Result += ") "; + } if (Attrs & Attribute::Alignment) { Result += "align "; Result += utostr(Attribute::getAlignmentFromAttrs(Attrs)); @@ -84,11 +89,11 @@ std::string Attribute::getAsString(Attributes Attrs) { Attributes Attribute::typeIncompatible(const Type *Ty) { Attributes Incompatible = None; - if (!Ty->isInteger()) + if (!Ty->isIntegerTy()) // Attributes that only apply to integers. Incompatible |= SExt | ZExt; - if (!isa<PointerType>(Ty)) + if (!Ty->isPointerTy()) // Attributes that only apply to pointers. Incompatible |= ByVal | Nest | NoAlias | StructRet | NoCapture; diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index 5c117d8..549977c 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -112,7 +112,7 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) { IdxList.push_back(Zero); } else if (const SequentialType *STy = dyn_cast<SequentialType>(ElTy)) { - if (isa<PointerType>(ElTy)) break; // Can't index into pointers! + if (ElTy->isPointerTy()) break; // Can't index into pointers! ElTy = STy->getElementType(); IdxList.push_back(Zero); } else { @@ -155,12 +155,12 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) { // Handle integral constant input. if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - if (DestTy->isInteger()) + if (DestTy->isIntegerTy()) // Integral -> Integral. This is a no-op because the bit widths must // be the same. Consequently, we just fold to V. return V; - if (DestTy->isFloatingPoint()) + if (DestTy->isFloatingPointTy()) return ConstantFP::get(DestTy->getContext(), APFloat(CI->getValue(), !DestTy->isPPC_FP128Ty())); @@ -189,7 +189,7 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) { /// static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, unsigned ByteSize) { - assert(isa<IntegerType>(C->getType()) && + assert(C->getType()->isIntegerTy() && (cast<IntegerType>(C->getType())->getBitWidth() & 7) == 0 && "Non-byte sized integer input"); unsigned CSize = cast<IntegerType>(C->getType())->getBitWidth()/8; @@ -334,11 +334,7 @@ static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy, Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true); return ConstantExpr::getNUWMul(E, N); } - if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { - Constant *N = ConstantInt::get(DestTy, VTy->getNumElements()); - Constant *E = getFoldedSizeOf(VTy->getElementType(), DestTy, true); - return ConstantExpr::getNUWMul(E, N); - } + if (const StructType *STy = dyn_cast<StructType>(Ty)) if (!STy->isPacked()) { unsigned NumElems = STy->getNumElements(); @@ -361,10 +357,26 @@ static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy, } } + if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) { + unsigned NumElems = UTy->getNumElements(); + // Check for a union with all members having the same size. + Constant *MemberSize = + getFoldedSizeOf(UTy->getElementType(0), DestTy, true); + bool AllSame = true; + for (unsigned i = 1; i != NumElems; ++i) + if (MemberSize != + getFoldedSizeOf(UTy->getElementType(i), DestTy, true)) { + AllSame = false; + break; + } + if (AllSame) + return MemberSize; + } + // Pointer size doesn't depend on the pointee type, so canonicalize them // to an arbitrary pointee. if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) - if (!PTy->getElementType()->isInteger(1)) + if (!PTy->getElementType()->isIntegerTy(1)) return getFoldedSizeOf(PointerType::get(IntegerType::get(PTy->getContext(), 1), PTy->getAddressSpace()), @@ -426,10 +438,28 @@ static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy, return MemberAlign; } + if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) { + // Union alignment is the maximum alignment of any member. + // Without target data, we can't compare much, but we can check to see + // if all the members have the same alignment. + unsigned NumElems = UTy->getNumElements(); + // Check for a union with all members having the same alignment. + Constant *MemberAlign = + getFoldedAlignOf(UTy->getElementType(0), DestTy, true); + bool AllSame = true; + for (unsigned i = 1; i != NumElems; ++i) + if (MemberAlign != getFoldedAlignOf(UTy->getElementType(i), DestTy, true)) { + AllSame = false; + break; + } + if (AllSame) + return MemberAlign; + } + // Pointer alignment doesn't depend on the pointee type, so canonicalize them // to an arbitrary pointee. if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) - if (!PTy->getElementType()->isInteger(1)) + if (!PTy->getElementType()->isIntegerTy(1)) return getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(), 1), @@ -464,13 +494,7 @@ static Constant *getFoldedOffsetOf(const Type *Ty, Constant *FieldNo, Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true); return ConstantExpr::getNUWMul(E, N); } - if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { - Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false, - DestTy, false), - FieldNo, DestTy); - Constant *E = getFoldedSizeOf(VTy->getElementType(), DestTy, true); - return ConstantExpr::getNUWMul(E, N); - } + if (const StructType *STy = dyn_cast<StructType>(Ty)) if (!STy->isPacked()) { unsigned NumElems = STy->getNumElements(); @@ -551,7 +575,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, // operating on each element. In the cast of bitcasts, the element // count may be mismatched; don't attempt to handle that here. if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) - if (isa<VectorType>(DestTy) && + if (DestTy->isVectorTy() && cast<VectorType>(DestTy)->getNumElements() == CV->getType()->getNumElements()) { std::vector<Constant*> res; @@ -629,12 +653,12 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2)); if (CI->isOne() && STy->getNumElements() == 2 && - STy->getElementType(0)->isInteger(1)) { + STy->getElementType(0)->isIntegerTy(1)) { return getFoldedAlignOf(STy->getElementType(1), DestTy, false); } } // Handle an offsetof-like expression. - if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)){ + if (Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()){ if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2), DestTy, false)) return C; @@ -885,6 +909,8 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, unsigned numOps; if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy)) numOps = AR->getNumElements(); + else if (AggTy->isUnionTy()) + numOps = 1; else numOps = cast<StructType>(AggTy)->getNumElements(); @@ -901,6 +927,10 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, if (const StructType* ST = dyn_cast<StructType>(AggTy)) return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked()); + if (const UnionType* UT = dyn_cast<UnionType>(AggTy)) { + assert(Ops.size() == 1 && "Union can only contain a single value!"); + return ConstantUnion::get(UT, Ops[0]); + } return ConstantArray::get(cast<ArrayType>(AggTy), Ops); } @@ -1099,6 +1129,10 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, return ConstantExpr::getLShr(C1, C2); break; } + } else if (isa<ConstantInt>(C1)) { + // If C1 is a ConstantInt and C2 is not, swap the operands. + if (Instruction::isCommutative(Opcode)) + return ConstantExpr::get(Opcode, C2, C1); } // At this point we know neither constant is an UndefValue. @@ -1358,35 +1392,12 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, } else if (isa<ConstantExpr>(C2)) { // If C2 is a constant expr and C1 isn't, flop them around and fold the // other way if possible. - switch (Opcode) { - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - // No change of opcode required. + if (Instruction::isCommutative(Opcode)) return ConstantFoldBinaryInstruction(Opcode, C2, C1); - - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::SDiv: - case Instruction::UDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - default: // These instructions cannot be flopped around. - break; - } } // i1 can be simplified in many cases. - if (C1->getType()->isInteger(1)) { + if (C1->getType()->isIntegerTy(1)) { switch (Opcode) { case Instruction::Add: case Instruction::Sub: @@ -1421,7 +1432,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, /// isZeroSizedType - This type is zero sized if its an array or structure of /// zero sized types. The only leaf zero sized type is an empty structure. static bool isMaybeZeroSizedType(const Type *Ty) { - if (isa<OpaqueType>(Ty)) return true; // Can't say. + if (Ty->isOpaqueTy()) return true; // Can't say. if (const StructType *STy = dyn_cast<StructType>(Ty)) { // If all of elements have zero size, this does too. @@ -1452,10 +1463,10 @@ static int IdxCompare(Constant *C1, Constant *C2, const Type *ElTy) { // Ok, we have two differing integer indices. Sign extend them to be the same // type. Long is always big enough, so we use it. - if (!C1->getType()->isInteger(64)) + if (!C1->getType()->isIntegerTy(64)) C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext())); - if (!C2->getType()->isInteger(64)) + if (!C2->getType()->isIntegerTy(64)) C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext())); if (C1 == C2) return 0; // They are equal @@ -1661,7 +1672,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, // If the cast is not actually changing bits, and the second operand is a // null pointer, do the comparison with the pre-casted value. if (V2->isNullValue() && - (isa<PointerType>(CE1->getType()) || CE1->getType()->isInteger())) { + (CE1->getType()->isPointerTy() || CE1->getType()->isIntegerTy())) { if (CE1->getOpcode() == Instruction::ZExt) isSigned = false; if (CE1->getOpcode() == Instruction::SExt) isSigned = true; return evaluateICmpRelation(CE1Op0, @@ -1807,7 +1818,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, // Handle some degenerate cases first if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) - return UndefValue::get(ResultTy); + return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred)); // No compile-time operations on this type yet. if (C1->getType()->isPPC_FP128Ty()) @@ -1836,7 +1847,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, } // If the comparison is a comparison between two i1's, simplify it. - if (C1->getType()->isInteger(1)) { + if (C1->getType()->isIntegerTy(1)) { switch(pred) { case ICmpInst::ICMP_EQ: if (isa<ConstantInt>(C2)) @@ -1908,7 +1919,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan || R==APFloat::cmpEqual); } - } else if (isa<VectorType>(C1->getType())) { + } else if (C1->getType()->isVectorTy()) { SmallVector<Constant*, 16> C1Elts, C2Elts; C1->getVectorElements(C1Elts); C2->getVectorElements(C2Elts); @@ -1925,7 +1936,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, return ConstantVector::get(&ResElts[0], ResElts.size()); } - if (C1->getType()->isFloatingPoint()) { + if (C1->getType()->isFloatingPointTy()) { int Result = -1; // -1 = unknown, 0 = known false, 1 = known true. switch (evaluateFCmpRelation(C1, C2)) { default: llvm_unreachable("Unknown relation!"); @@ -2059,7 +2070,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) { Constant *CE2Op0 = CE2->getOperand(0); if (CE2->getOpcode() == Instruction::BitCast && - isa<VectorType>(CE2->getType())==isa<VectorType>(CE2Op0->getType())) { + CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy()) { Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType()); return ConstantExpr::getICmp(pred, Inverse, CE2Op0); } @@ -2067,8 +2078,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, // If the left hand side is an extension, try eliminating it. if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) { - if (CE1->getOpcode() == Instruction::SExt || - CE1->getOpcode() == Instruction::ZExt) { + if ((CE1->getOpcode() == Instruction::SExt && ICmpInst::isSigned(pred)) || + (CE1->getOpcode() == Instruction::ZExt && !ICmpInst::isSigned(pred))){ Constant *CE1Op0 = CE1->getOperand(0); Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType()); if (CE1Inverse == CE1Op0) { @@ -2086,27 +2097,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, // If C2 is a constant expr and C1 isn't, flip them around and fold the // other way if possible. // Also, if C1 is null and C2 isn't, flip them around. - switch (pred) { - case ICmpInst::ICMP_EQ: - case ICmpInst::ICMP_NE: - // No change of predicate required. - return ConstantExpr::getICmp(pred, C2, C1); - - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_ULE: - case ICmpInst::ICMP_SLE: - case ICmpInst::ICMP_UGE: - case ICmpInst::ICMP_SGE: - // Change the predicate as necessary to swap the operands. - pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred); - return ConstantExpr::getICmp(pred, C2, C1); - - default: // These predicates cannot be flopped around. - break; - } + pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred); + return ConstantExpr::getICmp(pred, C2, C1); } } return 0; @@ -2178,7 +2170,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C, I != E; ++I) LastTy = *I; - if ((LastTy && isa<ArrayType>(LastTy)) || Idx0->isNullValue()) { + if ((LastTy && LastTy->isArrayTy()) || Idx0->isNullValue()) { SmallVector<Value*, 16> NewIndices; NewIndices.reserve(NumIdx + CE->getNumOperands()); for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i) @@ -2260,10 +2252,10 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C, // Before adding, extend both operands to i64 to avoid // overflow trouble. - if (!PrevIdx->getType()->isInteger(64)) + if (!PrevIdx->getType()->isIntegerTy(64)) PrevIdx = ConstantExpr::getSExt(PrevIdx, Type::getInt64Ty(Div->getContext())); - if (!Div->getType()->isInteger(64)) + if (!Div->getType()->isIntegerTy(64)) Div = ConstantExpr::getSExt(Div, Type::getInt64Ty(Div->getContext())); diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 2250626..10f8879 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -229,7 +229,7 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const { /// This handles breaking down a vector undef into undef elements, etc. For /// constant exprs and other cases we can't handle, we return an empty vector. void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const { - assert(isa<VectorType>(getType()) && "Not a vector constant!"); + assert(getType()->isVectorTy() && "Not a vector constant!"); if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) { for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) @@ -404,13 +404,13 @@ ConstantFP* ConstantFP::getNegativeZero(const Type* Ty) { Constant* ConstantFP::getZeroValueForNegation(const Type* Ty) { if (const VectorType *PTy = dyn_cast<VectorType>(Ty)) - if (PTy->getElementType()->isFloatingPoint()) { + if (PTy->getElementType()->isFloatingPointTy()) { std::vector<Constant*> zeros(PTy->getNumElements(), getNegativeZero(PTy->getElementType())); return ConstantVector::get(PTy, zeros); } - if (Ty->isFloatingPoint()) + if (Ty->isFloatingPointTy()) return getNegativeZero(Ty); return Constant::getNullValue(Ty); @@ -585,6 +585,27 @@ Constant* ConstantStruct::get(LLVMContext &Context, return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed); } +ConstantUnion::ConstantUnion(const UnionType *T, Constant* V) + : Constant(T, ConstantUnionVal, + OperandTraits<ConstantUnion>::op_end(this) - 1, 1) { + Use *OL = OperandList; + assert(T->getElementTypeIndex(V->getType()) >= 0 && + "Initializer for union element isn't a member of union type!"); + *OL = V; +} + +// ConstantUnion accessors. +Constant* ConstantUnion::get(const UnionType* T, Constant* V) { + LLVMContextImpl* pImpl = T->getContext().pImpl; + + // Create a ConstantAggregateZero value if all elements are zeros... + if (!V->isNullValue()) + return pImpl->UnionConstants.getOrCreate(T, V); + + return ConstantAggregateZero::get(T); +} + + ConstantVector::ConstantVector(const VectorType *T, const std::vector<Constant*> &V) : Constant(T, ConstantVectorVal, @@ -640,13 +661,13 @@ Constant* ConstantVector::get(Constant* const* Vals, unsigned NumVals) { } Constant* ConstantExpr::getNSWNeg(Constant* C) { - assert(C->getType()->isIntOrIntVector() && + assert(C->getType()->isIntOrIntVectorTy() && "Cannot NEG a nonintegral value!"); return getNSWSub(ConstantFP::getZeroValueForNegation(C->getType()), C); } Constant* ConstantExpr::getNUWNeg(Constant* C) { - assert(C->getType()->isIntOrIntVector() && + assert(C->getType()->isIntOrIntVectorTy() && "Cannot NEG a nonintegral value!"); return getNUWSub(ConstantFP::getZeroValueForNegation(C->getType()), C); } @@ -923,7 +944,7 @@ bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) { // Factory Function Implementation ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) { - assert((isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) && + assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) && "Cannot create an aggregate zero of non-aggregate type!"); LLVMContextImpl *pImpl = Ty->getContext().pImpl; @@ -948,7 +969,7 @@ void ConstantArray::destroyConstant() { /// if the elements of the array are all ConstantInt's. bool ConstantArray::isString() const { // Check the element type for i8... - if (!getType()->getElementType()->isInteger(8)) + if (!getType()->getElementType()->isIntegerTy(8)) return false; // Check the elements to make sure they are all integers, not constant // expressions. @@ -963,7 +984,7 @@ bool ConstantArray::isString() const { /// null bytes except its terminator. bool ConstantArray::isCString() const { // Check the element type for i8... - if (!getType()->getElementType()->isInteger(8)) + if (!getType()->getElementType()->isIntegerTy(8)) return false; // Last element must be a null. @@ -1010,6 +1031,13 @@ void ConstantStruct::destroyConstant() { // destroyConstant - Remove the constant from the constant table... // +void ConstantUnion::destroyConstant() { + getType()->getContext().pImpl->UnionConstants.remove(this); + destroyConstantImpl(); +} + +// destroyConstant - Remove the constant from the constant table... +// void ConstantVector::destroyConstant() { getType()->getContext().pImpl->VectorConstants.remove(this); destroyConstantImpl(); @@ -1211,18 +1239,18 @@ Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) { } Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) { - assert(isa<PointerType>(S->getType()) && "Invalid cast"); - assert((Ty->isInteger() || isa<PointerType>(Ty)) && "Invalid cast"); + assert(S->getType()->isPointerTy() && "Invalid cast"); + assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast"); - if (Ty->isInteger()) + if (Ty->isIntegerTy()) return getCast(Instruction::PtrToInt, S, Ty); return getCast(Instruction::BitCast, S, Ty); } Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, bool isSigned) { - assert(C->getType()->isIntOrIntVector() && - Ty->isIntOrIntVector() && "Invalid cast"); + assert(C->getType()->isIntOrIntVectorTy() && + Ty->isIntOrIntVectorTy() && "Invalid cast"); unsigned SrcBits = C->getType()->getScalarSizeInBits(); unsigned DstBits = Ty->getScalarSizeInBits(); Instruction::CastOps opcode = @@ -1233,7 +1261,7 @@ Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, } Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) { - assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() && + assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() && "Invalid cast"); unsigned SrcBits = C->getType()->getScalarSizeInBits(); unsigned DstBits = Ty->getScalarSizeInBits(); @@ -1250,8 +1278,8 @@ Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isIntOrIntVector() && "Trunc operand must be integer"); - assert(Ty->isIntOrIntVector() && "Trunc produces only integral"); + assert(C->getType()->isIntOrIntVectorTy() && "Trunc operand must be integer"); + assert(Ty->isIntOrIntVectorTy() && "Trunc produces only integral"); assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&& "SrcTy must be larger than DestTy for Trunc!"); @@ -1264,8 +1292,8 @@ Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isIntOrIntVector() && "SExt operand must be integral"); - assert(Ty->isIntOrIntVector() && "SExt produces only integer"); + assert(C->getType()->isIntOrIntVectorTy() && "SExt operand must be integral"); + assert(Ty->isIntOrIntVectorTy() && "SExt produces only integer"); assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&& "SrcTy must be smaller than DestTy for SExt!"); @@ -1278,8 +1306,8 @@ Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isIntOrIntVector() && "ZEXt operand must be integral"); - assert(Ty->isIntOrIntVector() && "ZExt produces only integer"); + assert(C->getType()->isIntOrIntVectorTy() && "ZEXt operand must be integral"); + assert(Ty->isIntOrIntVectorTy() && "ZExt produces only integer"); assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&& "SrcTy must be smaller than DestTy for ZExt!"); @@ -1292,7 +1320,7 @@ Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() && + assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() && C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&& "This is an illegal floating point truncation!"); return getFoldedCast(Instruction::FPTrunc, C, Ty); @@ -1304,7 +1332,7 @@ Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() && + assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() && C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&& "This is an illegal floating point extension!"); return getFoldedCast(Instruction::FPExt, C, Ty); @@ -1316,7 +1344,7 @@ Constant *ConstantExpr::getUIToFP(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isIntOrIntVector() && Ty->isFPOrFPVector() && + assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() && "This is an illegal uint to floating point cast!"); return getFoldedCast(Instruction::UIToFP, C, Ty); } @@ -1327,7 +1355,7 @@ Constant *ConstantExpr::getSIToFP(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isIntOrIntVector() && Ty->isFPOrFPVector() && + assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() && "This is an illegal sint to floating point cast!"); return getFoldedCast(Instruction::SIToFP, C, Ty); } @@ -1338,7 +1366,7 @@ Constant *ConstantExpr::getFPToUI(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isFPOrFPVector() && Ty->isIntOrIntVector() && + assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() && "This is an illegal floating point to uint cast!"); return getFoldedCast(Instruction::FPToUI, C, Ty); } @@ -1349,20 +1377,20 @@ Constant *ConstantExpr::getFPToSI(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isFPOrFPVector() && Ty->isIntOrIntVector() && + assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() && "This is an illegal floating point to sint cast!"); return getFoldedCast(Instruction::FPToSI, C, Ty); } Constant *ConstantExpr::getPtrToInt(Constant *C, const Type *DstTy) { - assert(isa<PointerType>(C->getType()) && "PtrToInt source must be pointer"); - assert(DstTy->isInteger() && "PtrToInt destination must be integral"); + assert(C->getType()->isPointerTy() && "PtrToInt source must be pointer"); + assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral"); return getFoldedCast(Instruction::PtrToInt, C, DstTy); } Constant *ConstantExpr::getIntToPtr(Constant *C, const Type *DstTy) { - assert(C->getType()->isInteger() && "IntToPtr source must be integral"); - assert(isa<PointerType>(DstTy) && "IntToPtr destination must be a pointer"); + assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral"); + assert(DstTy->isPointerTy() && "IntToPtr destination must be a pointer"); return getFoldedCast(Instruction::IntToPtr, C, DstTy); } @@ -1421,7 +1449,7 @@ Constant *ConstantExpr::getCompareTy(unsigned short predicate, Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2, unsigned Flags) { // API compatibility: Adjust integer opcodes to floating-point opcodes. - if (C1->getType()->isFPOrFPVector()) { + if (C1->getType()->isFPOrFPVectorTy()) { if (Opcode == Instruction::Add) Opcode = Instruction::FAdd; else if (Opcode == Instruction::Sub) Opcode = Instruction::FSub; else if (Opcode == Instruction::Mul) Opcode = Instruction::FMul; @@ -1432,51 +1460,51 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2, case Instruction::Sub: case Instruction::Mul: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isIntOrIntVector() && + assert(C1->getType()->isIntOrIntVectorTy() && "Tried to create an integer operation on a non-integer type!"); break; case Instruction::FAdd: case Instruction::FSub: case Instruction::FMul: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isFPOrFPVector() && + assert(C1->getType()->isFPOrFPVectorTy() && "Tried to create a floating-point operation on a " "non-floating-point type!"); break; case Instruction::UDiv: case Instruction::SDiv: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isIntOrIntVector() && + assert(C1->getType()->isIntOrIntVectorTy() && "Tried to create an arithmetic operation on a non-arithmetic type!"); break; case Instruction::FDiv: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isFPOrFPVector() && + assert(C1->getType()->isFPOrFPVectorTy() && "Tried to create an arithmetic operation on a non-arithmetic type!"); break; case Instruction::URem: case Instruction::SRem: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isIntOrIntVector() && + assert(C1->getType()->isIntOrIntVectorTy() && "Tried to create an arithmetic operation on a non-arithmetic type!"); break; case Instruction::FRem: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isFPOrFPVector() && + assert(C1->getType()->isFPOrFPVectorTy() && "Tried to create an arithmetic operation on a non-arithmetic type!"); break; case Instruction::And: case Instruction::Or: case Instruction::Xor: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isIntOrIntVector() && + assert(C1->getType()->isIntOrIntVectorTy() && "Tried to create a logical operation on a non-integral type!"); break; case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isIntOrIntVector() && + assert(C1->getType()->isIntOrIntVectorTy() && "Tried to create a shift operation on a non-integer type!"); break; default: @@ -1564,7 +1592,7 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C, (Constant**)Idxs, NumIdx)) return FC; // Fold a few common cases... - assert(isa<PointerType>(C->getType()) && + assert(C->getType()->isPointerTy() && "Non-pointer type for constant GetElementPtr expression"); // Look up the constant in the table first to ensure uniqueness std::vector<Constant*> ArgVec; @@ -1591,7 +1619,7 @@ Constant *ConstantExpr::getInBoundsGetElementPtrTy(const Type *ReqTy, (Constant**)Idxs, NumIdx)) return FC; // Fold a few common cases... - assert(isa<PointerType>(C->getType()) && + assert(C->getType()->isPointerTy() && "Non-pointer type for constant GetElementPtr expression"); // Look up the constant in the table first to ensure uniqueness std::vector<Constant*> ArgVec; @@ -1699,9 +1727,9 @@ Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val, } Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) { - assert(isa<VectorType>(Val->getType()) && + assert(Val->getType()->isVectorTy() && "Tried to create extractelement operation on non-vector type!"); - assert(Idx->getType()->isInteger(32) && + assert(Idx->getType()->isIntegerTy(32) && "Extractelement index must be i32 type!"); return getExtractElementTy(cast<VectorType>(Val->getType())->getElementType(), Val, Idx); @@ -1723,11 +1751,11 @@ Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val, Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt, Constant *Idx) { - assert(isa<VectorType>(Val->getType()) && + assert(Val->getType()->isVectorTy() && "Tried to create insertelement operation on non-vector type!"); assert(Elt->getType() == cast<VectorType>(Val->getType())->getElementType() && "Insertelement types must match!"); - assert(Idx->getType()->isInteger(32) && + assert(Idx->getType()->isIntegerTy(32) && "Insertelement index must be i32 type!"); return getInsertElementTy(Val->getType(), Val, Elt, Idx); } @@ -1811,9 +1839,9 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg, Constant* ConstantExpr::getNeg(Constant* C) { // API compatibility: Adjust integer opcodes to floating-point opcodes. - if (C->getType()->isFPOrFPVector()) + if (C->getType()->isFPOrFPVectorTy()) return getFNeg(C); - assert(C->getType()->isIntOrIntVector() && + assert(C->getType()->isIntOrIntVectorTy() && "Cannot NEG a nonintegral value!"); return get(Instruction::Sub, ConstantFP::getZeroValueForNegation(C->getType()), @@ -1821,7 +1849,7 @@ Constant* ConstantExpr::getNeg(Constant* C) { } Constant* ConstantExpr::getFNeg(Constant* C) { - assert(C->getType()->isFPOrFPVector() && + assert(C->getType()->isFPOrFPVectorTy() && "Cannot FNEG a non-floating-point value!"); return get(Instruction::FSub, ConstantFP::getZeroValueForNegation(C->getType()), @@ -1829,7 +1857,7 @@ Constant* ConstantExpr::getFNeg(Constant* C) { } Constant* ConstantExpr::getNot(Constant* C) { - assert(C->getType()->isIntOrIntVector() && + assert(C->getType()->isIntOrIntVectorTy() && "Cannot NOT a nonintegral value!"); return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType())); } @@ -2083,6 +2111,56 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, destroyConstant(); } +void ConstantUnion::replaceUsesOfWithOnConstant(Value *From, Value *To, + Use *U) { + assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!"); + Constant *ToC = cast<Constant>(To); + + assert(U == OperandList && "Union constants can only have one use!"); + assert(getNumOperands() == 1 && "Union constants can only have one use!"); + assert(getOperand(0) == From && "ReplaceAllUsesWith broken!"); + + std::pair<LLVMContextImpl::UnionConstantsTy::MapKey, ConstantUnion*> Lookup; + Lookup.first.first = getType(); + Lookup.second = this; + Lookup.first.second = ToC; + + LLVMContext &Context = getType()->getContext(); + LLVMContextImpl *pImpl = Context.pImpl; + + Constant *Replacement = 0; + if (ToC->isNullValue()) { + Replacement = ConstantAggregateZero::get(getType()); + } else { + // Check to see if we have this union type already. + bool Exists; + LLVMContextImpl::UnionConstantsTy::MapTy::iterator I = + pImpl->UnionConstants.InsertOrGetItem(Lookup, Exists); + + if (Exists) { + Replacement = I->second; + } else { + // Okay, the new shape doesn't exist in the system yet. Instead of + // creating a new constant union, inserting it, replaceallusesof'ing the + // old with the new, then deleting the old... just update the current one + // in place! + pImpl->UnionConstants.MoveConstantToNewSlot(this, I); + + // Update to the new value. + setOperand(0, ToC); + return; + } + } + + assert(Replacement != this && "I didn't contain From!"); + + // Everyone using this now uses the replacement. + uncheckedReplaceAllUsesWith(Replacement); + + // Delete the old constant! + destroyConstant(); +} + void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) { assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!"); diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h index 08224e4..c798ba2 100644 --- a/lib/VMCore/ConstantsContext.h +++ b/lib/VMCore/ConstantsContext.h @@ -341,6 +341,13 @@ struct ConstantTraits< std::vector<T, Alloc> > { } }; +template<> +struct ConstantTraits<Constant *> { + static unsigned uses(Constant * const & v) { + return 1; + } +}; + template<class ConstantClass, class TypeClass, class ValType> struct ConstantCreator { static ConstantClass *create(const TypeClass *Ty, const ValType &V) { @@ -470,6 +477,14 @@ struct ConstantKeyData<ConstantStruct> { } }; +template<> +struct ConstantKeyData<ConstantUnion> { + typedef Constant* ValType; + static ValType getValType(ConstantUnion *CU) { + return cast<Constant>(CU->getOperand(0)); + } +}; + // ConstantPointerNull does not take extra "value" argument... template<class ValType> struct ConstantCreator<ConstantPointerNull, PointerType, ValType> { diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index 1755cd2..f4f65c5 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -55,6 +55,15 @@ void LLVMContextDispose(LLVMContextRef C) { delete unwrap(C); } +unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char* Name, + unsigned SLen) { + return unwrap(C)->getMDKindID(StringRef(Name, SLen)); +} + +unsigned LLVMGetMDKindID(const char* Name, unsigned SLen) { + return LLVMGetMDKindIDInContext(LLVMGetGlobalContext(), Name, SLen); +} + /*===-- Operations on modules ---------------------------------------------===*/ @@ -141,6 +150,8 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) { return LLVMFunctionTypeKind; case Type::StructTyID: return LLVMStructTypeKind; + case Type::UnionTyID: + return LLVMUnionTypeKind; case Type::ArrayTyID: return LLVMArrayTypeKind; case Type::PointerTyID: @@ -299,6 +310,35 @@ LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) { return unwrap<StructType>(StructTy)->isPacked(); } +/*--.. Operations on union types ..........................................--*/ + +LLVMTypeRef LLVMUnionTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes, + unsigned ElementCount) { + SmallVector<const Type*, 8> Tys; + for (LLVMTypeRef *I = ElementTypes, + *E = ElementTypes + ElementCount; I != E; ++I) + Tys.push_back(unwrap(*I)); + + return wrap(UnionType::get(&Tys[0], Tys.size())); +} + +LLVMTypeRef LLVMUnionType(LLVMTypeRef *ElementTypes, + unsigned ElementCount, int Packed) { + return LLVMUnionTypeInContext(LLVMGetGlobalContext(), ElementTypes, + ElementCount); +} + +unsigned LLVMCountUnionElementTypes(LLVMTypeRef UnionTy) { + return unwrap<UnionType>(UnionTy)->getNumElements(); +} + +void LLVMGetUnionElementTypes(LLVMTypeRef UnionTy, LLVMTypeRef *Dest) { + UnionType *Ty = unwrap<UnionType>(UnionTy); + for (FunctionType::param_iterator I = Ty->element_begin(), + E = Ty->element_end(); I != E; ++I) + *Dest++ = wrap(*I); +} + /*--.. Operations on array, pointer, and vector types (sequence types) .....--*/ LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) { @@ -394,6 +434,18 @@ void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) { unwrap(OldVal)->replaceAllUsesWith(unwrap(NewVal)); } +int LLVMHasMetadata(LLVMValueRef Inst) { + return unwrap<Instruction>(Inst)->hasMetadata(); +} + +LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) { + return wrap(unwrap<Instruction>(Inst)->getMetadata(KindID)); +} + +void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) { + unwrap<Instruction>(Inst)->setMetadata(KindID, MD? unwrap<MDNode>(MD) : NULL); +} + /*--.. Conversion functions ................................................--*/ #define LLVM_DEFINE_VALUE_CAST(name) \ @@ -404,7 +456,7 @@ void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) { LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DEFINE_VALUE_CAST) /*--.. Operations on Uses ..................................................--*/ -LLVMUseIteratorRef LLVMGetFirstUse(LLVMValueRef Val) { +LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val) { Value *V = unwrap(Val); Value::use_iterator I = V->use_begin(); if (I == V->use_end()) @@ -412,16 +464,19 @@ LLVMUseIteratorRef LLVMGetFirstUse(LLVMValueRef Val) { return wrap(&(I.getUse())); } -LLVMUseIteratorRef LLVMGetNextUse(LLVMUseIteratorRef UR) { - return wrap(unwrap(UR)->getNext()); +LLVMUseRef LLVMGetNextUse(LLVMUseRef U) { + Use *Next = unwrap(U)->getNext(); + if (Next) + return wrap(Next); + return 0; } -LLVMValueRef LLVMGetUser(LLVMUseIteratorRef UR) { - return wrap(unwrap(UR)->getUser()); +LLVMValueRef LLVMGetUser(LLVMUseRef U) { + return wrap(unwrap(U)->getUser()); } -LLVMValueRef LLVMGetUsedValue(LLVMUseIteratorRef UR) { - return wrap(unwrap(UR)->get()); +LLVMValueRef LLVMGetUsedValue(LLVMUseRef U) { + return wrap(unwrap(U)->get()); } /*--.. Operations on Users .................................................--*/ @@ -462,6 +517,26 @@ LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) { wrap(ConstantPointerNull::get(unwrap<PointerType>(Ty))); } +/*--.. Operations on metadata nodes ........................................--*/ + +LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str, + unsigned SLen) { + return wrap(MDString::get(*unwrap(C), StringRef(Str, SLen))); +} + +LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) { + return LLVMMDStringInContext(LLVMGetGlobalContext(), Str, SLen); +} + +LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals, + unsigned Count) { + return wrap(MDNode::get(*unwrap(C), unwrap<Value>(Vals, Count), Count)); +} + +LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) { + return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count); +} + /*--.. Operations on scalar constants ......................................--*/ LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N, @@ -536,11 +611,13 @@ LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count, return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count, Packed); } - LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) { return wrap(ConstantVector::get( unwrap<Constant>(ScalarConstantVals, Size), Size)); } +LLVMValueRef LLVMConstUnion(LLVMTypeRef Ty, LLVMValueRef Val) { + return wrap(ConstantUnion::get(unwrap<UnionType>(Ty), unwrap<Constant>(Val))); +} /*--.. Constant expressions ................................................--*/ @@ -561,6 +638,17 @@ LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal) { unwrap<Constant>(ConstantVal))); } +LLVMValueRef LLVMConstNSWNeg(LLVMValueRef ConstantVal) { + return wrap(ConstantExpr::getNSWNeg( + unwrap<Constant>(ConstantVal))); +} + +LLVMValueRef LLVMConstNUWNeg(LLVMValueRef ConstantVal) { + return wrap(ConstantExpr::getNUWNeg( + unwrap<Constant>(ConstantVal))); +} + + LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal) { return wrap(ConstantExpr::getFNeg( unwrap<Constant>(ConstantVal))); @@ -584,6 +672,13 @@ LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant, unwrap<Constant>(RHSConstant))); } +LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant, + LLVMValueRef RHSConstant) { + return wrap(ConstantExpr::getNUWAdd( + unwrap<Constant>(LHSConstant), + unwrap<Constant>(RHSConstant))); +} + LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getFAdd( unwrap<Constant>(LHSConstant), @@ -596,6 +691,20 @@ LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { unwrap<Constant>(RHSConstant))); } +LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant, + LLVMValueRef RHSConstant) { + return wrap(ConstantExpr::getNSWSub( + unwrap<Constant>(LHSConstant), + unwrap<Constant>(RHSConstant))); +} + +LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant, + LLVMValueRef RHSConstant) { + return wrap(ConstantExpr::getNUWSub( + unwrap<Constant>(LHSConstant), + unwrap<Constant>(RHSConstant))); +} + LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getFSub(unwrap<Constant>(LHSConstant), unwrap<Constant>(RHSConstant))); @@ -607,6 +716,20 @@ LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { unwrap<Constant>(RHSConstant))); } +LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant, + LLVMValueRef RHSConstant) { + return wrap(ConstantExpr::getNSWMul( + unwrap<Constant>(LHSConstant), + unwrap<Constant>(RHSConstant))); +} + +LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant, + LLVMValueRef RHSConstant) { + return wrap(ConstantExpr::getNUWMul( + unwrap<Constant>(LHSConstant), + unwrap<Constant>(RHSConstant))); +} + LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getFMul( unwrap<Constant>(LHSConstant), @@ -893,6 +1016,10 @@ LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString, Constraints, HasSideEffects, IsAlignStack)); } +LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB) { + return wrap(BlockAddress::get(unwrap<Function>(F), unwrap(BB))); +} + /*--.. Operations on global variables, functions, and aliases (globals) ....--*/ LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) { @@ -1029,6 +1156,14 @@ LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) { GlobalValue::ExternalLinkage, 0, Name)); } +LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty, + const char *Name, + unsigned AddressSpace) { + return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false, + GlobalValue::ExternalLinkage, 0, Name, 0, + false, AddressSpace)); +} + LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) { return wrap(unwrap(M)->getNamedGlobal(Name)); } @@ -1184,14 +1319,14 @@ void LLVMSetGC(LLVMValueRef Fn, const char *GC) { void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) { Function *Func = unwrap<Function>(Fn); const AttrListPtr PAL = Func->getAttributes(); - const AttrListPtr PALnew = PAL.addAttr(0, PA); + const AttrListPtr PALnew = PAL.addAttr(~0U, PA); Func->setAttributes(PALnew); } void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) { Function *Func = unwrap<Function>(Fn); const AttrListPtr PAL = Func->getAttributes(); - const AttrListPtr PALnew = PAL.removeAttr(0, PA); + const AttrListPtr PALnew = PAL.removeAttr(~0U, PA); Func->setAttributes(PALnew); } @@ -1532,6 +1667,21 @@ void LLVMDisposeBuilder(LLVMBuilderRef Builder) { delete unwrap(Builder); } +/*--.. Metadata builders ...................................................--*/ + +void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) { + unwrap(Builder)->SetCurrentDebugLocation(L? unwrap<MDNode>(L) : NULL); +} + +LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) { + return wrap(unwrap(Builder)->getCurrentDebugLocation()); +} + +void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) { + unwrap(Builder)->SetInstDebugLocation(unwrap<Instruction>(Inst)); +} + + /*--.. Instruction builders ................................................--*/ LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef B) { @@ -1561,6 +1711,11 @@ LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef B, LLVMValueRef V, return wrap(unwrap(B)->CreateSwitch(unwrap(V), unwrap(Else), NumCases)); } +LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr, + unsigned NumDests) { + return wrap(unwrap(B)->CreateIndirectBr(unwrap(Addr), NumDests)); +} + LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn, LLVMValueRef *Args, unsigned NumArgs, LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch, @@ -1583,6 +1738,10 @@ void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal, unwrap<SwitchInst>(Switch)->addCase(unwrap<ConstantInt>(OnVal), unwrap(Dest)); } +void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest) { + unwrap<IndirectBrInst>(IndirectBr)->addDestination(unwrap(Dest)); +} + /*--.. Arithmetic ..........................................................--*/ LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, @@ -1595,6 +1754,11 @@ LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RH return wrap(unwrap(B)->CreateNSWAdd(unwrap(LHS), unwrap(RHS), Name)); } +LLVMValueRef LLVMBuildNUWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, + const char *Name) { + return wrap(unwrap(B)->CreateNUWAdd(unwrap(LHS), unwrap(RHS), Name)); +} + LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateFAdd(unwrap(LHS), unwrap(RHS), Name)); @@ -1605,6 +1769,16 @@ LLVMValueRef LLVMBuildSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, return wrap(unwrap(B)->CreateSub(unwrap(LHS), unwrap(RHS), Name)); } +LLVMValueRef LLVMBuildNSWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, + const char *Name) { + return wrap(unwrap(B)->CreateNSWSub(unwrap(LHS), unwrap(RHS), Name)); +} + +LLVMValueRef LLVMBuildNUWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, + const char *Name) { + return wrap(unwrap(B)->CreateNUWSub(unwrap(LHS), unwrap(RHS), Name)); +} + LLVMValueRef LLVMBuildFSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateFSub(unwrap(LHS), unwrap(RHS), Name)); @@ -1615,6 +1789,16 @@ LLVMValueRef LLVMBuildMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, return wrap(unwrap(B)->CreateMul(unwrap(LHS), unwrap(RHS), Name)); } +LLVMValueRef LLVMBuildNSWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, + const char *Name) { + return wrap(unwrap(B)->CreateNSWMul(unwrap(LHS), unwrap(RHS), Name)); +} + +LLVMValueRef LLVMBuildNUWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, + const char *Name) { + return wrap(unwrap(B)->CreateNUWMul(unwrap(LHS), unwrap(RHS), Name)); +} + LLVMValueRef LLVMBuildFMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateFMul(unwrap(LHS), unwrap(RHS), Name)); @@ -1685,10 +1869,27 @@ LLVMValueRef LLVMBuildXor(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, return wrap(unwrap(B)->CreateXor(unwrap(LHS), unwrap(RHS), Name)); } +LLVMValueRef LLVMBuildBinOp(LLVMBuilderRef B, LLVMOpcode Op, + LLVMValueRef LHS, LLVMValueRef RHS, + const char *Name) { + return wrap(unwrap(B)->CreateBinOp(Instruction::BinaryOps(Op), unwrap(LHS), + unwrap(RHS), Name)); +} + LLVMValueRef LLVMBuildNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) { return wrap(unwrap(B)->CreateNeg(unwrap(V), Name)); } +LLVMValueRef LLVMBuildNSWNeg(LLVMBuilderRef B, LLVMValueRef V, + const char *Name) { + return wrap(unwrap(B)->CreateNSWNeg(unwrap(V), Name)); +} + +LLVMValueRef LLVMBuildNUWNeg(LLVMBuilderRef B, LLVMValueRef V, + const char *Name) { + return wrap(unwrap(B)->CreateNUWNeg(unwrap(V), Name)); +} + LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) { return wrap(unwrap(B)->CreateFNeg(unwrap(V), Name)); } @@ -1856,6 +2057,12 @@ LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef B, LLVMValueRef Val, Name)); } +LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val, + LLVMTypeRef DestTy, const char *Name) { + return wrap(unwrap(B)->CreateCast(Instruction::CastOps(Op), unwrap(Val), + unwrap(DestTy), Name)); +} + LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreatePointerCast(unwrap(Val), unwrap(DestTy), Name)); diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp index f00f6ee..dbc283e 100644 --- a/lib/VMCore/Function.cpp +++ b/lib/VMCore/Function.cpp @@ -73,35 +73,35 @@ unsigned Argument::getArgNo() const { /// hasByValAttr - Return true if this argument has the byval attribute on it /// in its containing function. bool Argument::hasByValAttr() const { - if (!isa<PointerType>(getType())) return false; + if (!getType()->isPointerTy()) return false; return getParent()->paramHasAttr(getArgNo()+1, Attribute::ByVal); } /// hasNestAttr - Return true if this argument has the nest attribute on /// it in its containing function. bool Argument::hasNestAttr() const { - if (!isa<PointerType>(getType())) return false; + if (!getType()->isPointerTy()) return false; return getParent()->paramHasAttr(getArgNo()+1, Attribute::Nest); } /// hasNoAliasAttr - Return true if this argument has the noalias attribute on /// it in its containing function. bool Argument::hasNoAliasAttr() const { - if (!isa<PointerType>(getType())) return false; + if (!getType()->isPointerTy()) return false; return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoAlias); } /// hasNoCaptureAttr - Return true if this argument has the nocapture attribute /// on it in its containing function. bool Argument::hasNoCaptureAttr() const { - if (!isa<PointerType>(getType())) return false; + if (!getType()->isPointerTy()) return false; return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoCapture); } /// hasSRetAttr - Return true if this argument has the sret attribute on /// it in its containing function. bool Argument::hasStructRetAttr() const { - if (!isa<PointerType>(getType())) return false; + if (!getType()->isPointerTy()) return false; if (this != getParent()->arg_begin()) return false; // StructRet param must be first param return getParent()->paramHasAttr(1, Attribute::StructRet); @@ -155,7 +155,7 @@ Function::Function(const FunctionType *Ty, LinkageTypes Linkage, : GlobalValue(PointerType::getUnqual(Ty), Value::FunctionVal, 0, 0, Linkage, name) { assert(FunctionType::isValidReturnType(getReturnType()) && - !isa<OpaqueType>(getReturnType()) && "invalid return type"); + !getReturnType()->isOpaqueTy() && "invalid return type"); SymTab = new ValueSymbolTable(); // If the function has arguments, mark them as lazily built. diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp index f149c44..489ec65 100644 --- a/lib/VMCore/Globals.cpp +++ b/lib/VMCore/Globals.cpp @@ -44,10 +44,10 @@ static bool removeDeadUsersOfConstant(const Constant *C) { } bool GlobalValue::isMaterializable() const { - return getParent()->isMaterializable(this); + return getParent() && getParent()->isMaterializable(this); } bool GlobalValue::isDematerializable() const { - return getParent()->isDematerializable(this); + return getParent() && getParent()->isDematerializable(this); } bool GlobalValue::Materialize(std::string *ErrInfo) { return getParent()->Materialize(this, ErrInfo); diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp index 4bc3cbb..9f2786e 100644 --- a/lib/VMCore/IRBuilder.cpp +++ b/lib/VMCore/IRBuilder.cpp @@ -19,7 +19,7 @@ using namespace llvm; /// CreateGlobalString - Make a new global variable with an initializer that -/// has array of i8 type filled in the nul terminated string value +/// has array of i8 type filled in with the nul terminated string value /// specified. If Name is specified, it is the name of the global variable /// created. Value *IRBuilderBase::CreateGlobalString(const char *Str, const Twine &Name) { diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp index ec21773..6355834 100644 --- a/lib/VMCore/InlineAsm.cpp +++ b/lib/VMCore/InlineAsm.cpp @@ -220,7 +220,7 @@ bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) { if (!Ty->getReturnType()->isVoidTy()) return false; break; case 1: - if (isa<StructType>(Ty->getReturnType())) return false; + if (Ty->getReturnType()->isStructTy()) return false; break; default: const StructType *STy = dyn_cast<StructType>(Ty->getReturnType()); diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index 4ec8295..8f4763f 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -562,7 +562,7 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore, BasicBlock *InsertAtEnd) { assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) && "createFree needs either InsertBefore or InsertAtEnd"); - assert(isa<PointerType>(Source->getType()) && + assert(Source->getType()->isPointerTy() && "Can not free something of nonpointer type!"); BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd; @@ -787,7 +787,7 @@ BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const { void BranchInst::AssertOK() { if (isConditional()) - assert(getCondition()->getType()->isInteger(1) && + assert(getCondition()->getType()->isIntegerTy(1) && "May only branch on boolean predicates!"); } @@ -892,7 +892,7 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) { else { assert(!isa<BasicBlock>(Amt) && "Passed basic block into allocation size parameter! Use other ctor"); - assert(Amt->getType()->isInteger(32) && + assert(Amt->getType()->isIntegerTy(32) && "Allocation array size is not a 32-bit integer!"); } return Amt; @@ -989,7 +989,7 @@ bool AllocaInst::isStaticAlloca() const { //===----------------------------------------------------------------------===// void LoadInst::AssertOK() { - assert(isa<PointerType>(getOperand(0)->getType()) && + assert(getOperand(0)->getType()->isPointerTy() && "Ptr must have pointer type."); } @@ -1103,7 +1103,7 @@ void LoadInst::setAlignment(unsigned Align) { void StoreInst::AssertOK() { assert(getOperand(0) && getOperand(1) && "Both operands must be non-null!"); - assert(isa<PointerType>(getOperand(1)->getType()) && + assert(getOperand(1)->getType()->isPointerTy() && "Ptr must have pointer type!"); assert(getOperand(0)->getType() == cast<PointerType>(getOperand(1)->getType())->getElementType() @@ -1285,7 +1285,7 @@ static const Type* getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs, unsigned CurIdx = 1; for (; CurIdx != NumIdx; ++CurIdx) { const CompositeType *CT = dyn_cast<CompositeType>(Agg); - if (!CT || isa<PointerType>(CT)) return 0; + if (!CT || CT->isPointerTy()) return 0; IndexTy Index = Idxs[CurIdx]; if (!CT->indexValid(Index)) return 0; Agg = CT->getTypeAtIndex(Index); @@ -1391,7 +1391,7 @@ ExtractElementInst::ExtractElementInst(Value *Val, Value *Index, bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) { - if (!isa<VectorType>(Val->getType()) || !Index->getType()->isInteger(32)) + if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy(32)) return false; return true; } @@ -1432,13 +1432,13 @@ InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index, bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, const Value *Index) { - if (!isa<VectorType>(Vec->getType())) + if (!Vec->getType()->isVectorTy()) return false; // First operand of insertelement must be vector type. if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType()) return false;// Second operand of insertelement must be vector element type. - if (!Index->getType()->isInteger(32)) + if (!Index->getType()->isIntegerTy(32)) return false; // Third operand of insertelement must be i32. return true; } @@ -1485,12 +1485,12 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask, bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, const Value *Mask) { - if (!isa<VectorType>(V1->getType()) || V1->getType() != V2->getType()) + if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType()) return false; const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType()); if (!isa<Constant>(Mask) || MaskTy == 0 || - !MaskTy->getElementType()->isInteger(32)) + !MaskTy->getElementType()->isIntegerTy(32)) return false; return true; } @@ -1602,7 +1602,7 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg, unsigned CurIdx = 0; for (; CurIdx != NumIdx; ++CurIdx) { const CompositeType *CT = dyn_cast<CompositeType>(Agg); - if (!CT || isa<PointerType>(CT) || isa<VectorType>(CT)) return 0; + if (!CT || CT->isPointerTy() || CT->isVectorTy()) return 0; unsigned Index = Idxs[CurIdx]; if (!CT->indexValid(Index)) return 0; Agg = CT->getTypeAtIndex(Index); @@ -1632,7 +1632,7 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg, static BinaryOperator::BinaryOps AdjustIType(BinaryOperator::BinaryOps iType, const Type *Ty) { // API compatibility: Adjust integer opcodes to floating-point opcodes. - if (Ty->isFPOrFPVector()) { + if (Ty->isFPOrFPVectorTy()) { if (iType == BinaryOperator::Add) iType = BinaryOperator::FAdd; else if (iType == BinaryOperator::Sub) iType = BinaryOperator::FSub; else if (iType == BinaryOperator::Mul) iType = BinaryOperator::FMul; @@ -1678,14 +1678,14 @@ void BinaryOperator::init(BinaryOps iType) { case Mul: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert(getType()->isIntOrIntVector() && + assert(getType()->isIntOrIntVectorTy() && "Tried to create an integer operation on a non-integer type!"); break; case FAdd: case FSub: case FMul: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert(getType()->isFPOrFPVector() && + assert(getType()->isFPOrFPVectorTy() && "Tried to create a floating-point operation on a " "non-floating-point type!"); break; @@ -1693,28 +1693,28 @@ void BinaryOperator::init(BinaryOps iType) { case SDiv: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert((getType()->isInteger() || (isa<VectorType>(getType()) && - cast<VectorType>(getType())->getElementType()->isInteger())) && + assert((getType()->isIntegerTy() || (getType()->isVectorTy() && + cast<VectorType>(getType())->getElementType()->isIntegerTy())) && "Incorrect operand type (not integer) for S/UDIV"); break; case FDiv: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert(getType()->isFPOrFPVector() && + assert(getType()->isFPOrFPVectorTy() && "Incorrect operand type (not floating point) for FDIV"); break; case URem: case SRem: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert((getType()->isInteger() || (isa<VectorType>(getType()) && - cast<VectorType>(getType())->getElementType()->isInteger())) && + assert((getType()->isIntegerTy() || (getType()->isVectorTy() && + cast<VectorType>(getType())->getElementType()->isIntegerTy())) && "Incorrect operand type (not integer) for S/UREM"); break; case FRem: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert(getType()->isFPOrFPVector() && + assert(getType()->isFPOrFPVectorTy() && "Incorrect operand type (not floating point) for FREM"); break; case Shl: @@ -1722,18 +1722,18 @@ void BinaryOperator::init(BinaryOps iType) { case AShr: assert(getType() == LHS->getType() && "Shift operation should return same type as operands!"); - assert((getType()->isInteger() || - (isa<VectorType>(getType()) && - cast<VectorType>(getType())->getElementType()->isInteger())) && + assert((getType()->isIntegerTy() || + (getType()->isVectorTy() && + cast<VectorType>(getType())->getElementType()->isIntegerTy())) && "Tried to create a shift operation on a non-integral type!"); break; case And: case Or: case Xor: assert(getType() == LHS->getType() && "Logical operation should return same type as operands!"); - assert((getType()->isInteger() || - (isa<VectorType>(getType()) && - cast<VectorType>(getType())->getElementType()->isInteger())) && + assert((getType()->isIntegerTy() || + (getType()->isVectorTy() && + cast<VectorType>(getType())->getElementType()->isIntegerTy())) && "Tried to create a logical operation on a non-integral type!"); break; default: @@ -1960,7 +1960,8 @@ bool CastInst::isIntegerCast() const { case Instruction::Trunc: return true; case Instruction::BitCast: - return getOperand(0)->getType()->isInteger() && getType()->isInteger(); + return getOperand(0)->getType()->isIntegerTy() && + getType()->isIntegerTy(); } } @@ -1976,8 +1977,8 @@ bool CastInst::isLosslessCast() const { return true; // Pointer to pointer is always lossless. - if (isa<PointerType>(SrcTy)) - return isa<PointerType>(DstTy); + if (SrcTy->isPointerTy()) + return DstTy->isPointerTy(); return false; // Other types have no identity values } @@ -2093,25 +2094,25 @@ unsigned CastInst::isEliminableCastPair( // no-op cast in second op implies firstOp as long as the DestTy // is integer and we are not converting between a vector and a // non vector type. - if (!isa<VectorType>(SrcTy) && DstTy->isInteger()) + if (!SrcTy->isVectorTy() && DstTy->isIntegerTy()) return firstOp; return 0; case 4: // no-op cast in second op implies firstOp as long as the DestTy // is floating point. - if (DstTy->isFloatingPoint()) + if (DstTy->isFloatingPointTy()) return firstOp; return 0; case 5: // no-op cast in first op implies secondOp as long as the SrcTy // is an integer. - if (SrcTy->isInteger()) + if (SrcTy->isIntegerTy()) return secondOp; return 0; case 6: // no-op cast in first op implies secondOp as long as the SrcTy // is a floating point. - if (SrcTy->isFloatingPoint()) + if (SrcTy->isFloatingPointTy()) return secondOp; return 0; case 7: { @@ -2147,12 +2148,12 @@ unsigned CastInst::isEliminableCastPair( case 11: // bitcast followed by ptrtoint is allowed as long as the bitcast // is a pointer to pointer cast. - if (isa<PointerType>(SrcTy) && isa<PointerType>(MidTy)) + if (SrcTy->isPointerTy() && MidTy->isPointerTy()) return secondOp; return 0; case 12: // inttoptr, bitcast -> intptr if bitcast is a ptr to ptr cast - if (isa<PointerType>(MidTy) && isa<PointerType>(DstTy)) + if (MidTy->isPointerTy() && DstTy->isPointerTy()) return firstOp; return 0; case 13: { @@ -2273,11 +2274,11 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty, CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) { - assert(isa<PointerType>(S->getType()) && "Invalid cast"); - assert((Ty->isInteger() || isa<PointerType>(Ty)) && + assert(S->getType()->isPointerTy() && "Invalid cast"); + assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast"); - if (Ty->isInteger()) + if (Ty->isIntegerTy()) return Create(Instruction::PtrToInt, S, Ty, Name, InsertAtEnd); return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd); } @@ -2286,11 +2287,11 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore) { - assert(isa<PointerType>(S->getType()) && "Invalid cast"); - assert((Ty->isInteger() || isa<PointerType>(Ty)) && + assert(S->getType()->isPointerTy() && "Invalid cast"); + assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast"); - if (Ty->isInteger()) + if (Ty->isIntegerTy()) return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore); return Create(Instruction::BitCast, S, Ty, Name, InsertBefore); } @@ -2298,7 +2299,7 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, bool isSigned, const Twine &Name, Instruction *InsertBefore) { - assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() && + assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() && "Invalid integer cast"); unsigned SrcBits = C->getType()->getScalarSizeInBits(); unsigned DstBits = Ty->getScalarSizeInBits(); @@ -2312,7 +2313,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, bool isSigned, const Twine &Name, BasicBlock *InsertAtEnd) { - assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() && + assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() && "Invalid cast"); unsigned SrcBits = C->getType()->getScalarSizeInBits(); unsigned DstBits = Ty->getScalarSizeInBits(); @@ -2326,7 +2327,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, const Twine &Name, Instruction *InsertBefore) { - assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() && + assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() && "Invalid cast"); unsigned SrcBits = C->getType()->getScalarSizeInBits(); unsigned DstBits = Ty->getScalarSizeInBits(); @@ -2339,7 +2340,7 @@ CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) { - assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() && + assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() && "Invalid cast"); unsigned SrcBits = C->getType()->getScalarSizeInBits(); unsigned DstBits = Ty->getScalarSizeInBits(); @@ -2363,21 +2364,21 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) { unsigned DestBits = DestTy->getScalarSizeInBits(); // 0 for ptr // Run through the possibilities ... - if (DestTy->isInteger()) { // Casting to integral - if (SrcTy->isInteger()) { // Casting from integral + if (DestTy->isIntegerTy()) { // Casting to integral + if (SrcTy->isIntegerTy()) { // Casting from integral return true; - } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt + } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt return true; } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) { // Casting from vector return DestBits == PTy->getBitWidth(); } else { // Casting from something else - return isa<PointerType>(SrcTy); + return SrcTy->isPointerTy(); } - } else if (DestTy->isFloatingPoint()) { // Casting to floating pt - if (SrcTy->isInteger()) { // Casting from integral + } else if (DestTy->isFloatingPointTy()) { // Casting to floating pt + if (SrcTy->isIntegerTy()) { // Casting from integral return true; - } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt + } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt return true; } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) { // Casting from vector @@ -2393,10 +2394,10 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) { } else { // Casting from something else return DestPTy->getBitWidth() == SrcBits; } - } else if (isa<PointerType>(DestTy)) { // Casting to pointer - if (isa<PointerType>(SrcTy)) { // Casting from pointer + } else if (DestTy->isPointerTy()) { // Casting to pointer + if (SrcTy->isPointerTy()) { // Casting from pointer return true; - } else if (SrcTy->isInteger()) { // Casting from integral + } else if (SrcTy->isIntegerTy()) { // Casting from integral return true; } else { // Casting from something else return false; @@ -2425,8 +2426,8 @@ CastInst::getCastOpcode( "Only first class types are castable!"); // Run through the possibilities ... - if (DestTy->isInteger()) { // Casting to integral - if (SrcTy->isInteger()) { // Casting from integral + if (DestTy->isIntegerTy()) { // Casting to integral + if (SrcTy->isIntegerTy()) { // Casting from integral if (DestBits < SrcBits) return Trunc; // int -> smaller int else if (DestBits > SrcBits) { // its an extension @@ -2437,7 +2438,7 @@ CastInst::getCastOpcode( } else { return BitCast; // Same size, No-op cast } - } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt + } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt if (DestIsSigned) return FPToSI; // FP -> sint else @@ -2448,17 +2449,17 @@ CastInst::getCastOpcode( PTy = NULL; return BitCast; // Same size, no-op cast } else { - assert(isa<PointerType>(SrcTy) && + assert(SrcTy->isPointerTy() && "Casting from a value that is not first-class type"); return PtrToInt; // ptr -> int } - } else if (DestTy->isFloatingPoint()) { // Casting to floating pt - if (SrcTy->isInteger()) { // Casting from integral + } else if (DestTy->isFloatingPointTy()) { // Casting to floating pt + if (SrcTy->isIntegerTy()) { // Casting from integral if (SrcIsSigned) return SIToFP; // sint -> FP else return UIToFP; // uint -> FP - } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt + } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt if (DestBits < SrcBits) { return FPTrunc; // FP -> smaller FP } else if (DestBits > SrcBits) { @@ -2485,10 +2486,10 @@ CastInst::getCastOpcode( } else { assert(!"Illegal cast to vector (wrong type or size)"); } - } else if (isa<PointerType>(DestTy)) { - if (isa<PointerType>(SrcTy)) { + } else if (DestTy->isPointerTy()) { + if (SrcTy->isPointerTy()) { return BitCast; // ptr -> ptr - } else if (SrcTy->isInteger()) { + } else if (SrcTy->isIntegerTy()) { return IntToPtr; // int -> ptr } else { assert(!"Casting pointer to other than pointer or int"); @@ -2528,50 +2529,50 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) { switch (op) { default: return false; // This is an input error case Instruction::Trunc: - return SrcTy->isIntOrIntVector() && - DstTy->isIntOrIntVector()&& SrcBitSize > DstBitSize; + return SrcTy->isIntOrIntVectorTy() && + DstTy->isIntOrIntVectorTy()&& SrcBitSize > DstBitSize; case Instruction::ZExt: - return SrcTy->isIntOrIntVector() && - DstTy->isIntOrIntVector()&& SrcBitSize < DstBitSize; + return SrcTy->isIntOrIntVectorTy() && + DstTy->isIntOrIntVectorTy()&& SrcBitSize < DstBitSize; case Instruction::SExt: - return SrcTy->isIntOrIntVector() && - DstTy->isIntOrIntVector()&& SrcBitSize < DstBitSize; + return SrcTy->isIntOrIntVectorTy() && + DstTy->isIntOrIntVectorTy()&& SrcBitSize < DstBitSize; case Instruction::FPTrunc: - return SrcTy->isFPOrFPVector() && - DstTy->isFPOrFPVector() && + return SrcTy->isFPOrFPVectorTy() && + DstTy->isFPOrFPVectorTy() && SrcBitSize > DstBitSize; case Instruction::FPExt: - return SrcTy->isFPOrFPVector() && - DstTy->isFPOrFPVector() && + return SrcTy->isFPOrFPVectorTy() && + DstTy->isFPOrFPVectorTy() && SrcBitSize < DstBitSize; case Instruction::UIToFP: case Instruction::SIToFP: if (const VectorType *SVTy = dyn_cast<VectorType>(SrcTy)) { if (const VectorType *DVTy = dyn_cast<VectorType>(DstTy)) { - return SVTy->getElementType()->isIntOrIntVector() && - DVTy->getElementType()->isFPOrFPVector() && + return SVTy->getElementType()->isIntOrIntVectorTy() && + DVTy->getElementType()->isFPOrFPVectorTy() && SVTy->getNumElements() == DVTy->getNumElements(); } } - return SrcTy->isIntOrIntVector() && DstTy->isFPOrFPVector(); + return SrcTy->isIntOrIntVectorTy() && DstTy->isFPOrFPVectorTy(); case Instruction::FPToUI: case Instruction::FPToSI: if (const VectorType *SVTy = dyn_cast<VectorType>(SrcTy)) { if (const VectorType *DVTy = dyn_cast<VectorType>(DstTy)) { - return SVTy->getElementType()->isFPOrFPVector() && - DVTy->getElementType()->isIntOrIntVector() && + return SVTy->getElementType()->isFPOrFPVectorTy() && + DVTy->getElementType()->isIntOrIntVectorTy() && SVTy->getNumElements() == DVTy->getNumElements(); } } - return SrcTy->isFPOrFPVector() && DstTy->isIntOrIntVector(); + return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy(); case Instruction::PtrToInt: - return isa<PointerType>(SrcTy) && DstTy->isInteger(); + return SrcTy->isPointerTy() && DstTy->isIntegerTy(); case Instruction::IntToPtr: - return SrcTy->isInteger() && isa<PointerType>(DstTy); + return SrcTy->isIntegerTy() && DstTy->isPointerTy(); case Instruction::BitCast: // BitCast implies a no-op cast of type only. No bits change. // However, you can't cast pointers to anything but pointers. - if (isa<PointerType>(SrcTy) != isa<PointerType>(DstTy)) + if (SrcTy->isPointerTy() != DstTy->isPointerTy()) return false; // Now we know we're not dealing with a pointer/non-pointer mismatch. In all @@ -3149,7 +3150,7 @@ void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) { //===----------------------------------------------------------------------===// void IndirectBrInst::init(Value *Address, unsigned NumDests) { - assert(Address && isa<PointerType>(Address->getType()) && + assert(Address && Address->getType()->isPointerTy() && "Address of indirectbr must be a pointer"); ReservedSpace = 1+NumDests; NumOperands = 1; diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h index ccca789..9887f28 100644 --- a/lib/VMCore/LLVMContextImpl.h +++ b/lib/VMCore/LLVMContextImpl.h @@ -116,6 +116,10 @@ public: ConstantStruct, true /*largekey*/> StructConstantsTy; StructConstantsTy StructConstants; + typedef ConstantUniqueMap<Constant*, UnionType, ConstantUnion> + UnionConstantsTy; + UnionConstantsTy UnionConstants; + typedef ConstantUniqueMap<std::vector<Constant*>, VectorType, ConstantVector> VectorConstantsTy; VectorConstantsTy VectorConstants; @@ -159,12 +163,16 @@ public: TypeMap<PointerValType, PointerType> PointerTypes; TypeMap<FunctionValType, FunctionType> FunctionTypes; TypeMap<StructValType, StructType> StructTypes; + TypeMap<UnionValType, UnionType> UnionTypes; TypeMap<IntegerValType, IntegerType> IntegerTypes; // Opaque types are not structurally uniqued, so don't use TypeMap. typedef SmallPtrSet<const OpaqueType*, 8> OpaqueTypesTy; OpaqueTypesTy OpaqueTypes; - + + /// Used as an abstract type that will never be resolved. + OpaqueType *const AlwaysOpaqueTy; + /// ValueHandles - This map keeps track of all of the value handles that are /// watching a Value*. The Value::HasValueHandle bit is used to know @@ -196,7 +204,12 @@ public: Int8Ty(C, 8), Int16Ty(C, 16), Int32Ty(C, 32), - Int64Ty(C, 64) { } + Int64Ty(C, 64), + AlwaysOpaqueTy(new OpaqueType(C)) { + // Make sure the AlwaysOpaqueTy stays alive as long as the Context. + AlwaysOpaqueTy->addRef(); + OpaqueTypes.insert(AlwaysOpaqueTy); + } ~LLVMContextImpl() { ExprConstants.freeConstants(); @@ -216,12 +229,28 @@ public: if (I->second->use_empty()) delete I->second; } - MDNodeSet.clear(); + AlwaysOpaqueTy->dropRef(); for (OpaqueTypesTy::iterator I = OpaqueTypes.begin(), E = OpaqueTypes.end(); I != E; ++I) { (*I)->AbstractTypeUsers.clear(); delete *I; } + // Destroy MDNode operands first. + for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end(); + I != E;) { + MDNode *N = &(*I); + ++I; + N->replaceAllOperandsWithNull(); + } + while (!MDNodeSet.empty()) { + MDNode *N = &(*MDNodeSet.begin()); + N->destroy(); + } + // Destroy MDStrings. + for (StringMap<MDString*>::iterator I = MDStringCache.begin(), + E = MDStringCache.end(); I != E; ++I) { + delete I->second; + } } }; diff --git a/lib/VMCore/Makefile b/lib/VMCore/Makefile index bc5e77d..4395ecf 100644 --- a/lib/VMCore/Makefile +++ b/lib/VMCore/Makefile @@ -30,5 +30,5 @@ $(GENFILE): $(ObjDir)/Intrinsics.gen.tmp changed significantly. ) install-local:: $(GENFILE) - $(Echo) Installing $(PROJ_includedir)/llvm/Intrinsics.gen - $(Verb) $(DataInstall) $(GENFILE) $(PROJ_includedir)/llvm/Intrinsics.gen + $(Echo) Installing $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen + $(Verb) $(DataInstall) $(GENFILE) $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index 07a5f3c..a08c454 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -257,6 +257,13 @@ void MDNode::Profile(FoldingSetNodeID &ID) const { ID.AddPointer(getOperand(i)); } +// replaceAllOperandsWithNull - This is used while destroying llvm context to +// gracefully delete all nodes. This method replaces all operands with null. +void MDNode::replaceAllOperandsWithNull() { + for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands; + Op != E; ++Op) + replaceOperand(Op, 0); +} // Replace value from this node's operand list. void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp index 2b0b235..a782e5a 100644 --- a/lib/VMCore/Pass.cpp +++ b/lib/VMCore/Pass.cpp @@ -194,6 +194,9 @@ PassManagerType BasicBlockPass::getPotentialPassManagerType() const { // namespace { class PassRegistrar { + /// Guards the contents of this class. + mutable sys::SmartMutex<true> Lock; + /// PassInfoMap - Keep track of the passinfo object for each registered llvm /// pass. typedef std::map<intptr_t, const PassInfo*> MapType; @@ -213,16 +216,19 @@ class PassRegistrar { public: const PassInfo *GetPassInfo(intptr_t TI) const { + sys::SmartScopedLock<true> Guard(Lock); MapType::const_iterator I = PassInfoMap.find(TI); return I != PassInfoMap.end() ? I->second : 0; } const PassInfo *GetPassInfo(StringRef Arg) const { + sys::SmartScopedLock<true> Guard(Lock); StringMapType::const_iterator I = PassInfoStringMap.find(Arg); return I != PassInfoStringMap.end() ? I->second : 0; } void RegisterPass(const PassInfo &PI) { + sys::SmartScopedLock<true> Guard(Lock); bool Inserted = PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second; assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted; @@ -230,6 +236,7 @@ public: } void UnregisterPass(const PassInfo &PI) { + sys::SmartScopedLock<true> Guard(Lock); MapType::iterator I = PassInfoMap.find(PI.getTypeInfo()); assert(I != PassInfoMap.end() && "Pass registered but not in map!"); @@ -239,6 +246,7 @@ public: } void EnumerateWith(PassRegistrationListener *L) { + sys::SmartScopedLock<true> Guard(Lock); for (MapType::const_iterator I = PassInfoMap.begin(), E = PassInfoMap.end(); I != E; ++I) L->passEnumerate(I->second); @@ -249,6 +257,7 @@ public: void RegisterAnalysisGroup(PassInfo *InterfaceInfo, const PassInfo *ImplementationInfo, bool isDefault) { + sys::SmartScopedLock<true> Guard(Lock); AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo]; assert(AGI.Implementations.count(ImplementationInfo) == 0 && "Cannot add a pass to the same analysis group more than once!"); diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index a1d554e..c4dfe14 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -1118,6 +1118,7 @@ bool BBPassManager::runOnFunction(Function &F) { for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { BasicBlockPass *BP = getContainedPass(Index); + bool LocalChanged = false; dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName()); dumpRequiredSet(BP); @@ -1129,11 +1130,12 @@ bool BBPassManager::runOnFunction(Function &F) { PassManagerPrettyStackEntry X(BP, *I); Timer *T = StartPassTimer(BP); - Changed |= BP->runOnBasicBlock(*I); + LocalChanged |= BP->runOnBasicBlock(*I); StopPassTimer(BP, T); } - if (Changed) + Changed |= LocalChanged; + if (LocalChanged) dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG, I->getName()); dumpPreservedSet(BP); @@ -1220,9 +1222,11 @@ void FunctionPassManager::add(Pass *P) { /// so, return true. /// bool FunctionPassManager::run(Function &F) { - std::string errstr; - if (F.Materialize(&errstr)) { - llvm_report_error("Error reading bitcode file: " + errstr); + if (F.isMaterializable()) { + std::string errstr; + if (F.Materialize(&errstr)) { + llvm_report_error("Error reading bitcode file: " + errstr); + } } return FPM->run(F); } @@ -1332,6 +1336,7 @@ bool FPPassManager::runOnFunction(Function &F) { for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { FunctionPass *FP = getContainedPass(Index); + bool LocalChanged = false; dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName()); dumpRequiredSet(FP); @@ -1342,11 +1347,12 @@ bool FPPassManager::runOnFunction(Function &F) { PassManagerPrettyStackEntry X(FP, F); Timer *T = StartPassTimer(FP); - Changed |= FP->runOnFunction(F); + LocalChanged |= FP->runOnFunction(F); StopPassTimer(FP, T); } - if (Changed) + Changed |= LocalChanged; + if (LocalChanged) dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName()); dumpPreservedSet(FP); @@ -1405,6 +1411,7 @@ MPPassManager::runOnModule(Module &M) { for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { ModulePass *MP = getContainedPass(Index); + bool LocalChanged = false; dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier()); dumpRequiredSet(MP); @@ -1414,11 +1421,12 @@ MPPassManager::runOnModule(Module &M) { { PassManagerPrettyStackEntry X(MP, M); Timer *T = StartPassTimer(MP); - Changed |= MP->runOnModule(M); + LocalChanged |= MP->runOnModule(M); StopPassTimer(MP, T); } - if (Changed) + Changed |= LocalChanged; + if (LocalChanged) dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG, M.getModuleIdentifier()); dumpPreservedSet(MP); @@ -1704,8 +1712,13 @@ LLVMPassManagerRef LLVMCreatePassManager() { return wrap(new PassManager()); } +LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) { + return wrap(new FunctionPassManager(unwrap(M))); +} + LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) { - return wrap(new FunctionPassManager(unwrap(P))); + return LLVMCreateFunctionPassManagerForModule( + reinterpret_cast<LLVMModuleRef>(P)); } LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) { diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index 044de4f..9b2c2ca 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -50,8 +50,8 @@ void AbstractTypeUser::setType(Value *V, const Type *NewTy) { /// Because of the way Type subclasses are allocated, this function is necessary /// to use the correct kind of "delete" operator to deallocate the Type object. -/// Some type objects (FunctionTy, StructTy) allocate additional space after -/// the space for their derived type to hold the contained types array of +/// Some type objects (FunctionTy, StructTy, UnionTy) allocate additional space +/// after the space for their derived type to hold the contained types array of /// PATypeHandles. Using this allocation scheme means all the PATypeHandles are /// allocated with the type object, decreasing allocations and eliminating the /// need for a std::vector to be used in the Type class itself. @@ -61,7 +61,8 @@ void Type::destroy() const { // Structures and Functions allocate their contained types past the end of // the type object itself. These need to be destroyed differently than the // other types. - if (isa<FunctionType>(this) || isa<StructType>(this)) { + if (this->isFunctionTy() || this->isStructTy() || + this->isUnionTy()) { // First, make sure we destruct any PATypeHandles allocated by these // subclasses. They must be manually destructed. for (unsigned i = 0; i < NumContainedTys; ++i) @@ -69,10 +70,12 @@ void Type::destroy() const { // Now call the destructor for the subclass directly because we're going // to delete this as an array of char. - if (isa<FunctionType>(this)) + if (this->isFunctionTy()) static_cast<const FunctionType*>(this)->FunctionType::~FunctionType(); - else + else if (this->isStructTy()) static_cast<const StructType*>(this)->StructType::~StructType(); + else + static_cast<const UnionType*>(this)->UnionType::~UnionType(); // Finally, remove the memory as an array deallocation of the chars it was // constructed from. @@ -124,32 +127,32 @@ const Type *Type::getScalarType() const { return this; } -/// isInteger - Return true if this is an IntegerType of the specified width. -bool Type::isInteger(unsigned Bitwidth) const { - return isInteger() && cast<IntegerType>(this)->getBitWidth() == Bitwidth; +/// isIntegerTy - Return true if this is an IntegerType of the specified width. +bool Type::isIntegerTy(unsigned Bitwidth) const { + return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth; } -/// isIntOrIntVector - Return true if this is an integer type or a vector of +/// isIntOrIntVectorTy - Return true if this is an integer type or a vector of /// integer types. /// -bool Type::isIntOrIntVector() const { - if (isInteger()) +bool Type::isIntOrIntVectorTy() const { + if (isIntegerTy()) return true; if (ID != Type::VectorTyID) return false; - return cast<VectorType>(this)->getElementType()->isInteger(); + return cast<VectorType>(this)->getElementType()->isIntegerTy(); } -/// isFPOrFPVector - Return true if this is a FP type or a vector of FP types. +/// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP types. /// -bool Type::isFPOrFPVector() const { +bool Type::isFPOrFPVectorTy() const { if (ID == Type::FloatTyID || ID == Type::DoubleTyID || ID == Type::FP128TyID || ID == Type::X86_FP80TyID || ID == Type::PPC_FP128TyID) return true; if (ID != Type::VectorTyID) return false; - return cast<VectorType>(this)->getElementType()->isFloatingPoint(); + return cast<VectorType>(this)->getElementType()->isFloatingPointTy(); } // canLosslesslyBitCastTo - Return true if this type can be converted to @@ -173,8 +176,8 @@ bool Type::canLosslesslyBitCastTo(const Type *Ty) const { // At this point we have only various mismatches of the first class types // remaining and ptr->ptr. Just select the lossless conversions. Everything // else is not lossless. - if (isa<PointerType>(this)) - return isa<PointerType>(Ty); + if (this->isPointerTy()) + return Ty->isPointerTy(); return false; // Other types have no identity values } @@ -204,7 +207,7 @@ unsigned Type::getScalarSizeInBits() const { int Type::getFPMantissaWidth() const { if (const VectorType *VTy = dyn_cast<VectorType>(this)) return VTy->getElementType()->getFPMantissaWidth(); - assert(isFloatingPoint() && "Not a floating point type!"); + assert(isFloatingPointTy() && "Not a floating point type!"); if (ID == FloatTyID) return 24; if (ID == DoubleTyID) return 53; if (ID == X86_FP80TyID) return 64; @@ -217,7 +220,7 @@ int Type::getFPMantissaWidth() const { /// iff all of the members of the type are sized as well. Since asking for /// their size is relatively uncommon, move this operation out of line. bool Type::isSizedDerivedType() const { - if (isa<IntegerType>(this)) + if (this->isIntegerTy()) return true; if (const ArrayType *ATy = dyn_cast<ArrayType>(this)) @@ -226,7 +229,7 @@ bool Type::isSizedDerivedType() const { if (const VectorType *PTy = dyn_cast<VectorType>(this)) return PTy->getElementType()->isSized(); - if (!isa<StructType>(this)) + if (!this->isStructTy() && !this->isUnionTy()) return false; // Okay, our struct is sized if all of the elements are... @@ -285,7 +288,7 @@ std::string Type::getDescription() const { bool StructType::indexValid(const Value *V) const { // Structure indexes require 32-bit integer constants. - if (V->getType()->isInteger(32)) + if (V->getType()->isIntegerTy(32)) if (const ConstantInt *CU = dyn_cast<ConstantInt>(V)) return indexValid(CU->getZExtValue()); return false; @@ -308,6 +311,32 @@ const Type *StructType::getTypeAtIndex(unsigned Idx) const { return ContainedTys[Idx]; } + +bool UnionType::indexValid(const Value *V) const { + // Union indexes require 32-bit integer constants. + if (V->getType()->isIntegerTy(32)) + if (const ConstantInt *CU = dyn_cast<ConstantInt>(V)) + return indexValid(CU->getZExtValue()); + return false; +} + +bool UnionType::indexValid(unsigned V) const { + return V < NumContainedTys; +} + +// getTypeAtIndex - Given an index value into the type, return the type of the +// element. For a structure type, this must be a constant value... +// +const Type *UnionType::getTypeAtIndex(const Value *V) const { + unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue(); + return getTypeAtIndex(Idx); +} + +const Type *UnionType::getTypeAtIndex(unsigned Idx) const { + assert(indexValid(Idx) && "Invalid structure index!"); + return ContainedTys[Idx]; +} + //===----------------------------------------------------------------------===// // Primitive 'Type' data //===----------------------------------------------------------------------===// @@ -418,7 +447,7 @@ bool FunctionType::isValidReturnType(const Type *RetTy) { /// isValidArgumentType - Return true if the specified type is valid as an /// argument type. bool FunctionType::isValidArgumentType(const Type *ArgTy) { - return ArgTy->isFirstClassType() || isa<OpaqueType>(ArgTy); + return ArgTy->isFirstClassType() || ArgTy->isOpaqueTy(); } FunctionType::FunctionType(const Type *Result, @@ -463,6 +492,23 @@ StructType::StructType(LLVMContext &C, setAbstract(isAbstract); } +UnionType::UnionType(LLVMContext &C,const Type* const* Types, unsigned NumTypes) + : CompositeType(C, UnionTyID) { + ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1); + NumContainedTys = NumTypes; + bool isAbstract = false; + for (unsigned i = 0; i < NumTypes; ++i) { + assert(Types[i] && "<null> type for union field!"); + assert(isValidElementType(Types[i]) && + "Invalid type for union element!"); + new (&ContainedTys[i]) PATypeHandle(Types[i], this); + isAbstract |= Types[i]->isAbstract(); + } + + // Calculate whether or not this type is abstract + setAbstract(isAbstract); +} + ArrayType::ArrayType(const Type *ElType, uint64_t NumEl) : SequentialType(ArrayTyID, ElType) { NumElements = NumEl; @@ -507,30 +553,7 @@ void DerivedType::dropAllTypeUses() { if (NumContainedTys != 0) { // The type must stay abstract. To do this, we insert a pointer to a type // that will never get resolved, thus will always be abstract. - static Type *AlwaysOpaqueTy = 0; - static PATypeHolder* Holder = 0; - Type *tmp = AlwaysOpaqueTy; - if (llvm_is_multithreaded()) { - sys::MemoryFence(); - if (!tmp) { - llvm_acquire_global_lock(); - tmp = AlwaysOpaqueTy; - if (!tmp) { - tmp = OpaqueType::get(getContext()); - PATypeHolder* tmp2 = new PATypeHolder(tmp); - sys::MemoryFence(); - AlwaysOpaqueTy = tmp; - Holder = tmp2; - } - - llvm_release_global_lock(); - } - } else if (!AlwaysOpaqueTy) { - AlwaysOpaqueTy = OpaqueType::get(getContext()); - Holder = new PATypeHolder(AlwaysOpaqueTy); - } - - ContainedTys[0] = AlwaysOpaqueTy; + ContainedTys[0] = getContext().pImpl->AlwaysOpaqueTy; // Change the rest of the types to be Int32Ty's. It doesn't matter what we // pick so long as it doesn't point back to this type. We choose something @@ -590,7 +613,7 @@ void Type::PromoteAbstractToConcrete() { // Concrete types are leaves in the tree. Since an SCC will either be all // abstract or all concrete, we only need to check one type. if (SCC[0]->isAbstract()) { - if (isa<OpaqueType>(SCC[0])) + if (SCC[0]->isOpaqueTy()) return; // Not going to be concrete, sorry. // If all of the children of all of the types in this SCC are concrete, @@ -637,7 +660,7 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2, std::map<const Type *, const Type *> &EqTypes) { if (Ty == Ty2) return true; if (Ty->getTypeID() != Ty2->getTypeID()) return false; - if (isa<OpaqueType>(Ty)) + if (Ty->isOpaqueTy()) return false; // Two unequal opaque types are never equal std::map<const Type*, const Type*>::iterator It = EqTypes.find(Ty); @@ -667,6 +690,13 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2, if (!TypesEqual(STy->getElementType(i), STy2->getElementType(i), EqTypes)) return false; return true; + } else if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) { + const UnionType *UTy2 = cast<UnionType>(Ty2); + if (UTy->getNumElements() != UTy2->getNumElements()) return false; + for (unsigned i = 0, e = UTy2->getNumElements(); i != e; ++i) + if (!TypesEqual(UTy->getElementType(i), UTy2->getElementType(i), EqTypes)) + return false; + return true; } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { const ArrayType *ATy2 = cast<ArrayType>(Ty2); return ATy->getNumElements() == ATy2->getNumElements() && @@ -858,7 +888,7 @@ ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) { bool ArrayType::isValidElementType(const Type *ElemTy) { return ElemTy->getTypeID() != VoidTyID && ElemTy->getTypeID() != LabelTyID && - ElemTy->getTypeID() != MetadataTyID && !isa<FunctionType>(ElemTy); + ElemTy->getTypeID() != MetadataTyID && !ElemTy->isFunctionTy(); } VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) { @@ -881,8 +911,8 @@ VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) { } bool VectorType::isValidElementType(const Type *ElemTy) { - return ElemTy->isInteger() || ElemTy->isFloatingPoint() || - isa<OpaqueType>(ElemTy); + return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy() || + ElemTy->isOpaqueTy(); } //===----------------------------------------------------------------------===// @@ -924,12 +954,66 @@ StructType *StructType::get(LLVMContext &Context, const Type *type, ...) { } bool StructType::isValidElementType(const Type *ElemTy) { - return ElemTy->getTypeID() != VoidTyID && ElemTy->getTypeID() != LabelTyID && - ElemTy->getTypeID() != MetadataTyID && !isa<FunctionType>(ElemTy); + return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && + !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy(); } //===----------------------------------------------------------------------===// +// Union Type Factory... +// + +UnionType *UnionType::get(const Type* const* Types, unsigned NumTypes) { + assert(NumTypes > 0 && "union must have at least one member type!"); + UnionValType UTV(Types, NumTypes); + UnionType *UT = 0; + + LLVMContextImpl *pImpl = Types[0]->getContext().pImpl; + + UT = pImpl->UnionTypes.get(UTV); + + if (!UT) { + // Value not found. Derive a new type! + UT = (UnionType*) operator new(sizeof(UnionType) + + sizeof(PATypeHandle) * NumTypes); + new (UT) UnionType(Types[0]->getContext(), Types, NumTypes); + pImpl->UnionTypes.add(UTV, UT); + } +#ifdef DEBUG_MERGE_TYPES + DEBUG(dbgs() << "Derived new type: " << *UT << "\n"); +#endif + return UT; +} + +UnionType *UnionType::get(const Type *type, ...) { + va_list ap; + SmallVector<const llvm::Type*, 8> UnionFields; + va_start(ap, type); + while (type) { + UnionFields.push_back(type); + type = va_arg(ap, llvm::Type*); + } + unsigned NumTypes = UnionFields.size(); + assert(NumTypes > 0 && "union must have at least one member type!"); + return llvm::UnionType::get(&UnionFields[0], NumTypes); +} + +bool UnionType::isValidElementType(const Type *ElemTy) { + return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && + !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy(); +} + +int UnionType::getElementTypeIndex(const Type *ElemTy) const { + int index = 0; + for (UnionType::element_iterator I = element_begin(), E = element_end(); + I != E; ++I, ++index) { + if (ElemTy == *I) return index; + } + + return -1; +} + +//===----------------------------------------------------------------------===// // Pointer Type Factory... // @@ -1192,6 +1276,21 @@ void StructType::typeBecameConcrete(const DerivedType *AbsTy) { // concrete - this could potentially change us from an abstract type to a // concrete type. // +void UnionType::refineAbstractType(const DerivedType *OldType, + const Type *NewType) { + LLVMContextImpl *pImpl = OldType->getContext().pImpl; + pImpl->UnionTypes.RefineAbstractType(this, OldType, NewType); +} + +void UnionType::typeBecameConcrete(const DerivedType *AbsTy) { + LLVMContextImpl *pImpl = AbsTy->getContext().pImpl; + pImpl->UnionTypes.TypeBecameConcrete(this, AbsTy); +} + +// refineAbstractType - Called when a contained type is found to be more +// concrete - this could potentially change us from an abstract type to a +// concrete type. +// void PointerType::refineAbstractType(const DerivedType *OldType, const Type *NewType) { LLVMContextImpl *pImpl = OldType->getContext().pImpl; @@ -1204,7 +1303,7 @@ void PointerType::typeBecameConcrete(const DerivedType *AbsTy) { } bool SequentialType::indexValid(const Value *V) const { - if (isa<IntegerType>(V->getType())) + if (V->getType()->isIntegerTy()) return true; return false; } diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h index 4842845..02ab113 100644 --- a/lib/VMCore/TypesContext.h +++ b/lib/VMCore/TypesContext.h @@ -180,6 +180,32 @@ public: } }; +// UnionValType - Define a class to hold the key that goes into the TypeMap +// +class UnionValType { + std::vector<const Type*> ElTypes; +public: + UnionValType(const Type* const* Types, unsigned NumTypes) + : ElTypes(&Types[0], &Types[NumTypes]) {} + + static UnionValType get(const UnionType *UT) { + std::vector<const Type *> ElTypes; + ElTypes.reserve(UT->getNumElements()); + for (unsigned i = 0, e = UT->getNumElements(); i != e; ++i) + ElTypes.push_back(UT->getElementType(i)); + + return UnionValType(&ElTypes[0], ElTypes.size()); + } + + static unsigned hashTypeStructure(const UnionType *UT) { + return UT->getNumElements(); + } + + inline bool operator<(const UnionValType &UTV) const { + return (ElTypes < UTV.ElTypes); + } +}; + // FunctionValType - Define a class to hold the key that goes into the TypeMap // class FunctionValType { diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index 3759b8a..a36d262 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -45,11 +45,11 @@ Value::Value(const Type *ty, unsigned scid) UseList(0), Name(0) { if (isa<CallInst>(this) || isa<InvokeInst>(this)) assert((VTy->isFirstClassType() || VTy->isVoidTy() || - isa<OpaqueType>(ty) || VTy->getTypeID() == Type::StructTyID) && + ty->isOpaqueTy() || VTy->isStructTy()) && "invalid CallInst type!"); else if (!isa<Constant>(this) && !isa<BasicBlock>(this)) assert((VTy->isFirstClassType() || VTy->isVoidTy() || - isa<OpaqueType>(ty)) && + ty->isOpaqueTy()) && "Cannot create non-first-class values except for constants!"); } @@ -320,7 +320,7 @@ void Value::replaceAllUsesWith(Value *New) { } Value *Value::stripPointerCasts() { - if (!isa<PointerType>(getType())) + if (!getType()->isPointerTy()) return this; Value *V = this; do { @@ -337,12 +337,12 @@ Value *Value::stripPointerCasts() { } else { return V; } - assert(isa<PointerType>(V->getType()) && "Unexpected operand type!"); + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } while (1); } Value *Value::getUnderlyingObject(unsigned MaxLookup) { - if (!isa<PointerType>(getType())) + if (!getType()->isPointerTy()) return this; Value *V = this; for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { @@ -357,7 +357,7 @@ Value *Value::getUnderlyingObject(unsigned MaxLookup) { } else { return V; } - assert(isa<PointerType>(V->getType()) && "Unexpected operand type!"); + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } return V; } diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp index 7f9a6cd..a092cd1 100644 --- a/lib/VMCore/ValueTypes.cpp +++ b/lib/VMCore/ValueTypes.cpp @@ -36,17 +36,17 @@ EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, bool EVT::isExtendedFloatingPoint() const { assert(isExtended() && "Type is not extended!"); - return LLVMTy->isFPOrFPVector(); + return LLVMTy->isFPOrFPVectorTy(); } bool EVT::isExtendedInteger() const { assert(isExtended() && "Type is not extended!"); - return LLVMTy->isIntOrIntVector(); + return LLVMTy->isIntOrIntVectorTy(); } bool EVT::isExtendedVector() const { assert(isExtended() && "Type is not extended!"); - return isa<VectorType>(LLVMTy); + return LLVMTy->isVectorTy(); } bool EVT::isExtended64BitVector() const { @@ -126,6 +126,7 @@ std::string EVT::getEVTString() const { case MVT::v8f32: return "v8f32"; case MVT::v2f64: return "v2f64"; case MVT::v4f64: return "v4f64"; + case MVT::Metadata:return "Metadata"; } } diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index d0e8d30..721e96a 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -161,7 +161,8 @@ namespace { VerifierFailureAction action; // What to do if verification fails. Module *Mod; // Module we are verifying right now - DominatorTree *DT; // Dominator Tree, caution can be null! + LLVMContext *Context; // Context within which we are verifying + DominatorTree *DT; // Dominator Tree, caution can be null! std::string Messages; raw_string_ostream MessagesStr; @@ -178,24 +179,25 @@ namespace { Verifier() : FunctionPass(&ID), Broken(false), RealPass(true), action(AbortProcessAction), - DT(0), MessagesStr(Messages) {} + Mod(0), Context(0), DT(0), MessagesStr(Messages) {} explicit Verifier(VerifierFailureAction ctn) : FunctionPass(&ID), - Broken(false), RealPass(true), action(ctn), DT(0), + Broken(false), RealPass(true), action(ctn), Mod(0), Context(0), DT(0), MessagesStr(Messages) {} explicit Verifier(bool AB) : FunctionPass(&ID), Broken(false), RealPass(true), - action( AB ? AbortProcessAction : PrintMessageAction), DT(0), - MessagesStr(Messages) {} + action( AB ? AbortProcessAction : PrintMessageAction), Mod(0), + Context(0), DT(0), MessagesStr(Messages) {} explicit Verifier(DominatorTree &dt) : FunctionPass(&ID), - Broken(false), RealPass(false), action(PrintMessageAction), - DT(&dt), MessagesStr(Messages) {} + Broken(false), RealPass(false), action(PrintMessageAction), Mod(0), + Context(0), DT(&dt), MessagesStr(Messages) {} bool doInitialization(Module &M) { Mod = &M; + Context = &M.getContext(); verifyTypeSymbolTable(M.getTypeSymbolTable()); // If this is a real pass, in a pass manager, we must abort before @@ -211,6 +213,7 @@ namespace { if (RealPass) DT = &getAnalysis<DominatorTree>(); Mod = F.getParent(); + if (!Context) Context = &F.getContext(); visit(F); InstsInThisBlock.clear(); @@ -314,6 +317,7 @@ namespace { void visitStoreInst(StoreInst &SI); void visitInstruction(Instruction &I); void visitTerminatorInst(TerminatorInst &I); + void visitBranchInst(BranchInst &BI); void visitReturnInst(ReturnInst &RI); void visitSwitchInst(SwitchInst &SI); void visitSelectInst(SelectInst &SI); @@ -429,7 +433,7 @@ void Verifier::visitGlobalValue(GlobalValue &GV) { if (GV.hasAppendingLinkage()) { GlobalVariable *GVar = dyn_cast<GlobalVariable>(&GV); - Assert1(GVar && isa<ArrayType>(GVar->getType()->getElementType()), + Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(), "Only global arrays can have appending linkage!", GVar); } } @@ -596,13 +600,16 @@ void Verifier::visitFunction(Function &F) { const FunctionType *FT = F.getFunctionType(); unsigned NumArgs = F.arg_size(); + Assert1(Context == &F.getContext(), + "Function context does not match Module context!", &F); + Assert1(!F.hasCommonLinkage(), "Functions may not have common linkage", &F); Assert2(FT->getNumParams() == NumArgs, "# formal arguments must match # of arguments for function type!", &F, FT); Assert1(F.getReturnType()->isFirstClassType() || F.getReturnType()->isVoidTy() || - isa<StructType>(F.getReturnType()), + F.getReturnType()->isStructTy(), "Functions cannot return aggregate values!", &F); Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(), @@ -743,6 +750,14 @@ void Verifier::visitTerminatorInst(TerminatorInst &I) { visitInstruction(I); } +void Verifier::visitBranchInst(BranchInst &BI) { + if (BI.isConditional()) { + Assert2(BI.getCondition()->getType()->isIntegerTy(1), + "Branch condition is not 'i1' type!", &BI, BI.getCondition()); + } + visitTerminatorInst(BI); +} + void Verifier::visitReturnInst(ReturnInst &RI) { Function *F = RI.getParent()->getParent(); unsigned N = RI.getNumOperands(); @@ -821,9 +836,9 @@ void Verifier::visitTruncInst(TruncInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert1(SrcTy->isIntOrIntVector(), "Trunc only operates on integer", &I); - Assert1(DestTy->isIntOrIntVector(), "Trunc only produces integer", &I); - Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy), + Assert1(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I); + Assert1(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I); + Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(), "trunc source and destination must both be a vector or neither", &I); Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I); @@ -836,9 +851,9 @@ void Verifier::visitZExtInst(ZExtInst &I) { const Type *DestTy = I.getType(); // Get the size of the types in bits, we'll need this later - Assert1(SrcTy->isIntOrIntVector(), "ZExt only operates on integer", &I); - Assert1(DestTy->isIntOrIntVector(), "ZExt only produces an integer", &I); - Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy), + Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I); + Assert1(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I); + Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(), "zext source and destination must both be a vector or neither", &I); unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); @@ -857,9 +872,9 @@ void Verifier::visitSExtInst(SExtInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert1(SrcTy->isIntOrIntVector(), "SExt only operates on integer", &I); - Assert1(DestTy->isIntOrIntVector(), "SExt only produces an integer", &I); - Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy), + Assert1(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I); + Assert1(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I); + Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(), "sext source and destination must both be a vector or neither", &I); Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I); @@ -874,9 +889,9 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert1(SrcTy->isFPOrFPVector(),"FPTrunc only operates on FP", &I); - Assert1(DestTy->isFPOrFPVector(),"FPTrunc only produces an FP", &I); - Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy), + Assert1(SrcTy->isFPOrFPVectorTy(),"FPTrunc only operates on FP", &I); + Assert1(DestTy->isFPOrFPVectorTy(),"FPTrunc only produces an FP", &I); + Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(), "fptrunc source and destination must both be a vector or neither",&I); Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I); @@ -892,9 +907,9 @@ void Verifier::visitFPExtInst(FPExtInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert1(SrcTy->isFPOrFPVector(),"FPExt only operates on FP", &I); - Assert1(DestTy->isFPOrFPVector(),"FPExt only produces an FP", &I); - Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy), + Assert1(SrcTy->isFPOrFPVectorTy(),"FPExt only operates on FP", &I); + Assert1(DestTy->isFPOrFPVectorTy(),"FPExt only produces an FP", &I); + Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(), "fpext source and destination must both be a vector or neither", &I); Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I); @@ -906,14 +921,14 @@ void Verifier::visitUIToFPInst(UIToFPInst &I) { const Type *SrcTy = I.getOperand(0)->getType(); const Type *DestTy = I.getType(); - bool SrcVec = isa<VectorType>(SrcTy); - bool DstVec = isa<VectorType>(DestTy); + bool SrcVec = SrcTy->isVectorTy(); + bool DstVec = DestTy->isVectorTy(); Assert1(SrcVec == DstVec, "UIToFP source and dest must both be vector or scalar", &I); - Assert1(SrcTy->isIntOrIntVector(), + Assert1(SrcTy->isIntOrIntVectorTy(), "UIToFP source must be integer or integer vector", &I); - Assert1(DestTy->isFPOrFPVector(), + Assert1(DestTy->isFPOrFPVectorTy(), "UIToFP result must be FP or FP vector", &I); if (SrcVec && DstVec) @@ -929,14 +944,14 @@ void Verifier::visitSIToFPInst(SIToFPInst &I) { const Type *SrcTy = I.getOperand(0)->getType(); const Type *DestTy = I.getType(); - bool SrcVec = isa<VectorType>(SrcTy); - bool DstVec = isa<VectorType>(DestTy); + bool SrcVec = SrcTy->isVectorTy(); + bool DstVec = DestTy->isVectorTy(); Assert1(SrcVec == DstVec, "SIToFP source and dest must both be vector or scalar", &I); - Assert1(SrcTy->isIntOrIntVector(), + Assert1(SrcTy->isIntOrIntVectorTy(), "SIToFP source must be integer or integer vector", &I); - Assert1(DestTy->isFPOrFPVector(), + Assert1(DestTy->isFPOrFPVectorTy(), "SIToFP result must be FP or FP vector", &I); if (SrcVec && DstVec) @@ -952,13 +967,14 @@ void Verifier::visitFPToUIInst(FPToUIInst &I) { const Type *SrcTy = I.getOperand(0)->getType(); const Type *DestTy = I.getType(); - bool SrcVec = isa<VectorType>(SrcTy); - bool DstVec = isa<VectorType>(DestTy); + bool SrcVec = SrcTy->isVectorTy(); + bool DstVec = DestTy->isVectorTy(); Assert1(SrcVec == DstVec, "FPToUI source and dest must both be vector or scalar", &I); - Assert1(SrcTy->isFPOrFPVector(), "FPToUI source must be FP or FP vector", &I); - Assert1(DestTy->isIntOrIntVector(), + Assert1(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector", + &I); + Assert1(DestTy->isIntOrIntVectorTy(), "FPToUI result must be integer or integer vector", &I); if (SrcVec && DstVec) @@ -974,14 +990,14 @@ void Verifier::visitFPToSIInst(FPToSIInst &I) { const Type *SrcTy = I.getOperand(0)->getType(); const Type *DestTy = I.getType(); - bool SrcVec = isa<VectorType>(SrcTy); - bool DstVec = isa<VectorType>(DestTy); + bool SrcVec = SrcTy->isVectorTy(); + bool DstVec = DestTy->isVectorTy(); Assert1(SrcVec == DstVec, "FPToSI source and dest must both be vector or scalar", &I); - Assert1(SrcTy->isFPOrFPVector(), + Assert1(SrcTy->isFPOrFPVectorTy(), "FPToSI source must be FP or FP vector", &I); - Assert1(DestTy->isIntOrIntVector(), + Assert1(DestTy->isIntOrIntVectorTy(), "FPToSI result must be integer or integer vector", &I); if (SrcVec && DstVec) @@ -997,8 +1013,8 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) { const Type *SrcTy = I.getOperand(0)->getType(); const Type *DestTy = I.getType(); - Assert1(isa<PointerType>(SrcTy), "PtrToInt source must be pointer", &I); - Assert1(DestTy->isInteger(), "PtrToInt result must be integral", &I); + Assert1(SrcTy->isPointerTy(), "PtrToInt source must be pointer", &I); + Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I); visitInstruction(I); } @@ -1008,8 +1024,8 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) { const Type *SrcTy = I.getOperand(0)->getType(); const Type *DestTy = I.getType(); - Assert1(SrcTy->isInteger(), "IntToPtr source must be an integral", &I); - Assert1(isa<PointerType>(DestTy), "IntToPtr result must be a pointer",&I); + Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I); + Assert1(DestTy->isPointerTy(), "IntToPtr result must be a pointer",&I); visitInstruction(I); } @@ -1025,7 +1041,7 @@ void Verifier::visitBitCastInst(BitCastInst &I) { // BitCast implies a no-op cast of type only. No bits change. // However, you can't cast pointers to anything but pointers. - Assert1(isa<PointerType>(DestTy) == isa<PointerType>(DestTy), + Assert1(DestTy->isPointerTy() == DestTy->isPointerTy(), "Bitcast requires both operands to be pointer or neither", &I); Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I); @@ -1068,11 +1084,11 @@ void Verifier::visitPHINode(PHINode &PN) { void Verifier::VerifyCallSite(CallSite CS) { Instruction *I = CS.getInstruction(); - Assert1(isa<PointerType>(CS.getCalledValue()->getType()), + Assert1(CS.getCalledValue()->getType()->isPointerTy(), "Called function must be a pointer!", I); const PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType()); - Assert1(isa<FunctionType>(FPTy->getElementType()), + Assert1(FPTy->getElementType()->isFunctionTy(), "Called function is not pointer to function type!", I); const FunctionType *FTy = cast<FunctionType>(FPTy->getElementType()); @@ -1151,7 +1167,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { case Instruction::UDiv: case Instruction::SRem: case Instruction::URem: - Assert1(B.getType()->isIntOrIntVector(), + Assert1(B.getType()->isIntOrIntVectorTy(), "Integer arithmetic operators only work with integral types!", &B); Assert1(B.getType() == B.getOperand(0)->getType(), "Integer arithmetic operators must have same type " @@ -1164,7 +1180,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { case Instruction::FMul: case Instruction::FDiv: case Instruction::FRem: - Assert1(B.getType()->isFPOrFPVector(), + Assert1(B.getType()->isFPOrFPVectorTy(), "Floating-point arithmetic operators only work with " "floating-point types!", &B); Assert1(B.getType() == B.getOperand(0)->getType(), @@ -1175,7 +1191,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { case Instruction::And: case Instruction::Or: case Instruction::Xor: - Assert1(B.getType()->isIntOrIntVector(), + Assert1(B.getType()->isIntOrIntVectorTy(), "Logical operators only work with integral types!", &B); Assert1(B.getType() == B.getOperand(0)->getType(), "Logical operators must have same type for operands and result!", @@ -1184,7 +1200,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: - Assert1(B.getType()->isIntOrIntVector(), + Assert1(B.getType()->isIntOrIntVectorTy(), "Shifts only work with integral types!", &B); Assert1(B.getType() == B.getOperand(0)->getType(), "Shift return type must be same as operands!", &B); @@ -1203,7 +1219,7 @@ void Verifier::visitICmpInst(ICmpInst& IC) { Assert1(Op0Ty == Op1Ty, "Both operands to ICmp instruction are not of the same type!", &IC); // Check that the operands are the right type - Assert1(Op0Ty->isIntOrIntVector() || isa<PointerType>(Op0Ty), + Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPointerTy(), "Invalid operand types for ICmp instruction", &IC); visitInstruction(IC); @@ -1216,7 +1232,7 @@ void Verifier::visitFCmpInst(FCmpInst& FC) { Assert1(Op0Ty == Op1Ty, "Both operands to FCmp instruction are not of the same type!", &FC); // Check that the operands are the right type - Assert1(Op0Ty->isFPOrFPVector(), + Assert1(Op0Ty->isFPOrFPVectorTy(), "Invalid operand types for FCmp instruction", &FC); visitInstruction(FC); } @@ -1270,7 +1286,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(), Idxs.begin(), Idxs.end()); Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP); - Assert2(isa<PointerType>(GEP.getType()) && + Assert2(GEP.getType()->isPointerTy() && cast<PointerType>(GEP.getType())->getElementType() == ElTy, "GEP is not of right type for indices!", &GEP, ElTy); visitInstruction(GEP); @@ -1302,7 +1318,7 @@ void Verifier::visitAllocaInst(AllocaInst &AI) { &AI); Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type", &AI); - Assert1(AI.getArraySize()->getType()->isInteger(32), + Assert1(AI.getArraySize()->getType()->isIntegerTy(32), "Alloca array size must be i32", &AI); visitInstruction(AI); } @@ -1481,7 +1497,7 @@ void Verifier::visitInstruction(Instruction &I) { void Verifier::VerifyType(const Type *Ty) { if (!Types.insert(Ty)) return; - Assert1(&Mod->getContext() == &Ty->getContext(), + Assert1(Context == &Ty->getContext(), "Type context does not match Module context!", Ty); switch (Ty->getTypeID()) { @@ -1509,6 +1525,15 @@ void Verifier::VerifyType(const Type *Ty) { VerifyType(ElTy); } } break; + case Type::UnionTyID: { + const UnionType *UTy = cast<UnionType>(Ty); + for (unsigned i = 0, e = UTy->getNumElements(); i != e; ++i) { + const Type *ElTy = UTy->getElementType(i); + Assert2(UnionType::isValidElementType(ElTy), + "Union type with invalid element type", ElTy, UTy); + VerifyType(ElTy); + } + } break; case Type::ArrayTyID: { const ArrayType *ATy = cast<ArrayType>(Ty); Assert1(ArrayType::isValidElementType(ATy->getElementType()), @@ -1616,7 +1641,7 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { if (ID == Intrinsic::gcroot) { AllocaInst *AI = dyn_cast<AllocaInst>(CI.getOperand(1)->stripPointerCasts()); - Assert1(AI && isa<PointerType>(AI->getType()->getElementType()), + Assert1(AI && AI->getType()->getElementType()->isPointerTy(), "llvm.gcroot parameter #1 must be a pointer alloca.", &CI); Assert1(isa<Constant>(CI.getOperand(2)), "llvm.gcroot parameter #2 must be a constant.", &CI); @@ -1734,7 +1759,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, } } } else if (VT == MVT::iAny) { - if (!EltTy->isInteger()) { + if (!EltTy->isIntegerTy()) { CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not " "an integer type.", F); return false; @@ -1759,7 +1784,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, break; } } else if (VT == MVT::fAny) { - if (!EltTy->isFloatingPoint()) { + if (!EltTy->isFloatingPointTy()) { CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not " "a floating-point type.", F); return false; @@ -1778,7 +1803,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, } Suffix += ".v" + utostr(NumElts) + EVT::getEVT(EltTy).getEVTString(); } else if (VT == MVT::iPTR) { - if (!isa<PointerType>(Ty)) { + if (!Ty->isPointerTy()) { CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not a " "pointer and a pointer is required.", F); return false; |