diff options
author | Pirama Arumuga Nainar <pirama@google.com> | 2015-04-08 08:55:49 -0700 |
---|---|---|
committer | Pirama Arumuga Nainar <pirama@google.com> | 2015-04-09 15:04:38 -0700 |
commit | 4c5e43da7792f75567b693105cc53e3f1992ad98 (patch) | |
tree | 1b2c9792582e12f5af0b1512e3094425f0dc0df9 /lib | |
parent | c75239e6119d0f9a74c57099d91cbc9bde56bf33 (diff) | |
download | external_llvm-4c5e43da7792f75567b693105cc53e3f1992ad98.zip external_llvm-4c5e43da7792f75567b693105cc53e3f1992ad98.tar.gz external_llvm-4c5e43da7792f75567b693105cc53e3f1992ad98.tar.bz2 |
Update aosp/master llvm for rebase to r233350
Change-Id: I07d935f8793ee8ec6b7da003f6483046594bca49
Diffstat (limited to 'lib')
821 files changed, 28633 insertions, 23865 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 4e95aa0..0b0fd50 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -407,9 +407,10 @@ AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore(const Instruction *I, const AliasAnalysis::Location &MemLoc, DominatorTree *DT) { - if (!DT || !DL) return AliasAnalysis::ModRef; + if (!DT) + return AliasAnalysis::ModRef; - const Value *Object = GetUnderlyingObject(MemLoc.Ptr, DL); + const Value *Object = GetUnderlyingObject(MemLoc.Ptr, *DL); if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) || isa<Constant>(Object)) return AliasAnalysis::ModRef; @@ -462,9 +463,8 @@ AliasAnalysis::~AliasAnalysis() {} /// InitializeAliasAnalysis - Subclasses must call this method to initialize the /// AliasAnalysis interface before any other methods are called. /// -void AliasAnalysis::InitializeAliasAnalysis(Pass *P) { - DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; +void AliasAnalysis::InitializeAliasAnalysis(Pass *P, const DataLayout *NewDL) { + DL = NewDL; auto *TLIP = P->getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); TLI = TLIP ? &TLIP->getTLI() : nullptr; AA = &P->getAnalysis<AliasAnalysis>(); diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp index b860914..5865259 100644 --- a/lib/Analysis/AliasAnalysisCounter.cpp +++ b/lib/Analysis/AliasAnalysisCounter.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -76,7 +77,7 @@ namespace { bool runOnModule(Module &M) override { this->M = &M; - InitializeAliasAnalysis(this); + InitializeAliasAnalysis(this, &M.getDataLayout()); return false; } diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp index 5d61cf9..f98b578 100644 --- a/lib/Analysis/AliasDebugger.cpp +++ b/lib/Analysis/AliasDebugger.cpp @@ -44,7 +44,7 @@ namespace { } bool runOnModule(Module &M) override { - InitializeAliasAnalysis(this); // set up super class + InitializeAliasAnalysis(this, &M.getDataLayout()); // set up super class for(Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 1bfb06d..4549c1e 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -49,7 +49,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeIVUsersPass(Registry); initializeInstCountPass(Registry); initializeIntervalPartitionPass(Registry); - initializeJumpInstrTableInfoPass(Registry); initializeLazyValueInfoPass(Registry); initializeLibCallAliasAnalysisPass(Registry); initializeLintPass(Registry); diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk index e17b870..277956c 100644 --- a/lib/Analysis/Android.mk +++ b/lib/Analysis/Android.mk @@ -29,7 +29,6 @@ analysis_SRC_FILES := \ InstructionSimplify.cpp \ Interval.cpp \ IntervalPartition.cpp \ - JumpInstrTableInfo.cpp \ LazyCallGraph.cpp \ LazyValueInfo.cpp \ LibCallAliasAnalysis.cpp \ diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 46ca6ee..be2282f 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -103,7 +103,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, bool RoundToAlign = false) { uint64_t Size; - if (getObjectSize(V, Size, &DL, &TLI, RoundToAlign)) + if (getObjectSize(V, Size, DL, &TLI, RoundToAlign)) return Size; return AliasAnalysis::UnknownSize; } @@ -221,7 +221,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, case Instruction::Or: // X|C == X+C if all the bits in C are unset in X. Otherwise we can't // analyze it. - if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL, 0, AC, + if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, AC, BOp, DT)) break; // FALL THROUGH. @@ -292,7 +292,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, static const Value * DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, SmallVectorImpl<VariableGEPIndex> &VarIndices, - bool &MaxLookupReached, const DataLayout *DL, + bool &MaxLookupReached, const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT) { // Limit recursion depth to limit compile time in crazy cases. unsigned MaxLookup = MaxLookupSearchDepth; @@ -341,16 +341,6 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, if (!GEPOp->getOperand(0)->getType()->getPointerElementType()->isSized()) return V; - // If we are lacking DataLayout information, we can't compute the offets of - // elements computed by GEPs. However, we can handle bitcast equivalent - // GEPs. - if (!DL) { - if (!GEPOp->hasAllZeroIndices()) - return V; - V = GEPOp->getOperand(0); - continue; - } - unsigned AS = GEPOp->getPointerAddressSpace(); // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. gep_type_iterator GTI = gep_type_begin(GEPOp); @@ -363,30 +353,30 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); if (FieldNo == 0) continue; - BaseOffs += DL->getStructLayout(STy)->getElementOffset(FieldNo); + BaseOffs += DL.getStructLayout(STy)->getElementOffset(FieldNo); continue; } // For an array/pointer, add the element offset, explicitly scaled. if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) { if (CIdx->isZero()) continue; - BaseOffs += DL->getTypeAllocSize(*GTI)*CIdx->getSExtValue(); + BaseOffs += DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue(); continue; } - uint64_t Scale = DL->getTypeAllocSize(*GTI); + uint64_t Scale = DL.getTypeAllocSize(*GTI); ExtensionKind Extension = EK_NotExtended; // If the integer type is smaller than the pointer size, it is implicitly // sign extended to pointer size. unsigned Width = Index->getType()->getIntegerBitWidth(); - if (DL->getPointerSizeInBits(AS) > Width) + if (DL.getPointerSizeInBits(AS) > Width) Extension = EK_SignExt; // Use GetLinearExpression to decompose the index into a C1*V+C2 form. APInt IndexScale(Width, 0), IndexOffset(Width, 0); - Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, - *DL, 0, AC, DT); + Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, DL, + 0, AC, DT); // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. @@ -408,7 +398,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // Make sure that we have a scale that makes sense for this target's // pointer size. - if (unsigned ShiftBits = 64 - DL->getPointerSizeInBits(AS)) { + if (unsigned ShiftBits = 64 - DL.getPointerSizeInBits(AS)) { Scale <<= ShiftBits; Scale = (int64_t)Scale >> ShiftBits; } @@ -461,9 +451,7 @@ namespace { initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry()); } - void initializePass() override { - InitializeAliasAnalysis(this); - } + bool doInitialization(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AliasAnalysis>(); @@ -612,7 +600,7 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) { SmallVector<const Value *, 16> Worklist; Worklist.push_back(Loc.Ptr); do { - const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL); + const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), *DL); if (!Visited.insert(V).second) { Visited.clear(); return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); @@ -815,6 +803,11 @@ static bool isAssumeIntrinsic(ImmutableCallSite CS) { return false; } +bool BasicAliasAnalysis::doInitialization(Module &M) { + InitializeAliasAnalysis(this, &M.getDataLayout()); + return true; +} + /// getModRefInfo - Check to see if the specified callsite can clobber the /// specified memory object. Since we only look at local properties of this /// function, we really can't say much about this query. We do, however, use @@ -825,7 +818,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) && "AliasAnalysis query involving multiple functions!"); - const Value *Object = GetUnderlyingObject(Loc.Ptr, DL); + const Value *Object = GetUnderlyingObject(Loc.Ptr, *DL); // If this is a tail call and Loc.Ptr points to a stack location, we know that // the tail call cannot access or modify the local stack. @@ -1042,10 +1035,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, - GEP2MaxLookupReached, DL, AC2, DT); + GEP2MaxLookupReached, *DL, AC2, DT); const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, DL, AC1, DT); + GEP1MaxLookupReached, *DL, AC1, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { @@ -1074,14 +1067,14 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // about the relation of the resulting pointer. const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, DL, AC1, DT); + GEP1MaxLookupReached, *DL, AC1, DT); int64_t GEP2BaseOffset; bool GEP2MaxLookupReached; SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, - GEP2MaxLookupReached, DL, AC2, DT); + GEP2MaxLookupReached, *DL, AC2, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. @@ -1131,7 +1124,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, DL, AC1, DT); + GEP1MaxLookupReached, *DL, AC1, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. @@ -1200,7 +1193,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *V = GEP1VariableIndices[i].V; bool SignKnownZero, SignKnownOne; - ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL, + ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, *DL, 0, AC1, nullptr, DT); // Zero-extension widens the variable, and so forces the sign @@ -1409,8 +1402,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, return NoAlias; // Scalars cannot alias each other // Figure out what objects these things are pointing to if we can. - const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth); - const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth); + const Value *O1 = GetUnderlyingObject(V1, *DL, MaxLookupSearchDepth); + const Value *O2 = GetUnderlyingObject(V2, *DL, MaxLookupSearchDepth); // Null values in the default address space don't point to any object, so they // don't alias any other pointer. @@ -1533,6 +1526,9 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V, if (!Inst) return true; + if (VisitedPhiBBs.empty()) + return true; + if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck) return false; diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 8cd6ea4..14800f4 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Analysis/CFLAliasAnalysis.cpp b/lib/Analysis/CFLAliasAnalysis.cpp index 82fbfe0..53d748d 100644 --- a/lib/Analysis/CFLAliasAnalysis.cpp +++ b/lib/Analysis/CFLAliasAnalysis.cpp @@ -45,9 +45,11 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> #include <forward_list> +#include <memory> #include <tuple> using namespace llvm; @@ -77,7 +79,7 @@ static Optional<Value *> getTargetValue(Instruction *); static bool hasUsefulEdges(Instruction *); const StratifiedIndex StratifiedLink::SetSentinel = - std::numeric_limits<StratifiedIndex>::max(); + std::numeric_limits<StratifiedIndex>::max(); namespace { // StratifiedInfo Attribute things. @@ -85,11 +87,13 @@ typedef unsigned StratifiedAttr; LLVM_CONSTEXPR unsigned MaxStratifiedAttrIndex = NumStratifiedAttrs; LLVM_CONSTEXPR unsigned AttrAllIndex = 0; LLVM_CONSTEXPR unsigned AttrGlobalIndex = 1; -LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 2; +LLVM_CONSTEXPR unsigned AttrUnknownIndex = 2; +LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 3; LLVM_CONSTEXPR unsigned AttrLastArgIndex = MaxStratifiedAttrIndex; LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex; LLVM_CONSTEXPR StratifiedAttr AttrNone = 0; +LLVM_CONSTEXPR StratifiedAttr AttrUnknown = 1 << AttrUnknownIndex; LLVM_CONSTEXPR StratifiedAttr AttrAll = ~AttrNone; // \brief StratifiedSets call for knowledge of "direction", so this is how we @@ -144,9 +148,8 @@ struct FunctionInfo { // Lots of functions have < 4 returns. Adjust as necessary. SmallVector<Value *, 4> ReturnedValues; - FunctionInfo(StratifiedSets<Value *> &&S, - SmallVector<Value *, 4> &&RV) - : Sets(std::move(S)), ReturnedValues(std::move(RV)) {} + FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV) + : Sets(std::move(S)), ReturnedValues(std::move(RV)) {} }; struct CFLAliasAnalysis; @@ -229,6 +232,10 @@ public: // Comparisons between global variables and other constants should be // handled by BasicAA. + // TODO: ConstantExpr handling -- CFLAA may report NoAlias when comparing + // a GlobalValue and ConstantExpr, but every query needs to have at least + // one Value tied to a Function, and neither GlobalValues nor ConstantExprs + // are. if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) { return AliasAnalysis::alias(LocA, LocB); } @@ -240,7 +247,7 @@ public: return QueryResult; } - void initializePass() override { InitializeAliasAnalysis(this); } + bool doInitialization(Module &M) override; }; void FunctionHandle::removeSelfFromCache() { @@ -263,9 +270,19 @@ public: llvm_unreachable("Unsupported instruction encountered"); } + void visitPtrToIntInst(PtrToIntInst &Inst) { + auto *Ptr = Inst.getOperand(0); + Output.push_back(Edge(Ptr, Ptr, EdgeType::Assign, AttrUnknown)); + } + + void visitIntToPtrInst(IntToPtrInst &Inst) { + auto *Ptr = &Inst; + Output.push_back(Edge(Ptr, Ptr, EdgeType::Assign, AttrUnknown)); + } + void visitCastInst(CastInst &Inst) { - Output.push_back(Edge(&Inst, Inst.getOperand(0), EdgeType::Assign, - AttrNone)); + Output.push_back( + Edge(&Inst, Inst.getOperand(0), EdgeType::Assign, AttrNone)); } void visitBinaryOperator(BinaryOperator &Inst) { @@ -377,7 +394,7 @@ public: // I put this here to give us an upper bound on time taken by IPA. Is it // really (realistically) needed? Keep in mind that we do have an n^2 algo. - if (std::distance(Args.begin(), Args.end()) > (int) MaxSupportedArgs) + if (std::distance(Args.begin(), Args.end()) > (int)MaxSupportedArgs) return false; // Exit early if we'll fail anyway @@ -429,7 +446,7 @@ public: } if (AddEdge) Output.push_back(Edge(FuncValue, ArgVal, EdgeType::Assign, - StratifiedAttrs().flip())); + StratifiedAttrs().flip())); } if (Parameters.size() != Arguments.size()) @@ -571,8 +588,7 @@ private: EdgeTypeT Weight; Node Other; - Edge(const EdgeTypeT &W, const Node &N) - : Weight(W), Other(N) {} + Edge(const EdgeTypeT &W, const Node &N) : Weight(W), Other(N) {} bool operator==(const Edge &E) const { return Weight == E.Weight && Other == E.Other; @@ -735,6 +751,25 @@ static Level directionOfEdgeType(EdgeType); static void buildGraphFrom(CFLAliasAnalysis &, Function *, SmallVectorImpl<Value *> &, NodeMapT &, GraphT &); +// Gets the edges of a ConstantExpr as if it was an Instruction. This +// function also acts on any nested ConstantExprs, adding the edges +// of those to the given SmallVector as well. +static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &, + SmallVectorImpl<Edge> &); + +// Given an Instruction, this will add it to the graph, along with any +// Instructions that are potentially only available from said Instruction +// For example, given the following line: +// %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2 +// addInstructionToGraph would add both the `load` and `getelementptr` +// instructions to the graph appropriately. +static void addInstructionToGraph(CFLAliasAnalysis &, Instruction &, + SmallVectorImpl<Value *> &, NodeMapT &, + GraphT &); + +// Notes whether it would be pointless to add the given Value to our sets. +static bool canSkipAddingToSets(Value *Val); + // Builds the graph + StratifiedSets for a function. static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *); @@ -806,6 +841,8 @@ static EdgeType flipWeight(EdgeType Initial) { static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst, SmallVectorImpl<Edge> &Output) { + assert(hasUsefulEdges(Inst) && + "Expected instructions to have 'useful' edges"); GetEdgesVisitor v(Analysis, Output); v.visit(Inst); } @@ -822,13 +859,41 @@ static Level directionOfEdgeType(EdgeType Weight) { llvm_unreachable("Incomplete switch coverage"); } -// Aside: We may remove graph construction entirely, because it doesn't really -// buy us much that we don't already have. I'd like to add interprocedural -// analysis prior to this however, in case that somehow requires the graph -// produced by this for efficient execution -static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn, - SmallVectorImpl<Value *> &ReturnedValues, - NodeMapT &Map, GraphT &Graph) { +static void constexprToEdges(CFLAliasAnalysis &Analysis, + ConstantExpr &CExprToCollapse, + SmallVectorImpl<Edge> &Results) { + SmallVector<ConstantExpr *, 4> Worklist; + Worklist.push_back(&CExprToCollapse); + + SmallVector<Edge, 8> ConstexprEdges; + while (!Worklist.empty()) { + auto *CExpr = Worklist.pop_back_val(); + std::unique_ptr<Instruction> Inst(CExpr->getAsInstruction()); + + if (!hasUsefulEdges(Inst.get())) + continue; + + ConstexprEdges.clear(); + argsToEdges(Analysis, Inst.get(), ConstexprEdges); + for (auto &Edge : ConstexprEdges) { + if (Edge.From == Inst.get()) + Edge.From = CExpr; + else if (auto *Nested = dyn_cast<ConstantExpr>(Edge.From)) + Worklist.push_back(Nested); + + if (Edge.To == Inst.get()) + Edge.To = CExpr; + else if (auto *Nested = dyn_cast<ConstantExpr>(Edge.To)) + Worklist.push_back(Nested); + } + + Results.append(ConstexprEdges.begin(), ConstexprEdges.end()); + } +} + +static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst, + SmallVectorImpl<Value *> &ReturnedValues, + NodeMapT &Map, GraphT &Graph) { const auto findOrInsertNode = [&Map, &Graph](Value *Val) { auto Pair = Map.insert(std::make_pair(Val, GraphT::Node())); auto &Iter = Pair.first; @@ -839,42 +904,86 @@ static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn, return Iter->second; }; + // We don't want the edges of most "return" instructions, but we *do* want + // to know what can be returned. + if (isa<ReturnInst>(&Inst)) + ReturnedValues.push_back(&Inst); + + if (!hasUsefulEdges(&Inst)) + return; + SmallVector<Edge, 8> Edges; - for (auto &Bb : Fn->getBasicBlockList()) { - for (auto &Inst : Bb.getInstList()) { - // We don't want the edges of most "return" instructions, but we *do* want - // to know what can be returned. - if (auto *Ret = dyn_cast<ReturnInst>(&Inst)) - ReturnedValues.push_back(Ret); - - if (!hasUsefulEdges(&Inst)) - continue; + argsToEdges(Analysis, &Inst, Edges); + + // In the case of an unused alloca (or similar), edges may be empty. Note + // that it exists so we can potentially answer NoAlias. + if (Edges.empty()) { + auto MaybeVal = getTargetValue(&Inst); + assert(MaybeVal.hasValue()); + auto *Target = *MaybeVal; + findOrInsertNode(Target); + return; + } - Edges.clear(); - argsToEdges(Analysis, &Inst, Edges); + const auto addEdgeToGraph = [&Graph, &findOrInsertNode](const Edge &E) { + auto To = findOrInsertNode(E.To); + auto From = findOrInsertNode(E.From); + auto FlippedWeight = flipWeight(E.Weight); + auto Attrs = E.AdditionalAttrs; + Graph.addEdge(From, To, std::make_pair(E.Weight, Attrs), + std::make_pair(FlippedWeight, Attrs)); + }; - // In the case of an unused alloca (or similar), edges may be empty. Note - // that it exists so we can potentially answer NoAlias. - if (Edges.empty()) { - auto MaybeVal = getTargetValue(&Inst); - assert(MaybeVal.hasValue()); - auto *Target = *MaybeVal; - findOrInsertNode(Target); - continue; - } + SmallVector<ConstantExpr *, 4> ConstantExprs; + for (const Edge &E : Edges) { + addEdgeToGraph(E); + if (auto *Constexpr = dyn_cast<ConstantExpr>(E.To)) + ConstantExprs.push_back(Constexpr); + if (auto *Constexpr = dyn_cast<ConstantExpr>(E.From)) + ConstantExprs.push_back(Constexpr); + } - for (const Edge &E : Edges) { - auto To = findOrInsertNode(E.To); - auto From = findOrInsertNode(E.From); - auto FlippedWeight = flipWeight(E.Weight); - auto Attrs = E.AdditionalAttrs; - Graph.addEdge(From, To, std::make_pair(E.Weight, Attrs), - std::make_pair(FlippedWeight, Attrs)); - } - } + for (ConstantExpr *CE : ConstantExprs) { + Edges.clear(); + constexprToEdges(Analysis, *CE, Edges); + std::for_each(Edges.begin(), Edges.end(), addEdgeToGraph); } } +// Aside: We may remove graph construction entirely, because it doesn't really +// buy us much that we don't already have. I'd like to add interprocedural +// analysis prior to this however, in case that somehow requires the graph +// produced by this for efficient execution +static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn, + SmallVectorImpl<Value *> &ReturnedValues, + NodeMapT &Map, GraphT &Graph) { + for (auto &Bb : Fn->getBasicBlockList()) + for (auto &Inst : Bb.getInstList()) + addInstructionToGraph(Analysis, Inst, ReturnedValues, Map, Graph); +} + +static bool canSkipAddingToSets(Value *Val) { + // Constants can share instances, which may falsely unify multiple + // sets, e.g. in + // store i32* null, i32** %ptr1 + // store i32* null, i32** %ptr2 + // clearly ptr1 and ptr2 should not be unified into the same set, so + // we should filter out the (potentially shared) instance to + // i32* null. + if (isa<Constant>(Val)) { + bool Container = isa<ConstantVector>(Val) || isa<ConstantArray>(Val) || + isa<ConstantStruct>(Val); + // TODO: Because all of these things are constant, we can determine whether + // the data is *actually* mutable at graph building time. This will probably + // come for free/cheap with offset awareness. + bool CanStoreMutableData = + isa<GlobalValue>(Val) || isa<ConstantExpr>(Val) || Container; + return !CanStoreMutableData; + } + + return false; +} + static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { NodeMapT Map; GraphT Graph; @@ -906,7 +1015,7 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { while (!Worklist.empty()) { auto Node = Worklist.pop_back_val(); auto *CurValue = findValueOrDie(Node); - if (isa<Constant>(CurValue) && !isa<GlobalValue>(CurValue)) + if (canSkipAddingToSets(CurValue)) continue; for (const auto &EdgeTuple : Graph.edgesFor(Node)) { @@ -915,7 +1024,7 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { auto &OtherNode = std::get<1>(EdgeTuple); auto *OtherValue = findValueOrDie(OtherNode); - if (isa<Constant>(OtherValue) && !isa<GlobalValue>(OtherValue)) + if (canSkipAddingToSets(OtherValue)) continue; bool Added; @@ -931,16 +1040,16 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { break; } - if (Added) { - auto Aliasing = Weight.second; - if (auto MaybeCurIndex = valueToAttrIndex(CurValue)) - Aliasing.set(*MaybeCurIndex); - if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue)) - Aliasing.set(*MaybeOtherIndex); - Builder.noteAttributes(CurValue, Aliasing); - Builder.noteAttributes(OtherValue, Aliasing); + auto Aliasing = Weight.second; + if (auto MaybeCurIndex = valueToAttrIndex(CurValue)) + Aliasing.set(*MaybeCurIndex); + if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue)) + Aliasing.set(*MaybeOtherIndex); + Builder.noteAttributes(CurValue, Aliasing); + Builder.noteAttributes(OtherValue, Aliasing); + + if (Added) Worklist.push_back(OtherNode); - } } } } @@ -950,7 +1059,12 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { // things that were present during construction being present in the graph. // So, we add all present arguments here. for (auto &Arg : Fn->args()) { - Builder.add(&Arg); + if (!Builder.add(&Arg)) + continue; + + auto Attrs = valueToAttrIndex(&Arg); + if (Attrs.hasValue()) + Builder.noteAttributes(&Arg, *Attrs); } return FunctionInfo(Builder.build(), std::move(ReturnedValues)); @@ -1034,3 +1148,8 @@ CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA, return AliasAnalysis::NoAlias; } + +bool CFLAliasAnalysis::doInitialization(Module &M) { + InitializeAliasAnalysis(this, &M.getDataLayout()); + return true; +} diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index d840037..ae40321 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -27,7 +27,6 @@ add_llvm_library(LLVMAnalysis InstructionSimplify.cpp Interval.cpp IntervalPartition.cpp - JumpInstrTableInfo.cpp LazyCallGraph.cpp LazyValueInfo.cpp LibCallAliasAnalysis.cpp diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index fa5683c..46a2c43 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "code-metrics" diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index fcafb41..995465d 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -50,8 +50,7 @@ using namespace llvm; /// Constant fold bitcast, symbolically evaluating it with DataLayout. /// This always returns a non-null constant, but it may be a /// ConstantExpr if unfoldable. -static Constant *FoldBitCast(Constant *C, Type *DestTy, - const DataLayout &TD) { +static Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { // Catch the obvious splat cases. if (C->isNullValue() && !DestTy->isX86_MMXTy()) return Constant::getNullValue(DestTy); @@ -84,11 +83,11 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Now that we know that the input value is a vector of integers, just shift // and insert them into our result. - unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy); + unsigned BitShift = DL.getTypeAllocSizeInBits(SrcEltTy); APInt Result(IT->getBitWidth(), 0); for (unsigned i = 0; i != NumSrcElts; ++i) { Result <<= BitShift; - if (TD.isLittleEndian()) + if (DL.isLittleEndian()) Result |= CDV->getElementAsInteger(NumSrcElts-i-1); else Result |= CDV->getElementAsInteger(i); @@ -106,7 +105,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // vector so the code below can handle it uniformly. if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) { Constant *Ops = C; // don't take the address of C! - return FoldBitCast(ConstantVector::get(Ops), DestTy, TD); + return FoldBitCast(ConstantVector::get(Ops), DestTy, DL); } // If this is a bitcast from constant vector -> vector, fold it. @@ -138,7 +137,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, Type *DestIVTy = VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt); // Recursively handle this integer conversion, if possible. - C = FoldBitCast(C, DestIVTy, TD); + C = FoldBitCast(C, DestIVTy, DL); // Finally, IR can handle this now that #elts line up. return ConstantExpr::getBitCast(C, DestTy); @@ -162,7 +161,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // of the same size, and that their #elements is not the same. Do the // conversion here, which depends on whether the input or output has // more elements. - bool isLittleEndian = TD.isLittleEndian(); + bool isLittleEndian = DL.isLittleEndian(); SmallVector<Constant*, 32> Result; if (NumDstElt < NumSrcElt) { @@ -198,7 +197,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) unsigned Ratio = NumDstElt/NumSrcElt; - unsigned DstBitSize = TD.getTypeSizeInBits(DstEltTy); + unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy); // Loop over each source value, expanding into multiple results. for (unsigned i = 0; i != NumSrcElt; ++i) { @@ -235,10 +234,10 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, /// If this constant is a constant offset from a global, return the global and /// the constant. Because of constantexprs, this function is recursive. static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, - APInt &Offset, const DataLayout &TD) { + APInt &Offset, const DataLayout &DL) { // Trivial case, constant is the global. if ((GV = dyn_cast<GlobalValue>(C))) { - unsigned BitWidth = TD.getPointerTypeSizeInBits(GV->getType()); + unsigned BitWidth = DL.getPointerTypeSizeInBits(GV->getType()); Offset = APInt(BitWidth, 0); return true; } @@ -251,22 +250,22 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, if (CE->getOpcode() == Instruction::PtrToInt || CE->getOpcode() == Instruction::BitCast || CE->getOpcode() == Instruction::AddrSpaceCast) - return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); + return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) GEPOperator *GEP = dyn_cast<GEPOperator>(CE); if (!GEP) return false; - unsigned BitWidth = TD.getPointerTypeSizeInBits(GEP->getType()); + unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType()); APInt TmpOffset(BitWidth, 0); // If the base isn't a global+constant, we aren't either. - if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, TD)) + if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL)) return false; // Otherwise, add any offset that our operands provide. - if (!GEP->accumulateConstantOffset(TD, TmpOffset)) + if (!GEP->accumulateConstantOffset(DL, TmpOffset)) return false; Offset = TmpOffset; @@ -276,11 +275,11 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, /// Recursive helper to read bits out of global. C is the constant being copied /// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy /// results into and BytesLeft is the number of bytes left in -/// the CurPtr buffer. TD is the target data. +/// the CurPtr buffer. DL is the DataLayout. static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr, unsigned BytesLeft, - const DataLayout &TD) { - assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) && + const DataLayout &DL) { + assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) && "Out of range access"); // If this element is zero or undefined, we can just return since *CurPtr is @@ -298,7 +297,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) { int n = ByteOffset; - if (!TD.isLittleEndian()) + if (!DL.isLittleEndian()) n = IntBytes - n - 1; CurPtr[i] = (unsigned char)(Val >> (n * 8)); ++ByteOffset; @@ -308,22 +307,22 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { if (CFP->getType()->isDoubleTy()) { - C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD); - return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); } if (CFP->getType()->isFloatTy()){ - C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD); - return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); } if (CFP->getType()->isHalfTy()){ - C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), TD); - return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); } return false; } if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { - const StructLayout *SL = TD.getStructLayout(CS->getType()); + const StructLayout *SL = DL.getStructLayout(CS->getType()); unsigned Index = SL->getElementContainingOffset(ByteOffset); uint64_t CurEltOffset = SL->getElementOffset(Index); ByteOffset -= CurEltOffset; @@ -331,11 +330,11 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, while (1) { // If the element access is to the element itself and not to tail padding, // read the bytes from the element. - uint64_t EltSize = TD.getTypeAllocSize(CS->getOperand(Index)->getType()); + uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType()); if (ByteOffset < EltSize && !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr, - BytesLeft, TD)) + BytesLeft, DL)) return false; ++Index; @@ -362,7 +361,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, if (isa<ConstantArray>(C) || isa<ConstantVector>(C) || isa<ConstantDataSequential>(C)) { Type *EltTy = C->getType()->getSequentialElementType(); - uint64_t EltSize = TD.getTypeAllocSize(EltTy); + uint64_t EltSize = DL.getTypeAllocSize(EltTy); uint64_t Index = ByteOffset / EltSize; uint64_t Offset = ByteOffset - Index * EltSize; uint64_t NumElts; @@ -373,7 +372,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, for (; Index != NumElts; ++Index) { if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, - BytesLeft, TD)) + BytesLeft, DL)) return false; uint64_t BytesWritten = EltSize - Offset; @@ -390,9 +389,9 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { if (CE->getOpcode() == Instruction::IntToPtr && - CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType())) { + CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) { return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, - BytesLeft, TD); + BytesLeft, DL); } } @@ -401,7 +400,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, } static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, - const DataLayout &TD) { + const DataLayout &DL) { PointerType *PTy = cast<PointerType>(C->getType()); Type *LoadTy = PTy->getElementType(); IntegerType *IntType = dyn_cast<IntegerType>(LoadTy); @@ -423,14 +422,13 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, MapTy = Type::getInt64PtrTy(C->getContext(), AS); else if (LoadTy->isVectorTy()) { MapTy = PointerType::getIntNPtrTy(C->getContext(), - TD.getTypeAllocSizeInBits(LoadTy), - AS); + DL.getTypeAllocSizeInBits(LoadTy), AS); } else return nullptr; - C = FoldBitCast(C, MapTy, TD); - if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD)) - return FoldBitCast(Res, LoadTy, TD); + C = FoldBitCast(C, MapTy, DL); + if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, DL)) + return FoldBitCast(Res, LoadTy, DL); return nullptr; } @@ -440,7 +438,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, GlobalValue *GVal; APInt Offset; - if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) + if (!IsConstantOffsetFromGlobal(C, GVal, Offset, DL)) return nullptr; GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal); @@ -455,16 +453,16 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // If we're not accessing anything in this constant, the result is undefined. if (Offset.getZExtValue() >= - TD.getTypeAllocSize(GV->getInitializer()->getType())) + DL.getTypeAllocSize(GV->getInitializer()->getType())) return UndefValue::get(IntType); unsigned char RawBytes[32] = {0}; if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes, - BytesLoaded, TD)) + BytesLoaded, DL)) return nullptr; APInt ResultVal = APInt(IntType->getBitWidth(), 0); - if (TD.isLittleEndian()) { + if (DL.isLittleEndian()) { ResultVal = RawBytes[BytesLoaded - 1]; for (unsigned i = 1; i != BytesLoaded; ++i) { ResultVal <<= 8; @@ -482,9 +480,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, } static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE, - const DataLayout *DL) { - if (!DL) - return nullptr; + const DataLayout &DL) { auto *DestPtrTy = dyn_cast<PointerType>(CE->getType()); if (!DestPtrTy) return nullptr; @@ -499,7 +495,7 @@ static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE, // If the type sizes are the same and a cast is legal, just directly // cast the constant. - if (DL->getTypeSizeInBits(DestTy) == DL->getTypeSizeInBits(SrcTy)) { + if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) { Instruction::CastOps Cast = Instruction::BitCast; // If we are going from a pointer to int or vice versa, we spell the cast // differently. @@ -530,7 +526,7 @@ static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE, /// Return the value that a load from C would produce if it is constant and /// determinable. If this is not determinable, return null. Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, - const DataLayout *TD) { + const DataLayout &DL) { // First, try the easy cases: if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) if (GV->isConstant() && GV->hasDefinitiveInitializer()) @@ -552,13 +548,13 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, } if (CE->getOpcode() == Instruction::BitCast) - if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, TD)) + if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, DL)) return LoadedC; // Instead of loading constant c string, use corresponding integer value // directly if string length is small enough. StringRef Str; - if (TD && getConstantStringInfo(CE, Str) && !Str.empty()) { + if (getConstantStringInfo(CE, Str) && !Str.empty()) { unsigned StrLen = Str.size(); Type *Ty = cast<PointerType>(CE->getType())->getElementType(); unsigned NumBits = Ty->getPrimitiveSizeInBits(); @@ -568,7 +564,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) { APInt StrVal(NumBits, 0); APInt SingleChar(NumBits, 0); - if (TD->isLittleEndian()) { + if (DL.isLittleEndian()) { for (signed i = StrLen-1; i >= 0; i--) { SingleChar = (uint64_t) Str[i] & UCHAR_MAX; StrVal = (StrVal << 8) | SingleChar; @@ -593,7 +589,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // If this load comes from anywhere in a constant global, and if the global // is all undef or zero, we know what it loads. if (GlobalVariable *GV = - dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) { + dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, DL))) { if (GV->isConstant() && GV->hasDefinitiveInitializer()) { Type *ResTy = cast<PointerType>(C->getType())->getElementType(); if (GV->getInitializer()->isNullValue()) @@ -604,16 +600,15 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, } // Try hard to fold loads from bitcasted strange and non-type-safe things. - if (TD) - return FoldReinterpretLoadFromConstPtr(CE, *TD); - return nullptr; + return FoldReinterpretLoadFromConstPtr(CE, DL); } -static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ +static Constant *ConstantFoldLoadInst(const LoadInst *LI, + const DataLayout &DL) { if (LI->isVolatile()) return nullptr; if (Constant *C = dyn_cast<Constant>(LI->getOperand(0))) - return ConstantFoldLoadFromConstPtr(C, TD); + return ConstantFoldLoadFromConstPtr(C, DL); return nullptr; } @@ -623,16 +618,16 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ /// these together. If target data info is available, it is provided as DL, /// otherwise DL is null. static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, - Constant *Op1, const DataLayout *DL){ + Constant *Op1, + const DataLayout &DL) { // SROA // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute // bits. - - if (Opc == Instruction::And && DL) { - unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType()); + if (Opc == Instruction::And) { + unsigned BitWidth = DL.getTypeSizeInBits(Op0->getType()->getScalarType()); APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0); APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0); computeKnownBits(Op0, KnownZero0, KnownOne0, DL); @@ -655,14 +650,13 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, // If the constant expr is something like &A[123] - &A[4].f, fold this into a // constant. This happens frequently when iterating over a global array. - if (Opc == Instruction::Sub && DL) { + if (Opc == Instruction::Sub) { GlobalValue *GV1, *GV2; APInt Offs1, Offs2; - if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL)) - if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) && - GV1 == GV2) { - unsigned OpSize = DL->getTypeSizeInBits(Op0->getType()); + if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL)) + if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) { + unsigned OpSize = DL.getTypeSizeInBits(Op0->getType()); // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. // PtrToInt may change the bitwidth so we have convert to the right size @@ -677,13 +671,10 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, /// If array indices are not pointer-sized integers, explicitly cast them so /// that they aren't implicitly casted by the getelementptr. -static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, - Type *ResultTy, const DataLayout *TD, +static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy, + const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TD) - return nullptr; - - Type *IntPtrTy = TD->getIntPtrType(ResultTy); + Type *IntPtrTy = DL.getIntPtrType(ResultTy); bool Any = false; SmallVector<Constant*, 32> NewIdxs; @@ -708,7 +699,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI)) C = Folded; } @@ -733,14 +724,14 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) { /// If we can symbolically evaluate the GEP constant expression, do so. static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, - Type *ResultTy, const DataLayout *TD, + Type *ResultTy, const DataLayout &DL, const TargetLibraryInfo *TLI) { Constant *Ptr = Ops[0]; - if (!TD || !Ptr->getType()->getPointerElementType()->isSized() || + if (!Ptr->getType()->getPointerElementType()->isSized() || !Ptr->getType()->isPointerTy()) return nullptr; - Type *IntPtrTy = TD->getIntPtrType(Ptr->getType()); + Type *IntPtrTy = DL.getIntPtrType(Ptr->getType()); Type *ResultElementTy = ResultTy->getPointerElementType(); // If this is a constant expr gep that is effectively computing an @@ -760,19 +751,19 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, Res = ConstantExpr::getSub(Res, CE->getOperand(1)); Res = ConstantExpr::getIntToPtr(Res, ResultTy); if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res)) - Res = ConstantFoldConstantExpression(ResCE, TD, TLI); + Res = ConstantFoldConstantExpression(ResCE, DL, TLI); return Res; } } return nullptr; } - unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy); + unsigned BitWidth = DL.getTypeSizeInBits(IntPtrTy); APInt Offset = - APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(), - makeArrayRef((Value *const*) - Ops.data() + 1, - Ops.size() - 1))); + APInt(BitWidth, + DL.getIndexedOffset( + Ptr->getType(), + makeArrayRef((Value * const *)Ops.data() + 1, Ops.size() - 1))); Ptr = StripPtrCastKeepAS(Ptr); // If this is a GEP of a GEP, fold it all into a single GEP. @@ -790,8 +781,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, break; Ptr = cast<Constant>(GEP->getOperand(0)); - Offset += APInt(BitWidth, - TD->getIndexedOffset(Ptr->getType(), NestedOps)); + Offset += APInt(BitWidth, DL.getIndexedOffset(Ptr->getType(), NestedOps)); Ptr = StripPtrCastKeepAS(Ptr); } @@ -831,7 +821,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, } // Determine which element of the array the offset points into. - APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); + APInt ElemSize(BitWidth, DL.getTypeAllocSize(ATy->getElementType())); if (ElemSize == 0) // The element size is 0. This may be [0 x Ty]*, so just use a zero // index for this level and proceed to the next level to see if it can @@ -850,7 +840,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // can't re-form this GEP in a regular form, so bail out. The pointer // operand likely went through casts that are necessary to make the GEP // sensible. - const StructLayout &SL = *TD->getStructLayout(STy); + const StructLayout &SL = *DL.getStructLayout(STy); if (Offset.uge(SL.getSizeInBytes())) break; @@ -882,7 +872,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // If we ended up indexing a member with a type that doesn't match // the type of what the original indices indexed, add a cast. if (Ty != ResultElementTy) - C = FoldBitCast(C, ResultTy, *TD); + C = FoldBitCast(C, ResultTy, DL); return C; } @@ -898,8 +888,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, /// Note that this fails if not all of the operands are constant. Otherwise, /// this function can only fail when attempting to fold instructions like loads /// and stores, which have no constant expression form. -Constant *llvm::ConstantFoldInstruction(Instruction *I, - const DataLayout *TD, +Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI) { // Handle PHI nodes quickly here... if (PHINode *PN = dyn_cast<PHINode>(I)) { @@ -919,7 +908,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, return nullptr; // Fold the PHI's operands. if (ConstantExpr *NewC = dyn_cast<ConstantExpr>(C)) - C = ConstantFoldConstantExpression(NewC, TD, TLI); + C = ConstantFoldConstantExpression(NewC, DL, TLI); // If the incoming value is a different constant to // the one we saw previously, then give up. if (CommonValue && C != CommonValue) @@ -942,17 +931,17 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, // Fold the Instruction's operands. if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(Op)) - Op = ConstantFoldConstantExpression(NewCE, TD, TLI); + Op = ConstantFoldConstantExpression(NewCE, DL, TLI); Ops.push_back(Op); } if (const CmpInst *CI = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], - TD, TLI); + DL, TLI); if (const LoadInst *LI = dyn_cast<LoadInst>(I)) - return ConstantFoldLoadInst(LI, TD); + return ConstantFoldLoadInst(LI, DL); if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) { return ConstantExpr::getInsertValue( @@ -967,11 +956,11 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, EVI->getIndices()); } - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, DL, TLI); } static Constant * -ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD, +ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout &DL, const TargetLibraryInfo *TLI, SmallPtrSetImpl<ConstantExpr *> &FoldedOps) { SmallVector<Constant *, 8> Ops; @@ -982,25 +971,25 @@ ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD, // a ConstantExpr, we don't have to process it again. if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) { if (FoldedOps.insert(NewCE).second) - NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps); + NewC = ConstantFoldConstantExpressionImpl(NewCE, DL, TLI, FoldedOps); } Ops.push_back(NewC); } if (CE->isCompare()) return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], - TD, TLI); - return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI); + DL, TLI); + return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, DL, TLI); } /// Attempt to fold the constant expression /// using the specified DataLayout. If successful, the constant result is /// result is returned, if not, null is returned. Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, - const DataLayout *TD, + const DataLayout &DL, const TargetLibraryInfo *TLI) { SmallPtrSet<ConstantExpr *, 4> FoldedOps; - return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps); + return ConstantFoldConstantExpressionImpl(CE, DL, TLI, FoldedOps); } /// Attempt to constant fold an instruction with the @@ -1015,12 +1004,12 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, /// Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, ArrayRef<Constant *> Ops, - const DataLayout *TD, + const DataLayout &DL, const TargetLibraryInfo *TLI) { // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) { if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) { - if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD)) + if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], DL)) return C; } @@ -1040,10 +1029,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, // If the input is a inttoptr, eliminate the pair. This requires knowing // the width of a pointer, so it can't be done in ConstantExpr::getCast. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) { - if (TD && CE->getOpcode() == Instruction::IntToPtr) { + if (CE->getOpcode() == Instruction::IntToPtr) { Constant *Input = CE->getOperand(0); unsigned InWidth = Input->getType()->getScalarSizeInBits(); - unsigned PtrWidth = TD->getPointerTypeSizeInBits(CE->getType()); + unsigned PtrWidth = DL.getPointerTypeSizeInBits(CE->getType()); if (PtrWidth < InWidth) { Constant *Mask = ConstantInt::get(CE->getContext(), @@ -1061,15 +1050,15 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, // This requires knowing the width of a pointer, so it can't be done in // ConstantExpr::getCast. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) { - if (TD && CE->getOpcode() == Instruction::PtrToInt) { + if (CE->getOpcode() == Instruction::PtrToInt) { Constant *SrcPtr = CE->getOperand(0); - unsigned SrcPtrSize = TD->getPointerTypeSizeInBits(SrcPtr->getType()); + unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType()); unsigned MidIntSize = CE->getType()->getScalarSizeInBits(); if (MidIntSize >= SrcPtrSize) { unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace(); if (SrcAS == DestTy->getPointerAddressSpace()) - return FoldBitCast(CE->getOperand(0), DestTy, *TD); + return FoldBitCast(CE->getOperand(0), DestTy, DL); } } } @@ -1087,9 +1076,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, case Instruction::AddrSpaceCast: return ConstantExpr::getCast(Opcode, Ops[0], DestTy); case Instruction::BitCast: - if (TD) - return FoldBitCast(Ops[0], DestTy, *TD); - return ConstantExpr::getBitCast(Ops[0], DestTy); + return FoldBitCast(Ops[0], DestTy, DL); case Instruction::Select: return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]); case Instruction::ExtractElement: @@ -1099,9 +1086,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); case Instruction::GetElementPtr: - if (Constant *C = CastGEPIndices(Ops, DestTy, TD, TLI)) + if (Constant *C = CastGEPIndices(Ops, DestTy, DL, TLI)) return C; - if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI)) + if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, DL, TLI)) return C; return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1)); @@ -1113,43 +1100,44 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, /// returns a constant expression of the specified operands. Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *Ops0, Constant *Ops1, - const DataLayout *TD, + const DataLayout &DL, const TargetLibraryInfo *TLI) { // fold: icmp (inttoptr x), null -> icmp x, 0 // fold: icmp (ptrtoint x), 0 -> icmp x, null // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y // - // ConstantExpr::getCompare cannot do this, because it doesn't have TD + // FIXME: The following comment is out of data and the DataLayout is here now. + // ConstantExpr::getCompare cannot do this, because it doesn't have DL // around to know if bit truncation is happening. if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) { - if (TD && Ops1->isNullValue()) { + if (Ops1->isNullValue()) { if (CE0->getOpcode() == Instruction::IntToPtr) { - Type *IntPtrTy = TD->getIntPtrType(CE0->getType()); + Type *IntPtrTy = DL.getIntPtrType(CE0->getType()); // Convert the integer value to the right size to ensure we get the // proper extension or truncation. Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), IntPtrTy, false); Constant *Null = Constant::getNullValue(C->getType()); - return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); + return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI); } // Only do this transformation if the int is intptrty in size, otherwise // there is a truncation or extension that we aren't modeling. if (CE0->getOpcode() == Instruction::PtrToInt) { - Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType()); + Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType()); if (CE0->getType() == IntPtrTy) { Constant *C = CE0->getOperand(0); Constant *Null = Constant::getNullValue(C->getType()); - return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); + return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI); } } } if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) { - if (TD && CE0->getOpcode() == CE1->getOpcode()) { + if (CE0->getOpcode() == CE1->getOpcode()) { if (CE0->getOpcode() == Instruction::IntToPtr) { - Type *IntPtrTy = TD->getIntPtrType(CE0->getType()); + Type *IntPtrTy = DL.getIntPtrType(CE0->getType()); // Convert the integer value to the right size to ensure we get the // proper extension or truncation. @@ -1157,20 +1145,17 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, IntPtrTy, false); Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0), IntPtrTy, false); - return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI); + return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI); } // Only do this transformation if the int is intptrty in size, otherwise // there is a truncation or extension that we aren't modeling. if (CE0->getOpcode() == Instruction::PtrToInt) { - Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType()); + Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType()); if (CE0->getType() == IntPtrTy && CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) { - return ConstantFoldCompareInstOperands(Predicate, - CE0->getOperand(0), - CE1->getOperand(0), - TD, - TLI); + return ConstantFoldCompareInstOperands( + Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI); } } } @@ -1180,16 +1165,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0) if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) && CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) { - Constant *LHS = - ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1, - TD, TLI); - Constant *RHS = - ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1, - TD, TLI); + Constant *LHS = ConstantFoldCompareInstOperands( + Predicate, CE0->getOperand(0), Ops1, DL, TLI); + Constant *RHS = ConstantFoldCompareInstOperands( + Predicate, CE0->getOperand(1), Ops1, DL, TLI); unsigned OpC = Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; Constant *Ops[] = { LHS, RHS }; - return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI); + return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, DL, TLI); } } @@ -1451,26 +1434,16 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, default: break; case Intrinsic::fabs: return ConstantFoldFP(fabs, V, Ty); -#if HAVE_LOG2 case Intrinsic::log2: return ConstantFoldFP(log2, V, Ty); -#endif -#if HAVE_LOG case Intrinsic::log: return ConstantFoldFP(log, V, Ty); -#endif -#if HAVE_LOG10 case Intrinsic::log10: return ConstantFoldFP(log10, V, Ty); -#endif -#if HAVE_EXP case Intrinsic::exp: return ConstantFoldFP(exp, V, Ty); -#endif -#if HAVE_EXP2 case Intrinsic::exp2: return ConstantFoldFP(exp2, V, Ty); -#endif case Intrinsic::floor: return ConstantFoldFP(floor, V, Ty); case Intrinsic::ceil: diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index fda664b..3374b48 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -52,6 +52,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopInfo.h" @@ -59,6 +60,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -225,13 +227,11 @@ bool Dependence::isScalar(unsigned level) const { //===----------------------------------------------------------------------===// // FullDependence methods -FullDependence::FullDependence(Instruction *Source, - Instruction *Destination, +FullDependence::FullDependence(Instruction *Source, Instruction *Destination, bool PossiblyLoopIndependent, - unsigned CommonLevels) : - Dependence(Source, Destination), - Levels(CommonLevels), - LoopIndependent(PossiblyLoopIndependent) { + unsigned CommonLevels) + : Dependence(Source, Destination), Levels(CommonLevels), + LoopIndependent(PossiblyLoopIndependent) { Consistent = true; DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr; } @@ -625,14 +625,12 @@ void Dependence::dump(raw_ostream &OS) const { OS << "!\n"; } - - -static -AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, - const Value *A, - const Value *B) { - const Value *AObj = GetUnderlyingObject(A); - const Value *BObj = GetUnderlyingObject(B); +static AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, + const DataLayout &DL, + const Value *A, + const Value *B) { + const Value *AObj = GetUnderlyingObject(A, DL); + const Value *BObj = GetUnderlyingObject(B, DL); return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), BObj, AA->getTypeStoreSize(BObj->getType())); } @@ -3314,7 +3312,8 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, Value *SrcPtr = getPointerOperand(Src); Value *DstPtr = getPointerOperand(Dst); - switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { + switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, + SrcPtr)) { case AliasAnalysis::MayAlias: case AliasAnalysis::PartialAlias: // cannot analyse objects if we don't understand their aliasing. @@ -3347,9 +3346,9 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); - UsefulGEP = - isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && - isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); + UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && + isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && + (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); } unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; SmallVector<Subscript, 4> Pair(Pairs); @@ -3472,8 +3471,7 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, LI->getLoopFor(Dst->getParent()), Pair[SI].Loops); Result.Consistent = false; - } - else if (Pair[SI].Classification == Subscript::ZIV) { + } else if (Pair[SI].Classification == Subscript::ZIV) { // always separable Separable.set(SI); } @@ -3525,8 +3523,8 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, DEBUG(dbgs() << ", SIV\n"); unsigned Level; const SCEV *SplitIter = nullptr; - if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, - Result, NewConstraint, SplitIter)) + if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, + SplitIter)) return nullptr; break; } @@ -3574,8 +3572,8 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, unsigned Level; const SCEV *SplitIter = nullptr; DEBUG(dbgs() << "SIV\n"); - if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, - Result, NewConstraint, SplitIter)) + if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, + SplitIter)) return nullptr; ConstrainedLevels.set(Level); if (intersectConstraints(&Constraints[Level], &NewConstraint)) { @@ -3651,8 +3649,10 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, // update Result.DV from constraint vector DEBUG(dbgs() << " updating\n"); - for (int SJ = ConstrainedLevels.find_first(); - SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) { + for (int SJ = ConstrainedLevels.find_first(); SJ >= 0; + SJ = ConstrainedLevels.find_next(SJ)) { + if (SJ > (int)CommonLevels) + break; updateDirection(Result.DV[SJ - 1], Constraints[SJ]); if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) return nullptr; @@ -3759,8 +3759,8 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep, assert(isLoadOrStore(Dst)); Value *SrcPtr = getPointerOperand(Src); Value *DstPtr = getPointerOperand(Dst); - assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == - AliasAnalysis::MustAlias); + assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, + SrcPtr) == AliasAnalysis::MustAlias); // establish loop nesting levels establishNestingLevels(Src, Dst); @@ -3775,9 +3775,9 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep, SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); - UsefulGEP = - isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && - isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); + UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && + isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && + (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); } unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; SmallVector<Subscript, 4> Pair(Pairs); diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index ded1de7..9d607cc 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -49,7 +49,7 @@ public: explicit CGPassManager() : ModulePass(ID), PMDataManager() { } - /// run - Execute all of the passes scheduled for execution. Keep track of + /// Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. bool runOnModule(Module &M) override; @@ -142,9 +142,8 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, FPPassManager *FPP = (FPPassManager*)P; // Run pass P on all functions in the current SCC. - for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); - I != E; ++I) { - if (Function *F = (*I)->getFunction()) { + for (CallGraphNode *CGN : CurSCC) { + if (Function *F = CGN->getFunction()) { dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName()); { TimeRegion PassTimer(getPassTimer(FPP)); @@ -165,7 +164,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, } -/// RefreshCallGraph - Scan the functions in the specified CFG and resync the +/// Scan the functions in the specified CFG and resync the /// callgraph with the call sites found in it. This is used after /// FunctionPasses have potentially munged the callgraph, and can be used after /// CallGraphSCC passes to verify that they correctly updated the callgraph. @@ -181,9 +180,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size() << " nodes:\n"; - for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); - I != E; ++I) - (*I)->dump(); + for (CallGraphNode *CGN : CurSCC) + CGN->dump(); ); bool MadeChange = false; @@ -357,9 +355,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, DEBUG(if (MadeChange) { dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n"; - for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); - I != E; ++I) - (*I)->dump(); + for (CallGraphNode *CGN : CurSCC) + CGN->dump(); if (DevirtualizedCall) dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n"; @@ -372,15 +369,15 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, return DevirtualizedCall; } -/// RunAllPassesOnSCC - Execute the body of the entire pass manager on the -/// specified SCC. This keeps track of whether a function pass devirtualizes +/// Execute the body of the entire pass manager on the specified SCC. +/// This keeps track of whether a function pass devirtualizes /// any calls and returns it in DevirtualizedCall. bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, bool &DevirtualizedCall) { bool Changed = false; - // CallGraphUpToDate - Keep track of whether the callgraph is known to be - // up-to-date or not. The CGSSC pass manager runs two types of passes: + // Keep track of whether the callgraph is known to be up-to-date or not. + // The CGSSC pass manager runs two types of passes: // CallGraphSCC Passes and other random function passes. Because other // random function passes are not CallGraph aware, they may clobber the // call graph by introducing new calls or deleting other ones. This flag @@ -433,7 +430,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, return Changed; } -/// run - Execute all of the passes scheduled for execution. Keep track of +/// Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. bool CGPassManager::runOnModule(Module &M) { CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); @@ -519,7 +516,7 @@ bool CGPassManager::doFinalization(CallGraph &CG) { // CallGraphSCC Implementation //===----------------------------------------------------------------------===// -/// ReplaceNode - This informs the SCC and the pass manager that the specified +/// This informs the SCC and the pass manager that the specified /// Old node has been deleted, and New is to be used in its place. void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) { assert(Old != New && "Should not replace node with self"); @@ -578,8 +575,8 @@ void CallGraphSCCPass::assignPassManager(PMStack &PMS, CGP->add(this); } -/// getAnalysisUsage - For this class, we declare that we require and preserve -/// the call graph. If the derived class implements this method, it should +/// For this class, we declare that we require and preserve the call graph. +/// If the derived class implements this method, it should /// always explicitly call the implementation here. void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<CallGraphWrapperPass>(); @@ -609,9 +606,9 @@ namespace { bool runOnSCC(CallGraphSCC &SCC) override { Out << Banner; - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - if ((*I)->getFunction()) - (*I)->getFunction()->print(Out); + for (CallGraphNode *CGN : SCC) { + if (CGN->getFunction()) + CGN->getFunction()->print(Out); else Out << "\nPrinting <null> Function\n"; } diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 607c068..2208f32 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -96,7 +96,7 @@ namespace { } bool runOnModule(Module &M) override { - InitializeAliasAnalysis(this); + InitializeAliasAnalysis(this, &M.getDataLayout()); // Find non-addr taken globals. AnalyzeGlobals(M); @@ -322,7 +322,8 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { continue; // Check the value being stored. - Value *Ptr = GetUnderlyingObject(SI->getOperand(0)); + Value *Ptr = GetUnderlyingObject(SI->getOperand(0), + GV->getParent()->getDataLayout()); if (!isAllocLikeFn(Ptr, TLI)) return false; // Too hard to analyze. @@ -481,8 +482,8 @@ AliasAnalysis::AliasResult GlobalsModRef::alias(const Location &LocA, const Location &LocB) { // Get the base object these pointers point to. - const Value *UV1 = GetUnderlyingObject(LocA.Ptr); - const Value *UV2 = GetUnderlyingObject(LocB.Ptr); + const Value *UV1 = GetUnderlyingObject(LocA.Ptr, *DL); + const Value *UV2 = GetUnderlyingObject(LocB.Ptr, *DL); // If either of the underlying values is a global, they may be non-addr-taken // globals, which we can answer queries about. @@ -540,8 +541,9 @@ GlobalsModRef::getModRefInfo(ImmutableCallSite CS, // If we are asking for mod/ref info of a direct call with a pointer to a // global we are tracking, return information if we have it. + const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout(); if (const GlobalValue *GV = - dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr))) + dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL))) if (GV->hasLocalLinkage()) if (const Function *F = CS.getCalledFunction()) if (NonAddressTakenGlobals.count(GV)) diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index cd494ba..eeb3b87 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -45,9 +45,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { typedef InstVisitor<CallAnalyzer, bool> Base; friend class InstVisitor<CallAnalyzer, bool>; - // DataLayout if available, or null. - const DataLayout *const DL; - /// The TargetTransformInfo available for this compilation. const TargetTransformInfo &TTI; @@ -145,9 +142,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool visitUnreachableInst(UnreachableInst &I); public: - CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI, - AssumptionCacheTracker *ACT, Function &Callee, int Threshold) - : DL(DL), TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0), + CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT, + Function &Callee, int Threshold) + : TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), @@ -244,10 +241,8 @@ bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { /// Returns false if unable to compute the offset for any reason. Respects any /// simplified values known during the analysis of this callsite. bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { - if (!DL) - return false; - - unsigned IntPtrWidth = DL->getPointerSizeInBits(); + const DataLayout &DL = F.getParent()->getDataLayout(); + unsigned IntPtrWidth = DL.getPointerSizeInBits(); assert(IntPtrWidth == Offset.getBitWidth()); for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); @@ -263,12 +258,12 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { // Handle a struct index, which adds its field offset to the pointer. if (StructType *STy = dyn_cast<StructType>(*GTI)) { unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = DL->getStructLayout(STy); + const StructLayout *SL = DL.getStructLayout(STy); Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); continue; } - APInt TypeSize(IntPtrWidth, DL->getTypeAllocSize(GTI.getIndexedType())); + APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType())); Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; } return true; @@ -289,9 +284,9 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { // Accumulate the allocated size. if (I.isStaticAlloca()) { + const DataLayout &DL = F.getParent()->getDataLayout(); Type *Ty = I.getAllocatedType(); - AllocatedSize += (DL ? DL->getTypeAllocSize(Ty) : - Ty->getPrimitiveSizeInBits()); + AllocatedSize += DL.getTypeAllocSize(Ty); } // We will happily inline static alloca instructions. @@ -327,7 +322,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { // Try to fold GEPs of constant-offset call site argument pointers. This // requires target data and inbounds GEPs. - if (DL && I.isInBounds()) { + if (I.isInBounds()) { // Check if we have a base + offset for the pointer. Value *Ptr = I.getPointerOperand(); std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); @@ -396,7 +391,6 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) { } bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { - const DataLayout *DL = I.getDataLayout(); // Propagate constants through ptrtoint. Constant *COp = dyn_cast<Constant>(I.getOperand(0)); if (!COp) @@ -410,7 +404,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Track base/offset pairs when converted to a plain integer provided the // integer is large enough to represent the pointer. unsigned IntegerSize = I.getType()->getScalarSizeInBits(); - if (DL && IntegerSize >= DL->getPointerSizeInBits()) { + const DataLayout &DL = F.getParent()->getDataLayout(); + if (IntegerSize >= DL.getPointerSizeInBits()) { std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); if (BaseAndOffset.first) @@ -433,7 +428,6 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { } bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { - const DataLayout *DL = I.getDataLayout(); // Propagate constants through ptrtoint. Constant *COp = dyn_cast<Constant>(I.getOperand(0)); if (!COp) @@ -448,7 +442,8 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // modifications provided the integer is not too large. Value *Op = I.getOperand(0); unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); - if (DL && IntegerSize <= DL->getPointerSizeInBits()) { + const DataLayout &DL = F.getParent()->getDataLayout(); + if (IntegerSize <= DL.getPointerSizeInBits()) { std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; @@ -485,12 +480,14 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { Constant *COp = dyn_cast<Constant>(Operand); if (!COp) COp = SimplifiedValues.lookup(Operand); - if (COp) + if (COp) { + const DataLayout &DL = F.getParent()->getDataLayout(); if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), COp, DL)) { SimplifiedValues[&I] = C; return true; } + } // Disable any SROA on the argument to arbitrary unary operators. disableSROA(Operand); @@ -595,6 +592,7 @@ bool CallAnalyzer::visitSub(BinaryOperator &I) { bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + const DataLayout &DL = F.getParent()->getDataLayout(); if (!isa<Constant>(LHS)) if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) LHS = SimpleLHS; @@ -623,7 +621,7 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { bool CallAnalyzer::visitLoad(LoadInst &I) { Value *SROAArg; DenseMap<Value *, int>::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) { if (I.isSimple()) { accumulateSROACost(CostIt, InlineConstants::InstrCost); return true; @@ -638,7 +636,7 @@ bool CallAnalyzer::visitLoad(LoadInst &I) { bool CallAnalyzer::visitStore(StoreInst &I) { Value *SROAArg; DenseMap<Value *, int>::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) { if (I.isSimple()) { accumulateSROACost(CostIt, InlineConstants::InstrCost); return true; @@ -788,7 +786,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(DL, TTI, ACT, *F, InlineConstants::IndirectCallThreshold); + CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // bonus we want to apply, but don't go below zero. @@ -976,10 +974,11 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, /// returns 0 if V is not a pointer, and returns the constant '0' if there are /// no constant offsets applied. ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { - if (!DL || !V->getType()->isPointerTy()) + if (!V->getType()->isPointerTy()) return nullptr; - unsigned IntPtrWidth = DL->getPointerSizeInBits(); + const DataLayout &DL = F.getParent()->getDataLayout(); + unsigned IntPtrWidth = DL.getPointerSizeInBits(); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an @@ -1003,7 +1002,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } while (Visited.insert(V).second); - Type *IntPtrTy = DL->getIntPtrType(V->getContext()); + Type *IntPtrTy = DL.getIntPtrType(V->getContext()); return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); } @@ -1034,16 +1033,17 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { assert(NumVectorInstructions == 0); FiftyPercentVectorBonus = Threshold; TenPercentVectorBonus = Threshold / 2; + const DataLayout &DL = F.getParent()->getDataLayout(); // Give out bonuses per argument, as the instructions setting them up will // be gone after inlining. for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { - if (DL && CS.isByValArgument(I)) { + if (CS.isByValArgument(I)) { // We approximate the number of loads and stores needed by dividing the // size of the byval type by the target's pointer size. PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); - unsigned TypeSize = DL->getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = DL->getPointerSizeInBits(); + unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType()); + unsigned PointerSize = DL.getPointerSizeInBits(); // Ceiling division. unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; @@ -1333,8 +1333,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(Callee->getDataLayout(), TTIWP->getTTI(*Callee), - ACT, *Callee, Threshold); + CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 140753c..b88b249 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -113,6 +114,8 @@ static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT, /// return true. Otherwise, return false. bool IVUsers::AddUsersImpl(Instruction *I, SmallPtrSetImpl<Loop*> &SimpleLoopNests) { + const DataLayout &DL = I->getModule()->getDataLayout(); + // Add this IV user to the Processed set before returning false to ensure that // all IV users are members of the set. See IVUsers::isIVUserOrOperand. if (!Processed.insert(I).second) @@ -124,14 +127,14 @@ bool IVUsers::AddUsersImpl(Instruction *I, // IVUsers is used by LSR which assumes that all SCEV expressions are safe to // pass to SCEVExpander. Expressions are not safe to expand if they represent // operations that are not safe to speculate, namely integer division. - if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I, DL)) + if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I)) return false; // LSR is not APInt clean, do not touch integers bigger than 64-bits. // Also avoid creating IVs of non-native types. For example, we don't want a // 64-bit IV in 32-bit code just because the loop has one 64-bit cast. uint64_t Width = SE->getTypeSizeInBits(I->getType()); - if (Width > 64 || (DL && !DL->isLegalInteger(Width))) + if (Width > 64 || !DL.isLegalInteger(Width)) return false; // Get the symbolic expression for this instruction. @@ -253,8 +256,6 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = &getAnalysis<ScalarEvolution>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; // Find all uses of induction variables in this loop, and categorize // them by stride. Start by finding all of the PHI nodes in the header for diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 0cb0982..99c477d 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -45,13 +45,13 @@ STATISTIC(NumReassoc, "Number of reassociations"); namespace { struct Query { - const DataLayout *DL; + const DataLayout &DL; const TargetLibraryInfo *TLI; const DominatorTree *DT; AssumptionCache *AC; const Instruction *CxtI; - Query(const DataLayout *DL, const TargetLibraryInfo *tli, + Query(const DataLayout &DL, const TargetLibraryInfo *tli, const DominatorTree *dt, AssumptionCache *ac = nullptr, const Instruction *cxti = nullptr) : DL(DL), TLI(tli), DT(dt), AC(ac), CxtI(cxti) {} @@ -584,7 +584,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout *DL, const TargetLibraryInfo *TLI, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), @@ -601,17 +601,11 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// This is very similar to GetPointerBaseWithConstantOffset except it doesn't /// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. /// folding. -static Constant *stripAndComputeConstantOffsets(const DataLayout *DL, - Value *&V, +static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, bool AllowNonInbounds = false) { assert(V->getType()->getScalarType()->isPointerTy()); - // Without DataLayout, just be conservative for now. Theoretically, more could - // be done in this case. - if (!DL) - return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0); - - Type *IntPtrTy = DL->getIntPtrType(V->getType())->getScalarType(); + Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType(); APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); // Even though we don't look through PHI nodes, we could be called on an @@ -621,7 +615,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *DL, do { if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { if ((!AllowNonInbounds && !GEP->isInBounds()) || - !GEP->accumulateConstantOffset(*DL, Offset)) + !GEP->accumulateConstantOffset(DL, Offset)) break; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { @@ -646,8 +640,8 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *DL, /// \brief Compute the constant difference between two pointer values. /// If the difference is not a constant, returns zero. -static Constant *computePointerDifference(const DataLayout *DL, - Value *LHS, Value *RHS) { +static Constant *computePointerDifference(const DataLayout &DL, Value *LHS, + Value *RHS) { Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS); Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS); @@ -783,7 +777,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout *DL, const TargetLibraryInfo *TLI, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), @@ -962,7 +956,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, } Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -971,7 +965,7 @@ Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, } Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -980,7 +974,7 @@ Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, } Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -988,7 +982,7 @@ Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, RecursionLimit); } -Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1092,7 +1086,7 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1110,7 +1104,7 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1138,7 +1132,7 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, } Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1217,7 +1211,7 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1235,7 +1229,7 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1263,7 +1257,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, } Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1387,7 +1381,7 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout *DL, const TargetLibraryInfo *TLI, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), @@ -1411,7 +1405,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1445,7 +1439,7 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1596,9 +1590,11 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { - if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT)) + if (isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, + Q.DT)) return Op0; - if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT)) + if (isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, + Q.DT)) return Op1; } @@ -1643,7 +1639,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1831,7 +1827,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1888,7 +1884,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1948,10 +1944,10 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, // If the C and C++ standards are ever made sufficiently restrictive in this // area, it may be possible to update LLVM's semantics accordingly and reinstate // this optimization. -static Constant *computePointerICmp(const DataLayout *DL, +static Constant *computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, - CmpInst::Predicate Pred, - Value *LHS, Value *RHS) { + CmpInst::Predicate Pred, Value *LHS, + Value *RHS) { // First, skip past any trivial no-ops. LHS = LHS->stripPointerCasts(); RHS = RHS->stripPointerCasts(); @@ -2369,8 +2365,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input // if the integer type is the same size as the pointer type. - if (MaxRecurse && Q.DL && isa<PtrToIntInst>(LI) && - Q.DL->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) { + if (MaxRecurse && isa<PtrToIntInst>(LI) && + Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) { if (Constant *RHSC = dyn_cast<Constant>(RHS)) { // Transfer the cast to the constant. if (Value *V = SimplifyICmpInst(Pred, SrcOp, @@ -3024,7 +3020,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, Instruction *CxtI) { @@ -3054,8 +3050,13 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Pred == FCmpInst::FCMP_TRUE) return ConstantInt::get(GetCompareTy(LHS), 1); - if (isa<UndefValue>(RHS)) // fcmp pred X, undef -> undef - return UndefValue::get(GetCompareTy(LHS)); + // fcmp pred x, undef and fcmp pred undef, x + // fold to true if unordered, false if ordered + if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) { + // Choosing NaN for the undef will always make unordered comparison succeed + // and ordered comparison fail. + return ConstantInt::get(GetCompareTy(LHS), CmpInst::isUnordered(Pred)); + } // fcmp x,x -> true/false. Not all compares are foldable. if (LHS == RHS) { @@ -3135,7 +3136,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -3230,7 +3231,7 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, } Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -3264,10 +3265,10 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { return Ops[0]; Type *Ty = PtrTy->getElementType(); - if (Q.DL && Ty->isSized()) { + if (Ty->isSized()) { Value *P; uint64_t C; - uint64_t TyAllocSize = Q.DL->getTypeAllocSize(Ty); + uint64_t TyAllocSize = Q.DL.getTypeAllocSize(Ty); // getelementptr P, N -> P if P points to a type of zero size. if (TyAllocSize == 0) return Ops[0]; @@ -3275,7 +3276,7 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { // The following transforms are only safe if the ptrtoint cast // doesn't truncate the pointers. if (Ops[1]->getType()->getScalarSizeInBits() == - Q.DL->getPointerSizeInBits(AS)) { + Q.DL.getPointerSizeInBits(AS)) { auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * { if (match(P, m_Zero())) return Constant::getNullValue(GEPTy); @@ -3320,7 +3321,7 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1)); } -Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *DL, +Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -3357,7 +3358,7 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, } Value *llvm::SimplifyInsertValueInst( - Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout *DL, + Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, AC, CxtI), @@ -3405,7 +3406,7 @@ static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) { return nullptr; } -Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL, +Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -3502,7 +3503,7 @@ static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, } Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const DataLayout *DL, const TargetLibraryInfo *TLI, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), @@ -3510,7 +3511,7 @@ Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, } Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const FastMathFlags &FMF, const DataLayout *DL, + const FastMathFlags &FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -3528,7 +3529,7 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout *DL, const TargetLibraryInfo *TLI, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), @@ -3604,7 +3605,7 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, } Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, - User::op_iterator ArgEnd, const DataLayout *DL, + User::op_iterator ArgEnd, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AC, CxtI), @@ -3612,7 +3613,7 @@ Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, } Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args, - const DataLayout *DL, const TargetLibraryInfo *TLI, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyCall(V, Args.begin(), Args.end(), @@ -3621,7 +3622,7 @@ Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args, /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. -Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL, +Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC) { Value *Result; @@ -3769,12 +3770,12 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL, /// This routine returns 'true' only when *it* simplifies something. The passed /// in simplified value does not count toward this. static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, - const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC) { bool Simplified = false; SmallSetVector<Instruction *, 8> Worklist; + const DataLayout &DL = I->getModule()->getDataLayout(); // If we have an explicit value to collapse to, do that round of the // simplification loop by hand initially. @@ -3822,19 +3823,18 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, return Simplified; } -bool llvm::recursivelySimplifyInstruction(Instruction *I, const DataLayout *DL, +bool llvm::recursivelySimplifyInstruction(Instruction *I, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC) { - return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT, AC); + return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC); } bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, - const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC) { assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); assert(SimpleV && "Must provide a simplified value."); - return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT, AC); + return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC); } diff --git a/lib/Analysis/JumpInstrTableInfo.cpp b/lib/Analysis/JumpInstrTableInfo.cpp deleted file mode 100644 index 7aae2a5..0000000 --- a/lib/Analysis/JumpInstrTableInfo.cpp +++ /dev/null @@ -1,55 +0,0 @@ -//===-- JumpInstrTableInfo.cpp: Info for Jump-Instruction Tables ----------===// -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief Information about jump-instruction tables that have been created by -/// JumpInstrTables pass. -/// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "jiti" - -#include "llvm/Analysis/JumpInstrTableInfo.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Type.h" -#include "llvm/Support/MathExtras.h" - -using namespace llvm; - -INITIALIZE_PASS(JumpInstrTableInfo, "jump-instr-table-info", - "Jump-Instruction Table Info", true, true) -char JumpInstrTableInfo::ID = 0; - -ImmutablePass *llvm::createJumpInstrTableInfoPass() { - return new JumpInstrTableInfo(); -} - -ModulePass *llvm::createJumpInstrTableInfoPass(unsigned Bound) { - // This cast is always safe, since Bound is always in a subset of uint64_t. - uint64_t B = static_cast<uint64_t>(Bound); - return new JumpInstrTableInfo(B); -} - -JumpInstrTableInfo::JumpInstrTableInfo(uint64_t ByteAlign) - : ImmutablePass(ID), Tables(), ByteAlignment(ByteAlign) { - if (!llvm::isPowerOf2_64(ByteAlign)) { - // Note that we don't explicitly handle overflow here, since we handle the 0 - // case explicitly when a caller actually tries to create jumptable entries, - // and this is the return value on overflow. - ByteAlignment = llvm::NextPowerOf2(ByteAlign); - } - - initializeJumpInstrTableInfoPass(*PassRegistry::getPassRegistry()); -} - -JumpInstrTableInfo::~JumpInstrTableInfo() {} - -void JumpInstrTableInfo::insertEntry(FunctionType *TableFunTy, Function *Target, - Function *Jump) { - Tables[TableFunTy].push_back(JumpPair(Target, Jump)); -} diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 87c31fd..e6f586a 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -191,7 +191,7 @@ public: /// Merge the specified lattice value into this one, updating this /// one and returning true if anything changed. - bool mergeIn(const LVILatticeVal &RHS) { + bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) { if (RHS.isUndefined() || isOverdefined()) return false; if (RHS.isOverdefined()) return markOverdefined(); @@ -215,11 +215,9 @@ public: // Unless we can prove that the two Constants are different, we must // move to overdefined. - // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding. - if (ConstantInt *Res = dyn_cast<ConstantInt>( - ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, - getConstant(), - RHS.getNotConstant()))) + if (ConstantInt *Res = + dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands( + CmpInst::ICMP_NE, getConstant(), RHS.getNotConstant(), DL))) if (Res->isOne()) return markNotConstant(RHS.getNotConstant()); @@ -241,11 +239,9 @@ public: // Unless we can prove that the two Constants are different, we must // move to overdefined. - // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding. - if (ConstantInt *Res = dyn_cast<ConstantInt>( - ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, - getNotConstant(), - RHS.getConstant()))) + if (ConstantInt *Res = + dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands( + CmpInst::ICMP_NE, getNotConstant(), RHS.getConstant(), DL))) if (Res->isOne()) return false; @@ -346,21 +342,17 @@ namespace { /// Push BV onto BlockValueStack unless it's already in there. /// Returns true on success. bool pushBlockValue(const std::pair<BasicBlock *, Value *> &BV) { - if (BlockValueSet.count(BV)) + if (!BlockValueSet.insert(BV).second) return false; // It's already in the stack. BlockValueStack.push(BV); - BlockValueSet.insert(BV); return true; } - /// A pointer to the cache of @llvm.assume calls. - AssumptionCache *AC; - /// An optional DL pointer. - const DataLayout *DL; - /// An optional DT pointer. - DominatorTree *DT; - + AssumptionCache *AC; ///< A pointer to the cache of @llvm.assume calls. + const DataLayout &DL; ///< A mandatory DataLayout + DominatorTree *DT; ///< An optional DT pointer. + friend struct LVIValueHandle; void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) { @@ -426,7 +418,7 @@ namespace { OverDefinedCache.clear(); } - LazyValueInfoCache(AssumptionCache *AC, const DataLayout *DL = nullptr, + LazyValueInfoCache(AssumptionCache *AC, const DataLayout &DL, DominatorTree *DT = nullptr) : AC(AC), DL(DL), DT(DT) {} }; @@ -579,11 +571,13 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { if (LoadInst *L = dyn_cast<LoadInst>(I)) { return L->getPointerAddressSpace() == 0 && - GetUnderlyingObject(L->getPointerOperand()) == Ptr; + GetUnderlyingObject(L->getPointerOperand(), + L->getModule()->getDataLayout()) == Ptr; } if (StoreInst *S = dyn_cast<StoreInst>(I)) { return S->getPointerAddressSpace() == 0 && - GetUnderlyingObject(S->getPointerOperand()) == Ptr; + GetUnderlyingObject(S->getPointerOperand(), + S->getModule()->getDataLayout()) == Ptr; } if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { if (MI->isVolatile()) return false; @@ -593,11 +587,13 @@ static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { if (!Len || Len->isZero()) return false; if (MI->getDestAddressSpace() == 0) - if (GetUnderlyingObject(MI->getRawDest()) == Ptr) + if (GetUnderlyingObject(MI->getRawDest(), + MI->getModule()->getDataLayout()) == Ptr) return true; if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) if (MTI->getSourceAddressSpace() == 0) - if (GetUnderlyingObject(MTI->getRawSource()) == Ptr) + if (GetUnderlyingObject(MTI->getRawSource(), + MTI->getModule()->getDataLayout()) == Ptr) return true; } return false; @@ -614,10 +610,11 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, if (isKnownNonNull(Val)) { NotNull = true; } else { - Value *UnderlyingVal = GetUnderlyingObject(Val); + const DataLayout &DL = BB->getModule()->getDataLayout(); + Value *UnderlyingVal = GetUnderlyingObject(Val, DL); // If 'GetUnderlyingObject' didn't converge, skip it. It won't converge // inside InstructionDereferencesPointer either. - if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, nullptr, 1)) { + if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, DL, 1)) { for (Instruction &I : *BB) { if (InstructionDereferencesPointer(&I, UnderlyingVal)) { NotNull = true; @@ -651,7 +648,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, if (EdgesMissing) continue; - Result.mergeIn(EdgeResult); + Result.mergeIn(EdgeResult, DL); // If we hit overdefined, exit early. The BlockVals entry is already set // to overdefined. @@ -696,7 +693,7 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV, if (EdgesMissing) continue; - Result.mergeIn(EdgeResult); + Result.mergeIn(EdgeResult, DL); // If we hit overdefined, exit early. The BlockVals entry is already set // to overdefined. @@ -735,7 +732,7 @@ void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(Value *Val, if (!AssumeVH) continue; auto *I = cast<CallInst>(AssumeVH); - if (!isValidAssumeForContext(I, BBI, DL, DT)) + if (!isValidAssumeForContext(I, BBI, DT)) continue; Value *C = I->getArgOperand(0); @@ -745,7 +742,7 @@ void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(Value *Val, if (BBLV.isOverdefined()) BBLV = Result; else - BBLV.mergeIn(Result); + BBLV.mergeIn(Result, DL); } } } @@ -857,10 +854,10 @@ bool getValueFromFromCondition(Value *Val, ICmpInst *ICI, ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1)); if (CI && (ICI->getOperand(0) == Val || NegOffset)) { - // Calculate the range of values that would satisfy the comparison. + // Calculate the range of values that are allowed by the comparison ConstantRange CmpRange(CI->getValue()); ConstantRange TrueValues = - ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange); + ConstantRange::makeAllowedICmpRegion(ICI->getPredicate(), CmpRange); if (NegOffset) // Apply the offset from above. TrueValues = TrueValues.subtract(NegOffset->getValue()); @@ -1104,27 +1101,27 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, /// This lazily constructs the LazyValueInfoCache. static LazyValueInfoCache &getCache(void *&PImpl, AssumptionCache *AC, - const DataLayout *DL = nullptr, + const DataLayout *DL, DominatorTree *DT = nullptr) { - if (!PImpl) - PImpl = new LazyValueInfoCache(AC, DL, DT); + if (!PImpl) { + assert(DL && "getCache() called with a null DataLayout"); + PImpl = new LazyValueInfoCache(AC, *DL, DT); + } return *static_cast<LazyValueInfoCache*>(PImpl); } bool LazyValueInfo::runOnFunction(Function &F) { AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + const DataLayout &DL = F.getParent()->getDataLayout(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DT = DTWP ? &DTWP->getDomTree() : nullptr; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; - TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); if (PImpl) - getCache(PImpl, AC, DL, DT).clear(); + getCache(PImpl, AC, &DL, DT).clear(); // Fully lazy. return false; @@ -1139,15 +1136,16 @@ void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const { void LazyValueInfo::releaseMemory() { // If the cache was allocated, free it. if (PImpl) { - delete &getCache(PImpl, AC); + delete &getCache(PImpl, AC, nullptr); PImpl = nullptr; } } Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB, Instruction *CxtI) { + const DataLayout &DL = BB->getModule()->getDataLayout(); LVILatticeVal Result = - getCache(PImpl, AC, DL, DT).getValueInBlock(V, BB, CxtI); + getCache(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI); if (Result.isConstant()) return Result.getConstant(); @@ -1164,8 +1162,9 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB, Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { + const DataLayout &DL = FromBB->getModule()->getDataLayout(); LVILatticeVal Result = - getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); if (Result.isConstant()) return Result.getConstant(); @@ -1177,9 +1176,10 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, return nullptr; } -static LazyValueInfo::Tristate -getPredicateResult(unsigned Pred, Constant *C, LVILatticeVal &Result, - const DataLayout *DL, TargetLibraryInfo *TLI) { +static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, + LVILatticeVal &Result, + const DataLayout &DL, + TargetLibraryInfo *TLI) { // If we know the value is a constant, evaluate the conditional. Constant *Res = nullptr; @@ -1250,8 +1250,9 @@ LazyValueInfo::Tristate LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { + const DataLayout &DL = FromBB->getModule()->getDataLayout(); LVILatticeVal Result = - getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); return getPredicateResult(Pred, C, Result, DL, TLI); } @@ -1259,18 +1260,23 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, LazyValueInfo::Tristate LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, Instruction *CxtI) { - LVILatticeVal Result = getCache(PImpl, AC, DL, DT).getValueAt(V, CxtI); + const DataLayout &DL = CxtI->getModule()->getDataLayout(); + LVILatticeVal Result = getCache(PImpl, AC, &DL, DT).getValueAt(V, CxtI); return getPredicateResult(Pred, C, Result, DL, TLI); } void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc) { - if (PImpl) - getCache(PImpl, AC, DL, DT).threadEdge(PredBB, OldSucc, NewSucc); + if (PImpl) { + const DataLayout &DL = PredBB->getModule()->getDataLayout(); + getCache(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc); + } } void LazyValueInfo::eraseBlock(BasicBlock *BB) { - if (PImpl) - getCache(PImpl, AC, DL, DT).eraseBlock(BB); + if (PImpl) { + const DataLayout &DL = BB->getModule()->getDataLayout(); + getCache(PImpl, AC, &DL, DT).eraseBlock(BB); + } } diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp index 016f8c5..f6025e3 100644 --- a/lib/Analysis/LibCallAliasAnalysis.cpp +++ b/lib/Analysis/LibCallAliasAnalysis.cpp @@ -36,7 +36,11 @@ void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); // Does not transform code } - +bool LibCallAliasAnalysis::runOnFunction(Function &F) { + // set up super class + InitializeAliasAnalysis(this, &F.getParent()->getDataLayout()); + return false; +} /// AnalyzeLibCallDetails - Given a call to a function with the specified /// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp index cf752dd..328b186 100644 --- a/lib/Analysis/LibCallSemantics.cpp +++ b/lib/Analysis/LibCallSemantics.cpp @@ -80,18 +80,6 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) { .Default(EHPersonality::Unknown); } -bool llvm::isAsynchronousEHPersonality(EHPersonality Pers) { - // The two SEH personality functions can catch asynch exceptions. We assume - // unknown personalities don't catch asynch exceptions. - switch (Pers) { - case EHPersonality::MSVC_X86SEH: - case EHPersonality::MSVC_Win64SEH: - return true; - default: return false; - } - llvm_unreachable("invalid enum"); -} - bool llvm::canSimplifyInvokeNoUnwind(const InvokeInst *II) { const LandingPadInst *LP = II->getLandingPadInst(); EHPersonality Personality = classifyEHPersonality(LP->getPersonalityFn()); diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 874ed0a..65a90d7 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -59,10 +59,10 @@ using namespace llvm; namespace { namespace MemRef { - static unsigned Read = 1; - static unsigned Write = 2; - static unsigned Callee = 4; - static unsigned Branchee = 8; + static const unsigned Read = 1; + static const unsigned Write = 2; + static const unsigned Callee = 4; + static const unsigned Branchee = 8; } class Lint : public FunctionPass, public InstVisitor<Lint> { @@ -98,8 +98,8 @@ namespace { void visitInsertElementInst(InsertElementInst &I); void visitUnreachableInst(UnreachableInst &I); - Value *findValue(Value *V, bool OffsetOk) const; - Value *findValueImpl(Value *V, bool OffsetOk, + Value *findValue(Value *V, const DataLayout &DL, bool OffsetOk) const; + Value *findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, SmallPtrSetImpl<Value *> &Visited) const; public: @@ -107,7 +107,6 @@ namespace { AliasAnalysis *AA; AssumptionCache *AC; DominatorTree *DT; - const DataLayout *DL; TargetLibraryInfo *TLI; std::string Messages; @@ -129,27 +128,33 @@ namespace { } void print(raw_ostream &O, const Module *M) const override {} - void WriteValue(const Value *V) { - if (!V) return; - if (isa<Instruction>(V)) { - MessagesStr << *V << '\n'; - } else { - V->printAsOperand(MessagesStr, true, Mod); - MessagesStr << '\n'; + void WriteValues(ArrayRef<const Value *> Vs) { + for (const Value *V : Vs) { + if (!V) + continue; + if (isa<Instruction>(V)) { + MessagesStr << *V << '\n'; + } else { + V->printAsOperand(MessagesStr, true, Mod); + MessagesStr << '\n'; + } } } - // CheckFailed - A check failed, so print out the condition and the message - // that failed. This provides a nice place to put a breakpoint if you want - // to see why something is not correct. - void CheckFailed(const Twine &Message, - const Value *V1 = nullptr, const Value *V2 = nullptr, - const Value *V3 = nullptr, const Value *V4 = nullptr) { - MessagesStr << Message.str() << "\n"; - WriteValue(V1); - WriteValue(V2); - WriteValue(V3); - WriteValue(V4); + /// \brief A check failed, so printout out the condition and the message. + /// + /// This provides a nice place to put a breakpoint if you want to see why + /// something is not correct. + void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } + + /// \brief A check failed (with values to print). + /// + /// This calls the Message-only version so that the above is easier to set + /// a breakpoint on. + template <typename T1, typename... Ts> + void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) { + CheckFailed(Message); + WriteValues({V1, Vs...}); } }; } @@ -165,16 +170,8 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", false, true) // Assert - We know that cond should be true, if not print an error message. -#define Assert(C, M) \ - do { if (!(C)) { CheckFailed(M); return; } } while (0) -#define Assert1(C, M, V1) \ - do { if (!(C)) { CheckFailed(M, V1); return; } } while (0) -#define Assert2(C, M, V1, V2) \ - do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0) -#define Assert3(C, M, V1, V2, V3) \ - do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0) -#define Assert4(C, M, V1, V2, V3, V4) \ - do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0) +#define Assert(C, ...) \ + do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0) // Lint::run - This is the main Analysis entry point for a // function. @@ -184,8 +181,6 @@ bool Lint::runOnFunction(Function &F) { AA = &getAnalysis<AliasAnalysis>(); AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); visit(F); dbgs() << MessagesStr.str(); @@ -196,8 +191,8 @@ bool Lint::runOnFunction(Function &F) { void Lint::visitFunction(Function &F) { // This isn't undefined behavior, it's just a little unusual, and it's a // fairly common mistake to neglect to name a function. - Assert1(F.hasName() || F.hasLocalLinkage(), - "Unusual: Unnamed function with non-local linkage", &F); + Assert(F.hasName() || F.hasLocalLinkage(), + "Unusual: Unnamed function with non-local linkage", &F); // TODO: Check for irreducible control flow. } @@ -205,27 +200,30 @@ void Lint::visitFunction(Function &F) { void Lint::visitCallSite(CallSite CS) { Instruction &I = *CS.getInstruction(); Value *Callee = CS.getCalledValue(); + const DataLayout &DL = CS->getModule()->getDataLayout(); visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize, 0, nullptr, MemRef::Callee); - if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { - Assert1(CS.getCallingConv() == F->getCallingConv(), - "Undefined behavior: Caller and callee calling convention differ", - &I); + if (Function *F = dyn_cast<Function>(findValue(Callee, DL, + /*OffsetOk=*/false))) { + Assert(CS.getCallingConv() == F->getCallingConv(), + "Undefined behavior: Caller and callee calling convention differ", + &I); FunctionType *FT = F->getFunctionType(); unsigned NumActualArgs = CS.arg_size(); - Assert1(FT->isVarArg() ? - FT->getNumParams() <= NumActualArgs : - FT->getNumParams() == NumActualArgs, - "Undefined behavior: Call argument count mismatches callee " - "argument count", &I); + Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs + : FT->getNumParams() == NumActualArgs, + "Undefined behavior: Call argument count mismatches callee " + "argument count", + &I); - Assert1(FT->getReturnType() == I.getType(), - "Undefined behavior: Call return type mismatches " - "callee return type", &I); + Assert(FT->getReturnType() == I.getType(), + "Undefined behavior: Call return type mismatches " + "callee return type", + &I); // Check argument types (in case the callee was casted) and attributes. // TODO: Verify that caller and callee attributes are compatible. @@ -235,9 +233,10 @@ void Lint::visitCallSite(CallSite CS) { Value *Actual = *AI; if (PI != PE) { Argument *Formal = PI++; - Assert1(Formal->getType() == Actual->getType(), - "Undefined behavior: Call argument type mismatches " - "callee parameter type", &I); + Assert(Formal->getType() == Actual->getType(), + "Undefined behavior: Call argument type mismatches " + "callee parameter type", + &I); // Check that noalias arguments don't alias other arguments. This is // not fully precise because we don't know the sizes of the dereferenced @@ -246,9 +245,9 @@ void Lint::visitCallSite(CallSite CS) { for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) if (AI != BI && (*BI)->getType()->isPointerTy()) { AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI); - Assert1(Result != AliasAnalysis::MustAlias && - Result != AliasAnalysis::PartialAlias, - "Unusual: noalias argument aliases another argument", &I); + Assert(Result != AliasAnalysis::MustAlias && + Result != AliasAnalysis::PartialAlias, + "Unusual: noalias argument aliases another argument", &I); } // Check that an sret argument points to valid memory. @@ -256,8 +255,8 @@ void Lint::visitCallSite(CallSite CS) { Type *Ty = cast<PointerType>(Formal->getType())->getElementType(); visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), - DL ? DL->getABITypeAlignment(Ty) : 0, - Ty, MemRef::Read | MemRef::Write); + DL.getABITypeAlignment(Ty), Ty, + MemRef::Read | MemRef::Write); } } } @@ -266,10 +265,11 @@ void Lint::visitCallSite(CallSite CS) { if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) { - Value *Obj = findValue(*AI, /*OffsetOk=*/true); - Assert1(!isa<AllocaInst>(Obj), - "Undefined behavior: Call with \"tail\" keyword references " - "alloca", &I); + Value *Obj = findValue(*AI, DL, /*OffsetOk=*/true); + Assert(!isa<AllocaInst>(Obj), + "Undefined behavior: Call with \"tail\" keyword references " + "alloca", + &I); } @@ -294,13 +294,13 @@ void Lint::visitCallSite(CallSite CS) { // overlap is not distinguished from the case where nothing is known. uint64_t Size = 0; if (const ConstantInt *Len = - dyn_cast<ConstantInt>(findValue(MCI->getLength(), - /*OffsetOk=*/false))) + dyn_cast<ConstantInt>(findValue(MCI->getLength(), DL, + /*OffsetOk=*/false))) if (Len->getValue().isIntN(32)) Size = Len->getValue().getZExtValue(); - Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != - AliasAnalysis::MustAlias, - "Undefined behavior: memcpy source and destination overlap", &I); + Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != + AliasAnalysis::MustAlias, + "Undefined behavior: memcpy source and destination overlap", &I); break; } case Intrinsic::memmove: { @@ -324,9 +324,9 @@ void Lint::visitCallSite(CallSite CS) { } case Intrinsic::vastart: - Assert1(I.getParent()->getParent()->isVarArg(), - "Undefined behavior: va_start called in a non-varargs function", - &I); + Assert(I.getParent()->getParent()->isVarArg(), + "Undefined behavior: va_start called in a non-varargs function", + &I); visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); @@ -369,14 +369,13 @@ void Lint::visitInvokeInst(InvokeInst &I) { void Lint::visitReturnInst(ReturnInst &I) { Function *F = I.getParent()->getParent(); - Assert1(!F->doesNotReturn(), - "Unusual: Return statement in function with noreturn attribute", - &I); + Assert(!F->doesNotReturn(), + "Unusual: Return statement in function with noreturn attribute", &I); if (Value *V = I.getReturnValue()) { - Value *Obj = findValue(V, /*OffsetOk=*/true); - Assert1(!isa<AllocaInst>(Obj), - "Unusual: Returning alloca value", &I); + Value *Obj = + findValue(V, F->getParent()->getDataLayout(), /*OffsetOk=*/true); + Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I); } } @@ -390,45 +389,47 @@ void Lint::visitMemoryReference(Instruction &I, if (Size == 0) return; - Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); - Assert1(!isa<ConstantPointerNull>(UnderlyingObject), - "Undefined behavior: Null pointer dereference", &I); - Assert1(!isa<UndefValue>(UnderlyingObject), - "Undefined behavior: Undef pointer dereference", &I); - Assert1(!isa<ConstantInt>(UnderlyingObject) || - !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(), - "Unusual: All-ones pointer dereference", &I); - Assert1(!isa<ConstantInt>(UnderlyingObject) || - !cast<ConstantInt>(UnderlyingObject)->isOne(), - "Unusual: Address one pointer dereference", &I); + Value *UnderlyingObject = + findValue(Ptr, I.getModule()->getDataLayout(), /*OffsetOk=*/true); + Assert(!isa<ConstantPointerNull>(UnderlyingObject), + "Undefined behavior: Null pointer dereference", &I); + Assert(!isa<UndefValue>(UnderlyingObject), + "Undefined behavior: Undef pointer dereference", &I); + Assert(!isa<ConstantInt>(UnderlyingObject) || + !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(), + "Unusual: All-ones pointer dereference", &I); + Assert(!isa<ConstantInt>(UnderlyingObject) || + !cast<ConstantInt>(UnderlyingObject)->isOne(), + "Unusual: Address one pointer dereference", &I); if (Flags & MemRef::Write) { if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) - Assert1(!GV->isConstant(), - "Undefined behavior: Write to read-only memory", &I); - Assert1(!isa<Function>(UnderlyingObject) && - !isa<BlockAddress>(UnderlyingObject), - "Undefined behavior: Write to text section", &I); + Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory", + &I); + Assert(!isa<Function>(UnderlyingObject) && + !isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Write to text section", &I); } if (Flags & MemRef::Read) { - Assert1(!isa<Function>(UnderlyingObject), - "Unusual: Load from function body", &I); - Assert1(!isa<BlockAddress>(UnderlyingObject), - "Undefined behavior: Load from block address", &I); + Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body", + &I); + Assert(!isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Load from block address", &I); } if (Flags & MemRef::Callee) { - Assert1(!isa<BlockAddress>(UnderlyingObject), - "Undefined behavior: Call to block address", &I); + Assert(!isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Call to block address", &I); } if (Flags & MemRef::Branchee) { - Assert1(!isa<Constant>(UnderlyingObject) || - isa<BlockAddress>(UnderlyingObject), - "Undefined behavior: Branch to non-blockaddress", &I); + Assert(!isa<Constant>(UnderlyingObject) || + isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Branch to non-blockaddress", &I); } // Check for buffer overflows and misalignment. // Only handles memory references that read/write something simple like an // alloca instruction or a global variable. + auto &DL = I.getModule()->getDataLayout(); int64_t Offset = 0; if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) { // OK, so the access is to a constant offset from Ptr. Check that Ptr is @@ -439,37 +440,37 @@ void Lint::visitMemoryReference(Instruction &I, if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { Type *ATy = AI->getAllocatedType(); - if (DL && !AI->isArrayAllocation() && ATy->isSized()) - BaseSize = DL->getTypeAllocSize(ATy); + if (!AI->isArrayAllocation() && ATy->isSized()) + BaseSize = DL.getTypeAllocSize(ATy); BaseAlign = AI->getAlignment(); - if (DL && BaseAlign == 0 && ATy->isSized()) - BaseAlign = DL->getABITypeAlignment(ATy); + if (BaseAlign == 0 && ATy->isSized()) + BaseAlign = DL.getABITypeAlignment(ATy); } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { // If the global may be defined differently in another compilation unit // then don't warn about funky memory accesses. if (GV->hasDefinitiveInitializer()) { Type *GTy = GV->getType()->getElementType(); - if (DL && GTy->isSized()) - BaseSize = DL->getTypeAllocSize(GTy); + if (GTy->isSized()) + BaseSize = DL.getTypeAllocSize(GTy); BaseAlign = GV->getAlignment(); - if (DL && BaseAlign == 0 && GTy->isSized()) - BaseAlign = DL->getABITypeAlignment(GTy); + if (BaseAlign == 0 && GTy->isSized()) + BaseAlign = DL.getABITypeAlignment(GTy); } } // Accesses from before the start or after the end of the object are not // defined. - Assert1(Size == AliasAnalysis::UnknownSize || - BaseSize == AliasAnalysis::UnknownSize || - (Offset >= 0 && Offset + Size <= BaseSize), - "Undefined behavior: Buffer overflow", &I); + Assert(Size == AliasAnalysis::UnknownSize || + BaseSize == AliasAnalysis::UnknownSize || + (Offset >= 0 && Offset + Size <= BaseSize), + "Undefined behavior: Buffer overflow", &I); // Accesses that say that the memory is more aligned than it is are not // defined. - if (DL && Align == 0 && Ty && Ty->isSized()) - Align = DL->getABITypeAlignment(Ty); - Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), - "Undefined behavior: Memory reference address is misaligned", &I); + if (Align == 0 && Ty && Ty->isSized()) + Align = DL.getABITypeAlignment(Ty); + Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), + "Undefined behavior: Memory reference address is misaligned", &I); } } @@ -487,36 +488,35 @@ void Lint::visitStoreInst(StoreInst &I) { } void Lint::visitXor(BinaryOperator &I) { - Assert1(!isa<UndefValue>(I.getOperand(0)) || - !isa<UndefValue>(I.getOperand(1)), - "Undefined result: xor(undef, undef)", &I); + Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), + "Undefined result: xor(undef, undef)", &I); } void Lint::visitSub(BinaryOperator &I) { - Assert1(!isa<UndefValue>(I.getOperand(0)) || - !isa<UndefValue>(I.getOperand(1)), - "Undefined result: sub(undef, undef)", &I); + Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), + "Undefined result: sub(undef, undef)", &I); } void Lint::visitLShr(BinaryOperator &I) { - if (ConstantInt *CI = - dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) - Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), - "Undefined result: Shift count out of range", &I); + if (ConstantInt *CI = dyn_cast<ConstantInt>( + findValue(I.getOperand(1), I.getModule()->getDataLayout(), + /*OffsetOk=*/false))) + Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); } void Lint::visitAShr(BinaryOperator &I) { - if (ConstantInt *CI = - dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) - Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), - "Undefined result: Shift count out of range", &I); + if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue( + I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false))) + Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); } void Lint::visitShl(BinaryOperator &I) { - if (ConstantInt *CI = - dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) - Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), - "Undefined result: Shift count out of range", &I); + if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue( + I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false))) + Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); } static bool @@ -598,9 +598,9 @@ void Lint::visitEHBeginCatch(IntrinsicInst *II) { // The begin catch must occur in a landing pad block or all paths // to it must have come from a landing pad. - Assert1(allPredsCameFromLandingPad(CatchBB, VisitedBlocks), - "llvm.eh.begincatch may be reachable without passing a landingpad", - II); + Assert(allPredsCameFromLandingPad(CatchBB, VisitedBlocks), + "llvm.eh.begincatch may be reachable without passing a landingpad", + II); // Reset the visited block list. VisitedBlocks.clear(); @@ -612,13 +612,13 @@ void Lint::visitEHBeginCatch(IntrinsicInst *II) { bool EndCatchFound = allSuccessorsReachEndCatch( CatchBB, std::next(static_cast<BasicBlock::iterator>(II)), &SecondBeginCatch, VisitedBlocks); - Assert2( + Assert( SecondBeginCatch == nullptr, "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch", II, SecondBeginCatch); - Assert1(EndCatchFound, - "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch", - II); + Assert(EndCatchFound, + "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch", + II); } static bool allPredCameFromBeginCatch( @@ -691,17 +691,16 @@ void Lint::visitEHEndCatch(IntrinsicInst *II) { bool BeginCatchFound = allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II), &SecondEndCatch, VisitedBlocks); - Assert2( + Assert( SecondEndCatch == nullptr, "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch", II, SecondEndCatch); - Assert1( - BeginCatchFound, - "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch", - II); + Assert(BeginCatchFound, + "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch", + II); } -static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT, +static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC) { // Assume undef could be zero. if (isa<UndefValue>(V)) @@ -742,30 +741,30 @@ static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT, } void Lint::visitSDiv(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL, DT, AC), - "Undefined behavior: Division by zero", &I); + Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), + "Undefined behavior: Division by zero", &I); } void Lint::visitUDiv(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL, DT, AC), - "Undefined behavior: Division by zero", &I); + Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), + "Undefined behavior: Division by zero", &I); } void Lint::visitSRem(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL, DT, AC), - "Undefined behavior: Division by zero", &I); + Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), + "Undefined behavior: Division by zero", &I); } void Lint::visitURem(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL, DT, AC), - "Undefined behavior: Division by zero", &I); + Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), + "Undefined behavior: Division by zero", &I); } void Lint::visitAllocaInst(AllocaInst &I) { if (isa<ConstantInt>(I.getArraySize())) // This isn't undefined behavior, it's just an obvious pessimization. - Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), - "Pessimization: Static alloca outside of entry block", &I); + Assert(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), + "Pessimization: Static alloca outside of entry block", &I); // TODO: Check for an unusual size (MSB set?) } @@ -779,32 +778,33 @@ void Lint::visitIndirectBrInst(IndirectBrInst &I) { visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, nullptr, MemRef::Branchee); - Assert1(I.getNumDestinations() != 0, - "Undefined behavior: indirectbr with no destinations", &I); + Assert(I.getNumDestinations() != 0, + "Undefined behavior: indirectbr with no destinations", &I); } void Lint::visitExtractElementInst(ExtractElementInst &I) { - if (ConstantInt *CI = - dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), - /*OffsetOk=*/false))) - Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), - "Undefined result: extractelement index out of range", &I); + if (ConstantInt *CI = dyn_cast<ConstantInt>( + findValue(I.getIndexOperand(), I.getModule()->getDataLayout(), + /*OffsetOk=*/false))) + Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), + "Undefined result: extractelement index out of range", &I); } void Lint::visitInsertElementInst(InsertElementInst &I) { - if (ConstantInt *CI = - dyn_cast<ConstantInt>(findValue(I.getOperand(2), - /*OffsetOk=*/false))) - Assert1(CI->getValue().ult(I.getType()->getNumElements()), - "Undefined result: insertelement index out of range", &I); + if (ConstantInt *CI = dyn_cast<ConstantInt>( + findValue(I.getOperand(2), I.getModule()->getDataLayout(), + /*OffsetOk=*/false))) + Assert(CI->getValue().ult(I.getType()->getNumElements()), + "Undefined result: insertelement index out of range", &I); } void Lint::visitUnreachableInst(UnreachableInst &I) { // This isn't undefined behavior, it's merely suspicious. - Assert1(&I == I.getParent()->begin() || - std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(), - "Unusual: unreachable immediately preceded by instruction without " - "side effects", &I); + Assert(&I == I.getParent()->begin() || + std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(), + "Unusual: unreachable immediately preceded by instruction without " + "side effects", + &I); } /// findValue - Look through bitcasts and simple memory reference patterns @@ -814,13 +814,13 @@ void Lint::visitUnreachableInst(UnreachableInst &I) { /// Most analysis passes don't require this logic, because instcombine /// will simplify most of these kinds of things away. But it's a goal of /// this Lint pass to be useful even on non-optimized IR. -Value *Lint::findValue(Value *V, bool OffsetOk) const { +Value *Lint::findValue(Value *V, const DataLayout &DL, bool OffsetOk) const { SmallPtrSet<Value *, 4> Visited; - return findValueImpl(V, OffsetOk, Visited); + return findValueImpl(V, DL, OffsetOk, Visited); } /// findValueImpl - Implementation helper for findValue. -Value *Lint::findValueImpl(Value *V, bool OffsetOk, +Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, SmallPtrSetImpl<Value *> &Visited) const { // Detect self-referential values. if (!Visited.insert(V).second) @@ -841,7 +841,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, break; if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), BB, BBI, 6, AA)) - return findValueImpl(U, OffsetOk, Visited); + return findValueImpl(U, DL, OffsetOk, Visited); if (BBI != BB->begin()) break; BB = BB->getUniquePredecessor(); if (!BB) break; @@ -850,40 +850,38 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, } else if (PHINode *PN = dyn_cast<PHINode>(V)) { if (Value *W = PN->hasConstantValue()) if (W != V) - return findValueImpl(W, OffsetOk, Visited); + return findValueImpl(W, DL, OffsetOk, Visited); } else if (CastInst *CI = dyn_cast<CastInst>(V)) { if (CI->isNoopCast(DL)) - return findValueImpl(CI->getOperand(0), OffsetOk, Visited); + return findValueImpl(CI->getOperand(0), DL, OffsetOk, Visited); } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) if (W != V) - return findValueImpl(W, OffsetOk, Visited); + return findValueImpl(W, DL, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { // Same as above, but for ConstantExpr instead of Instruction. if (Instruction::isCast(CE->getOpcode())) { if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), - CE->getOperand(0)->getType(), - CE->getType(), - DL ? DL->getIntPtrType(V->getType()) : - Type::getInt64Ty(V->getContext()))) - return findValueImpl(CE->getOperand(0), OffsetOk, Visited); + CE->getOperand(0)->getType(), CE->getType(), + DL.getIntPtrType(V->getType()))) + return findValueImpl(CE->getOperand(0), DL, OffsetOk, Visited); } else if (CE->getOpcode() == Instruction::ExtractValue) { ArrayRef<unsigned> Indices = CE->getIndices(); if (Value *W = FindInsertedValue(CE->getOperand(0), Indices)) if (W != V) - return findValueImpl(W, OffsetOk, Visited); + return findValueImpl(W, DL, OffsetOk, Visited); } } // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast<Instruction>(V)) { if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC)) - return findValueImpl(W, OffsetOk, Visited); + return findValueImpl(W, DL, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI)) if (W != V) - return findValueImpl(W, OffsetOk, Visited); + return findValueImpl(W, DL, OffsetOk, Visited); } return V; diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 5042eb9..aed3b04 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" using namespace llvm; @@ -62,7 +63,8 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { /// This uses the pointee type to determine how many bytes need to be safe to /// load from the pointer. bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, - unsigned Align, const DataLayout *DL) { + unsigned Align) { + const DataLayout &DL = ScanFrom->getModule()->getDataLayout(); int64_t ByteOffset = 0; Value *Base = V; Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL); @@ -87,19 +89,19 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, } PointerType *AddrTy = cast<PointerType>(V->getType()); - uint64_t LoadSize = DL ? DL->getTypeStoreSize(AddrTy->getElementType()) : 0; + uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType()); // If we found a base allocated type from either an alloca or global variable, // try to see if we are definitively within the allocated region. We need to // know the size of the base type and the loaded type to do anything in this - // case, so only try this when we have the DataLayout available. - if (BaseType && BaseType->isSized() && DL) { + // case. + if (BaseType && BaseType->isSized()) { if (BaseAlign == 0) - BaseAlign = DL->getPrefTypeAlignment(BaseType); + BaseAlign = DL.getPrefTypeAlignment(BaseType); if (Align <= BaseAlign) { // Check if the load is within the bounds of the underlying object. - if (ByteOffset + LoadSize <= DL->getTypeAllocSize(BaseType) && + if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) && (Align == 0 || (ByteOffset % Align) == 0)) return true; } @@ -133,16 +135,13 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, else continue; - // Handle trivial cases even w/o DataLayout or other work. + // Handle trivial cases. if (AccessedPtr == V) return true; - if (!DL) - continue; - auto *AccessedTy = cast<PointerType>(AccessedPtr->getType()); if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) && - LoadSize <= DL->getTypeStoreSize(AccessedTy->getElementType())) + LoadSize <= DL.getTypeStoreSize(AccessedTy->getElementType())) return true; } return false; @@ -176,13 +175,10 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); - // Try to get the DataLayout for this module. This may be null, in which case - // the optimizations will be limited. - const DataLayout *DL = ScanBB->getDataLayout(); + const DataLayout &DL = ScanBB->getModule()->getDataLayout(); // Try to get the store size for the type. - uint64_t AccessSize = DL ? DL->getTypeStoreSize(AccessTy) - : AA ? AA->getTypeStoreSize(AccessTy) : 0; + uint64_t AccessSize = DL.getTypeStoreSize(AccessTy); Value *StrippedPtr = Ptr->stripPointerCasts(); diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 7bedd40..1818e93 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -15,11 +15,13 @@ #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/VectorUtils.h" using namespace llvm; @@ -49,6 +51,13 @@ unsigned VectorizerParams::RuntimeMemoryCheckThreshold; /// Maximum SIMD width. const unsigned VectorizerParams::MaxVectorWidth = 64; +/// \brief We collect interesting dependences up to this threshold. +static cl::opt<unsigned> MaxInterestingDependence( + "max-interesting-dependences", cl::Hidden, + cl::desc("Maximum number of interesting dependences collected by " + "loop-access analysis (default = 100)"), + cl::init(100)); + bool VectorizerParams::isInterleaveForced() { return ::VectorizationInterleave.getNumOccurrences() > 0; } @@ -120,8 +129,8 @@ void LoopAccessInfo::RuntimePointerCheck::insert( AliasSetId.push_back(ASId); } -bool LoopAccessInfo::RuntimePointerCheck::needsChecking(unsigned I, - unsigned J) const { +bool LoopAccessInfo::RuntimePointerCheck::needsChecking( + unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const { // No need to check if two readonly pointers intersect. if (!IsWritePtr[I] && !IsWritePtr[J]) return false; @@ -134,11 +143,19 @@ bool LoopAccessInfo::RuntimePointerCheck::needsChecking(unsigned I, if (AliasSetId[I] != AliasSetId[J]) return false; + // If PtrPartition is set omit checks between pointers of the same partition. + // Partition number -1 means that the pointer is used in multiple partitions. + // In this case we can't omit the check. + if (PtrPartition && (*PtrPartition)[I] != -1 && + (*PtrPartition)[I] == (*PtrPartition)[J]) + return false; + return true; } -void LoopAccessInfo::RuntimePointerCheck::print(raw_ostream &OS, - unsigned Depth) const { +void LoopAccessInfo::RuntimePointerCheck::print( + raw_ostream &OS, unsigned Depth, + const SmallVectorImpl<int> *PtrPartition) const { unsigned NumPointers = Pointers.size(); if (NumPointers == 0) return; @@ -147,10 +164,16 @@ void LoopAccessInfo::RuntimePointerCheck::print(raw_ostream &OS, unsigned N = 0; for (unsigned I = 0; I < NumPointers; ++I) for (unsigned J = I + 1; J < NumPointers; ++J) - if (needsChecking(I, J)) { + if (needsChecking(I, J, PtrPartition)) { OS.indent(Depth) << N++ << ":\n"; - OS.indent(Depth + 2) << *Pointers[I] << "\n"; - OS.indent(Depth + 2) << *Pointers[J] << "\n"; + OS.indent(Depth + 2) << *Pointers[I]; + if (PtrPartition) + OS << " (Partition: " << (*PtrPartition)[I] << ")"; + OS << "\n"; + OS.indent(Depth + 2) << *Pointers[J]; + if (PtrPartition) + OS << " (Partition: " << (*PtrPartition)[J] << ")"; + OS << "\n"; } } @@ -165,11 +188,9 @@ public: typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet; - /// \brief Set of potential dependent memory accesses. - typedef EquivalenceClasses<MemAccessInfo> DepCandidates; - - AccessAnalysis(const DataLayout *Dl, AliasAnalysis *AA, DepCandidates &DA) : - DL(Dl), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {} + AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, + MemoryDepChecker::DepCandidates &DA) + : DL(Dl), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {} /// \brief Register a load and whether it is only read from. void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) { @@ -217,14 +238,14 @@ private: /// Set of all accesses. PtrAccessSet Accesses; + const DataLayout &DL; + /// Set of accesses that need a further dependence check. MemAccessInfoSet CheckDeps; /// Set of pointers that are read only. SmallPtrSet<Value*, 16> ReadOnlyPtr; - const DataLayout *DL; - /// An alias set tracker to partition the access set by underlying object and //intrinsic property (such as TBAA metadata). AliasSetTracker AST; @@ -232,7 +253,7 @@ private: /// Sets of potentially dependent accesses - members of one set share an /// underlying pointer. The set "CheckDeps" identfies which sets really need a /// dependence check. - DepCandidates &DepCands; + MemoryDepChecker::DepCandidates &DepCands; bool IsRTCheckNeeded; }; @@ -252,8 +273,8 @@ static bool hasComputableBounds(ScalarEvolution *SE, /// \brief Check the stride of the pointer and ensure that it does not wrap in /// the address space. -static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr, - const Loop *Lp, const ValueToValueMap &StridesMap); +static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, + const ValueToValueMap &StridesMap); bool AccessAnalysis::canCheckPtrAtRT( LoopAccessInfo::RuntimePointerCheck &RtCheck, unsigned &NumComparisons, @@ -289,10 +310,10 @@ bool AccessAnalysis::canCheckPtrAtRT( ++NumReadPtrChecks; if (hasComputableBounds(SE, StridesMap, Ptr) && - // When we run after a failing dependency check we have to make sure we - // don't have wrapping pointers. + // When we run after a failing dependency check we have to make sure + // we don't have wrapping pointers. (!ShouldCheckStride || - isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) { + isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) { // The id of the dependence set. unsigned DepId; @@ -362,7 +383,7 @@ void AccessAnalysis::processMemAccesses() { DEBUG(dbgs() << "LAA: Processing memory accesses...\n"); DEBUG(dbgs() << " AST: "; AST.dump()); - DEBUG(dbgs() << "LAA: Accesses:\n"); + DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n"); DEBUG({ for (auto A : Accesses) dbgs() << "\t" << *A.getPointer() << " (" << @@ -460,124 +481,6 @@ void AccessAnalysis::processMemAccesses() { } } -namespace { -/// \brief Checks memory dependences among accesses to the same underlying -/// object to determine whether there vectorization is legal or not (and at -/// which vectorization factor). -/// -/// This class works under the assumption that we already checked that memory -/// locations with different underlying pointers are "must-not alias". -/// We use the ScalarEvolution framework to symbolically evalutate access -/// functions pairs. Since we currently don't restructure the loop we can rely -/// on the program order of memory accesses to determine their safety. -/// At the moment we will only deem accesses as safe for: -/// * A negative constant distance assuming program order. -/// -/// Safe: tmp = a[i + 1]; OR a[i + 1] = x; -/// a[i] = tmp; y = a[i]; -/// -/// The latter case is safe because later checks guarantuee that there can't -/// be a cycle through a phi node (that is, we check that "x" and "y" is not -/// the same variable: a header phi can only be an induction or a reduction, a -/// reduction can't have a memory sink, an induction can't have a memory -/// source). This is important and must not be violated (or we have to -/// resort to checking for cycles through memory). -/// -/// * A positive constant distance assuming program order that is bigger -/// than the biggest memory access. -/// -/// tmp = a[i] OR b[i] = x -/// a[i+2] = tmp y = b[i+2]; -/// -/// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively. -/// -/// * Zero distances and all accesses have the same size. -/// -class MemoryDepChecker { -public: - typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; - typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet; - - MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L) - : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0), - ShouldRetryWithRuntimeCheck(false) {} - - /// \brief Register the location (instructions are given increasing numbers) - /// of a write access. - void addAccess(StoreInst *SI) { - Value *Ptr = SI->getPointerOperand(); - Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx); - InstMap.push_back(SI); - ++AccessIdx; - } - - /// \brief Register the location (instructions are given increasing numbers) - /// of a write access. - void addAccess(LoadInst *LI) { - Value *Ptr = LI->getPointerOperand(); - Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx); - InstMap.push_back(LI); - ++AccessIdx; - } - - /// \brief Check whether the dependencies between the accesses are safe. - /// - /// Only checks sets with elements in \p CheckDeps. - bool areDepsSafe(AccessAnalysis::DepCandidates &AccessSets, - MemAccessInfoSet &CheckDeps, const ValueToValueMap &Strides); - - /// \brief The maximum number of bytes of a vector register we can vectorize - /// the accesses safely with. - unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; } - - /// \brief In same cases when the dependency check fails we can still - /// vectorize the loop with a dynamic array access check. - bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; } - -private: - ScalarEvolution *SE; - const DataLayout *DL; - const Loop *InnermostLoop; - - /// \brief Maps access locations (ptr, read/write) to program order. - DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses; - - /// \brief Memory access instructions in program order. - SmallVector<Instruction *, 16> InstMap; - - /// \brief The program order index to be used for the next instruction. - unsigned AccessIdx; - - // We can access this many bytes in parallel safely. - unsigned MaxSafeDepDistBytes; - - /// \brief If we see a non-constant dependence distance we can still try to - /// vectorize this loop with runtime checks. - bool ShouldRetryWithRuntimeCheck; - - /// \brief Check whether there is a plausible dependence between the two - /// accesses. - /// - /// Access \p A must happen before \p B in program order. The two indices - /// identify the index into the program order map. - /// - /// This function checks whether there is a plausible dependence (or the - /// absence of such can't be proved) between the two accesses. If there is a - /// plausible dependence but the dependence distance is bigger than one - /// element access it records this distance in \p MaxSafeDepDistBytes (if this - /// distance is smaller than any other distance encountered so far). - /// Otherwise, this function returns true signaling a possible dependence. - bool isDependent(const MemAccessInfo &A, unsigned AIdx, - const MemAccessInfo &B, unsigned BIdx, - const ValueToValueMap &Strides); - - /// \brief Check whether the data dependence could prevent store-load - /// forwarding. - bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize); -}; - -} // end anonymous namespace - static bool isInBoundsGep(Value *Ptr) { if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) return GEP->isInBounds(); @@ -585,8 +488,8 @@ static bool isInBoundsGep(Value *Ptr) { } /// \brief Check whether the access through \p Ptr has a constant stride. -static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr, - const Loop *Lp, const ValueToValueMap &StridesMap) { +static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, + const ValueToValueMap &StridesMap) { const Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); @@ -640,7 +543,8 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr, return 0; } - int64_t Size = DL->getTypeAllocSize(PtrTy->getElementType()); + auto &DL = Lp->getHeader()->getModule()->getDataLayout(); + int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); const APInt &APStepVal = C->getValue()->getValue(); // Huge step value - give up. @@ -665,6 +569,54 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr, return Stride; } +bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { + switch (Type) { + case NoDep: + case Forward: + case BackwardVectorizable: + return true; + + case Unknown: + case ForwardButPreventsForwarding: + case Backward: + case BackwardVectorizableButPreventsForwarding: + return false; + } + llvm_unreachable("unexpected DepType!"); +} + +bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) { + switch (Type) { + case NoDep: + case Forward: + return false; + + case BackwardVectorizable: + case Unknown: + case ForwardButPreventsForwarding: + case Backward: + case BackwardVectorizableButPreventsForwarding: + return true; + } + llvm_unreachable("unexpected DepType!"); +} + +bool MemoryDepChecker::Dependence::isPossiblyBackward() const { + switch (Type) { + case NoDep: + case Forward: + case ForwardButPreventsForwarding: + return false; + + case Unknown: + case BackwardVectorizable: + case Backward: + case BackwardVectorizableButPreventsForwarding: + return true; + } + llvm_unreachable("unexpected DepType!"); +} + bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize) { // If loads occur at a distance that is not a multiple of a feasible vector @@ -704,9 +656,10 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, return false; } -bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, - const MemAccessInfo &B, unsigned BIdx, - const ValueToValueMap &Strides) { +MemoryDepChecker::Dependence::DepType +MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, + const MemAccessInfo &B, unsigned BIdx, + const ValueToValueMap &Strides) { assert (AIdx < BIdx && "Must pass arguments in program order"); Value *APtr = A.getPointer(); @@ -716,18 +669,18 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // Two reads are independent. if (!AIsWrite && !BIsWrite) - return false; + return Dependence::NoDep; // We cannot check pointers in different address spaces. if (APtr->getType()->getPointerAddressSpace() != BPtr->getType()->getPointerAddressSpace()) - return true; + return Dependence::Unknown; const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr); const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr); - int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop, Strides); - int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop, Strides); + int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides); + int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides); const SCEV *Src = AScev; const SCEV *Sink = BScev; @@ -756,19 +709,20 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // the address space. if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ DEBUG(dbgs() << "Non-consecutive pointer access\n"); - return true; + return Dependence::Unknown; } const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist); if (!C) { DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); ShouldRetryWithRuntimeCheck = true; - return true; + return Dependence::Unknown; } Type *ATy = APtr->getType()->getPointerElementType(); Type *BTy = BPtr->getType()->getPointerElementType(); - unsigned TypeByteSize = DL->getTypeAllocSize(ATy); + auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); + unsigned TypeByteSize = DL.getTypeAllocSize(ATy); // Negative distances are not plausible dependencies. const APInt &Val = C->getValue()->getValue(); @@ -777,19 +731,19 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, if (IsTrueDataDependence && (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) || ATy != BTy)) - return true; + return Dependence::ForwardButPreventsForwarding; DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n"); - return false; + return Dependence::Forward; } // Write to the same location with the same size. // Could be improved to assert type sizes are the same (i32 == float, etc). if (Val == 0) { if (ATy == BTy) - return false; + return Dependence::NoDep; DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n"); - return true; + return Dependence::Unknown; } assert(Val.isStrictlyPositive() && "Expect a positive value"); @@ -797,7 +751,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, if (ATy != BTy) { DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with different types\n"); - return true; + return Dependence::Unknown; } unsigned Distance = (unsigned) Val.getZExtValue(); @@ -816,7 +770,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, Distance < TypeByteSize * ForcedUnroll * ForcedFactor) { DEBUG(dbgs() << "LAA: Failure because of Positive distance " << Val.getSExtValue() << '\n'); - return true; + return Dependence::Backward; } // Positive distance bigger than max vectorization factor. @@ -826,15 +780,15 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, bool IsTrueDataDependence = (!AIsWrite && BIsWrite); if (IsTrueDataDependence && couldPreventStoreLoadForward(Distance, TypeByteSize)) - return true; + return Dependence::BackwardVectorizableButPreventsForwarding; DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n'); - return false; + return Dependence::BackwardVectorizable; } -bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets, +bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, MemAccessInfoSet &CheckDeps, const ValueToValueMap &Strides) { @@ -860,9 +814,33 @@ bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets, I1E = Accesses[*AI].end(); I1 != I1E; ++I1) for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(), I2E = Accesses[*OI].end(); I2 != I2E; ++I2) { - if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2, Strides)) - return false; - if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1, Strides)) + auto A = std::make_pair(&*AI, *I1); + auto B = std::make_pair(&*OI, *I2); + + assert(*I1 != *I2); + if (*I1 > *I2) + std::swap(A, B); + + Dependence::DepType Type = + isDependent(*A.first, A.second, *B.first, B.second, Strides); + SafeForVectorization &= Dependence::isSafeForVectorization(Type); + + // Gather dependences unless we accumulated MaxInterestingDependence + // dependences. In that case return as soon as we find the first + // unsafe dependence. This puts a limit on this quadratic + // algorithm. + if (RecordInterestingDependences) { + if (Dependence::isInterestingDependence(Type)) + InterestingDependences.push_back( + Dependence(A.second, B.second, Type)); + + if (InterestingDependences.size() >= MaxInterestingDependence) { + RecordInterestingDependences = false; + InterestingDependences.clear(); + DEBUG(dbgs() << "Too many dependences, stopped recording\n"); + } + } + if (!RecordInterestingDependences && !SafeForVectorization) return false; } ++OI; @@ -870,7 +848,34 @@ bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets, AI++; } } - return true; + + DEBUG(dbgs() << "Total Interesting Dependences: " + << InterestingDependences.size() << "\n"); + return SafeForVectorization; +} + +SmallVector<Instruction *, 4> +MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const { + MemAccessInfo Access(Ptr, isWrite); + auto &IndexVector = Accesses.find(Access)->second; + + SmallVector<Instruction *, 4> Insts; + std::transform(IndexVector.begin(), IndexVector.end(), + std::back_inserter(Insts), + [&](unsigned Idx) { return this->InstMap[Idx]; }); + return Insts; +} + +const char *MemoryDepChecker::Dependence::DepName[] = { + "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward", + "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"}; + +void MemoryDepChecker::Dependence::print( + raw_ostream &OS, unsigned Depth, + const SmallVectorImpl<Instruction *> &Instrs) const { + OS.indent(Depth) << DepName[Type] << ":\n"; + OS.indent(Depth + 2) << *Instrs[Source] << " -> \n"; + OS.indent(Depth + 2) << *Instrs[Destination] << "\n"; } bool LoopAccessInfo::canAnalyzeLoop() { @@ -939,7 +944,6 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { PtrRtCheck.Need = false; const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); - MemoryDepChecker DepChecker(SE, DL, TheLoop); // For each block. for (Loop::block_iterator bb = TheLoop->block_begin(), @@ -960,6 +964,12 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (Call && getIntrinsicIDForCall(Call, TLI)) continue; + // If the function has an explicit vectorized counterpart, we can safely + // assume that it can be vectorized. + if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() && + TLI->isFunctionVectorizable(Call->getCalledFunction()->getName())) + continue; + LoadInst *Ld = dyn_cast<LoadInst>(it); if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { emitAnalysis(LoopAccessReport(Ld) @@ -1008,8 +1018,9 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { return; } - AccessAnalysis::DepCandidates DependentAccesses; - AccessAnalysis Accesses(DL, AA, DependentAccesses); + MemoryDepChecker::DepCandidates DependentAccesses; + AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), + AA, DependentAccesses); // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once @@ -1068,8 +1079,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // read a few words, modify, and write a few words, and some of the // words may be written to the same address. bool IsReadOnlyPtr = false; - if (Seen.insert(Ptr).second || - !isStridedPtr(SE, DL, Ptr, TheLoop, Strides)) { + if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) { ++NumReads; IsReadOnlyPtr = true; } @@ -1099,7 +1109,6 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. - unsigned NumComparisons = 0; bool CanDoRT = false; if (NeedRTCheck) CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop, @@ -1113,18 +1122,10 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (NumComparisons == 0 && NeedRTCheck) NeedRTCheck = false; - // Check that we did not collect too many pointers or found an unsizeable - // pointer. - if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) { - PtrRtCheck.reset(); - CanDoRT = false; - } - - if (CanDoRT) { + // Check that we found the bounds for the pointer. + if (CanDoRT) DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); - } - - if (NeedRTCheck && !CanDoRT) { + else if (NeedRTCheck) { emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " << "the array bounds.\n"); @@ -1154,17 +1155,10 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop, Strides, true); - // Check that we did not collect too many pointers or found an unsizeable - // pointer. - if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) { - if (!CanDoRT && NumComparisons > 0) - emitAnalysis(LoopAccessReport() - << "cannot check memory dependencies at runtime"); - else - emitAnalysis(LoopAccessReport() - << NumComparisons << " exceeds limit of " - << RuntimeMemoryCheckThreshold - << " dependent memory operations checked at runtime"); + // Check that we found the bounds for the pointer. + if (!CanDoRT && NumComparisons > 0) { + emitAnalysis(LoopAccessReport() + << "cannot check memory dependencies at runtime"); DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); PtrRtCheck.reset(); CanVecMem = false; @@ -1175,12 +1169,15 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { } } - if (!CanVecMem) + if (CanVecMem) + DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We" + << (NeedRTCheck ? "" : " don't") + << " need a runtime memory check.\n"); + else { emitAnalysis(LoopAccessReport() << "unsafe dependent memory operations in loop"); - - DEBUG(dbgs() << "LAA: We" << (NeedRTCheck ? "" : " don't") << - " need a runtime memory check.\n"); + DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); + } } bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, @@ -1212,8 +1209,8 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V, return nullptr; } -std::pair<Instruction *, Instruction *> -LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const { +std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck( + Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const { Instruction *tnullptr = nullptr; if (!PtrRtCheck.Need) return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr); @@ -1223,7 +1220,7 @@ LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const { SmallVector<TrackingVH<Value> , 2> Ends; LLVMContext &Ctx = Loc->getContext(); - SCEVExpander Exp(*SE, "induction"); + SCEVExpander Exp(*SE, DL, "induction"); Instruction *FirstInst = nullptr; for (unsigned i = 0; i < NumPointers; ++i) { @@ -1254,7 +1251,7 @@ LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const { Value *MemoryRuntimeCheck = nullptr; for (unsigned i = 0; i < NumPointers; ++i) { for (unsigned j = i+1; j < NumPointers; ++j) { - if (!PtrRtCheck.needsChecking(i, j)) + if (!PtrRtCheck.needsChecking(i, j, PtrPartition)) continue; unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); @@ -1298,12 +1295,13 @@ LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const { } LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, const ValueToValueMap &Strides) - : TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), NumLoads(0), - NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false) { + : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL), + TLI(TLI), AA(AA), DT(DT), NumLoads(0), NumStores(0), + MaxSafeDepDistBytes(-1U), CanVecMem(false) { if (canAnalyzeLoop()) analyzeLoop(Strides); } @@ -1319,7 +1317,14 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (Report) OS.indent(Depth) << "Report: " << Report->str() << "\n"; - // FIXME: Print unsafe dependences + if (auto *InterestingDependences = DepChecker.getInterestingDependences()) { + OS.indent(Depth) << "Interesting Dependences:\n"; + for (auto &Dep : *InterestingDependences) { + Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions()); + OS << "\n"; + } + } else + OS.indent(Depth) << "Too many interesting dependences, not recorded\n"; // List the pair of accesses need run-time checks to prove independence. PtrRtCheck.print(OS, Depth); @@ -1336,6 +1341,7 @@ LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) { #endif if (!LAI) { + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, Strides); #ifndef NDEBUG LAI->NumSymbolicStrides = Strides.size(); @@ -1360,7 +1366,6 @@ void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const { bool LoopAccessAnalysis::runOnFunction(Function &F) { SE = &getAnalysis<ScalarEvolution>(); - DL = F.getParent()->getDataLayout(); auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); TLI = TLIP ? &TLIP->getTLI() : nullptr; AA = &getAnalysis<AliasAnalysis>(); diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 95f6eb0..6462b06 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> using namespace llvm; diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index a99c949..e9fcf02 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "loop-pass-manager" diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp index 531d75e..6119a3d 100644 --- a/lib/Analysis/MemDerefPrinter.cpp +++ b/lib/Analysis/MemDerefPrinter.cpp @@ -14,6 +14,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -27,7 +28,6 @@ namespace { initializeMemDerefPrinterPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DataLayoutPass>(); AU.setPreservesAll(); } bool runOnFunction(Function &F) override; @@ -41,7 +41,6 @@ namespace { char MemDerefPrinter::ID = 0; INITIALIZE_PASS_BEGIN(MemDerefPrinter, "print-memderefs", "Memory Dereferenciblity of pointers in function", false, true) -INITIALIZE_PASS_DEPENDENCY(DataLayoutPass) INITIALIZE_PASS_END(MemDerefPrinter, "print-memderefs", "Memory Dereferenciblity of pointers in function", false, true) @@ -50,7 +49,7 @@ FunctionPass *llvm::createMemDerefPrinter() { } bool MemDerefPrinter::runOnFunction(Function &F) { - const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout(); + const DataLayout &DL = F.getParent()->getDataLayout(); for (auto &I: inst_range(F)) { if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { Value *PO = LI->getPointerOperand(); diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 6108af3..8ddac8f 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -206,7 +206,7 @@ const CallInst *llvm::extractMallocCall(const Value *I, return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : nullptr; } -static Value *computeArraySize(const CallInst *CI, const DataLayout *DL, +static Value *computeArraySize(const CallInst *CI, const DataLayout &DL, const TargetLibraryInfo *TLI, bool LookThroughSExt = false) { if (!CI) @@ -214,12 +214,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *DL, // The size of the malloc's result type must be known to determine array size. Type *T = getMallocAllocatedType(CI, TLI); - if (!T || !T->isSized() || !DL) + if (!T || !T->isSized()) return nullptr; - unsigned ElementSize = DL->getTypeAllocSize(T); + unsigned ElementSize = DL.getTypeAllocSize(T); if (StructType *ST = dyn_cast<StructType>(T)) - ElementSize = DL->getStructLayout(ST)->getSizeInBytes(); + ElementSize = DL.getStructLayout(ST)->getSizeInBytes(); // If malloc call's arg can be determined to be a multiple of ElementSize, // return the multiple. Otherwise, return NULL. @@ -232,23 +232,6 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *DL, return nullptr; } -/// isArrayMalloc - Returns the corresponding CallInst if the instruction -/// is a call to malloc whose array size can be determined and the array size -/// is not constant 1. Otherwise, return NULL. -const CallInst *llvm::isArrayMalloc(const Value *I, - const DataLayout *DL, - const TargetLibraryInfo *TLI) { - const CallInst *CI = extractMallocCall(I, TLI); - Value *ArraySize = computeArraySize(CI, DL, TLI); - - if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize)) - if (ConstSize->isOne()) - return CI; - - // CI is a non-array malloc or we can't figure out that it is an array malloc. - return nullptr; -} - /// getMallocType - Returns the PointerType resulting from the malloc call. /// The PointerType depends on the number of bitcast uses of the malloc call: /// 0: PointerType is the calls' return type. @@ -297,7 +280,7 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI, /// then return that multiple. For non-array mallocs, the multiple is /// constant 1. Otherwise, return NULL for mallocs whose array size cannot be /// determined. -Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL, +Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout &DL, const TargetLibraryInfo *TLI, bool LookThroughSExt) { assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call"); @@ -367,11 +350,8 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { /// object size in Size if successful, and false otherwise. /// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, /// byval arguments, and global variables. -bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *DL, +bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, bool RoundToAlign) { - if (!DL) - return false; - ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), RoundToAlign); SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr)); if (!Visitor.bothKnown(Data)) @@ -399,17 +379,17 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { return Size; } -ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL, +ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context, bool RoundToAlign) -: DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) { + : DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) { // Pointer size must be rechecked for each object visited since it could have // a different address space. } SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { - IntTyBits = DL->getPointerTypeSizeInBits(V->getType()); + IntTyBits = DL.getPointerTypeSizeInBits(V->getType()); Zero = APInt::getNullValue(IntTyBits); V = V->stripPointerCasts(); @@ -449,7 +429,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { if (!I.getAllocatedType()->isSized()) return unknown(); - APInt Size(IntTyBits, DL->getTypeAllocSize(I.getAllocatedType())); + APInt Size(IntTyBits, DL.getTypeAllocSize(I.getAllocatedType())); if (!I.isArrayAllocation()) return std::make_pair(align(Size, I.getAlignment()), Zero); @@ -468,7 +448,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { return unknown(); } PointerType *PT = cast<PointerType>(A.getType()); - APInt Size(IntTyBits, DL->getTypeAllocSize(PT->getElementType())); + APInt Size(IntTyBits, DL.getTypeAllocSize(PT->getElementType())); return std::make_pair(align(Size, A.getParamAlignment()), Zero); } @@ -541,7 +521,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) { SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) { SizeOffsetType PtrData = compute(GEP.getPointerOperand()); APInt Offset(IntTyBits, 0); - if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*DL, Offset)) + if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(DL, Offset)) return unknown(); return std::make_pair(PtrData.first, PtrData.second + Offset); @@ -557,7 +537,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){ if (!GV.hasDefinitiveInitializer()) return unknown(); - APInt Size(IntTyBits, DL->getTypeAllocSize(GV.getType()->getElementType())); + APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getType()->getElementType())); return std::make_pair(align(Size, GV.getAlignment()), Zero); } @@ -593,19 +573,18 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { return unknown(); } -ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL, - const TargetLibraryInfo *TLI, - LLVMContext &Context, - bool RoundToAlign) -: DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)), - RoundToAlign(RoundToAlign) { +ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator( + const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context, + bool RoundToAlign) + : DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)), + RoundToAlign(RoundToAlign) { // IntTy and Zero must be set for each compute() since the address space may // be different for later objects. } SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { // XXX - Are vectors of pointers possible here? - IntTy = cast<IntegerType>(DL->getIntPtrType(V->getType())); + IntTy = cast<IntegerType>(DL.getIntPtrType(V->getType())); Zero = ConstantInt::get(IntTy, 0); SizeOffsetEvalType Result = compute_(V); @@ -687,7 +666,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) { assert(I.isArrayAllocation()); Value *ArraySize = I.getArraySize(); Value *Size = ConstantInt::get(ArraySize->getType(), - DL->getTypeAllocSize(I.getAllocatedType())); + DL.getTypeAllocSize(I.getAllocatedType())); Size = Builder.CreateMul(Size, ArraySize); return std::make_pair(Size, Zero); } @@ -739,7 +718,7 @@ ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) { if (!bothKnown(PtrData)) return unknown(); - Value *Offset = EmitGEPOffset(&Builder, *DL, &GEP, /*NoAssumptions=*/true); + Value *Offset = EmitGEPOffset(&Builder, DL, &GEP, /*NoAssumptions=*/true); Offset = Builder.CreateAdd(PtrData.second, Offset); return std::make_pair(PtrData.first, Offset); } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 6d38863..716e3e6 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -93,8 +93,6 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool MemoryDependenceAnalysis::runOnFunction(Function &F) { AA = &getAnalysis<AliasAnalysis>(); AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DT = DTWP ? &DTWP->getDomTree() : nullptr; @@ -263,22 +261,17 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, /// /// MemLocBase, MemLocOffset are lazily computed here the first time the /// base/offs of memloc is needed. -static bool -isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, - const Value *&MemLocBase, - int64_t &MemLocOffs, - const LoadInst *LI, - const DataLayout *DL) { - // If we have no target data, we can't do this. - if (!DL) return false; +static bool isLoadLoadClobberIfExtendedToFullWidth( + const AliasAnalysis::Location &MemLoc, const Value *&MemLocBase, + int64_t &MemLocOffs, const LoadInst *LI) { + const DataLayout &DL = LI->getModule()->getDataLayout(); // If we haven't already computed the base/offset of MemLoc, do so now. if (!MemLocBase) MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL); - unsigned Size = MemoryDependenceAnalysis:: - getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, - LI, *DL); + unsigned Size = MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize( + MemLocBase, MemLocOffs, MemLoc.Size, LI); return Size != 0; } @@ -289,10 +282,9 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, /// 2) safe for the target, and 3) would provide the specified memory /// location value, then this function returns the size in bytes of the /// load width to use. If not, this returns zero. -unsigned MemoryDependenceAnalysis:: -getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, - unsigned MemLocSize, const LoadInst *LI, - const DataLayout &DL) { +unsigned MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize( + const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, + const LoadInst *LI) { // We can only extend simple integer loads. if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0; @@ -301,10 +293,12 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) return 0; + const DataLayout &DL = LI->getModule()->getDataLayout(); + // Get the base of this load. int64_t LIOffs = 0; const Value *LIBase = - GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &DL); + GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL); // If the two pointers are not based on the same pointer, we can't tell that // they are related. @@ -413,14 +407,19 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // by every program that can detect any optimisation of that kind: either // it is racy (undefined) or there is a release followed by an acquire // between the pair of accesses under consideration. - bool HasSeenAcquire = false; + // If the load is invariant, we "know" that it doesn't alias *any* write. We + // do want to respect mustalias results since defs are useful for value + // forwarding, but any mayalias write can be assumed to be noalias. + // Arguably, this logic should be pushed inside AliasAnalysis itself. if (isLoad && QueryInst) { LoadInst *LI = dyn_cast<LoadInst>(QueryInst); if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr) isInvariantLoad = true; } + const DataLayout &DL = BB->getModule()->getDataLayout(); + // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { Instruction *Inst = --ScanIt; @@ -472,12 +471,12 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Atomic loads have complications involved. // A Monotonic (or higher) load is OK if the query inst is itself not atomic. - // An Acquire (or higher) load sets the HasSeenAcquire flag, so that any - // release store will know to return getClobber. // FIXME: This is overly conservative. if (LI->isAtomic() && LI->getOrdering() > Unordered) { if (!QueryInst) return MemDepResult::getClobber(LI); + if (LI->getOrdering() != Monotonic) + return MemDepResult::getClobber(LI); if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) { if (!QueryLI->isSimple()) return MemDepResult::getClobber(LI); @@ -487,9 +486,6 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, } else if (QueryInst->mayReadOrWriteMemory()) { return MemDepResult::getClobber(LI); } - - if (isAtLeastAcquire(LI->getOrdering())) - HasSeenAcquire = true; } AliasAnalysis::Location LoadLoc = AA->getLocation(LI); @@ -505,12 +501,12 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // location is 1 byte at P+1). If so, return it as a load/load // clobber result, allowing the client to decide to widen the load if // it wants to. - if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) - if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() && + if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) { + if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() && isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, - MemLocOffset, LI, DL)) + MemLocOffset, LI)) return MemDepResult::getClobber(Inst); - + } continue; } @@ -549,12 +545,12 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // Atomic stores have complications involved. // A Monotonic store is OK if the query inst is itself not atomic. - // A Release (or higher) store further requires that no acquire load - // has been seen. // FIXME: This is overly conservative. if (!SI->isUnordered()) { if (!QueryInst) return MemDepResult::getClobber(SI); + if (SI->getOrdering() != Monotonic) + return MemDepResult::getClobber(SI); if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) { if (!QueryLI->isSimple()) return MemDepResult::getClobber(SI); @@ -564,9 +560,6 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, } else if (QueryInst->mayReadOrWriteMemory()) { return MemDepResult::getClobber(SI); } - - if (HasSeenAcquire && isAtLeastRelease(SI->getOrdering())) - return MemDepResult::getClobber(SI); } // FIXME: this is overly conservative. @@ -612,6 +605,8 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); + if (isInvariantLoad) + continue; // Be conservative if the accessed pointer may alias the allocation. if (AA->alias(Inst, AccessPtr) != AliasAnalysis::NoAlias) return MemDepResult::getClobber(Inst); @@ -622,6 +617,9 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, continue; } + if (isInvariantLoad) + continue; + // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); // If necessary, perform additional analysis. @@ -923,8 +921,7 @@ getNonLocalPointerDependency(Instruction *QueryInst, const_cast<Value *>(Loc.Ptr))); return; } - - + const DataLayout &DL = FromBB->getModule()->getDataLayout(); PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AC); // This is the set of blocks we've inspected, and the pointer we consider in diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp index f645558..cbc4700 100644 --- a/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -55,28 +55,74 @@ bool ModuleDebugInfoPrinter::runOnModule(Module &M) { return false; } +static void printFile(raw_ostream &O, StringRef Filename, StringRef Directory, + unsigned Line = 0) { + if (Filename.empty()) + return; + + O << " from "; + if (!Directory.empty()) + O << Directory << "/"; + O << Filename; + if (Line) + O << ":" << Line; +} + void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const { + // Printing the nodes directly isn't particularly helpful (since they + // reference other nodes that won't be printed, particularly for the + // filenames), so just print a few useful things. for (DICompileUnit CU : Finder.compile_units()) { - O << "Compile Unit: "; - CU.print(O); + O << "Compile unit: "; + if (const char *Lang = LanguageString(CU.getLanguage())) + O << Lang; + else + O << "unknown-language(" << CU.getLanguage() << ")"; + printFile(O, CU.getFilename(), CU.getDirectory()); O << '\n'; } for (DISubprogram S : Finder.subprograms()) { - O << "Subprogram: "; - S.print(O); + O << "Subprogram: " << S.getName(); + printFile(O, S.getFilename(), S.getDirectory(), S.getLineNumber()); + if (!S.getLinkageName().empty()) + O << " ('" << S.getLinkageName() << "')"; O << '\n'; } for (DIGlobalVariable GV : Finder.global_variables()) { - O << "GlobalVariable: "; - GV.print(O); + O << "Global variable: " << GV.getName(); + printFile(O, GV.getFilename(), GV.getDirectory(), GV.getLineNumber()); + if (!GV.getLinkageName().empty()) + O << " ('" << GV.getLinkageName() << "')"; O << '\n'; } for (DIType T : Finder.types()) { - O << "Type: "; - T.print(O); + O << "Type:"; + if (!T.getName().empty()) + O << ' ' << T.getName(); + printFile(O, T.getFilename(), T.getDirectory(), T.getLineNumber()); + if (T.isBasicType()) { + DIBasicType BT(T.get()); + O << " "; + if (const char *Encoding = + dwarf::AttributeEncodingString(BT.getEncoding())) + O << Encoding; + else + O << "unknown-encoding(" << BT.getEncoding() << ')'; + } else { + O << ' '; + if (const char *Tag = dwarf::TagString(T.getTag())) + O << Tag; + else + O << "unknown-tag(" << T.getTag() << ")"; + } + if (T.isCompositeType()) { + DICompositeType CT(T.get()); + if (auto *S = CT.getIdentifier()) + O << " (identifier: '" << S->getString() << "')"; + } O << '\n'; } } diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp index c214d3c..203e1da 100644 --- a/lib/Analysis/NoAliasAnalysis.cpp +++ b/lib/Analysis/NoAliasAnalysis.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" using namespace llvm; @@ -33,11 +34,11 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override {} - void initializePass() override { + bool doInitialization(Module &M) override { // Note: NoAA does not call InitializeAliasAnalysis because it's // special and does not support chaining. - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; + DL = &M.getDataLayout(); + return true; } AliasResult alias(const Location &LocA, const Location &LocB) override { diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index a534418..177684f 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -404,10 +404,9 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, GEPOps.push_back(OpVal); } - GetElementPtrInst *Result = - GetElementPtrInst::Create(GEPOps[0], makeArrayRef(GEPOps).slice(1), - InVal->getName()+".phi.trans.insert", - PredBB->getTerminator()); + GetElementPtrInst *Result = GetElementPtrInst::Create( + GEP->getSourceElementType(), GEPOps[0], makeArrayRef(GEPOps).slice(1), + InVal->getName() + ".phi.trans.insert", PredBB->getTerminator()); Result->setIsInBounds(GEP->isInBounds()); NewInsts.push_back(Result); return Result; diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index 6fa7b2e..cd1e944 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/RegionIterator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "regionpassmgr" @@ -83,9 +84,11 @@ bool RGPassManager::runOnFunction(Function &F) { for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { RegionPass *P = (RegionPass*)getContainedPass(Index); - dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG, - CurrentRegion->getNameStr()); - dumpRequiredSet(P); + if (isPassDebuggingExecutionsOrMore()) { + dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG, + CurrentRegion->getNameStr()); + dumpRequiredSet(P); + } initializeAnalysisImpl(P); @@ -96,11 +99,13 @@ bool RGPassManager::runOnFunction(Function &F) { Changed |= P->runOnRegion(CurrentRegion, *this); } - if (Changed) - dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG, - skipThisRegion ? "<deleted>" : - CurrentRegion->getNameStr()); - dumpPreservedSet(P); + if (isPassDebuggingExecutionsOrMore()) { + if (Changed) + dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG, + skipThisRegion ? "<deleted>" : + CurrentRegion->getNameStr()); + dumpPreservedSet(P); + } if (!skipThisRegion) { // Manually check that this region is still healthy. This is done @@ -120,8 +125,8 @@ bool RGPassManager::runOnFunction(Function &F) { removeNotPreservedAnalysis(P); recordAvailableAnalysis(P); removeDeadPasses(P, - skipThisRegion ? "<deleted>" : - CurrentRegion->getNameStr(), + (!isPassDebuggingExecutionsOrMore() || skipThisRegion) ? + "<deleted>" : CurrentRegion->getNameStr(), ON_REGION_MSG); if (skipThisRegion) diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 9e4eb11..4e713fb 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -1102,13 +1102,14 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, return getTruncateOrZeroExtend(SZ->getOperand(), Ty); // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can - // eliminate all the truncates. + // eliminate all the truncates, or we replace other casts with truncates. if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) { SmallVector<const SCEV *, 4> Operands; bool hasTrunc = false; for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) { const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty); - hasTrunc = isa<SCEVTruncateExpr>(S); + if (!isa<SCEVCastExpr>(SA->getOperand(i))) + hasTrunc = isa<SCEVTruncateExpr>(S); Operands.push_back(S); } if (!hasTrunc) @@ -1117,13 +1118,14 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, } // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can - // eliminate all the truncates. + // eliminate all the truncates, or we replace other casts with truncates. if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) { SmallVector<const SCEV *, 4> Operands; bool hasTrunc = false; for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) { const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty); - hasTrunc = isa<SCEVTruncateExpr>(S); + if (!isa<SCEVCastExpr>(SM->getOperand(i))) + hasTrunc = isa<SCEVTruncateExpr>(S); Operands.push_back(S); } if (!hasTrunc) @@ -1325,6 +1327,85 @@ static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty, (SE->*GetExtendExpr)(PreStart, Ty)); } +// Try to prove away overflow by looking at "nearby" add recurrences. A +// motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it +// does not itself wrap then we can conclude that `{1,+,4}` is `nuw`. +// +// Formally: +// +// {S,+,X} == {S-T,+,X} + T +// => Ext({S,+,X}) == Ext({S-T,+,X} + T) +// +// If ({S-T,+,X} + T) does not overflow ... (1) +// +// RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T) +// +// If {S-T,+,X} does not overflow ... (2) +// +// RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T) +// == {Ext(S-T)+Ext(T),+,Ext(X)} +// +// If (S-T)+T does not overflow ... (3) +// +// RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)} +// == {Ext(S),+,Ext(X)} == LHS +// +// Thus, if (1), (2) and (3) are true for some T, then +// Ext({S,+,X}) == {Ext(S),+,Ext(X)} +// +// (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T) +// does not overflow" restricted to the 0th iteration. Therefore we only need +// to check for (1) and (2). +// +// In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T +// is `Delta` (defined below). +// +template <typename ExtendOpTy> +bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, + const SCEV *Step, + const Loop *L) { + auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType; + + // We restrict `Start` to a constant to prevent SCEV from spending too much + // time here. It is correct (but more expensive) to continue with a + // non-constant `Start` and do a general SCEV subtraction to compute + // `PreStart` below. + // + const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start); + if (!StartC) + return false; + + APInt StartAI = StartC->getValue()->getValue(); + + for (unsigned Delta : {-2, -1, 1, 2}) { + const SCEV *PreStart = getConstant(StartAI - Delta); + + // Give up if we don't already have the add recurrence we need because + // actually constructing an add recurrence is relatively expensive. + const SCEVAddRecExpr *PreAR = [&]() { + FoldingSetNodeID ID; + ID.AddInteger(scAddRecExpr); + ID.AddPointer(PreStart); + ID.AddPointer(Step); + ID.AddPointer(L); + void *IP = nullptr; + return static_cast<SCEVAddRecExpr *>( + this->UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + }(); + + if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2) + const SCEV *DeltaS = getConstant(StartC->getType(), Delta); + ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; + const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep( + DeltaS, &Pred, this); + if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1) + return true; + } + } + + return false; +} + const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && @@ -1473,6 +1554,13 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, } } } + + if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) { + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); + return getAddRecExpr( + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), + getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + } } // The cast wasn't folded; create an explicit cast node. @@ -1664,6 +1752,13 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, return getAddExpr(Start, getSignExtendExpr(NewAR, Ty)); } } + + if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) { + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); + return getAddRecExpr( + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + } } // The cast wasn't folded; create an explicit cast node. @@ -3037,39 +3132,23 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, } const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { - // If we have DataLayout, we can bypass creating a target-independent + // We can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. - if (DL) - return getConstant(IntTy, DL->getTypeAllocSize(AllocTy)); - - Constant *C = ConstantExpr::getSizeOf(AllocTy); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI)) - C = Folded; - Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); - assert(Ty == IntTy && "Effective SCEV type doesn't match"); - return getTruncateOrZeroExtend(getSCEV(C), Ty); + return getConstant(IntTy, + F->getParent()->getDataLayout().getTypeAllocSize(AllocTy)); } const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo) { - // If we have DataLayout, we can bypass creating a target-independent + // We can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. - if (DL) { - return getConstant(IntTy, - DL->getStructLayout(STy)->getElementOffset(FieldNo)); - } - - Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI)) - C = Folded; - - Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); - return getTruncateOrZeroExtend(getSCEV(C), Ty); + return getConstant( + IntTy, + F->getParent()->getDataLayout().getStructLayout(STy)->getElementOffset( + FieldNo)); } const SCEV *ScalarEvolution::getUnknown(Value *V) { @@ -3111,19 +3190,7 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const { /// for which isSCEVable must return true. uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); - - // If we have a DataLayout, use it! - if (DL) - return DL->getTypeSizeInBits(Ty); - - // Integer types have fixed sizes. - if (Ty->isIntegerTy()) - return Ty->getPrimitiveSizeInBits(); - - // The only other support type is pointer. Without DataLayout, conservatively - // assume pointers are 64-bit. - assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!"); - return 64; + return F->getParent()->getDataLayout().getTypeSizeInBits(Ty); } /// getEffectiveSCEVType - Return a type with the same bitwidth as @@ -3139,12 +3206,7 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { // The only other support type is pointer. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); - - if (DL) - return DL->getIntPtrType(Ty); - - // Without DataLayout, conservatively assume pointers are 64-bit. - return Type::getInt64Ty(getContext()); + return F->getParent()->getDataLayout().getIntPtrType(Ty); } const SCEV *ScalarEvolution::getCouldNotCompute() { @@ -3531,10 +3593,12 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // If the increment doesn't overflow, then neither the addrec nor // the post-increment will overflow. if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) { - if (OBO->hasNoUnsignedWrap()) - Flags = setFlags(Flags, SCEV::FlagNUW); - if (OBO->hasNoSignedWrap()) - Flags = setFlags(Flags, SCEV::FlagNSW); + if (OBO->getOperand(0) == PN) { + if (OBO->hasNoUnsignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNUW); + if (OBO->hasNoSignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNSW); + } } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) { // If the increment is an inbounds GEP, then we know the address // space cannot be wrapped around. We cannot make any guarantee @@ -3542,7 +3606,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // unsigned but we may have a negative index from the base // pointer. We can guarantee that no unsigned wrap occurs if the // indices form a positive value. - if (GEP->isInBounds()) { + if (GEP->isInBounds() && GEP->getOperand(0) == PN) { Flags = setFlags(Flags, SCEV::FlagNW); const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); @@ -3608,7 +3672,8 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = SimplifyInstruction(PN, DL, TLI, DT, AC)) + if (Value *V = + SimplifyInstruction(PN, F->getParent()->getDataLayout(), TLI, DT, AC)) if (LI->replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); @@ -3740,7 +3805,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT); + computeKnownBits(U->getValue(), Zeros, Ones, + F->getParent()->getDataLayout(), 0, AC, nullptr, DT); return Zeros.countTrailingOnes(); } @@ -3775,79 +3841,93 @@ static Optional<ConstantRange> GetRangeFromMetadata(Value *V) { return None; } -/// getUnsignedRange - Determine the unsigned range for a particular SCEV. +/// getRange - Determine the range for a particular SCEV. If SignHint is +/// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges +/// with a "cleaner" unsigned (resp. signed) representation. /// ConstantRange -ScalarEvolution::getUnsignedRange(const SCEV *S) { +ScalarEvolution::getRange(const SCEV *S, + ScalarEvolution::RangeSignHint SignHint) { + DenseMap<const SCEV *, ConstantRange> &Cache = + SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges + : SignedRanges; + // See if we've computed this range already. - DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S); - if (I != UnsignedRanges.end()) + DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S); + if (I != Cache.end()) return I->second; if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) - return setUnsignedRange(C, ConstantRange(C->getValue()->getValue())); + return setRange(C, SignHint, ConstantRange(C->getValue()->getValue())); unsigned BitWidth = getTypeSizeInBits(S->getType()); ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); - // If the value has known zeros, the maximum unsigned value will have those - // known zeros as well. + // If the value has known zeros, the maximum value will have those known zeros + // as well. uint32_t TZ = GetMinTrailingZeros(S); - if (TZ != 0) - ConservativeResult = - ConstantRange(APInt::getMinValue(BitWidth), - APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1); + if (TZ != 0) { + if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) + ConservativeResult = + ConstantRange(APInt::getMinValue(BitWidth), + APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1); + else + ConservativeResult = ConstantRange( + APInt::getSignedMinValue(BitWidth), + APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1); + } if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - ConstantRange X = getUnsignedRange(Add->getOperand(0)); + ConstantRange X = getRange(Add->getOperand(0), SignHint); for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) - X = X.add(getUnsignedRange(Add->getOperand(i))); - return setUnsignedRange(Add, ConservativeResult.intersectWith(X)); + X = X.add(getRange(Add->getOperand(i), SignHint)); + return setRange(Add, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { - ConstantRange X = getUnsignedRange(Mul->getOperand(0)); + ConstantRange X = getRange(Mul->getOperand(0), SignHint); for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) - X = X.multiply(getUnsignedRange(Mul->getOperand(i))); - return setUnsignedRange(Mul, ConservativeResult.intersectWith(X)); + X = X.multiply(getRange(Mul->getOperand(i), SignHint)); + return setRange(Mul, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { - ConstantRange X = getUnsignedRange(SMax->getOperand(0)); + ConstantRange X = getRange(SMax->getOperand(0), SignHint); for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) - X = X.smax(getUnsignedRange(SMax->getOperand(i))); - return setUnsignedRange(SMax, ConservativeResult.intersectWith(X)); + X = X.smax(getRange(SMax->getOperand(i), SignHint)); + return setRange(SMax, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { - ConstantRange X = getUnsignedRange(UMax->getOperand(0)); + ConstantRange X = getRange(UMax->getOperand(0), SignHint); for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) - X = X.umax(getUnsignedRange(UMax->getOperand(i))); - return setUnsignedRange(UMax, ConservativeResult.intersectWith(X)); + X = X.umax(getRange(UMax->getOperand(i), SignHint)); + return setRange(UMax, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { - ConstantRange X = getUnsignedRange(UDiv->getLHS()); - ConstantRange Y = getUnsignedRange(UDiv->getRHS()); - return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y))); + ConstantRange X = getRange(UDiv->getLHS(), SignHint); + ConstantRange Y = getRange(UDiv->getRHS(), SignHint); + return setRange(UDiv, SignHint, + ConservativeResult.intersectWith(X.udiv(Y))); } if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { - ConstantRange X = getUnsignedRange(ZExt->getOperand()); - return setUnsignedRange(ZExt, - ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); + ConstantRange X = getRange(ZExt->getOperand(), SignHint); + return setRange(ZExt, SignHint, + ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); } if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { - ConstantRange X = getUnsignedRange(SExt->getOperand()); - return setUnsignedRange(SExt, - ConservativeResult.intersectWith(X.signExtend(BitWidth))); + ConstantRange X = getRange(SExt->getOperand(), SignHint); + return setRange(SExt, SignHint, + ConservativeResult.intersectWith(X.signExtend(BitWidth))); } if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { - ConstantRange X = getUnsignedRange(Trunc->getOperand()); - return setUnsignedRange(Trunc, - ConservativeResult.intersectWith(X.truncate(BitWidth))); + ConstantRange X = getRange(Trunc->getOperand(), SignHint); + return setRange(Trunc, SignHint, + ConservativeResult.intersectWith(X.truncate(BitWidth))); } if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { @@ -3860,143 +3940,6 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { ConservativeResult.intersectWith( ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0))); - // TODO: non-affine addrec - if (AddRec->isAffine()) { - Type *Ty = AddRec->getType(); - const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); - if (!isa<SCEVCouldNotCompute>(MaxBECount) && - getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { - MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); - - const SCEV *Start = AddRec->getStart(); - const SCEV *Step = AddRec->getStepRecurrence(*this); - - ConstantRange StartRange = getUnsignedRange(Start); - ConstantRange StepRange = getSignedRange(Step); - ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); - ConstantRange EndRange = - StartRange.add(MaxBECountRange.multiply(StepRange)); - - // Check for overflow. This must be done with ConstantRange arithmetic - // because we could be called from within the ScalarEvolution overflow - // checking code. - ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1); - ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1); - ConstantRange ExtMaxBECountRange = - MaxBECountRange.zextOrTrunc(BitWidth*2+1); - ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1); - if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != - ExtEndRange) - return setUnsignedRange(AddRec, ConservativeResult); - - APInt Min = APIntOps::umin(StartRange.getUnsignedMin(), - EndRange.getUnsignedMin()); - APInt Max = APIntOps::umax(StartRange.getUnsignedMax(), - EndRange.getUnsignedMax()); - if (Min.isMinValue() && Max.isMaxValue()) - return setUnsignedRange(AddRec, ConservativeResult); - return setUnsignedRange(AddRec, - ConservativeResult.intersectWith(ConstantRange(Min, Max+1))); - } - } - - return setUnsignedRange(AddRec, ConservativeResult); - } - - if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { - // Check if the IR explicitly contains !range metadata. - Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue()); - if (MDRange.hasValue()) - ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue()); - - // For a SCEVUnknown, ask ValueTracking. - APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT); - if (Ones == ~Zeros + 1) - return setUnsignedRange(U, ConservativeResult); - return setUnsignedRange(U, - ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1))); - } - - return setUnsignedRange(S, ConservativeResult); -} - -/// getSignedRange - Determine the signed range for a particular SCEV. -/// -ConstantRange -ScalarEvolution::getSignedRange(const SCEV *S) { - // See if we've computed this range already. - DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S); - if (I != SignedRanges.end()) - return I->second; - - if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) - return setSignedRange(C, ConstantRange(C->getValue()->getValue())); - - unsigned BitWidth = getTypeSizeInBits(S->getType()); - ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); - - // If the value has known zeros, the maximum signed value will have those - // known zeros as well. - uint32_t TZ = GetMinTrailingZeros(S); - if (TZ != 0) - ConservativeResult = - ConstantRange(APInt::getSignedMinValue(BitWidth), - APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1); - - if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - ConstantRange X = getSignedRange(Add->getOperand(0)); - for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) - X = X.add(getSignedRange(Add->getOperand(i))); - return setSignedRange(Add, ConservativeResult.intersectWith(X)); - } - - if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { - ConstantRange X = getSignedRange(Mul->getOperand(0)); - for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) - X = X.multiply(getSignedRange(Mul->getOperand(i))); - return setSignedRange(Mul, ConservativeResult.intersectWith(X)); - } - - if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { - ConstantRange X = getSignedRange(SMax->getOperand(0)); - for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) - X = X.smax(getSignedRange(SMax->getOperand(i))); - return setSignedRange(SMax, ConservativeResult.intersectWith(X)); - } - - if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { - ConstantRange X = getSignedRange(UMax->getOperand(0)); - for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) - X = X.umax(getSignedRange(UMax->getOperand(i))); - return setSignedRange(UMax, ConservativeResult.intersectWith(X)); - } - - if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { - ConstantRange X = getSignedRange(UDiv->getLHS()); - ConstantRange Y = getSignedRange(UDiv->getRHS()); - return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y))); - } - - if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { - ConstantRange X = getSignedRange(ZExt->getOperand()); - return setSignedRange(ZExt, - ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); - } - - if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { - ConstantRange X = getSignedRange(SExt->getOperand()); - return setSignedRange(SExt, - ConservativeResult.intersectWith(X.signExtend(BitWidth))); - } - - if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { - ConstantRange X = getSignedRange(Trunc->getOperand()); - return setSignedRange(Trunc, - ConservativeResult.intersectWith(X.truncate(BitWidth))); - } - - if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { // If there's no signed wrap, and all the operands have the same sign or // zero, the value won't ever change sign. if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) { @@ -4022,41 +3965,66 @@ ScalarEvolution::getSignedRange(const SCEV *S) { const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); if (!isa<SCEVCouldNotCompute>(MaxBECount) && getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { + + // Check for overflow. This must be done with ConstantRange arithmetic + // because we could be called from within the ScalarEvolution overflow + // checking code. + MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); + ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); + ConstantRange ZExtMaxBECountRange = + MaxBECountRange.zextOrTrunc(BitWidth * 2 + 1); const SCEV *Start = AddRec->getStart(); const SCEV *Step = AddRec->getStepRecurrence(*this); + ConstantRange StepSRange = getSignedRange(Step); + ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2 + 1); + + ConstantRange StartURange = getUnsignedRange(Start); + ConstantRange EndURange = + StartURange.add(MaxBECountRange.multiply(StepSRange)); + + // Check for unsigned overflow. + ConstantRange ZExtStartURange = + StartURange.zextOrTrunc(BitWidth * 2 + 1); + ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2 + 1); + if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) == + ZExtEndURange) { + APInt Min = APIntOps::umin(StartURange.getUnsignedMin(), + EndURange.getUnsignedMin()); + APInt Max = APIntOps::umax(StartURange.getUnsignedMax(), + EndURange.getUnsignedMax()); + bool IsFullRange = Min.isMinValue() && Max.isMaxValue(); + if (!IsFullRange) + ConservativeResult = + ConservativeResult.intersectWith(ConstantRange(Min, Max + 1)); + } - ConstantRange StartRange = getSignedRange(Start); - ConstantRange StepRange = getSignedRange(Step); - ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); - ConstantRange EndRange = - StartRange.add(MaxBECountRange.multiply(StepRange)); - - // Check for overflow. This must be done with ConstantRange arithmetic - // because we could be called from within the ScalarEvolution overflow - // checking code. - ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1); - ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1); - ConstantRange ExtMaxBECountRange = - MaxBECountRange.zextOrTrunc(BitWidth*2+1); - ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1); - if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != - ExtEndRange) - return setSignedRange(AddRec, ConservativeResult); - - APInt Min = APIntOps::smin(StartRange.getSignedMin(), - EndRange.getSignedMin()); - APInt Max = APIntOps::smax(StartRange.getSignedMax(), - EndRange.getSignedMax()); - if (Min.isMinSignedValue() && Max.isMaxSignedValue()) - return setSignedRange(AddRec, ConservativeResult); - return setSignedRange(AddRec, - ConservativeResult.intersectWith(ConstantRange(Min, Max+1))); + ConstantRange StartSRange = getSignedRange(Start); + ConstantRange EndSRange = + StartSRange.add(MaxBECountRange.multiply(StepSRange)); + + // Check for signed overflow. This must be done with ConstantRange + // arithmetic because we could be called from within the ScalarEvolution + // overflow checking code. + ConstantRange SExtStartSRange = + StartSRange.sextOrTrunc(BitWidth * 2 + 1); + ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2 + 1); + if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) == + SExtEndSRange) { + APInt Min = APIntOps::smin(StartSRange.getSignedMin(), + EndSRange.getSignedMin()); + APInt Max = APIntOps::smax(StartSRange.getSignedMax(), + EndSRange.getSignedMax()); + bool IsFullRange = Min.isMinSignedValue() && Max.isMaxSignedValue(); + if (!IsFullRange) + ConservativeResult = + ConservativeResult.intersectWith(ConstantRange(Min, Max + 1)); + } } } - return setSignedRange(AddRec, ConservativeResult); + return setRange(AddRec, SignHint, ConservativeResult); } if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { @@ -4065,18 +4033,31 @@ ScalarEvolution::getSignedRange(const SCEV *S) { if (MDRange.hasValue()) ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue()); - // For a SCEVUnknown, ask ValueTracking. - if (!U->getValue()->getType()->isIntegerTy() && !DL) - return setSignedRange(U, ConservativeResult); - unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT); - if (NS <= 1) - return setSignedRange(U, ConservativeResult); - return setSignedRange(U, ConservativeResult.intersectWith( - ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), - APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1))); + // Split here to avoid paying the compile-time cost of calling both + // computeKnownBits and ComputeNumSignBits. This restriction can be lifted + // if needed. + const DataLayout &DL = F->getParent()->getDataLayout(); + if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { + // For a SCEVUnknown, ask ValueTracking. + APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); + computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT); + if (Ones != ~Zeros + 1) + ConservativeResult = + ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)); + } else { + assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED && + "generalize as needed!"); + unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT); + if (NS > 1) + ConservativeResult = ConservativeResult.intersectWith( + ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), + APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1)); + } + + return setRange(U, SignHint, ConservativeResult); } - return setSignedRange(S, ConservativeResult); + return setRange(S, SignHint, ConservativeResult); } /// createSCEV - We know that there is no SCEV for the specified value. @@ -4175,8 +4156,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { unsigned TZ = A.countTrailingZeros(); unsigned BitWidth = A.getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, 0, AC, - nullptr, DT); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, + F->getParent()->getDataLayout(), 0, AC, nullptr, DT); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); @@ -5327,12 +5308,9 @@ static bool canConstantEvolve(Instruction *I, const Loop *L) { if (!L->contains(I)) return false; if (isa<PHINode>(I)) { - if (L->getHeader() == I->getParent()) - return true; - else - // We don't currently keep track of the control flow needed to evaluate - // PHIs, so we cannot handle PHIs inside of loops. - return false; + // We don't currently keep track of the control flow needed to evaluate + // PHIs, so we cannot handle PHIs inside of loops. + return L->getHeader() == I->getParent(); } // If we won't be able to constant fold this expression even if the operands @@ -5403,7 +5381,7 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { /// reason, return null. static Constant *EvaluateExpression(Value *V, const Loop *L, DenseMap<Instruction *, Constant *> &Vals, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI) { // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast<Constant>(V)) return C; @@ -5492,6 +5470,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, unsigned NumIterations = BEs.getZExtValue(); // must be in range unsigned IterationNum = 0; + const DataLayout &DL = F->getParent()->getDataLayout(); for (; ; ++IterationNum) { if (IterationNum == NumIterations) return RetVal = CurrentIterVals[PN]; // Got exit value! @@ -5499,8 +5478,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // Compute the value of the PHIs for the next iteration. // EvaluateExpression adds non-phi values to the CurrentIterVals map. DenseMap<Instruction *, Constant *> NextIterVals; - Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, - TLI); + Constant *NextPHI = + EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); if (!NextPHI) return nullptr; // Couldn't evaluate! NextIterVals[PN] = NextPHI; @@ -5576,12 +5555,11 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // Okay, we find a PHI node that defines the trip count of this loop. Execute // the loop symbolically to determine when the condition gets a value of // "ExitWhen". - unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. + const DataLayout &DL = F->getParent()->getDataLayout(); for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ - ConstantInt *CondVal = - dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals, - DL, TLI)); + ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>( + EvaluateExpression(Cond, L, CurrentIterVals, DL, TLI)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -5814,16 +5792,16 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // Check to see if getSCEVAtScope actually made an improvement. if (MadeImprovement) { Constant *C = nullptr; + const DataLayout &DL = F->getParent()->getDataLayout(); if (const CmpInst *CI = dyn_cast<CmpInst>(I)) - C = ConstantFoldCompareInstOperands(CI->getPredicate(), - Operands[0], Operands[1], DL, - TLI); + C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], + Operands[1], DL, TLI); else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { if (!LI->isVolatile()) C = ConstantFoldLoadFromConstPtr(Operands[0], DL); } else - C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), - Operands, DL, TLI); + C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, + DL, TLI); if (!C) return V; return getSCEV(C); } @@ -6105,7 +6083,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) { - if (CB->getZExtValue() == false) + if (!CB->getZExtValue()) std::swap(R1, R2); // R1 is the minimum root now. // We can only use this value if the chrec ends up with an exact zero @@ -6815,15 +6793,6 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue); if (!ICI) return false; - // Bail if the ICmp's operands' types are wider than the needed type - // before attempting to call getSCEV on them. This avoids infinite - // recursion, since the analysis of widening casts can require loop - // exit condition information for overflow checking, which would - // lead back here. - if (getTypeSizeInBits(LHS->getType()) < - getTypeSizeInBits(ICI->getOperand(0)->getType())) - return false; - // Now that we found a conditional branch that dominates the loop or controls // the loop latch. Check to see if it is the comparison we are looking for. ICmpInst::Predicate FoundPred; @@ -6835,9 +6804,17 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); - // Balance the types. The case where FoundLHS' type is wider than - // LHS' type is checked for above. - if (getTypeSizeInBits(LHS->getType()) > + // Balance the types. + if (getTypeSizeInBits(LHS->getType()) < + getTypeSizeInBits(FoundLHS->getType())) { + if (CmpInst::isSigned(Pred)) { + LHS = getSignExtendExpr(LHS, FoundLHS->getType()); + RHS = getSignExtendExpr(RHS, FoundLHS->getType()); + } else { + LHS = getZeroExtendExpr(LHS, FoundLHS->getType()); + RHS = getZeroExtendExpr(RHS, FoundLHS->getType()); + } + } else if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(FoundLHS->getType())) { if (CmpInst::isSigned(FoundPred)) { FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); @@ -6963,6 +6940,9 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { + if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + return isImpliedCondOperandsHelper(Pred, LHS, RHS, FoundLHS, FoundRHS) || // ~x < ~y --> x > y @@ -7100,6 +7080,47 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, return false; } +/// isImpliedCondOperandsViaRanges - helper function for isImpliedCondOperands. +/// Tries to get cases like "X `sgt` 0 => X - 1 `sgt` -1". +bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, + const SCEV *LHS, + const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS) { + if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS)) + // The restriction on `FoundRHS` be lifted easily -- it exists only to + // reduce the compile time impact of this optimization. + return false; + + const SCEVAddExpr *AddLHS = dyn_cast<SCEVAddExpr>(LHS); + if (!AddLHS || AddLHS->getOperand(1) != FoundLHS || + !isa<SCEVConstant>(AddLHS->getOperand(0))) + return false; + + APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getValue()->getValue(); + + // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the + // antecedent "`FoundLHS` `Pred` `FoundRHS`". + ConstantRange FoundLHSRange = + ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS); + + // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range + // for `LHS`: + APInt Addend = + cast<SCEVConstant>(AddLHS->getOperand(0))->getValue()->getValue(); + ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend)); + + // We can also compute the range of values for `LHS` that satisfy the + // consequent, "`LHS` `Pred` `RHS`": + APInt ConstRHS = cast<SCEVConstant>(RHS)->getValue()->getValue(); + ConstantRange SatisfyingLHSRange = + ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS); + + // The antecedent implies the consequent if every value of `LHS` that + // satisfies the antecedent also satisfies the consequent. + return SatisfyingLHSRange.contains(LHSRange); +} + // Verify if an linear IV with positive stride can overflow when in a // less-than comparison, knowing the invariant term of the comparison, the // stride and the knowledge of NSW/NUW flags on the recurrence. @@ -7428,7 +7449,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) { - if (CB->getZExtValue() == false) + if (!CB->getZExtValue()) std::swap(R1, R2); // R1 is the minimum root now. // Make sure the root is not off by one. The returned iteration should @@ -7956,8 +7977,6 @@ bool ScalarEvolution::runOnFunction(Function &F) { this->F = &F; AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); return false; @@ -8058,6 +8077,12 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { OS << " --> "; const SCEV *SV = SE.getSCEV(&*I); SV->print(OS); + if (!isa<SCEVCouldNotCompute>(SV)) { + OS << " U: "; + SE.getUnsignedRange(SV).print(OS); + OS << " S: "; + SE.getSignedRange(SV).print(OS); + } const Loop *L = LI->getLoopFor((*I).getParent()); @@ -8065,6 +8090,12 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { if (AtUse != SV) { OS << " --> "; AtUse->print(OS); + if (!isa<SCEVCouldNotCompute>(AtUse)) { + OS << " U: "; + SE.getUnsignedRange(AtUse).print(OS); + OS << " S: "; + SE.getSignedRange(AtUse).print(OS); + } } if (L) { diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 5c339ee..ccec0a8 100644 --- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" using namespace llvm; @@ -79,7 +80,7 @@ ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) { - InitializeAliasAnalysis(this); + InitializeAliasAnalysis(this, &F.getParent()->getDataLayout()); SE = &getAnalysis<ScalarEvolution>(); return false; } diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 2625cf3..a73ec9e 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -204,11 +205,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made /// unnecessary; in its place, just signed-divide Ops[i] by the scale and /// check to see if the divide was folded. -static bool FactorOutConstant(const SCEV *&S, - const SCEV *&Remainder, - const SCEV *Factor, - ScalarEvolution &SE, - const DataLayout *DL) { +static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, + const SCEV *Factor, ScalarEvolution &SE, + const DataLayout &DL) { // Everything is divisible by one. if (Factor->isOne()) return true; @@ -248,35 +247,17 @@ static bool FactorOutConstant(const SCEV *&S, // In a Mul, check if there is a constant operand which is a multiple // of the given factor. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { - if (DL) { - // With DataLayout, the size is known. Check if there is a constant - // operand which is a multiple of the given factor. If so, we can - // factor it. - const SCEVConstant *FC = cast<SCEVConstant>(Factor); - if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) - if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { - SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); - NewMulOps[0] = - SE.getConstant(C->getValue()->getValue().sdiv( - FC->getValue()->getValue())); - S = SE.getMulExpr(NewMulOps); - return true; - } - } else { - // Without DataLayout, check if Factor can be factored out of any of the - // Mul's operands. If so, we can just remove it. - for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { - const SCEV *SOp = M->getOperand(i); - const SCEV *Remainder = SE.getConstant(SOp->getType(), 0); - if (FactorOutConstant(SOp, Remainder, Factor, SE, DL) && - Remainder->isZero()) { - SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); - NewMulOps[i] = SOp; - S = SE.getMulExpr(NewMulOps); - return true; - } + // Size is known, check if there is a constant operand which is a multiple + // of the given factor. If so, we can factor it. + const SCEVConstant *FC = cast<SCEVConstant>(Factor); + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) + if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { + SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); + NewMulOps[0] = SE.getConstant( + C->getValue()->getValue().sdiv(FC->getValue()->getValue())); + S = SE.getMulExpr(NewMulOps); + return true; } - } } // In an AddRec, check if both start and step are divisible. @@ -393,7 +374,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, PointerType *PTy, Type *Ty, Value *V) { - Type *ElTy = PTy->getElementType(); + Type *OriginalElTy = PTy->getElementType(); + Type *ElTy = OriginalElTy; SmallVector<Value *, 4> GepIndices; SmallVector<const SCEV *, 8> Ops(op_begin, op_end); bool AnyNonZeroIndices = false; @@ -402,9 +384,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // without the other. SplitAddRecs(Ops, Ty, SE); - Type *IntPtrTy = SE.DL - ? SE.DL->getIntPtrType(PTy) - : Type::getInt64Ty(PTy->getContext()); + Type *IntPtrTy = DL.getIntPtrType(PTy); // Descend down the pointer's type and attempt to convert the other // operands into GEP indices, at each level. The first index in a GEP @@ -422,7 +402,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, for (unsigned i = 0, e = Ops.size(); i != e; ++i) { const SCEV *Op = Ops[i]; const SCEV *Remainder = SE.getConstant(Ty, 0); - if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.DL)) { + if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) { // Op now has ElSize factored out. ScaledOps.push_back(Op); if (!Remainder->isZero()) @@ -456,43 +436,25 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, bool FoundFieldNo = false; // An empty struct has no fields. if (STy->getNumElements() == 0) break; - if (SE.DL) { - // With DataLayout, field offsets are known. See if a constant offset - // falls within any of the struct fields. - if (Ops.empty()) break; - if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) - if (SE.getTypeSizeInBits(C->getType()) <= 64) { - const StructLayout &SL = *SE.DL->getStructLayout(STy); - uint64_t FullOffset = C->getValue()->getZExtValue(); - if (FullOffset < SL.getSizeInBytes()) { - unsigned ElIdx = SL.getElementContainingOffset(FullOffset); - GepIndices.push_back( - ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); - ElTy = STy->getTypeAtIndex(ElIdx); - Ops[0] = + // Field offsets are known. See if a constant offset falls within any of + // the struct fields. + if (Ops.empty()) + break; + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) + if (SE.getTypeSizeInBits(C->getType()) <= 64) { + const StructLayout &SL = *DL.getStructLayout(STy); + uint64_t FullOffset = C->getValue()->getZExtValue(); + if (FullOffset < SL.getSizeInBytes()) { + unsigned ElIdx = SL.getElementContainingOffset(FullOffset); + GepIndices.push_back( + ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); + ElTy = STy->getTypeAtIndex(ElIdx); + Ops[0] = SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); - AnyNonZeroIndices = true; - FoundFieldNo = true; - } - } - } else { - // Without DataLayout, just check for an offsetof expression of the - // appropriate struct type. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) { - Type *CTy; - Constant *FieldNo; - if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) { - GepIndices.push_back(FieldNo); - ElTy = - STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue()); - Ops[i] = SE.getConstant(Ty, 0); - AnyNonZeroIndices = true; - FoundFieldNo = true; - break; - } + AnyNonZeroIndices = true; + FoundFieldNo = true; } - } + } // If no struct field offsets were found, tentatively assume that // field zero was selected (since the zero offset would obviously // be folded away). @@ -597,7 +559,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, Value *Casted = V; if (V->getType() != PTy) Casted = InsertNoopCastOfTo(Casted, PTy); - Value *GEP = Builder.CreateGEP(Casted, + Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep"); Ops.push_back(SE.getUnknown(GEP)); @@ -1746,7 +1708,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, // Fold constant phis. They may be congruent to other constant phis and // would confuse the logic below that expects proper IVs. - if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT, SE.AC)) { + if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) { Phi->replaceAllUsesWith(V); DeadInsts.push_back(Phi); ++NumElim; @@ -1811,9 +1773,12 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, << *IsomorphicInc << '\n'); Value *NewInc = OrigInc; if (OrigInc->getType() != IsomorphicInc->getType()) { - Instruction *IP = isa<PHINode>(OrigInc) - ? (Instruction*)L->getHeader()->getFirstInsertionPt() - : OrigInc->getNextNode(); + Instruction *IP = nullptr; + if (PHINode *PN = dyn_cast<PHINode>(OrigInc)) + IP = PN->getParent()->getFirstInsertionPt(); + else + IP = OrigInc->getNextNode(); + IRBuilder<> Builder(IP); Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); NewInc = Builder. diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp index c6ea3af..02f8b0b 100644 --- a/lib/Analysis/ScopedNoAliasAA.cpp +++ b/lib/Analysis/ScopedNoAliasAA.cpp @@ -80,7 +80,7 @@ public: initializeScopedNoAliasAAPass(*PassRegistry::getPassRegistry()); } - void initializePass() override { InitializeAliasAnalysis(this); } + bool doInitialization(Module &M) override; /// getAdjustedAnalysisPointer - This method is used when a pass implements /// an analysis interface through multiple inheritance. If needed, it @@ -119,6 +119,11 @@ ImmutablePass *llvm::createScopedNoAliasAAPass() { return new ScopedNoAliasAA(); } +bool ScopedNoAliasAA::doInitialization(Module &M) { + InitializeAliasAnalysis(this, &M.getDataLayout()); + return true; +} + void ScopedNoAliasAA::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp index 91041fc..7e574d5 100644 --- a/lib/Analysis/TargetLibraryInfo.cpp +++ b/lib/Analysis/TargetLibraryInfo.cpp @@ -13,341 +13,22 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; -const char* TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = - { - "_IO_getc", - "_IO_putc", - "_ZdaPv", - "_ZdaPvRKSt9nothrow_t", - "_ZdaPvj", - "_ZdaPvm", - "_ZdlPv", - "_ZdlPvRKSt9nothrow_t", - "_ZdlPvj", - "_ZdlPvm", - "_Znaj", - "_ZnajRKSt9nothrow_t", - "_Znam", - "_ZnamRKSt9nothrow_t", - "_Znwj", - "_ZnwjRKSt9nothrow_t", - "_Znwm", - "_ZnwmRKSt9nothrow_t", - "__cospi", - "__cospif", - "__cxa_atexit", - "__cxa_guard_abort", - "__cxa_guard_acquire", - "__cxa_guard_release", - "__isoc99_scanf", - "__isoc99_sscanf", - "__memcpy_chk", - "__memmove_chk", - "__memset_chk", - "__sincospi_stret", - "__sincospif_stret", - "__sinpi", - "__sinpif", - "__sqrt_finite", - "__sqrtf_finite", - "__sqrtl_finite", - "__stpcpy_chk", - "__stpncpy_chk", - "__strcpy_chk", - "__strdup", - "__strncpy_chk", - "__strndup", - "__strtok_r", - "abs", - "access", - "acos", - "acosf", - "acosh", - "acoshf", - "acoshl", - "acosl", - "asin", - "asinf", - "asinh", - "asinhf", - "asinhl", - "asinl", - "atan", - "atan2", - "atan2f", - "atan2l", - "atanf", - "atanh", - "atanhf", - "atanhl", - "atanl", - "atof", - "atoi", - "atol", - "atoll", - "bcmp", - "bcopy", - "bzero", - "calloc", - "cbrt", - "cbrtf", - "cbrtl", - "ceil", - "ceilf", - "ceill", - "chmod", - "chown", - "clearerr", - "closedir", - "copysign", - "copysignf", - "copysignl", - "cos", - "cosf", - "cosh", - "coshf", - "coshl", - "cosl", - "ctermid", - "exp", - "exp10", - "exp10f", - "exp10l", - "exp2", - "exp2f", - "exp2l", - "expf", - "expl", - "expm1", - "expm1f", - "expm1l", - "fabs", - "fabsf", - "fabsl", - "fclose", - "fdopen", - "feof", - "ferror", - "fflush", - "ffs", - "ffsl", - "ffsll", - "fgetc", - "fgetpos", - "fgets", - "fileno", - "fiprintf", - "flockfile", - "floor", - "floorf", - "floorl", - "fmax", - "fmaxf", - "fmaxl", - "fmin", - "fminf", - "fminl", - "fmod", - "fmodf", - "fmodl", - "fopen", - "fopen64", - "fprintf", - "fputc", - "fputs", - "fread", - "free", - "frexp", - "frexpf", - "frexpl", - "fscanf", - "fseek", - "fseeko", - "fseeko64", - "fsetpos", - "fstat", - "fstat64", - "fstatvfs", - "fstatvfs64", - "ftell", - "ftello", - "ftello64", - "ftrylockfile", - "funlockfile", - "fwrite", - "getc", - "getc_unlocked", - "getchar", - "getenv", - "getitimer", - "getlogin_r", - "getpwnam", - "gets", - "gettimeofday", - "htonl", - "htons", - "iprintf", - "isascii", - "isdigit", - "labs", - "lchown", - "ldexp", - "ldexpf", - "ldexpl", - "llabs", - "log", - "log10", - "log10f", - "log10l", - "log1p", - "log1pf", - "log1pl", - "log2", - "log2f", - "log2l", - "logb", - "logbf", - "logbl", - "logf", - "logl", - "lstat", - "lstat64", - "malloc", - "memalign", - "memccpy", - "memchr", - "memcmp", - "memcpy", - "memmove", - "memrchr", - "memset", - "memset_pattern16", - "mkdir", - "mktime", - "modf", - "modff", - "modfl", - "nearbyint", - "nearbyintf", - "nearbyintl", - "ntohl", - "ntohs", - "open", - "open64", - "opendir", - "pclose", - "perror", - "popen", - "posix_memalign", - "pow", - "powf", - "powl", - "pread", - "printf", - "putc", - "putchar", - "puts", - "pwrite", - "qsort", - "read", - "readlink", - "realloc", - "reallocf", - "realpath", - "remove", - "rename", - "rewind", - "rint", - "rintf", - "rintl", - "rmdir", - "round", - "roundf", - "roundl", - "scanf", - "setbuf", - "setitimer", - "setvbuf", - "sin", - "sinf", - "sinh", - "sinhf", - "sinhl", - "sinl", - "siprintf", - "snprintf", - "sprintf", - "sqrt", - "sqrtf", - "sqrtl", - "sscanf", - "stat", - "stat64", - "statvfs", - "statvfs64", - "stpcpy", - "stpncpy", - "strcasecmp", - "strcat", - "strchr", - "strcmp", - "strcoll", - "strcpy", - "strcspn", - "strdup", - "strlen", - "strncasecmp", - "strncat", - "strncmp", - "strncpy", - "strndup", - "strnlen", - "strpbrk", - "strrchr", - "strspn", - "strstr", - "strtod", - "strtof", - "strtok", - "strtok_r", - "strtol", - "strtold", - "strtoll", - "strtoul", - "strtoull", - "strxfrm", - "system", - "tan", - "tanf", - "tanh", - "tanhf", - "tanhl", - "tanl", - "times", - "tmpfile", - "tmpfile64", - "toascii", - "trunc", - "truncf", - "truncl", - "uname", - "ungetc", - "unlink", - "unsetenv", - "utime", - "utimes", - "valloc", - "vfprintf", - "vfscanf", - "vprintf", - "vscanf", - "vsnprintf", - "vsprintf", - "vsscanf", - "write" - }; +static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary( + "vector-library", cl::Hidden, cl::desc("Vector functions library"), + cl::init(TargetLibraryInfoImpl::NoLibrary), + cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none", + "No vector functions library"), + clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate", + "Accelerate framework"), + clEnumValEnd)); + +const char *const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = { +#define TLI_DEFINE_STRING +#include "llvm/Analysis/TargetLibraryInfo.def" +}; static bool hasSinCosPiStret(const Triple &T) { // Only Darwin variants have _stret versions of combined trig functions. @@ -371,7 +52,7 @@ static bool hasSinCosPiStret(const Triple &T) { /// specified target triple. This should be carefully written so that a missing /// target triple gets a sane set of defaults. static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, - const char **StandardNames) { + const char *const *StandardNames) { #ifndef NDEBUG // Verify that the StandardNames array is in alphabetical order. for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) { @@ -674,6 +355,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc::statvfs64); TLI.setUnavailable(LibFunc::tmpfile64); } + + TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary); } TargetLibraryInfoImpl::TargetLibraryInfoImpl() { @@ -693,12 +376,16 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) { TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI) : CustomNames(TLI.CustomNames) { memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); + VectorDescs = TLI.VectorDescs; + ScalarDescs = TLI.ScalarDescs; } TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI) : CustomNames(std::move(TLI.CustomNames)) { std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray), AvailableArray); + VectorDescs = TLI.VectorDescs; + ScalarDescs = TLI.ScalarDescs; } TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) { @@ -714,40 +401,32 @@ TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl && return *this; } -namespace { -struct StringComparator { - /// Compare two strings and return true if LHS is lexicographically less than - /// RHS. Requires that RHS doesn't contain any zero bytes. - bool operator()(const char *LHS, StringRef RHS) const { - // Compare prefixes with strncmp. If prefixes match we know that LHS is - // greater or equal to RHS as RHS can't contain any '\0'. - return std::strncmp(LHS, RHS.data(), RHS.size()) < 0; - } - - // Provided for compatibility with MSVC's debug mode. - bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; } - bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; } - bool operator()(const char *LHS, const char *RHS) const { - return std::strcmp(LHS, RHS) < 0; - } -}; -} - -bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, - LibFunc::Func &F) const { - const char **Start = &StandardNames[0]; - const char **End = &StandardNames[LibFunc::NumLibFuncs]; - +static StringRef sanitizeFunctionName(StringRef funcName) { // Filter out empty names and names containing null bytes, those can't be in // our table. if (funcName.empty() || funcName.find('\0') != StringRef::npos) - return false; + return StringRef(); // Check for \01 prefix that is used to mangle __asm declarations and // strip it if present. if (funcName.front() == '\01') funcName = funcName.substr(1); - const char **I = std::lower_bound(Start, End, funcName, StringComparator()); + return funcName; +} + +bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, + LibFunc::Func &F) const { + const char *const *Start = &StandardNames[0]; + const char *const *End = &StandardNames[LibFunc::NumLibFuncs]; + + funcName = sanitizeFunctionName(funcName); + if (funcName.empty()) + return false; + + const char *const *I = std::lower_bound( + Start, End, funcName, [](const char *LHS, StringRef RHS) { + return std::strncmp(LHS, RHS.data(), RHS.size()) < 0; + }); if (I != End && *I == funcName) { F = (LibFunc::Func)(I - Start); return true; @@ -759,6 +438,94 @@ void TargetLibraryInfoImpl::disableAllFunctions() { memset(AvailableArray, 0, sizeof(AvailableArray)); } +static bool compareByScalarFnName(const VecDesc &LHS, const VecDesc &RHS) { + return std::strncmp(LHS.ScalarFnName, RHS.ScalarFnName, + std::strlen(RHS.ScalarFnName)) < 0; +} + +static bool compareByVectorFnName(const VecDesc &LHS, const VecDesc &RHS) { + return std::strncmp(LHS.VectorFnName, RHS.VectorFnName, + std::strlen(RHS.VectorFnName)) < 0; +} + +static bool compareWithScalarFnName(const VecDesc &LHS, StringRef S) { + return std::strncmp(LHS.ScalarFnName, S.data(), S.size()) < 0; +} + +static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) { + return std::strncmp(LHS.VectorFnName, S.data(), S.size()) < 0; +} + +void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) { + VectorDescs.insert(VectorDescs.end(), Fns.begin(), Fns.end()); + std::sort(VectorDescs.begin(), VectorDescs.end(), compareByScalarFnName); + + ScalarDescs.insert(ScalarDescs.end(), Fns.begin(), Fns.end()); + std::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName); +} + +void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( + enum VectorLibrary VecLib) { + switch (VecLib) { + case Accelerate: { + const VecDesc VecFuncs[] = { + {"expf", "vexpf", 4}, + {"llvm.exp.f32", "vexpf", 4}, + {"logf", "vlogf", 4}, + {"llvm.log.f32", "vlogf", 4}, + {"sqrtf", "vsqrtf", 4}, + {"llvm.sqrt.f32", "vsqrtf", 4}, + {"fabsf", "vfabsf", 4}, + {"llvm.fabs.f32", "vfabsf", 4}, + }; + addVectorizableFunctions(VecFuncs); + break; + } + case NoLibrary: + break; + } +} + +bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const { + funcName = sanitizeFunctionName(funcName); + if (funcName.empty()) + return false; + + std::vector<VecDesc>::const_iterator I = std::lower_bound( + VectorDescs.begin(), VectorDescs.end(), funcName, + compareWithScalarFnName); + return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName; +} + +StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F, + unsigned VF) const { + F = sanitizeFunctionName(F); + if (F.empty()) + return F; + std::vector<VecDesc>::const_iterator I = std::lower_bound( + VectorDescs.begin(), VectorDescs.end(), F, compareWithScalarFnName); + while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) { + if (I->VectorizationFactor == VF) + return I->VectorFnName; + ++I; + } + return StringRef(); +} + +StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F, + unsigned &VF) const { + F = sanitizeFunctionName(F); + if (F.empty()) + return F; + + std::vector<VecDesc>::const_iterator I = std::lower_bound( + ScalarDescs.begin(), ScalarDescs.end(), F, compareWithVectorFnName); + if (I == VectorDescs.end() || StringRef(I->VectorFnName) != F) + return StringRef(); + VF = I->VectorizationFactor; + return I->ScalarFnName; +} + TargetLibraryInfo TargetLibraryAnalysis::run(Module &M) { if (PresetInfoImpl) return TargetLibraryInfo(*PresetInfoImpl); diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 7ff29b0..f51c7f54 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -143,6 +143,10 @@ bool TargetTransformInfo::shouldBuildLookupTables() const { return TTIImpl->shouldBuildLookupTables(); } +bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const { + return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); +} + TargetTransformInfo::PopcntSupportKind TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { return TTIImpl->getPopcntSupport(IntTyWidthInBit); @@ -233,6 +237,11 @@ TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, return TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys); } +unsigned TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, + ArrayRef<Type *> Tys) const { + return TTIImpl->getCallInstrCost(F, RetTy, Tys); +} + unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { return TTIImpl->getNumberOfParts(Tp); } @@ -277,7 +286,7 @@ TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) { char TargetIRAnalysis::PassID; TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) { - return Result(F.getParent()->getDataLayout()); + return Result(&F.getParent()->getDataLayout()); } // Register the basic pass. diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index ff89558..1158725 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -129,6 +129,7 @@ #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SetVector.h" using namespace llvm; // A handy option for disabling TBAA functionality. The same effect can also be @@ -282,9 +283,7 @@ namespace { initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry()); } - void initializePass() override { - InitializeAliasAnalysis(this); - } + bool doInitialization(Module &M) override; /// getAdjustedAnalysisPointer - This method is used when a pass implements /// an analysis interface through multiple inheritance. If needed, it @@ -321,6 +320,11 @@ ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() { return new TypeBasedAliasAnalysis(); } +bool TypeBasedAliasAnalysis::doInitialization(Module &M) { + InitializeAliasAnalysis(this, &M.getDataLayout()); + return true; +} + void TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -575,18 +579,22 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { if (!B) return nullptr; } - SmallVector<MDNode *, 4> PathA; + SmallSetVector<MDNode *, 4> PathA; MDNode *T = A; while (T) { - PathA.push_back(T); + if (PathA.count(T)) + report_fatal_error("Cycle found in TBAA metadata."); + PathA.insert(T); T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : nullptr; } - SmallVector<MDNode *, 4> PathB; + SmallSetVector<MDNode *, 4> PathB; T = B; while (T) { - PathB.push_back(T); + if (PathB.count(T)) + report_fatal_error("Cycle found in TBAA metadata."); + PathB.insert(T); T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : nullptr; } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 0458d28..f329e3a 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -39,13 +39,41 @@ using namespace llvm::PatternMatch; const unsigned MaxDepth = 6; +/// Enable an experimental feature to leverage information about dominating +/// conditions to compute known bits. The individual options below control how +/// hard we search. The defaults are choosen to be fairly aggressive. If you +/// run into compile time problems when testing, scale them back and report +/// your findings. +static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions", + cl::Hidden, cl::init(false)); + +// This is expensive, so we only do it for the top level query value. +// (TODO: evaluate cost vs profit, consider higher thresholds) +static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth", + cl::Hidden, cl::init(1)); + +/// How many dominating blocks should be scanned looking for dominating +/// conditions? +static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks", + cl::Hidden, + cl::init(20000)); + +// Controls the number of uses of the value searched for possible +// dominating comparisons. +static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", + cl::Hidden, cl::init(2000)); + +// If true, don't consider only compares whose only use is a branch. +static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use", + cl::Hidden, cl::init(false)); + /// Returns the bitwidth of the given scalar or pointer type (if unknown returns /// 0). For vector types, returns the element type's bitwidth. -static unsigned getBitWidth(Type *Ty, const DataLayout *TD) { +static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { if (unsigned BitWidth = Ty->getScalarSizeInBits()) return BitWidth; - return TD ? TD->getPointerTypeSizeInBits(Ty) : 0; + return DL.getPointerTypeSizeInBits(Ty); } // Many of these functions have internal versions that take an assumption @@ -97,73 +125,73 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { } static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout *TD, unsigned Depth, - const Query &Q); + const DataLayout &DL, unsigned Depth, + const Query &Q); void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout *TD, unsigned Depth, + const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - ::computeKnownBits(V, KnownZero, KnownOne, TD, Depth, + ::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout *TD, unsigned Depth, - const Query &Q); + const DataLayout &DL, unsigned Depth, + const Query &Q); void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout *TD, unsigned Depth, + const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - ::ComputeSignBit(V, KnownZero, KnownOne, TD, Depth, + ::ComputeSignBit(V, KnownZero, KnownOne, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, - const Query &Q); + const Query &Q, const DataLayout &DL); -bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, - AssumptionCache *AC, const Instruction *CxtI, +bool llvm::isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT) { return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth, - Query(AC, safeCxtI(V, CxtI), DT)); + Query(AC, safeCxtI(V, CxtI), DT), DL); } -static bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, +static bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, const Query &Q); -bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, +bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - return ::isKnownNonZero(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT)); + return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } -static bool MaskedValueIsZero(Value *V, const APInt &Mask, - const DataLayout *TD, unsigned Depth, - const Query &Q); +static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL, + unsigned Depth, const Query &Q); -bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout *TD, +bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - return ::MaskedValueIsZero(V, Mask, TD, Depth, + return ::MaskedValueIsZero(V, Mask, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } -static unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, +static unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, const Query &Q); -unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, +unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - return ::ComputeNumSignBits(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT)); + return ::ComputeNumSignBits(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, - const DataLayout *TD, unsigned Depth, + const DataLayout &DL, unsigned Depth, const Query &Q) { if (!Add) { if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) { @@ -175,7 +203,7 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(Op1, KnownZero2, KnownOne2, DL, Depth + 1, Q); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -194,8 +222,8 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, // If an initial sequence of bits in the result is not needed, the // corresponding bits in the operands are not needed. APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1, Q); - computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, DL, Depth + 1, Q); + computeKnownBits(Op1, KnownZero2, KnownOne2, DL, Depth + 1, Q); // Carry in a 1 for a subtract, rather than a 0. APInt CarryIn(BitWidth, 0); @@ -243,11 +271,11 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, - const DataLayout *TD, unsigned Depth, + const DataLayout &DL, unsigned Depth, const Query &Q) { unsigned BitWidth = KnownZero.getBitWidth(); - computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1, Q); - computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(Op1, KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(Op0, KnownZero2, KnownOne2, DL, Depth + 1, Q); bool isKnownNegative = false; bool isKnownNonNegative = false; @@ -268,9 +296,9 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, // negative or zero. if (!isKnownNonNegative) isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 && - isKnownNonZero(Op0, TD, Depth, Q)) || + isKnownNonZero(Op0, DL, Depth, Q)) || (isKnownNegativeOp0 && isKnownNonNegativeOp1 && - isKnownNonZero(Op1, TD, Depth, Q)); + isKnownNonZero(Op1, DL, Depth, Q)); } } @@ -382,8 +410,7 @@ static bool isAssumeLikeIntrinsic(const Instruction *I) { return false; } -static bool isValidAssumeForContext(Value *V, const Query &Q, - const DataLayout *DL) { +static bool isValidAssumeForContext(Value *V, const Query &Q) { Instruction *Inv = cast<Instruction>(V); // There are two restrictions on the use of an assume: @@ -403,8 +430,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q, for (BasicBlock::const_iterator I = std::next(BasicBlock::const_iterator(Q.CxtI)), IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(I, DL) && - !isAssumeLikeIntrinsic(I)) + if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I)) return false; return !isEphemeralValueOf(Inv, Q.CxtI); @@ -428,8 +454,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q, for (BasicBlock::const_iterator I = std::next(BasicBlock::const_iterator(Q.CxtI)), IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(I, DL) && - !isAssumeLikeIntrinsic(I)) + if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I)) return false; return !isEphemeralValueOf(Inv, Q.CxtI); @@ -440,10 +465,9 @@ static bool isValidAssumeForContext(Value *V, const Query &Q, bool llvm::isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, - const DataLayout *DL, const DominatorTree *DT) { - return ::isValidAssumeForContext(const_cast<Instruction*>(I), - Query(nullptr, CxtI, DT), DL); + return ::isValidAssumeForContext(const_cast<Instruction *>(I), + Query(nullptr, CxtI, DT)); } template<typename LHS, typename RHS> @@ -474,9 +498,181 @@ m_c_Xor(const LHS &L, const RHS &R) { return m_CombineOr(m_Xor(L, R), m_Xor(R, L)); } +/// Compute known bits in 'V' under the assumption that the condition 'Cmp' is +/// true (at the context instruction.) This is mostly a utility function for +/// the prototype dominating conditions reasoning below. +static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp, + APInt &KnownZero, + APInt &KnownOne, + const DataLayout &DL, + unsigned Depth, const Query &Q) { + Value *LHS = Cmp->getOperand(0); + Value *RHS = Cmp->getOperand(1); + // TODO: We could potentially be more aggressive here. This would be worth + // evaluating. If we can, explore commoning this code with the assume + // handling logic. + if (LHS != V && RHS != V) + return; + + const unsigned BitWidth = KnownZero.getBitWidth(); + + switch (Cmp->getPredicate()) { + default: + // We know nothing from this condition + break; + // TODO: implement unsigned bound from below (known one bits) + // TODO: common condition check implementations with assumes + // TODO: implement other patterns from assume (e.g. V & B == A) + case ICmpInst::ICMP_SGT: + if (LHS == V) { + APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); + computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q); + if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) { + // We know that the sign bit is zero. + KnownZero |= APInt::getSignBit(BitWidth); + } + } + break; + case ICmpInst::ICMP_EQ: + if (LHS == V) + computeKnownBits(RHS, KnownZero, KnownOne, DL, Depth + 1, Q); + else if (RHS == V) + computeKnownBits(LHS, KnownZero, KnownOne, DL, Depth + 1, Q); + else + llvm_unreachable("missing use?"); + break; + case ICmpInst::ICMP_ULE: + if (LHS == V) { + APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); + computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q); + // The known zero bits carry over + unsigned SignBits = KnownZeroTemp.countLeadingOnes(); + KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits); + } + break; + case ICmpInst::ICMP_ULT: + if (LHS == V) { + APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); + computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q); + // Whatever high bits in rhs are zero are known to be zero (if rhs is a + // power of 2, then one more). + unsigned SignBits = KnownZeroTemp.countLeadingOnes(); + if (isKnownToBeAPowerOfTwo(RHS, false, Depth + 1, Query(Q, Cmp), DL)) + SignBits++; + KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits); + } + break; + }; +} + +/// Compute known bits in 'V' from conditions which are known to be true along +/// all paths leading to the context instruction. In particular, look for +/// cases where one branch of an interesting condition dominates the context +/// instruction. This does not do general dataflow. +/// NOTE: This code is EXPERIMENTAL and currently off by default. +static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero, + APInt &KnownOne, + const DataLayout &DL, + unsigned Depth, + const Query &Q) { + // Need both the dominator tree and the query location to do anything useful + if (!Q.DT || !Q.CxtI) + return; + Instruction *Cxt = const_cast<Instruction *>(Q.CxtI); + + // Avoid useless work + if (auto VI = dyn_cast<Instruction>(V)) + if (VI->getParent() == Cxt->getParent()) + return; + + // Note: We currently implement two options. It's not clear which of these + // will survive long term, we need data for that. + // Option 1 - Try walking the dominator tree looking for conditions which + // might apply. This works well for local conditions (loop guards, etc..), + // but not as well for things far from the context instruction (presuming a + // low max blocks explored). If we can set an high enough limit, this would + // be all we need. + // Option 2 - We restrict out search to those conditions which are uses of + // the value we're interested in. This is independent of dom structure, + // but is slightly less powerful without looking through lots of use chains. + // It does handle conditions far from the context instruction (e.g. early + // function exits on entry) really well though. + + // Option 1 - Search the dom tree + unsigned NumBlocksExplored = 0; + BasicBlock *Current = Cxt->getParent(); + while (true) { + // Stop searching if we've gone too far up the chain + if (NumBlocksExplored >= DomConditionsMaxDomBlocks) + break; + NumBlocksExplored++; + + if (!Q.DT->getNode(Current)->getIDom()) + break; + Current = Q.DT->getNode(Current)->getIDom()->getBlock(); + if (!Current) + // found function entry + break; + + BranchInst *BI = dyn_cast<BranchInst>(Current->getTerminator()); + if (!BI || BI->isUnconditional()) + continue; + ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getCondition()); + if (!Cmp) + continue; + + // We're looking for conditions that are guaranteed to hold at the context + // instruction. Finding a condition where one path dominates the context + // isn't enough because both the true and false cases could merge before + // the context instruction we're actually interested in. Instead, we need + // to ensure that the taken *edge* dominates the context instruction. + BasicBlock *BB0 = BI->getSuccessor(0); + BasicBlockEdge Edge(BI->getParent(), BB0); + if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent())) + continue; + + computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth, + Q); + } + + // Option 2 - Search the other uses of V + unsigned NumUsesExplored = 0; + for (auto U : V->users()) { + // Avoid massive lists + if (NumUsesExplored >= DomConditionsMaxUses) + break; + NumUsesExplored++; + // Consider only compare instructions uniquely controlling a branch + ICmpInst *Cmp = dyn_cast<ICmpInst>(U); + if (!Cmp) + continue; + + if (DomConditionsSingleCmpUse && !Cmp->hasOneUse()) + continue; + + for (auto *CmpU : Cmp->users()) { + BranchInst *BI = dyn_cast<BranchInst>(CmpU); + if (!BI || BI->isUnconditional()) + continue; + // We're looking for conditions that are guaranteed to hold at the + // context instruction. Finding a condition where one path dominates + // the context isn't enough because both the true and false cases could + // merge before the context instruction we're actually interested in. + // Instead, we need to ensure that the taken *edge* dominates the context + // instruction. + BasicBlock *BB0 = BI->getSuccessor(0); + BasicBlockEdge Edge(BI->getParent(), BB0); + if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent())) + continue; + + computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth, + Q); + } + } +} + static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, - APInt &KnownOne, - const DataLayout *DL, + APInt &KnownOne, const DataLayout &DL, unsigned Depth, const Query &Q) { // Use of assumptions is context-sensitive. If we don't have a context, we // cannot use them! @@ -504,8 +700,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, Value *Arg = I->getArgOperand(0); - if (Arg == V && - isValidAssumeForContext(I, Q, DL)) { + if (Arg == V && isValidAssumeForContext(I, Q)) { assert(BitWidth == 1 && "assume operand is not i1?"); KnownZero.clearAllBits(); KnownOne.setAllBits(); @@ -525,15 +720,15 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, ConstantInt *C; // assume(v = a) if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); KnownZero |= RHSKnownZero; KnownOne |= RHSKnownOne; // assume(v & b = a) - } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), - m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + } else if (match(Arg, + m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); @@ -546,7 +741,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(~(v & b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); @@ -557,9 +752,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= RHSKnownOne & MaskKnownOne; KnownOne |= RHSKnownZero & MaskKnownOne; // assume(v | b = a) - } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), - m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + } else if (match(Arg, + m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); @@ -572,7 +767,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(~(v | b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); @@ -583,9 +778,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= RHSKnownOne & BKnownZero; KnownOne |= RHSKnownZero & BKnownZero; // assume(v ^ b = a) - } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), - m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + } else if (match(Arg, + m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); @@ -601,7 +796,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(~(v ^ b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); @@ -617,7 +812,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(v << c = a) } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known @@ -627,7 +822,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(~(v << c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted @@ -637,10 +832,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(v >> c = a) } else if (match(Arg, m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)), - m_AShr(m_V, - m_ConstantInt(C))), - m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + m_AShr(m_V, m_ConstantInt(C))), + m_Value(A))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known @@ -649,10 +843,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownOne |= RHSKnownOne << C->getZExtValue(); // assume(~(v >> c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr( - m_LShr(m_V, m_ConstantInt(C)), - m_AShr(m_V, m_ConstantInt(C)))), + m_LShr(m_V, m_ConstantInt(C)), + m_AShr(m_V, m_ConstantInt(C)))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted @@ -661,8 +855,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownOne |= RHSKnownZero << C->getZExtValue(); // assume(v >=_s c) where c is non-negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SGE && - isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_SGE && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -672,8 +865,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } // assume(v >_s c) where c is at least -1. } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SGT && - isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_SGT && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -683,8 +875,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } // assume(v <=_s c) where c is negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SLE && - isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_SLE && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -694,8 +885,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } // assume(v <_s c) where c is non-positive } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SLT && - isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_SLT && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -705,8 +895,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } // assume(v <=_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_ULE && - isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_ULE && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -715,14 +904,13 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()); // assume(v <_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_ULT && - isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_ULT && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); // Whatever high bits in c are zero are known to be zero (if c is a power // of 2, then one more). - if (isKnownToBeAPowerOfTwo(A, false, Depth+1, Query(Q, I))) + if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I), DL)) KnownZero |= APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()+1); else @@ -743,13 +931,12 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, /// this won't lose us code quality. /// /// This function is defined on values with integer type, values with pointer -/// type (but only if TD is non-null), and vectors of integers. In the case +/// type, and vectors of integers. In the case /// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout *TD, unsigned Depth, - const Query &Q) { + const DataLayout &DL, unsigned Depth, const Query &Q) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = KnownZero.getBitWidth(); @@ -757,8 +944,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, assert((V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarType()->isPointerTy()) && "Not integer or pointer type!"); - assert((!TD || - TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && + assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && (!V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarSizeInBits() == BitWidth) && KnownZero.getBitWidth() == BitWidth && @@ -797,7 +983,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // The address of an aligned GlobalValue has trailing zeros. if (auto *GO = dyn_cast<GlobalObject>(V)) { unsigned Align = GO->getAlignment(); - if (Align == 0 && TD) { + if (Align == 0) { if (auto *GVar = dyn_cast<GlobalVariable>(GO)) { Type *ObjectType = GVar->getType()->getElementType(); if (ObjectType->isSized()) { @@ -805,9 +991,9 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // it the preferred alignment. Otherwise, we have to assume that it // may only have the minimum ABI alignment. if (!GVar->isDeclaration() && !GVar->isWeakForLinker()) - Align = TD->getPreferredAlignment(GVar); + Align = DL.getPreferredAlignment(GVar); else - Align = TD->getABITypeAlignment(ObjectType); + Align = DL.getABITypeAlignment(ObjectType); } } } @@ -823,11 +1009,11 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (Argument *A = dyn_cast<Argument>(V)) { unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0; - if (!Align && TD && A->hasStructRetAttr()) { + if (!Align && A->hasStructRetAttr()) { // An sret parameter has at least the ABI alignment of the return type. Type *EltTy = cast<PointerType>(A->getType())->getElementType(); if (EltTy->isSized()) - Align = TD->getABITypeAlignment(EltTy); + Align = DL.getABITypeAlignment(EltTy); } if (Align) @@ -838,7 +1024,12 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Don't give up yet... there might be an assumption that provides more // information... - computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); + + // Or a dominating condition for that matter + if (EnableDomConditions && Depth <= DomConditionsMaxDepth) + computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, + Depth, Q); return; } @@ -854,12 +1045,18 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // the bits of its aliasee. if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { if (!GA->mayBeOverridden()) - computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth + 1, Q); + computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, DL, Depth + 1, Q); return; } // Check whether a nearby assume intrinsic can determine some known bits. - computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); + + // Check whether there's a dominating condition which implies something about + // this value at the given context. + if (EnableDomConditions && Depth <= DomConditionsMaxDepth) + computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth, + Q); Operator *I = dyn_cast<Operator>(V); if (!I) return; @@ -873,8 +1070,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; @@ -883,8 +1080,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } case Instruction::Or: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; @@ -893,8 +1090,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } case Instruction::Xor: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); @@ -905,21 +1102,20 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, } case Instruction::Mul: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); - computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, TD, - Depth, Q); + computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, KnownZero, + KnownOne, KnownZero2, KnownOne2, DL, Depth, Q); break; } case Instruction::UDiv: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, @@ -929,8 +1125,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } case Instruction::Select: - computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1, Q); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(I->getOperand(2), KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; @@ -946,8 +1142,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::AddrSpaceCast: // Pointers could be different sizes. - // We can't handle these if we don't know the pointer size. - if (!TD) break; // FALL THROUGH and handle them the same as zext/trunc. case Instruction::ZExt: case Instruction::Trunc: { @@ -956,17 +1150,12 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint // which fall through here. - if(TD) { - SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType()); - } else { - SrcBitWidth = SrcTy->getScalarSizeInBits(); - if (!SrcBitWidth) break; - } + SrcBitWidth = DL.getTypeSizeInBits(SrcTy->getScalarType()); assert(SrcBitWidth && "SrcBitWidth can't be zero"); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); KnownZero = KnownZero.zextOrTrunc(BitWidth); KnownOne = KnownOne.zextOrTrunc(BitWidth); // Any top bits are known to be zero. @@ -980,7 +1169,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); break; } break; @@ -991,7 +1180,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -1007,7 +1196,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); KnownZero <<= ShiftAmt; KnownOne <<= ShiftAmt; KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 @@ -1020,7 +1209,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); // Unsigned shift right. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); // high bits known zero. @@ -1034,7 +1223,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); @@ -1048,15 +1237,15 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::Sub: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, TD, - Depth, Q); + KnownZero, KnownOne, KnownZero2, KnownOne2, DL, + Depth, Q); break; } case Instruction::Add: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, TD, - Depth, Q); + KnownZero, KnownOne, KnownZero2, KnownOne2, DL, + Depth, Q); break; } case Instruction::SRem: @@ -1064,8 +1253,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, - Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, + Q); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -1089,8 +1278,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // remainder is zero. if (KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD, - Depth+1, Q); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, DL, + Depth + 1, Q); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) KnownZero.setBit(BitWidth - 1); @@ -1102,8 +1291,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, APInt RA = Rem->getValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, - Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, + Q); KnownZero |= ~LowBits; KnownOne &= LowBits; break; @@ -1112,8 +1301,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q); unsigned Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); @@ -1125,8 +1314,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::Alloca: { AllocaInst *AI = cast<AllocaInst>(V); unsigned Align = AI->getAlignment(); - if (Align == 0 && TD) - Align = TD->getABITypeAlignment(AI->getType()->getElementType()); + if (Align == 0) + Align = DL.getABITypeAlignment(AI->getType()->getElementType()); if (Align > 0) KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); @@ -1136,8 +1325,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Analyze all of the subscripts of this getelementptr instruction // to determine if we can prove known low zero bits. APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD, - Depth+1, Q); + computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, DL, + Depth + 1, Q); unsigned TrailZ = LocalKnownZero.countTrailingOnes(); gep_type_iterator GTI = gep_type_begin(I); @@ -1145,10 +1334,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, Value *Index = I->getOperand(i); if (StructType *STy = dyn_cast<StructType>(*GTI)) { // Handle struct member offset arithmetic. - if (!TD) { - TrailZ = 0; - break; - } // Handle case when index is vector zeroinitializer Constant *CIndex = cast<Constant>(Index); @@ -1159,7 +1344,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, Index = CIndex->getSplatValue(); unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); - const StructLayout *SL = TD->getStructLayout(STy); + const StructLayout *SL = DL.getStructLayout(STy); uint64_t Offset = SL->getElementOffset(Idx); TrailZ = std::min<unsigned>(TrailZ, countTrailingZeros(Offset)); @@ -1171,9 +1356,10 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); - uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; + uint64_t TypeSize = DL.getTypeAllocSize(IndexedTy); LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); - computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1, Q); + computeKnownBits(Index, LocalKnownZero, LocalKnownOne, DL, Depth + 1, + Q); TrailZ = std::min(TrailZ, unsigned(countTrailingZeros(TypeSize) + LocalKnownZero.countTrailingOnes())); @@ -1215,11 +1401,11 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(R, KnownZero2, KnownOne2, DL, Depth + 1, Q); // We need to take the minimum number of known bits APInt KnownZero3(KnownZero), KnownOne3(KnownOne); - computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1, Q); + computeKnownBits(L, KnownZero3, KnownOne3, DL, Depth + 1, Q); KnownZero = APInt::getLowBitsSet(BitWidth, std::min(KnownZero2.countTrailingOnes(), @@ -1250,8 +1436,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownOne2 = APInt(BitWidth, 0); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD, - MaxDepth-1, Q); + computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, DL, + MaxDepth - 1, Q); KnownZero &= KnownZero2; KnownOne &= KnownOne2; // If all bits have been ruled out, there's no need to check @@ -1303,19 +1489,19 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Intrinsic::sadd_with_overflow: computeKnownBitsAddSub(true, II->getArgOperand(0), II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, TD, Depth, Q); + KnownOne, KnownZero2, KnownOne2, DL, Depth, Q); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: computeKnownBitsAddSub(false, II->getArgOperand(0), II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, TD, Depth, Q); + KnownOne, KnownZero2, KnownOne2, DL, Depth, Q); break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: - computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), - false, KnownZero, KnownOne, - KnownZero2, KnownOne2, TD, Depth, Q); + computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, + KnownZero, KnownOne, KnownZero2, KnownOne2, DL, + Depth, Q); break; } } @@ -1328,9 +1514,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, /// Determine whether the sign bit is known to be zero or one. /// Convenience wrapper around computeKnownBits. void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout *TD, unsigned Depth, - const Query &Q) { - unsigned BitWidth = getBitWidth(V->getType(), TD); + const DataLayout &DL, unsigned Depth, const Query &Q) { + unsigned BitWidth = getBitWidth(V->getType(), DL); if (!BitWidth) { KnownZero = false; KnownOne = false; @@ -1338,7 +1523,7 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, } APInt ZeroBits(BitWidth, 0); APInt OneBits(BitWidth, 0); - computeKnownBits(V, ZeroBits, OneBits, TD, Depth, Q); + computeKnownBits(V, ZeroBits, OneBits, DL, Depth, Q); KnownOne = OneBits[BitWidth - 1]; KnownZero = ZeroBits[BitWidth - 1]; } @@ -1348,7 +1533,7 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, /// be a power of two when defined. Supports values with integer or pointer /// types and vectors of integers. bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, - const Query &Q) { + const Query &Q, const DataLayout &DL) { if (Constant *C = dyn_cast<Constant>(V)) { if (C->isNullValue()) return OrZero; @@ -1375,20 +1560,19 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, // A shift of a power of two is a power of two or zero. if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || match(V, m_Shr(m_Value(X), m_Value())))) - return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q); + return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL); if (ZExtInst *ZI = dyn_cast<ZExtInst>(V)) - return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q); + return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q, DL); if (SelectInst *SI = dyn_cast<SelectInst>(V)) - return - isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) && - isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q); + return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q, DL) && + isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q, DL); if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { // A power of two and'd with anything is a power of two or zero. - if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q) || - isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth, Q)) + if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL) || + isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q, DL)) return true; // X & (-X) is always a power of two or zero. if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) @@ -1403,19 +1587,19 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) { if (match(X, m_And(m_Specific(Y), m_Value())) || match(X, m_And(m_Value(), m_Specific(Y)))) - if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q)) + if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q, DL)) return true; if (match(Y, m_And(m_Specific(X), m_Value())) || match(Y, m_And(m_Value(), m_Specific(X)))) - if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q)) + if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q, DL)) return true; unsigned BitWidth = V->getType()->getScalarSizeInBits(); APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0); - computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth, Q); + computeKnownBits(X, LHSZeroBits, LHSOneBits, DL, Depth, Q); APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0); - computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth, Q); + computeKnownBits(Y, RHSZeroBits, RHSOneBits, DL, Depth, Q); // If i8 V is a power of two or zero: // ZeroBits: 1 1 1 0 1 1 1 1 // ~ZeroBits: 0 0 0 1 0 0 0 0 @@ -1433,7 +1617,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) || match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) { return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero, - Depth, Q); + Depth, Q, DL); } return false; @@ -1445,7 +1629,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, /// to be non-null. /// /// Currently this routine does not support vector GEPs. -static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, +static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout &DL, unsigned Depth, const Query &Q) { if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0) return false; @@ -1458,10 +1642,6 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth, Q)) return true; - // Past this, if we don't have DataLayout, we can't do much. - if (!DL) - return false; - // Walk the GEP operands and see if any operand introduces a non-zero offset. // If so, then the GEP cannot produce a null pointer, as doing so would // inherently violate the inbounds contract within address space zero. @@ -1471,7 +1651,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, if (StructType *STy = dyn_cast<StructType>(*GTI)) { ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand()); unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = DL->getStructLayout(STy); + const StructLayout *SL = DL.getStructLayout(STy); uint64_t ElementOffset = SL->getElementOffset(ElementIdx); if (ElementOffset > 0) return true; @@ -1479,7 +1659,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, } // If we have a zero-sized type, the index doesn't matter. Keep looping. - if (DL->getTypeAllocSize(GTI.getIndexedType()) == 0) + if (DL.getTypeAllocSize(GTI.getIndexedType()) == 0) continue; // Fast path the constant operand case both for efficiency and so we don't @@ -1528,7 +1708,7 @@ static bool rangeMetadataExcludesValue(MDNode* Ranges, /// For vectors return true if every element is known to be non-zero when /// defined. Supports values with integer or pointer type and vectors of /// integers. -bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, +bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, const Query &Q) { if (Constant *C = dyn_cast<Constant>(V)) { if (C->isNullValue()) @@ -1561,21 +1741,20 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, if (isKnownNonNull(V)) return true; if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) - if (isGEPKnownNonNull(GEP, TD, Depth, Q)) + if (isGEPKnownNonNull(GEP, DL, Depth, Q)) return true; } - unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD); + unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), DL); // X | Y != 0 if X != 0 or Y != 0. Value *X = nullptr, *Y = nullptr; if (match(V, m_Or(m_Value(X), m_Value(Y)))) - return isKnownNonZero(X, TD, Depth, Q) || - isKnownNonZero(Y, TD, Depth, Q); + return isKnownNonZero(X, DL, Depth, Q) || isKnownNonZero(Y, DL, Depth, Q); // ext X != 0 if X != 0. if (isa<SExtInst>(V) || isa<ZExtInst>(V)) - return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth, Q); + return isKnownNonZero(cast<Instruction>(V)->getOperand(0), DL, Depth, Q); // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined // if the lowest bit is shifted off the end. @@ -1583,11 +1762,11 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, // shl nuw can't remove any non-zero bits. OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V); if (BO->hasNoUnsignedWrap()) - return isKnownNonZero(X, TD, Depth, Q); + return isKnownNonZero(X, DL, Depth, Q); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q); if (KnownOne[0]) return true; } @@ -1597,29 +1776,28 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, // shr exact can only shift out zero bits. PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V); if (BO->isExact()) - return isKnownNonZero(X, TD, Depth, Q); + return isKnownNonZero(X, DL, Depth, Q); bool XKnownNonNegative, XKnownNegative; - ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q); + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q); if (XKnownNegative) return true; } // div exact can only produce a zero if the dividend is zero. else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { - return isKnownNonZero(X, TD, Depth, Q); + return isKnownNonZero(X, DL, Depth, Q); } // X + Y. else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { bool XKnownNonNegative, XKnownNegative; bool YKnownNonNegative, YKnownNegative; - ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q); - ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth, Q); + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q); + ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, DL, Depth, Q); // If X and Y are both non-negative (as signed values) then their sum is not // zero unless both X and Y are zero. if (XKnownNonNegative && YKnownNonNegative) - if (isKnownNonZero(X, TD, Depth, Q) || - isKnownNonZero(Y, TD, Depth, Q)) + if (isKnownNonZero(X, DL, Depth, Q) || isKnownNonZero(Y, DL, Depth, Q)) return true; // If X and Y are both negative (as signed values) then their sum is not @@ -1630,22 +1808,22 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. - computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q); if ((KnownOne & Mask) != 0) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. - computeKnownBits(Y, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBits(Y, KnownZero, KnownOne, DL, Depth, Q); if ((KnownOne & Mask) != 0) return true; } // The sum of a non-negative number and a power of two is not zero. if (XKnownNonNegative && - isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth, Q)) + isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q, DL)) return true; if (YKnownNonNegative && - isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth, Q)) + isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q, DL)) return true; } // X * Y. @@ -1654,21 +1832,20 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, // If X and Y are non-zero then so is X * Y as long as the multiplication // does not overflow. if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) && - isKnownNonZero(X, TD, Depth, Q) && - isKnownNonZero(Y, TD, Depth, Q)) + isKnownNonZero(X, DL, Depth, Q) && isKnownNonZero(Y, DL, Depth, Q)) return true; } // (C ? X : Y) != 0 if X != 0 and Y != 0. else if (SelectInst *SI = dyn_cast<SelectInst>(V)) { - if (isKnownNonZero(SI->getTrueValue(), TD, Depth, Q) && - isKnownNonZero(SI->getFalseValue(), TD, Depth, Q)) + if (isKnownNonZero(SI->getTrueValue(), DL, Depth, Q) && + isKnownNonZero(SI->getFalseValue(), DL, Depth, Q)) return true; } if (!BitWidth) return false; APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q); return KnownOne != 0; } @@ -1677,15 +1854,14 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, /// cannot have. /// /// This function is defined on values with integer type, values with pointer -/// type (but only if TD is non-null), and vectors of integers. In the case +/// type, and vectors of integers. In the case /// where V is a vector, the mask, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -bool MaskedValueIsZero(Value *V, const APInt &Mask, - const DataLayout *TD, unsigned Depth, - const Query &Q) { +bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL, + unsigned Depth, const Query &Q) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); - computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q); return (KnownZero & Mask) == Mask; } @@ -1699,14 +1875,9 @@ bool MaskedValueIsZero(Value *V, const APInt &Mask, /// /// 'Op' must have a scalar integer type. /// -unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, - unsigned Depth, const Query &Q) { - assert((TD || V->getType()->isIntOrIntVectorTy()) && - "ComputeNumSignBits requires a DataLayout object to operate " - "on non-integer values!"); - Type *Ty = V->getType(); - unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) : - Ty->getScalarSizeInBits(); +unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, + const Query &Q) { + unsigned TyBits = DL.getTypeSizeInBits(V->getType()->getScalarType()); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; @@ -1721,10 +1892,63 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, default: break; case Instruction::SExt: Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); - return ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q) + Tmp; + return ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q) + Tmp; + + case Instruction::SDiv: { + const APInt *Denominator; + // sdiv X, C -> adds log(C) sign bits. + if (match(U->getOperand(1), m_APInt(Denominator))) { + + // Ignore non-positive denominator. + if (!Denominator->isStrictlyPositive()) + break; + + // Calculate the incoming numerator bits. + unsigned NumBits = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); + + // Add floor(log(C)) bits to the numerator bits. + return std::min(TyBits, NumBits + Denominator->logBase2()); + } + break; + } + + case Instruction::SRem: { + const APInt *Denominator; + // srem X, C -> we know that the result is within [-C+1,C) when C is a + // positive constant. This let us put a lower bound on the number of sign + // bits. + if (match(U->getOperand(1), m_APInt(Denominator))) { + + // Ignore non-positive denominator. + if (!Denominator->isStrictlyPositive()) + break; + + // Calculate the incoming numerator bits. SRem by a positive constant + // can't lower the number of sign bits. + unsigned NumrBits = + ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); + + // Calculate the leading sign bit constraints by examining the + // denominator. Given that the denominator is positive, there are two + // cases: + // + // 1. the numerator is positive. The result range is [0,C) and [0,C) u< + // (1 << ceilLogBase2(C)). + // + // 2. the numerator is negative. Then the result range is (-C,0] and + // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). + // + // Thus a lower bound on the number of sign bits is `TyBits - + // ceilLogBase2(C)`. + + unsigned ResBits = TyBits - Denominator->ceilLogBase2(); + return std::max(NumrBits, ResBits); + } + break; + } case Instruction::AShr: { - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); + Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); // ashr X, C -> adds C sign bits. Vectors too. const APInt *ShAmt; if (match(U->getOperand(1), m_APInt(ShAmt))) { @@ -1737,7 +1961,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, const APInt *ShAmt; if (match(U->getOperand(1), m_APInt(ShAmt))) { // shl destroys sign bits. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); + Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); Tmp2 = ShAmt->getZExtValue(); if (Tmp2 >= TyBits || // Bad shift. Tmp2 >= Tmp) break; // Shifted all sign bits out. @@ -1749,9 +1973,9 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, case Instruction::Or: case Instruction::Xor: // NOT is handled here. // Logical binary ops preserve the number of sign bits at the worst. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); + Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); if (Tmp != 1) { - Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); + Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q); FirstAnswer = std::min(Tmp, Tmp2); // We computed what we know about the sign bits as our first // answer. Now proceed to the generic code that uses @@ -1760,22 +1984,23 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, break; case Instruction::Select: - Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); + Tmp = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1, Q); + Tmp2 = ComputeNumSignBits(U->getOperand(2), DL, Depth + 1, Q); return std::min(Tmp, Tmp2); case Instruction::Add: // Add can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); + Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); if (Tmp == 1) return 1; // Early out. // Special case decrementing a value (ADD X, -1): if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, + Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -1788,19 +2013,20 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, return Tmp; } - Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); + Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q); if (Tmp2 == 1) return 1; return std::min(Tmp, Tmp2)-1; case Instruction::Sub: - Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); + Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q); if (Tmp2 == 1) return 1; // Handle NEG. if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(U->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, + Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) @@ -1816,7 +2042,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, // Sub can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); + Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; @@ -1830,12 +2056,11 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, // Take the minimum of all incoming values. This can't infinitely loop // because of our depth threshold. - Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1, Q); + Tmp = ComputeNumSignBits(PN->getIncomingValue(0), DL, Depth + 1, Q); for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) { if (Tmp == 1) return Tmp; - Tmp = std::min(Tmp, - ComputeNumSignBits(PN->getIncomingValue(i), TD, - Depth+1, Q)); + Tmp = std::min( + Tmp, ComputeNumSignBits(PN->getIncomingValue(i), DL, Depth + 1, Q)); } return Tmp; } @@ -1850,7 +2075,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, // use this information. APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); APInt Mask; - computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q); if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; @@ -2132,9 +2357,7 @@ Value *llvm::isBytewiseValue(Value *V) { if (CI->getBitWidth() % 8 == 0) { assert(CI->getBitWidth() > 8 && "8 bits should be handled above!"); - // We can check that all bytes of an integer are equal by making use of a - // little trick: rotate by 8 and check if it's still the same value. - if (CI->getValue() != CI->getValue().rotl(8)) + if (!CI->getValue().isSplat(8)) return nullptr; return ConstantInt::get(V->getContext(), CI->getValue().trunc(8)); } @@ -2335,23 +2558,19 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, /// Analyze the specified pointer to see if it can be expressed as a base /// pointer plus a constant offset. Return the base and offset to the caller. Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout *DL) { - // Without DataLayout, conservatively assume 64-bit offsets, which is - // the widest we support. - unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(Ptr->getType()) : 64; + const DataLayout &DL) { + unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType()); APInt ByteOffset(BitWidth, 0); while (1) { if (Ptr->getType()->isVectorTy()) break; if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { - if (DL) { - APInt GEPOffset(BitWidth, 0); - if (!GEP->accumulateConstantOffset(*DL, GEPOffset)) - break; + APInt GEPOffset(BitWidth, 0); + if (!GEP->accumulateConstantOffset(DL, GEPOffset)) + break; - ByteOffset += GEPOffset; - } + ByteOffset += GEPOffset; Ptr = GEP->getPointerOperand(); } else if (Operator::getOpcode(Ptr) == Instruction::BitCast || @@ -2380,7 +2599,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, // Look through bitcast instructions and geps. V = V->stripPointerCasts(); - // If the value is a GEP instructionor constant expression, treat it as an + // If the value is a GEP instruction or constant expression, treat it as an // offset. if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { // Make sure the GEP has exactly three arguments. @@ -2407,7 +2626,8 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, StartIdx = CI->getZExtValue(); else return false; - return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset); + return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx + Offset, + TrimAtNul); } // The GEP instruction, constant or instruction, must reference a global @@ -2517,8 +2737,8 @@ uint64_t llvm::GetStringLength(Value *V) { return Len == ~0ULL ? 1 : Len; } -Value * -llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) { +Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, + unsigned MaxLookup) { if (!V->getType()->isPointerTy()) return V; for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { @@ -2535,7 +2755,7 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) { // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast<Instruction>(V)) // TODO: Acquire a DominatorTree and AssumptionCache and use them. - if (Value *Simplified = SimplifyInstruction(I, TD, nullptr)) { + if (Value *Simplified = SimplifyInstruction(I, DL, nullptr)) { V = Simplified; continue; } @@ -2547,17 +2767,14 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) { return V; } -void -llvm::GetUnderlyingObjects(Value *V, - SmallVectorImpl<Value *> &Objects, - const DataLayout *TD, - unsigned MaxLookup) { +void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects, + const DataLayout &DL, unsigned MaxLookup) { SmallPtrSet<Value *, 4> Visited; SmallVector<Value *, 4> Worklist; Worklist.push_back(V); do { Value *P = Worklist.pop_back_val(); - P = GetUnderlyingObject(P, TD, MaxLookup); + P = GetUnderlyingObject(P, DL, MaxLookup); if (!Visited.insert(P).second) continue; @@ -2591,8 +2808,7 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { return true; } -bool llvm::isSafeToSpeculativelyExecute(const Value *V, - const DataLayout *TD) { +bool llvm::isSafeToSpeculativelyExecute(const Value *V) { const Operator *Inst = dyn_cast<Operator>(V); if (!Inst) return false; @@ -2638,7 +2854,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, // Speculative load may create a race that did not exist in the source. LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) return false; - return LI->getPointerOperand()->isDereferenceablePointer(TD); + const DataLayout &DL = LI->getModule()->getDataLayout(); + return LI->getPointerOperand()->isDereferenceablePointer(DL); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { @@ -2730,7 +2947,7 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { } OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS, - const DataLayout *DL, + const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { @@ -2780,7 +2997,7 @@ OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS, } OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS, - const DataLayout *DL, + const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 9e7354e..103c8c4 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -2270,13 +2270,13 @@ bool LLParser::PerFunctionState::SetInstName(int NameID, /// forward reference record if needed. BasicBlock *LLParser::PerFunctionState::GetBB(const std::string &Name, LocTy Loc) { - return cast_or_null<BasicBlock>(GetVal(Name, - Type::getLabelTy(F.getContext()), Loc)); + return dyn_cast_or_null<BasicBlock>(GetVal(Name, + Type::getLabelTy(F.getContext()), Loc)); } BasicBlock *LLParser::PerFunctionState::GetBB(unsigned ID, LocTy Loc) { - return cast_or_null<BasicBlock>(GetVal(ID, - Type::getLabelTy(F.getContext()), Loc)); + return dyn_cast_or_null<BasicBlock>(GetVal(ID, + Type::getLabelTy(F.getContext()), Loc)); } /// DefineBB - Define the specified basic block, which is either named or @@ -2512,7 +2512,12 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (!F) { // Make a global variable as a placeholder for this reference. - GlobalValue *&FwdRef = ForwardRefBlockAddresses[Fn][Label]; + GlobalValue *&FwdRef = + ForwardRefBlockAddresses.insert(std::make_pair( + std::move(Fn), + std::map<ValID, GlobalValue *>())) + .first->second.insert(std::make_pair(std::move(Label), nullptr)) + .first->second; if (!FwdRef) FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context), false, GlobalValue::InternalLinkage, nullptr, ""); @@ -2772,11 +2777,23 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { unsigned Opc = Lex.getUIntVal(); SmallVector<Constant*, 16> Elts; bool InBounds = false; + Type *Ty; Lex.Lex(); + if (Opc == Instruction::GetElementPtr) InBounds = EatIfPresent(lltok::kw_inbounds); - if (ParseToken(lltok::lparen, "expected '(' in constantexpr") || - ParseGlobalValueVector(Elts) || + + if (ParseToken(lltok::lparen, "expected '(' in constantexpr")) + return true; + + LocTy ExplicitTypeLoc = Lex.getLoc(); + if (Opc == Instruction::GetElementPtr) { + if (ParseType(Ty) || + ParseToken(lltok::comma, "expected comma after getelementptr's type")) + return true; + } + + if (ParseGlobalValueVector(Elts) || ParseToken(lltok::rparen, "expected ')' in constantexpr")) return true; @@ -2787,6 +2804,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { Type *BaseType = Elts[0]->getType(); auto *BasePointerType = cast<PointerType>(BaseType->getScalarType()); + if (Ty != BasePointerType->getElementType()) + return Error( + ExplicitTypeLoc, + "explicit pointee type doesn't match operand's pointee type"); ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end()); for (Constant *Val : Indices) { @@ -2805,7 +2826,9 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { } } - if (!Indices.empty() && !BasePointerType->getElementType()->isSized()) + SmallPtrSet<const Type*, 4> Visited; + if (!Indices.empty() && + !BasePointerType->getElementType()->isSized(&Visited)) return Error(ID.Loc, "base element of getelementptr must be sized"); if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(), Indices)) @@ -2976,6 +2999,8 @@ struct ColumnField : public MDUnsignedField { }; struct DwarfTagField : public MDUnsignedField { DwarfTagField() : MDUnsignedField(0, dwarf::DW_TAG_hi_user) {} + DwarfTagField(dwarf::Tag DefaultTag) + : MDUnsignedField(DefaultTag, dwarf::DW_TAG_hi_user) {} }; struct DwarfAttEncodingField : public MDUnsignedField { DwarfAttEncodingField() : MDUnsignedField(0, dwarf::DW_ATE_hi_user) {} @@ -3323,8 +3348,8 @@ bool LLParser::ParseMDLocation(MDNode *&Result, bool IsDistinct) { PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS - auto get = (IsDistinct ? MDLocation::getDistinct : MDLocation::get); - Result = get(Context, line.Val, column.Val, scope.Val, inlinedAt.Val); + Result = GET_OR_DISTINCT( + MDLocation, (Context, line.Val, column.Val, scope.Val, inlinedAt.Val)); return false; } @@ -3373,7 +3398,7 @@ bool LLParser::ParseMDEnumerator(MDNode *&Result, bool IsDistinct) { /// ::= !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32) bool LLParser::ParseMDBasicType(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ - REQUIRED(tag, DwarfTagField, ); \ + OPTIONAL(tag, DwarfTagField, (dwarf::DW_TAG_base_type)); \ OPTIONAL(name, MDStringField, ); \ OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX)); \ OPTIONAL(align, MDUnsignedField, (0, UINT64_MAX)); \ @@ -3509,7 +3534,7 @@ bool LLParser::ParseMDCompileUnit(MDNode *&Result, bool IsDistinct) { bool LLParser::ParseMDSubprogram(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ OPTIONAL(scope, MDField, ); \ - REQUIRED(name, MDStringField, ); \ + OPTIONAL(name, MDStringField, ); \ OPTIONAL(linkageName, MDStringField, ); \ OPTIONAL(file, MDField, ); \ OPTIONAL(line, LineField, ); \ @@ -3604,9 +3629,9 @@ bool LLParser::ParseMDTemplateTypeParameter(MDNode *&Result, bool IsDistinct) { /// name: "V", type: !1, value: i32 7) bool LLParser::ParseMDTemplateValueParameter(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ - REQUIRED(tag, DwarfTagField, ); \ + OPTIONAL(tag, DwarfTagField, (dwarf::DW_TAG_template_value_parameter)); \ OPTIONAL(name, MDStringField, ); \ - REQUIRED(type, MDField, ); \ + OPTIONAL(type, MDField, ); \ REQUIRED(value, MDField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS @@ -3624,7 +3649,7 @@ bool LLParser::ParseMDTemplateValueParameter(MDNode *&Result, bool IsDistinct) { bool LLParser::ParseMDGlobalVariable(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ OPTIONAL(scope, MDField, ); \ - REQUIRED(name, MDStringField, ); \ + OPTIONAL(name, MDStringField, ); \ OPTIONAL(linkageName, MDStringField, ); \ OPTIONAL(file, MDField, ); \ OPTIONAL(line, LineField, ); \ @@ -3710,7 +3735,7 @@ bool LLParser::ParseMDExpression(MDNode *&Result, bool IsDistinct) { /// getter: "getFoo", attributes: 7, type: !2) bool LLParser::ParseMDObjCProperty(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ - REQUIRED(name, MDStringField, ); \ + OPTIONAL(name, MDStringField, ); \ OPTIONAL(file, MDField, ); \ OPTIONAL(line, LineField, ); \ OPTIONAL(setter, MDStringField, ); \ @@ -4297,7 +4322,9 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { } BasicBlock *BB = PFS.DefineBB(Name, NameLoc); - if (!BB) return true; + if (!BB) + return Error(NameLoc, + "unable to create block named '" + Name + "'"); std::string NameStr; @@ -5032,7 +5059,7 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { ParseTypeAndValue(PersFn, PersFnLoc, PFS)) return true; - LandingPadInst *LP = LandingPadInst::Create(Ty, PersFn, 0); + std::unique_ptr<LandingPadInst> LP(LandingPadInst::Create(Ty, PersFn, 0)); LP->setCleanup(EatIfPresent(lltok::kw_cleanup)); while (Lex.getKind() == lltok::kw_catch || Lex.getKind() == lltok::kw_filter){ @@ -5046,10 +5073,8 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { Value *V; LocTy VLoc; - if (ParseTypeAndValue(V, VLoc, PFS)) { - delete LP; + if (ParseTypeAndValue(V, VLoc, PFS)) return true; - } // A 'catch' type expects a non-array constant. A filter clause expects an // array constant. @@ -5061,10 +5086,13 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { Error(VLoc, "'filter' clause has an invalid type"); } - LP->addClause(cast<Constant>(V)); + Constant *CV = dyn_cast<Constant>(V); + if (!CV) + return Error(VLoc, "clause argument must be a constant"); + LP->addClause(CV); } - Inst = LP; + Inst = LP.release(); return false; } @@ -5241,7 +5269,11 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { Lex.Lex(); } - if (ParseTypeAndValue(Val, Loc, PFS) || + Type *Ty = nullptr; + LocTy ExplicitTypeLoc = Lex.getLoc(); + if (ParseType(Ty) || + ParseToken(lltok::comma, "expected comma after load's type") || + ParseTypeAndValue(Val, Loc, PFS) || ParseScopeAndOrdering(isAtomic, Scope, Ordering) || ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; @@ -5254,6 +5286,10 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { if (Ordering == Release || Ordering == AcquireRelease) return Error(Loc, "atomic load cannot use Release ordering"); + if (Ty != cast<PointerType>(Val->getType())->getElementType()) + return Error(ExplicitTypeLoc, + "explicit pointee type doesn't match operand's pointee type"); + Inst = new LoadInst(Val, "", isVolatile, Alignment, Ordering, Scope); return AteExtraComma ? InstExtraComma : InstNormal; } @@ -5440,13 +5476,22 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { bool InBounds = EatIfPresent(lltok::kw_inbounds); - if (ParseTypeAndValue(Ptr, Loc, PFS)) return true; + Type *Ty = nullptr; + LocTy ExplicitTypeLoc = Lex.getLoc(); + if (ParseType(Ty) || + ParseToken(lltok::comma, "expected comma after getelementptr's type") || + ParseTypeAndValue(Ptr, Loc, PFS)) + return true; Type *BaseType = Ptr->getType(); PointerType *BasePointerType = dyn_cast<PointerType>(BaseType->getScalarType()); if (!BasePointerType) return Error(Loc, "base of getelementptr must be a pointer"); + if (Ty != BasePointerType->getElementType()) + return Error(ExplicitTypeLoc, + "explicit pointee type doesn't match operand's pointee type"); + SmallVector<Value*, 16> Indices; bool AteExtraComma = false; while (EatIfPresent(lltok::comma)) { @@ -5469,12 +5514,14 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { Indices.push_back(Val); } - if (!Indices.empty() && !BasePointerType->getElementType()->isSized()) + SmallPtrSet<const Type*, 4> Visited; + if (!Indices.empty() && + !BasePointerType->getElementType()->isSized(&Visited)) return Error(Loc, "base element of getelementptr must be sized"); if (!GetElementPtrInst::getIndexedType(BaseType, Indices)) return Error(Loc, "invalid getelementptr indices"); - Inst = GetElementPtrInst::Create(Ptr, Indices); + Inst = GetElementPtrInst::Create(Ty, Ptr, Indices); if (InBounds) cast<GetElementPtrInst>(Inst)->setIsInBounds(true); return AteExtraComma ? InstExtraComma : InstNormal; diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp index ed1a753..2e76c0e 100644 --- a/lib/AsmParser/Parser.cpp +++ b/lib/AsmParser/Parser.cpp @@ -13,6 +13,7 @@ #include "llvm/AsmParser/Parser.h" #include "LLParser.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 92af0f8..84753ff 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -8,34 +8,375 @@ //===----------------------------------------------------------------------===// #include "llvm/Bitcode/ReaderWriter.h" -#include "BitcodeReader.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" +#include "llvm/Bitcode/BitstreamReader.h" #include "llvm/Bitcode/LLVMBitCodes.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/GVMaterializer.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Support/DataStream.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" - +#include <deque> using namespace llvm; +namespace { enum { SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex }; +class BitcodeReaderValueList { + std::vector<WeakVH> ValuePtrs; + + /// ResolveConstants - As we resolve forward-referenced constants, we add + /// information about them to this vector. This allows us to resolve them in + /// bulk instead of resolving each reference at a time. See the code in + /// ResolveConstantForwardRefs for more information about this. + /// + /// The key of this vector is the placeholder constant, the value is the slot + /// number that holds the resolved value. + typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy; + ResolveConstantsTy ResolveConstants; + LLVMContext &Context; +public: + BitcodeReaderValueList(LLVMContext &C) : Context(C) {} + ~BitcodeReaderValueList() { + assert(ResolveConstants.empty() && "Constants not resolved?"); + } + + // vector compatibility methods + unsigned size() const { return ValuePtrs.size(); } + void resize(unsigned N) { ValuePtrs.resize(N); } + void push_back(Value *V) { + ValuePtrs.push_back(V); + } + + void clear() { + assert(ResolveConstants.empty() && "Constants not resolved?"); + ValuePtrs.clear(); + } + + Value *operator[](unsigned i) const { + assert(i < ValuePtrs.size()); + return ValuePtrs[i]; + } + + Value *back() const { return ValuePtrs.back(); } + void pop_back() { ValuePtrs.pop_back(); } + bool empty() const { return ValuePtrs.empty(); } + void shrinkTo(unsigned N) { + assert(N <= size() && "Invalid shrinkTo request!"); + ValuePtrs.resize(N); + } + + Constant *getConstantFwdRef(unsigned Idx, Type *Ty); + Value *getValueFwdRef(unsigned Idx, Type *Ty); + + void AssignValue(Value *V, unsigned Idx); + + /// ResolveConstantForwardRefs - Once all constants are read, this method bulk + /// resolves any forward references. + void ResolveConstantForwardRefs(); +}; + +class BitcodeReaderMDValueList { + unsigned NumFwdRefs; + bool AnyFwdRefs; + unsigned MinFwdRef; + unsigned MaxFwdRef; + std::vector<TrackingMDRef> MDValuePtrs; + + LLVMContext &Context; +public: + BitcodeReaderMDValueList(LLVMContext &C) + : NumFwdRefs(0), AnyFwdRefs(false), Context(C) {} + + // vector compatibility methods + unsigned size() const { return MDValuePtrs.size(); } + void resize(unsigned N) { MDValuePtrs.resize(N); } + void push_back(Metadata *MD) { MDValuePtrs.emplace_back(MD); } + void clear() { MDValuePtrs.clear(); } + Metadata *back() const { return MDValuePtrs.back(); } + void pop_back() { MDValuePtrs.pop_back(); } + bool empty() const { return MDValuePtrs.empty(); } + + Metadata *operator[](unsigned i) const { + assert(i < MDValuePtrs.size()); + return MDValuePtrs[i]; + } + + void shrinkTo(unsigned N) { + assert(N <= size() && "Invalid shrinkTo request!"); + MDValuePtrs.resize(N); + } + + Metadata *getValueFwdRef(unsigned Idx); + void AssignValue(Metadata *MD, unsigned Idx); + void tryToResolveCycles(); +}; + +class BitcodeReader : public GVMaterializer { + LLVMContext &Context; + DiagnosticHandlerFunction DiagnosticHandler; + Module *TheModule; + std::unique_ptr<MemoryBuffer> Buffer; + std::unique_ptr<BitstreamReader> StreamFile; + BitstreamCursor Stream; + DataStreamer *LazyStreamer; + uint64_t NextUnreadBit; + bool SeenValueSymbolTable; + + std::vector<Type*> TypeList; + BitcodeReaderValueList ValueList; + BitcodeReaderMDValueList MDValueList; + std::vector<Comdat *> ComdatList; + SmallVector<Instruction *, 64> InstructionList; + + std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits; + std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits; + std::vector<std::pair<Function*, unsigned> > FunctionPrefixes; + std::vector<std::pair<Function*, unsigned> > FunctionPrologues; + + SmallVector<Instruction*, 64> InstsWithTBAATag; + + /// MAttributes - The set of attributes by index. Index zero in the + /// file is for null, and is thus not represented here. As such all indices + /// are off by one. + std::vector<AttributeSet> MAttributes; + + /// \brief The set of attribute groups. + std::map<unsigned, AttributeSet> MAttributeGroups; + + /// FunctionBBs - While parsing a function body, this is a list of the basic + /// blocks for the function. + std::vector<BasicBlock*> FunctionBBs; + + // When reading the module header, this list is populated with functions that + // have bodies later in the file. + std::vector<Function*> FunctionsWithBodies; + + // When intrinsic functions are encountered which require upgrading they are + // stored here with their replacement function. + typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap; + UpgradedIntrinsicMap UpgradedIntrinsics; + + // Map the bitcode's custom MDKind ID to the Module's MDKind ID. + DenseMap<unsigned, unsigned> MDKindMap; + + // Several operations happen after the module header has been read, but + // before function bodies are processed. This keeps track of whether + // we've done this yet. + bool SeenFirstFunctionBody; + + /// DeferredFunctionInfo - When function bodies are initially scanned, this + /// map contains info about where to find deferred function body in the + /// stream. + DenseMap<Function*, uint64_t> DeferredFunctionInfo; + + /// When Metadata block is initially scanned when parsing the module, we may + /// choose to defer parsing of the metadata. This vector contains info about + /// which Metadata blocks are deferred. + std::vector<uint64_t> DeferredMetadataInfo; + + /// These are basic blocks forward-referenced by block addresses. They are + /// inserted lazily into functions when they're loaded. The basic block ID is + /// its index into the vector. + DenseMap<Function *, std::vector<BasicBlock *>> BasicBlockFwdRefs; + std::deque<Function *> BasicBlockFwdRefQueue; + + /// UseRelativeIDs - Indicates that we are using a new encoding for + /// instruction operands where most operands in the current + /// FUNCTION_BLOCK are encoded relative to the instruction number, + /// for a more compact encoding. Some instruction operands are not + /// relative to the instruction ID: basic block numbers, and types. + /// Once the old style function blocks have been phased out, we would + /// not need this flag. + bool UseRelativeIDs; + + /// True if all functions will be materialized, negating the need to process + /// (e.g.) blockaddress forward references. + bool WillMaterializeAllForwardRefs; + + /// Functions that have block addresses taken. This is usually empty. + SmallPtrSet<const Function *, 4> BlockAddressesTaken; + + /// True if any Metadata block has been materialized. + bool IsMetadataMaterialized; + +public: + std::error_code Error(BitcodeError E, const Twine &Message); + std::error_code Error(BitcodeError E); + std::error_code Error(const Twine &Message); + + explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C, + DiagnosticHandlerFunction DiagnosticHandler); + explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C, + DiagnosticHandlerFunction DiagnosticHandler); + ~BitcodeReader() { FreeState(); } + + std::error_code materializeForwardReferencedFunctions(); + + void FreeState(); + + void releaseBuffer(); + + bool isDematerializable(const GlobalValue *GV) const override; + std::error_code materialize(GlobalValue *GV) override; + std::error_code MaterializeModule(Module *M) override; + std::vector<StructType *> getIdentifiedStructTypes() const override; + void Dematerialize(GlobalValue *GV) override; + + /// @brief Main interface to parsing a bitcode buffer. + /// @returns true if an error occurred. + std::error_code ParseBitcodeInto(Module *M, + bool ShouldLazyLoadMetadata = false); + + /// @brief Cheap mechanism to just extract module triple + /// @returns true if an error occurred. + ErrorOr<std::string> parseTriple(); + + static uint64_t decodeSignRotatedValue(uint64_t V); + + /// Materialize any deferred Metadata block. + std::error_code materializeMetadata() override; + +private: + std::vector<StructType *> IdentifiedStructTypes; + StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name); + StructType *createIdentifiedStructType(LLVMContext &Context); + + Type *getTypeByID(unsigned ID); + Value *getFnValueByID(unsigned ID, Type *Ty) { + if (Ty && Ty->isMetadataTy()) + return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID)); + return ValueList.getValueFwdRef(ID, Ty); + } + Metadata *getFnMetadataByID(unsigned ID) { + return MDValueList.getValueFwdRef(ID); + } + BasicBlock *getBasicBlock(unsigned ID) const { + if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID + return FunctionBBs[ID]; + } + AttributeSet getAttributes(unsigned i) const { + if (i-1 < MAttributes.size()) + return MAttributes[i-1]; + return AttributeSet(); + } + + /// getValueTypePair - Read a value/type pair out of the specified record from + /// slot 'Slot'. Increment Slot past the number of slots used in the record. + /// Return true on failure. + bool getValueTypePair(SmallVectorImpl<uint64_t> &Record, unsigned &Slot, + unsigned InstNum, Value *&ResVal) { + if (Slot == Record.size()) return true; + unsigned ValNo = (unsigned)Record[Slot++]; + // Adjust the ValNo, if it was encoded relative to the InstNum. + if (UseRelativeIDs) + ValNo = InstNum - ValNo; + if (ValNo < InstNum) { + // If this is not a forward reference, just return the value we already + // have. + ResVal = getFnValueByID(ValNo, nullptr); + return ResVal == nullptr; + } else if (Slot == Record.size()) { + return true; + } + + unsigned TypeNo = (unsigned)Record[Slot++]; + ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo)); + return ResVal == nullptr; + } + + /// popValue - Read a value out of the specified record from slot 'Slot'. + /// Increment Slot past the number of slots used by the value in the record. + /// Return true if there is an error. + bool popValue(SmallVectorImpl<uint64_t> &Record, unsigned &Slot, + unsigned InstNum, Type *Ty, Value *&ResVal) { + if (getValue(Record, Slot, InstNum, Ty, ResVal)) + return true; + // All values currently take a single record slot. + ++Slot; + return false; + } + + /// getValue -- Like popValue, but does not increment the Slot number. + bool getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot, + unsigned InstNum, Type *Ty, Value *&ResVal) { + ResVal = getValue(Record, Slot, InstNum, Ty); + return ResVal == nullptr; + } + + /// getValue -- Version of getValue that returns ResVal directly, + /// or 0 if there is an error. + Value *getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot, + unsigned InstNum, Type *Ty) { + if (Slot == Record.size()) return nullptr; + unsigned ValNo = (unsigned)Record[Slot]; + // Adjust the ValNo, if it was encoded relative to the InstNum. + if (UseRelativeIDs) + ValNo = InstNum - ValNo; + return getFnValueByID(ValNo, Ty); + } + + /// getValueSigned -- Like getValue, but decodes signed VBRs. + Value *getValueSigned(SmallVectorImpl<uint64_t> &Record, unsigned Slot, + unsigned InstNum, Type *Ty) { + if (Slot == Record.size()) return nullptr; + unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]); + // Adjust the ValNo, if it was encoded relative to the InstNum. + if (UseRelativeIDs) + ValNo = InstNum - ValNo; + return getFnValueByID(ValNo, Ty); + } + + /// Converts alignment exponent (i.e. power of two (or zero)) to the + /// corresponding alignment to use. If alignment is too large, returns + /// a corresponding error code. + std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment); + std::error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); + std::error_code ParseModule(bool Resume, bool ShouldLazyLoadMetadata = false); + std::error_code ParseAttributeBlock(); + std::error_code ParseAttributeGroupBlock(); + std::error_code ParseTypeTable(); + std::error_code ParseTypeTableBody(); + + std::error_code ParseValueSymbolTable(); + std::error_code ParseConstants(); + std::error_code RememberAndSkipFunctionBody(); + /// Save the positions of the Metadata blocks and skip parsing the blocks. + std::error_code rememberAndSkipMetadata(); + std::error_code ParseFunctionBody(Function *F); + std::error_code GlobalCleanup(); + std::error_code ResolveGlobalAndAliasInits(); + std::error_code ParseMetadata(); + std::error_code ParseMetadataAttachment(); + ErrorOr<std::string> parseModuleTriple(); + std::error_code ParseUseLists(); + std::error_code InitStream(); + std::error_code InitStreamFromBuffer(); + std::error_code InitLazyStream(); + std::error_code FindFunctionInStream( + Function *F, + DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator); +}; +} // namespace + BitcodeDiagnosticInfo::BitcodeDiagnosticInfo(std::error_code EC, DiagnosticSeverity Severity, const Twine &Msg) @@ -81,7 +422,7 @@ BitcodeReader::BitcodeReader(MemoryBuffer *buffer, LLVMContext &C, TheModule(nullptr), Buffer(buffer), LazyStreamer(nullptr), NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false), - WillMaterializeAllForwardRefs(false) {} + WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {} BitcodeReader::BitcodeReader(DataStreamer *streamer, LLVMContext &C, DiagnosticHandlerFunction DiagnosticHandler) @@ -89,7 +430,7 @@ BitcodeReader::BitcodeReader(DataStreamer *streamer, LLVMContext &C, TheModule(nullptr), Buffer(nullptr), LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false), - WillMaterializeAllForwardRefs(false) {} + WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {} std::error_code BitcodeReader::materializeForwardReferencedFunctions() { if (WillMaterializeAllForwardRefs) @@ -135,6 +476,7 @@ void BitcodeReader::FreeState() { std::vector<BasicBlock*>().swap(FunctionBBs); std::vector<Function*>().swap(FunctionsWithBodies); DeferredFunctionInfo.clear(); + DeferredMetadataInfo.clear(); MDKindMap.clear(); assert(BasicBlockFwdRefs.empty() && "Unresolved blockaddress fwd references"); @@ -1198,6 +1540,7 @@ std::error_code BitcodeReader::ParseValueSymbolTable() { static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; } std::error_code BitcodeReader::ParseMetadata() { + IsMetadataMaterialized = true; unsigned NextMDValueNo = MDValueList.size(); if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID)) @@ -1348,14 +1691,15 @@ std::error_code BitcodeReader::ParseMetadata() { if (Record.size() != 5) return Error("Invalid record"); - auto get = Record[0] ? MDLocation::getDistinct : MDLocation::get; unsigned Line = Record[1]; unsigned Column = Record[2]; MDNode *Scope = cast<MDNode>(MDValueList.getValueFwdRef(Record[3])); Metadata *InlinedAt = Record[4] ? MDValueList.getValueFwdRef(Record[4] - 1) : nullptr; - MDValueList.AssignValue(get(Context, Line, Column, Scope, InlinedAt), - NextMDValueNo++); + MDValueList.AssignValue( + GET_OR_DISTINCT(MDLocation, Record[0], + (Context, Line, Column, Scope, InlinedAt)), + NextMDValueNo++); break; } case bitc::METADATA_GENERIC_DEBUG: { @@ -1952,19 +2296,26 @@ std::error_code BitcodeReader::ParseConstants() { } case bitc::CST_CODE_CE_INBOUNDS_GEP: case bitc::CST_CODE_CE_GEP: { // CE_GEP: [n x operands] - if (Record.size() & 1) - return Error("Invalid record"); + unsigned OpNum = 0; + Type *PointeeType = nullptr; + if (Record.size() % 2) + PointeeType = getTypeByID(Record[OpNum++]); SmallVector<Constant*, 16> Elts; - for (unsigned i = 0, e = Record.size(); i != e; i += 2) { - Type *ElTy = getTypeByID(Record[i]); + while (OpNum != Record.size()) { + Type *ElTy = getTypeByID(Record[OpNum++]); if (!ElTy) return Error("Invalid record"); - Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy)); + Elts.push_back(ValueList.getConstantFwdRef(Record[OpNum++], ElTy)); } + ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end()); V = ConstantExpr::getGetElementPtr(Elts[0], Indices, BitCode == bitc::CST_CODE_CE_INBOUNDS_GEP); + if (PointeeType && + PointeeType != cast<GEPOperator>(V)->getSourceElementType()) + return Error("Explicit gep operator type does not match pointee type " + "of pointer operand"); break; } case bitc::CST_CODE_CE_SELECT: { // CE_SELECT: [opval#, opval#, opval#] @@ -2234,6 +2585,30 @@ std::error_code BitcodeReader::ParseUseLists() { } } +/// When we see the block for metadata, remember where it is and then skip it. +/// This lets us lazily deserialize the metadata. +std::error_code BitcodeReader::rememberAndSkipMetadata() { + // Save the current stream state. + uint64_t CurBit = Stream.GetCurrentBitNo(); + DeferredMetadataInfo.push_back(CurBit); + + // Skip over the block for now. + if (Stream.SkipBlock()) + return Error("Invalid record"); + return std::error_code(); +} + +std::error_code BitcodeReader::materializeMetadata() { + for (uint64_t BitPos : DeferredMetadataInfo) { + // Move the bit stream to the saved position. + Stream.JumpToBit(BitPos); + if (std::error_code EC = ParseMetadata()) + return EC; + } + DeferredMetadataInfo.clear(); + return std::error_code(); +} + /// RememberAndSkipFunctionBody - When we see the block for a function body, /// remember where it is and then skip it. This lets us lazily deserialize the /// functions. @@ -2284,7 +2659,8 @@ std::error_code BitcodeReader::GlobalCleanup() { return std::error_code(); } -std::error_code BitcodeReader::ParseModule(bool Resume) { +std::error_code BitcodeReader::ParseModule(bool Resume, + bool ShouldLazyLoadMetadata) { if (Resume) Stream.JumpToBit(NextUnreadBit); else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) @@ -2338,6 +2714,12 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { return EC; break; case bitc::METADATA_BLOCK_ID: + if (ShouldLazyLoadMetadata && !IsMetadataMaterialized) { + if (std::error_code EC = rememberAndSkipMetadata()) + return EC; + break; + } + assert(DeferredMetadataInfo.empty() && "Unexpected deferred metadata"); if (std::error_code EC = ParseMetadata()) return EC; break; @@ -2652,7 +3034,8 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { } } -std::error_code BitcodeReader::ParseBitcodeInto(Module *M) { +std::error_code BitcodeReader::ParseBitcodeInto(Module *M, + bool ShouldLazyLoadMetadata) { TheModule = nullptr; if (std::error_code EC = InitStream()) @@ -2693,7 +3076,7 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M) { if (TheModule) return Error("Invalid multiple blocks"); TheModule = M; - if (std::error_code EC = ParseModule(false)) + if (std::error_code EC = ParseModule(false, ShouldLazyLoadMetadata)) return EC; if (LazyStreamer) return std::error_code(); @@ -3082,6 +3465,13 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr)) return Error("Invalid record"); + if (Ty && + Ty != + cast<SequentialType>(BasePtr->getType()->getScalarType()) + ->getElementType()) + return Error( + "Explicit gep type does not match pointee type of pointer operand"); + SmallVector<Value*, 16> GEPIdx; while (OpNum != Record.size()) { Value *Op; @@ -3090,8 +3480,8 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { GEPIdx.push_back(Op); } - I = GetElementPtrInst::Create(BasePtr, GEPIdx); - assert(!Ty || Ty == cast<GetElementPtrInst>(I)->getSourceElementType()); + I = GetElementPtrInst::Create(Ty, BasePtr, GEPIdx); + InstructionList.push_back(I); if (InBounds) cast<GetElementPtrInst>(I)->setIsInBounds(true); @@ -3600,8 +3990,9 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { return EC; I = new LoadInst(Op, "", Record[OpNum+1], Align); - assert((!Ty || Ty == I->getType()) && - "Explicit type doesn't match pointee type of the first operand"); + if (Ty && Ty != I->getType()) + return Error("Explicit load type does not match pointee type of " + "pointer operand"); InstructionList.push_back(I); break; @@ -3631,6 +4022,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { return EC; I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SynchScope); + (void)Ty; assert((!Ty || Ty == I->getType()) && "Explicit type doesn't match pointee type of the first operand"); @@ -3890,6 +4282,9 @@ std::error_code BitcodeReader::FindFunctionInStream( void BitcodeReader::releaseBuffer() { Buffer.release(); } std::error_code BitcodeReader::materialize(GlobalValue *GV) { + if (std::error_code EC = materializeMetadata()) + return EC; + Function *F = dyn_cast<Function>(GV); // If it's not a function or is already material, ignore the request. if (!F || !F->isMaterializable()) @@ -3957,6 +4352,9 @@ std::error_code BitcodeReader::MaterializeModule(Module *M) { assert(M == TheModule && "Can only Materialize the Module this BitcodeReader is attached to."); + if (std::error_code EC = materializeMetadata()) + return EC; + // Promise to materialize all forward references. WillMaterializeAllForwardRefs = true; @@ -4097,7 +4495,8 @@ const std::error_category &llvm::BitcodeErrorCategory() { static ErrorOr<Module *> getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context, bool WillMaterializeAll, - DiagnosticHandlerFunction DiagnosticHandler) { + DiagnosticHandlerFunction DiagnosticHandler, + bool ShouldLazyLoadMetadata = false) { Module *M = new Module(Buffer->getBufferIdentifier(), Context); BitcodeReader *R = new BitcodeReader(Buffer.get(), Context, DiagnosticHandler); @@ -4109,7 +4508,8 @@ getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer, return EC; }; - if (std::error_code EC = R->ParseBitcodeInto(M)) + // Delay parsing Metadata if ShouldLazyLoadMetadata is true. + if (std::error_code EC = R->ParseBitcodeInto(M, ShouldLazyLoadMetadata)) return cleanupOnError(EC); if (!WillMaterializeAll) @@ -4124,9 +4524,10 @@ getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer, ErrorOr<Module *> llvm::getLazyBitcodeModule(std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context, - DiagnosticHandlerFunction DiagnosticHandler) { + DiagnosticHandlerFunction DiagnosticHandler, + bool ShouldLazyLoadMetadata) { return getLazyBitcodeModuleImpl(std::move(Buffer), Context, false, - DiagnosticHandler); + DiagnosticHandler, ShouldLazyLoadMetadata); } ErrorOr<std::unique_ptr<Module>> diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h deleted file mode 100644 index 9803e78..0000000 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ /dev/null @@ -1,369 +0,0 @@ -//===- BitcodeReader.h - Internal BitcodeReader impl ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This header defines the BitcodeReader class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_BITCODE_READER_BITCODEREADER_H -#define LLVM_LIB_BITCODE_READER_BITCODEREADER_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/Bitcode/BitstreamReader.h" -#include "llvm/Bitcode/LLVMBitCodes.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/GVMaterializer.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/OperandTraits.h" -#include "llvm/IR/TrackingMDRef.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/ValueHandle.h" -#include <deque> -#include <system_error> -#include <vector> - -namespace llvm { - class Comdat; - class MemoryBuffer; - class LLVMContext; - -//===----------------------------------------------------------------------===// -// BitcodeReaderValueList Class -//===----------------------------------------------------------------------===// - -class BitcodeReaderValueList { - std::vector<WeakVH> ValuePtrs; - - /// ResolveConstants - As we resolve forward-referenced constants, we add - /// information about them to this vector. This allows us to resolve them in - /// bulk instead of resolving each reference at a time. See the code in - /// ResolveConstantForwardRefs for more information about this. - /// - /// The key of this vector is the placeholder constant, the value is the slot - /// number that holds the resolved value. - typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy; - ResolveConstantsTy ResolveConstants; - LLVMContext &Context; -public: - BitcodeReaderValueList(LLVMContext &C) : Context(C) {} - ~BitcodeReaderValueList() { - assert(ResolveConstants.empty() && "Constants not resolved?"); - } - - // vector compatibility methods - unsigned size() const { return ValuePtrs.size(); } - void resize(unsigned N) { ValuePtrs.resize(N); } - void push_back(Value *V) { - ValuePtrs.push_back(V); - } - - void clear() { - assert(ResolveConstants.empty() && "Constants not resolved?"); - ValuePtrs.clear(); - } - - Value *operator[](unsigned i) const { - assert(i < ValuePtrs.size()); - return ValuePtrs[i]; - } - - Value *back() const { return ValuePtrs.back(); } - void pop_back() { ValuePtrs.pop_back(); } - bool empty() const { return ValuePtrs.empty(); } - void shrinkTo(unsigned N) { - assert(N <= size() && "Invalid shrinkTo request!"); - ValuePtrs.resize(N); - } - - Constant *getConstantFwdRef(unsigned Idx, Type *Ty); - Value *getValueFwdRef(unsigned Idx, Type *Ty); - - void AssignValue(Value *V, unsigned Idx); - - /// ResolveConstantForwardRefs - Once all constants are read, this method bulk - /// resolves any forward references. - void ResolveConstantForwardRefs(); -}; - - -//===----------------------------------------------------------------------===// -// BitcodeReaderMDValueList Class -//===----------------------------------------------------------------------===// - -class BitcodeReaderMDValueList { - unsigned NumFwdRefs; - bool AnyFwdRefs; - unsigned MinFwdRef; - unsigned MaxFwdRef; - std::vector<TrackingMDRef> MDValuePtrs; - - LLVMContext &Context; -public: - BitcodeReaderMDValueList(LLVMContext &C) - : NumFwdRefs(0), AnyFwdRefs(false), Context(C) {} - - // vector compatibility methods - unsigned size() const { return MDValuePtrs.size(); } - void resize(unsigned N) { MDValuePtrs.resize(N); } - void push_back(Metadata *MD) { MDValuePtrs.emplace_back(MD); } - void clear() { MDValuePtrs.clear(); } - Metadata *back() const { return MDValuePtrs.back(); } - void pop_back() { MDValuePtrs.pop_back(); } - bool empty() const { return MDValuePtrs.empty(); } - - Metadata *operator[](unsigned i) const { - assert(i < MDValuePtrs.size()); - return MDValuePtrs[i]; - } - - void shrinkTo(unsigned N) { - assert(N <= size() && "Invalid shrinkTo request!"); - MDValuePtrs.resize(N); - } - - Metadata *getValueFwdRef(unsigned Idx); - void AssignValue(Metadata *MD, unsigned Idx); - void tryToResolveCycles(); -}; - -class BitcodeReader : public GVMaterializer { - LLVMContext &Context; - DiagnosticHandlerFunction DiagnosticHandler; - Module *TheModule; - std::unique_ptr<MemoryBuffer> Buffer; - std::unique_ptr<BitstreamReader> StreamFile; - BitstreamCursor Stream; - DataStreamer *LazyStreamer; - uint64_t NextUnreadBit; - bool SeenValueSymbolTable; - - std::vector<Type*> TypeList; - BitcodeReaderValueList ValueList; - BitcodeReaderMDValueList MDValueList; - std::vector<Comdat *> ComdatList; - SmallVector<Instruction *, 64> InstructionList; - - std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits; - std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits; - std::vector<std::pair<Function*, unsigned> > FunctionPrefixes; - std::vector<std::pair<Function*, unsigned> > FunctionPrologues; - - SmallVector<Instruction*, 64> InstsWithTBAATag; - - /// MAttributes - The set of attributes by index. Index zero in the - /// file is for null, and is thus not represented here. As such all indices - /// are off by one. - std::vector<AttributeSet> MAttributes; - - /// \brief The set of attribute groups. - std::map<unsigned, AttributeSet> MAttributeGroups; - - /// FunctionBBs - While parsing a function body, this is a list of the basic - /// blocks for the function. - std::vector<BasicBlock*> FunctionBBs; - - // When reading the module header, this list is populated with functions that - // have bodies later in the file. - std::vector<Function*> FunctionsWithBodies; - - // When intrinsic functions are encountered which require upgrading they are - // stored here with their replacement function. - typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap; - UpgradedIntrinsicMap UpgradedIntrinsics; - - // Map the bitcode's custom MDKind ID to the Module's MDKind ID. - DenseMap<unsigned, unsigned> MDKindMap; - - // Several operations happen after the module header has been read, but - // before function bodies are processed. This keeps track of whether - // we've done this yet. - bool SeenFirstFunctionBody; - - /// DeferredFunctionInfo - When function bodies are initially scanned, this - /// map contains info about where to find deferred function body in the - /// stream. - DenseMap<Function*, uint64_t> DeferredFunctionInfo; - - /// These are basic blocks forward-referenced by block addresses. They are - /// inserted lazily into functions when they're loaded. The basic block ID is - /// its index into the vector. - DenseMap<Function *, std::vector<BasicBlock *>> BasicBlockFwdRefs; - std::deque<Function *> BasicBlockFwdRefQueue; - - /// UseRelativeIDs - Indicates that we are using a new encoding for - /// instruction operands where most operands in the current - /// FUNCTION_BLOCK are encoded relative to the instruction number, - /// for a more compact encoding. Some instruction operands are not - /// relative to the instruction ID: basic block numbers, and types. - /// Once the old style function blocks have been phased out, we would - /// not need this flag. - bool UseRelativeIDs; - - /// True if all functions will be materialized, negating the need to process - /// (e.g.) blockaddress forward references. - bool WillMaterializeAllForwardRefs; - - /// Functions that have block addresses taken. This is usually empty. - SmallPtrSet<const Function *, 4> BlockAddressesTaken; - -public: - std::error_code Error(BitcodeError E, const Twine &Message); - std::error_code Error(BitcodeError E); - std::error_code Error(const Twine &Message); - - explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C, - DiagnosticHandlerFunction DiagnosticHandler); - explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C, - DiagnosticHandlerFunction DiagnosticHandler); - ~BitcodeReader() { FreeState(); } - - std::error_code materializeForwardReferencedFunctions(); - - void FreeState(); - - void releaseBuffer(); - - bool isDematerializable(const GlobalValue *GV) const override; - std::error_code materialize(GlobalValue *GV) override; - std::error_code MaterializeModule(Module *M) override; - std::vector<StructType *> getIdentifiedStructTypes() const override; - void Dematerialize(GlobalValue *GV) override; - - /// @brief Main interface to parsing a bitcode buffer. - /// @returns true if an error occurred. - std::error_code ParseBitcodeInto(Module *M); - - /// @brief Cheap mechanism to just extract module triple - /// @returns true if an error occurred. - ErrorOr<std::string> parseTriple(); - - static uint64_t decodeSignRotatedValue(uint64_t V); - -private: - std::vector<StructType *> IdentifiedStructTypes; - StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name); - StructType *createIdentifiedStructType(LLVMContext &Context); - - Type *getTypeByID(unsigned ID); - Value *getFnValueByID(unsigned ID, Type *Ty) { - if (Ty && Ty->isMetadataTy()) - return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID)); - return ValueList.getValueFwdRef(ID, Ty); - } - Metadata *getFnMetadataByID(unsigned ID) { - return MDValueList.getValueFwdRef(ID); - } - BasicBlock *getBasicBlock(unsigned ID) const { - if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID - return FunctionBBs[ID]; - } - AttributeSet getAttributes(unsigned i) const { - if (i-1 < MAttributes.size()) - return MAttributes[i-1]; - return AttributeSet(); - } - - /// getValueTypePair - Read a value/type pair out of the specified record from - /// slot 'Slot'. Increment Slot past the number of slots used in the record. - /// Return true on failure. - bool getValueTypePair(SmallVectorImpl<uint64_t> &Record, unsigned &Slot, - unsigned InstNum, Value *&ResVal) { - if (Slot == Record.size()) return true; - unsigned ValNo = (unsigned)Record[Slot++]; - // Adjust the ValNo, if it was encoded relative to the InstNum. - if (UseRelativeIDs) - ValNo = InstNum - ValNo; - if (ValNo < InstNum) { - // If this is not a forward reference, just return the value we already - // have. - ResVal = getFnValueByID(ValNo, nullptr); - return ResVal == nullptr; - } else if (Slot == Record.size()) { - return true; - } - - unsigned TypeNo = (unsigned)Record[Slot++]; - ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo)); - return ResVal == nullptr; - } - - /// popValue - Read a value out of the specified record from slot 'Slot'. - /// Increment Slot past the number of slots used by the value in the record. - /// Return true if there is an error. - bool popValue(SmallVectorImpl<uint64_t> &Record, unsigned &Slot, - unsigned InstNum, Type *Ty, Value *&ResVal) { - if (getValue(Record, Slot, InstNum, Ty, ResVal)) - return true; - // All values currently take a single record slot. - ++Slot; - return false; - } - - /// getValue -- Like popValue, but does not increment the Slot number. - bool getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot, - unsigned InstNum, Type *Ty, Value *&ResVal) { - ResVal = getValue(Record, Slot, InstNum, Ty); - return ResVal == nullptr; - } - - /// getValue -- Version of getValue that returns ResVal directly, - /// or 0 if there is an error. - Value *getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot, - unsigned InstNum, Type *Ty) { - if (Slot == Record.size()) return nullptr; - unsigned ValNo = (unsigned)Record[Slot]; - // Adjust the ValNo, if it was encoded relative to the InstNum. - if (UseRelativeIDs) - ValNo = InstNum - ValNo; - return getFnValueByID(ValNo, Ty); - } - - /// getValueSigned -- Like getValue, but decodes signed VBRs. - Value *getValueSigned(SmallVectorImpl<uint64_t> &Record, unsigned Slot, - unsigned InstNum, Type *Ty) { - if (Slot == Record.size()) return nullptr; - unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]); - // Adjust the ValNo, if it was encoded relative to the InstNum. - if (UseRelativeIDs) - ValNo = InstNum - ValNo; - return getFnValueByID(ValNo, Ty); - } - - /// Converts alignment exponent (i.e. power of two (or zero)) to the - /// corresponding alignment to use. If alignment is too large, returns - /// a corresponding error code. - std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment); - std::error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); - std::error_code ParseModule(bool Resume); - std::error_code ParseAttributeBlock(); - std::error_code ParseAttributeGroupBlock(); - std::error_code ParseTypeTable(); - std::error_code ParseTypeTableBody(); - - std::error_code ParseValueSymbolTable(); - std::error_code ParseConstants(); - std::error_code RememberAndSkipFunctionBody(); - std::error_code ParseFunctionBody(Function *F); - std::error_code GlobalCleanup(); - std::error_code ResolveGlobalAndAliasInits(); - std::error_code ParseMetadata(); - std::error_code ParseMetadataAttachment(); - ErrorOr<std::string> parseModuleTriple(); - std::error_code ParseUseLists(); - std::error_code InitStream(); - std::error_code InitStreamFromBuffer(); - std::error_code InitLazyStream(); - std::error_code FindFunctionInStream( - Function *F, - DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator); -}; - -} // End llvm namespace - -#endif diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp index ca68257..beaaf7a 100644 --- a/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -245,7 +245,7 @@ void BitstreamCursor::ReadAbbrevRecord() { BitCodeAbbrev *Abbv = new BitCodeAbbrev(); unsigned NumOpInfo = ReadVBR(5); for (unsigned i = 0; i != NumOpInfo; ++i) { - bool IsLiteral = Read(1) ? true : false; + bool IsLiteral = Read(1); if (IsLiteral) { Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8))); continue; diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index ecb6f7c..0123fb2 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1205,6 +1205,8 @@ static void WriteModuleMetadata(const Module *M, SmallVector<uint64_t, 64> Record; for (const Metadata *MD : MDs) { if (const MDNode *N = dyn_cast<MDNode>(MD)) { + assert(N->isResolved() && "Expected forward references to be resolved"); + switch (N->getMetadataID()) { default: llvm_unreachable("Invalid MDNode subclass"); @@ -1522,15 +1524,18 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, Record.push_back(Flags); } break; - case Instruction::GetElementPtr: + case Instruction::GetElementPtr: { Code = bitc::CST_CODE_CE_GEP; - if (cast<GEPOperator>(C)->isInBounds()) + const auto *GO = cast<GEPOperator>(C); + if (GO->isInBounds()) Code = bitc::CST_CODE_CE_INBOUNDS_GEP; + Record.push_back(VE.getTypeID(GO->getSourceElementType())); for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) { Record.push_back(VE.getTypeID(C->getOperand(i)->getType())); Record.push_back(VE.getValueID(C->getOperand(i))); } break; + } case Instruction::Select: Code = bitc::CST_CODE_CE_SELECT; Record.push_back(VE.getValueID(C->getOperand(0))); diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 8ab2d6e..ce10998 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -18,3 +18,4 @@ add_subdirectory(AsmParser) add_subdirectory(LineEditor) add_subdirectory(ProfileData) add_subdirectory(Fuzzer) +add_subdirectory(Passes) diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index e50b846..8e11fe1 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -312,8 +312,7 @@ static const Value *getNoopInput(const Value *V, // previous aggregate. Combine the two paths to obtain the true address of // our element. ArrayRef<unsigned> ExtractLoc = EVI->getIndices(); - std::copy(ExtractLoc.rbegin(), ExtractLoc.rend(), - std::back_inserter(ValLoc)); + ValLoc.append(ExtractLoc.rbegin(), ExtractLoc.rend()); NoopInput = Op; } // Terminate if we couldn't find anything to look through. @@ -601,10 +600,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, // The manipulations performed when we're looking through an insertvalue or // an extractvalue would happen at the front of the RetPath list, so since // we have to copy it anyway it's more efficient to create a reversed copy. - using std::copy; - SmallVector<unsigned, 4> TmpRetPath, TmpCallPath; - copy(RetPath.rbegin(), RetPath.rend(), std::back_inserter(TmpRetPath)); - copy(CallPath.rbegin(), CallPath.rend(), std::back_inserter(TmpCallPath)); + SmallVector<unsigned, 4> TmpRetPath(RetPath.rbegin(), RetPath.rend()); + SmallVector<unsigned, 4> TmpCallPath(CallPath.rbegin(), CallPath.rend()); // Finally, we can check whether the value produced by the tail call at this // index is compatible with the value we return. diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk index ec3cd77..2827d73 100644 --- a/lib/CodeGen/Android.mk +++ b/lib/CodeGen/Android.mk @@ -21,7 +21,6 @@ codegen_SRC_FILES := \ ExecutionDepsFix.cpp \ ExpandISelPseudos.cpp \ ExpandPostRAPseudos.cpp \ - ForwardControlFlowIntegrity.cpp \ GCMetadata.cpp \ GCMetadataPrinter.cpp \ GCRootLowering.cpp \ @@ -31,7 +30,6 @@ codegen_SRC_FILES := \ InlineSpiller.cpp \ InterferenceCache.cpp \ IntrinsicLowering.cpp \ - JumpInstrTables.cpp \ LatencyPriorityQueue.cpp \ LexicalScopes.cpp \ LiveDebugVariables.cpp \ @@ -53,6 +51,7 @@ codegen_SRC_FILES := \ MachineCombiner.cpp \ MachineCopyPropagation.cpp \ MachineCSE.cpp \ + MachineDominanceFrontier.cpp \ MachineDominators.cpp \ MachineFunctionAnalysis.cpp \ MachineFunction.cpp \ @@ -66,6 +65,7 @@ codegen_SRC_FILES := \ MachineModuleInfoImpls.cpp \ MachinePassRegistry.cpp \ MachinePostDominators.cpp \ + MachineRegionInfo.cpp \ MachineRegisterInfo.cpp \ MachineScheduler.cpp \ MachineSink.cpp \ diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 6fe75ad..9a16e15 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -36,8 +36,7 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -ARMException::ARMException(AsmPrinter *A) - : EHStreamer(A), shouldEmitCFI(false) {} +ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {} ARMException::~ARMException() {} @@ -53,13 +52,9 @@ void ARMException::endModule() { Asm->OutStreamer.EmitCFISections(false, true); } -/// beginFunction - Gather pre-function exception information. Assumes it's -/// being emitted immediately after the function entry point. void ARMException::beginFunction(const MachineFunction *MF) { if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM) getTargetStreamer().emitFnStart(); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); // See if we need call frame info. AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); assert(MoveType != AsmPrinter::CFI_M_EH && @@ -72,20 +67,12 @@ void ARMException::beginFunction(const MachineFunction *MF) { /// endFunction - Gather and emit post-function exception information. /// -void ARMException::endFunction(const MachineFunction *) { - if (shouldEmitCFI) - Asm->OutStreamer.EmitCFIEndProc(); - - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); - +void ARMException::endFunction(const MachineFunction *MF) { ARMTargetStreamer &ATS = getTargetStreamer(); if (!Asm->MF->getFunction()->needsUnwindTableEntry() && MMI->getLandingPads().empty()) ATS.emitCantUnwind(); else { - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", - Asm->getFunctionNumber())); if (!MMI->getLandingPads().empty()) { // Emit references to personality. if (const Function *Personality = MMI->getPersonality()) { diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 988381d..07d6731 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -77,11 +77,11 @@ static gcp_map_type &getGCMap(void *&P) { /// getGVAlignmentLog2 - Return the alignment to use for the specified global /// value in log2 form. This rounds up to the preferred alignment if possible /// and legal. -static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, +static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL, unsigned InBits = 0) { unsigned NumBits = 0; if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) - NumBits = TD.getPreferredAlignmentLog(GVar); + NumBits = DL.getPreferredAlignmentLog(GVar); // If InBits is specified, round it to it. if (InBits > NumBits) @@ -103,12 +103,14 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer) : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()), OutContext(Streamer->getContext()), OutStreamer(*Streamer.release()), - LastMI(nullptr), LastFn(0), Counter(~0U), SetCounter(0) { + LastMI(nullptr), LastFn(0), Counter(~0U) { DD = nullptr; MMI = nullptr; LI = nullptr; MF = nullptr; - CurrentFnSym = CurrentFnSymForSize = nullptr; + CurExceptionSym = CurrentFnSym = CurrentFnSymForSize = nullptr; + CurrentFnBegin = nullptr; + CurrentFnEnd = nullptr; GCMetadataPrinters = nullptr; VerboseAsm = OutStreamer.isVerboseAsm(); } @@ -219,9 +221,13 @@ bool AsmPrinter::doInitialization(Module &M) { // Emit module-level inline asm if it exists. if (!M.getModuleInlineAsm().empty()) { + // We're at the module level. Construct MCSubtarget from the default CPU + // and target triple. + std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( + TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString())); OutStreamer.AddComment("Start of file scope inline assembly"); OutStreamer.AddBlankLine(); - EmitInlineAsm(M.getModuleInlineAsm()+"\n"); + EmitInlineAsm(M.getModuleInlineAsm()+"\n", *STI); OutStreamer.AddComment("End of file scope inline assembly"); OutStreamer.AddBlankLine(); } @@ -525,7 +531,8 @@ void AsmPrinter::EmitFunctionHeader() { EmitVisibility(CurrentFnSym, F->getVisibility()); EmitLinkage(F, CurrentFnSym); - EmitAlignment(MF->getAlignment(), F); + if (MAI->hasFunctionAlignment()) + EmitAlignment(MF->getAlignment(), F); if (MAI->hasDotTypeDotSizeDirective()) OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); @@ -554,6 +561,17 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.EmitLabel(DeadBlockSyms[i]); } + if (CurrentFnBegin) { + if (MAI->useAssignmentForEHBegin()) { + MCSymbol *CurPos = OutContext.CreateTempSymbol(); + OutStreamer.EmitLabel(CurPos); + OutStreamer.EmitAssignment(CurrentFnBegin, + MCSymbolRefExpr::Create(CurPos, OutContext)); + } else { + OutStreamer.EmitLabel(CurrentFnBegin); + } + } + // Emit pre-function debug and/or EH information. for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); @@ -764,6 +782,8 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { /// EmitFunctionBody - This method emits the body and trailer for a /// function. void AsmPrinter::EmitFunctionBody() { + EmitFunctionHeader(); + // Emit target-specific gunk before the function body. EmitFunctionBodyStart(); @@ -867,32 +887,41 @@ void AsmPrinter::EmitFunctionBody() { // Emit target-specific gunk after the function body. EmitFunctionBodyEnd(); + if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() || + MAI->hasDotTypeDotSizeDirective()) { + // Create a symbol for the end of function. + CurrentFnEnd = createTempSymbol("func_end"); + OutStreamer.EmitLabel(CurrentFnEnd); + } + // If the target wants a .size directive for the size of the function, emit // it. if (MAI->hasDotTypeDotSizeDirective()) { - // Create a symbol for the end of function, so we can get the size as - // difference between the function label and the temp label. - MCSymbol *FnEndLabel = OutContext.CreateTempSymbol(); - OutStreamer.EmitLabel(FnEndLabel); - + // We can get the size as difference between the function label and the + // temp label. const MCExpr *SizeExp = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext), + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(CurrentFnEnd, OutContext), MCSymbolRefExpr::Create(CurrentFnSymForSize, OutContext), OutContext); OutStreamer.EmitELFSize(CurrentFnSym, SizeExp); } - // Emit post-function debug and/or EH information. for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); - HI.Handler->endFunction(MF); + HI.Handler->markFunctionEnd(); } - MMI->EndFunction(); // Print out jump tables referenced by the function. EmitJumpTableInfo(); + // Emit post-function debug and/or EH information. + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + HI.Handler->endFunction(MF); + } + MMI->EndFunction(); + OutStreamer.AddBlankLine(); } @@ -928,7 +957,7 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV, // To be a got equivalent, at least one of its users need to be a constant // expression used by another global variable. for (auto *U : GV->users()) - NumGOTEquivUsers += getNumGlobalVariableUses(cast<Constant>(U)); + NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast<Constant>(U)); return NumGOTEquivUsers > 0; } @@ -961,17 +990,25 @@ void AsmPrinter::emitGlobalGOTEquivs() { if (!getObjFileLowering().supportIndirectSymViaGOTPCRel()) return; - while (!GlobalGOTEquivs.empty()) { - DenseMap<const MCSymbol *, GOTEquivUsePair>::iterator I = - GlobalGOTEquivs.begin(); - const MCSymbol *S = I->first; - const GlobalVariable *GV = I->second.first; - GlobalGOTEquivs.erase(S); - EmitGlobalVariable(GV); + SmallVector<const GlobalVariable *, 8> FailedCandidates; + for (auto &I : GlobalGOTEquivs) { + const GlobalVariable *GV = I.second.first; + unsigned Cnt = I.second.second; + if (Cnt) + FailedCandidates.push_back(GV); } + GlobalGOTEquivs.clear(); + + for (auto *GV : FailedCandidates) + EmitGlobalVariable(GV); } bool AsmPrinter::doFinalization(Module &M) { + // Set the MachineFunction to nullptr so that we can catch attempted + // accesses to MF specific features at the module level and so that + // we can conditionalize accesses based on whether or not it is nullptr. + MF = nullptr; + // Gather all GOT equivalent globals in the module. We really need two // passes over the globals: one to compute and another to avoid its emission // in EmitGlobalVariable, otherwise we would not be able to handle cases @@ -997,59 +1034,6 @@ bool AsmPrinter::doFinalization(Module &M) { EmitVisibility(Name, V, false); } - // Get information about jump-instruction tables to print. - JumpInstrTableInfo *JITI = getAnalysisIfAvailable<JumpInstrTableInfo>(); - - if (JITI && !JITI->getTables().empty()) { - // Since we're at the module level we can't use a function specific - // MCSubtargetInfo - instead create one with the module defaults. - std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( - TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString())); - unsigned Arch = Triple(getTargetTriple()).getArch(); - bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb); - const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo(); - MCInst TrapInst; - TII->getTrap(TrapInst); - unsigned LogAlignment = llvm::Log2_64(JITI->entryByteAlignment()); - - // Emit the right section for these functions. - OutStreamer.SwitchSection(OutContext.getObjectFileInfo()->getTextSection()); - for (const auto &KV : JITI->getTables()) { - uint64_t Count = 0; - for (const auto &FunPair : KV.second) { - // Emit the function labels to make this be a function entry point. - MCSymbol *FunSym = - OutContext.GetOrCreateSymbol(FunPair.second->getName()); - EmitAlignment(LogAlignment); - if (IsThumb) - OutStreamer.EmitThumbFunc(FunSym); - if (MAI->hasDotTypeDotSizeDirective()) - OutStreamer.EmitSymbolAttribute(FunSym, MCSA_ELF_TypeFunction); - OutStreamer.EmitLabel(FunSym); - - // Emit the jump instruction to transfer control to the original - // function. - MCInst JumpToFun; - MCSymbol *TargetSymbol = - OutContext.GetOrCreateSymbol(FunPair.first->getName()); - const MCSymbolRefExpr *TargetSymRef = - MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT, - OutContext); - TII->getUnconditionalBranch(JumpToFun, TargetSymRef); - OutStreamer.EmitInstruction(JumpToFun, *STI); - ++Count; - } - - // Emit enough padding instructions to fill up to the next power of two. - uint64_t Remaining = NextPowerOf2(Count) - Count; - for (uint64_t C = 0; C < Remaining; ++C) { - EmitAlignment(LogAlignment); - OutStreamer.EmitInstruction(TrapInst, *STI); - } - - } - } - // Emit module flags. SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; M.getModuleFlagsMetadata(ModuleFlags); @@ -1152,11 +1136,26 @@ bool AsmPrinter::doFinalization(Module &M) { return false; } +MCSymbol *AsmPrinter::getCurExceptionSym() { + if (!CurExceptionSym) + CurExceptionSym = createTempSymbol("exception"); + return CurExceptionSym; +} + void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; // Get the function symbol. CurrentFnSym = getSymbol(MF.getFunction()); CurrentFnSymForSize = CurrentFnSym; + CurrentFnBegin = nullptr; + CurExceptionSym = nullptr; + bool NeedsLocalForSize = MAI->needsLocalForSize(); + if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() || + NeedsLocalForSize) { + CurrentFnBegin = createTempSymbol("func_begin"); + if (NeedsLocalForSize) + CurrentFnSymForSize = CurrentFnBegin; + } if (isVerbose()) LI = &getAnalysis<MachineLoopInfo>(); @@ -1273,10 +1272,8 @@ void AsmPrinter::EmitJumpTableInfo() { bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection( MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32, *F); - if (!JTInDiffSection) { - OutStreamer.SwitchSection(TLOF.SectionForGlobal(F, *Mang, TM)); - } else { - // Otherwise, drop it in the readonly section. + if (JTInDiffSection) { + // Drop it in the readonly section. const MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, *Mang, TM); OutStreamer.SwitchSection(ReadOnlySection); @@ -1585,7 +1582,7 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, } // Otherwise, emit with .set (aka assignment). - MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); + MCSymbol *SetLabel = createTempSymbol("set"); OutStreamer.EmitAssignment(SetLabel, Diff); OutStreamer.EmitSymbolValue(SetLabel, Size); } @@ -1667,8 +1664,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = ConstantFoldConstantExpression( - CE, TM.getDataLayout())) + if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout())) if (C != CE) return lowerConstant(C); @@ -2112,9 +2108,15 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, // // gotpcrelcst := <offset from @foo base> + <cst> // + // If gotpcrelcst is positive it means that we can safely fold the pc rel + // displacement into the GOTPCREL. We can also can have an extra offset <cst> + // if the target knows how to encode it. + // int64_t GOTPCRelCst = Offset + MV.getConstant(); if (GOTPCRelCst < 0) return; + if (!AP.getObjFileLowering().supportGOTPCRelWithOffset() && GOTPCRelCst != 0) + return; // Emit the GOT PC relative to replace the got equivalent global, i.e.: // @@ -2134,18 +2136,16 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, // AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym]; const GlobalVariable *GV = Result.first; - unsigned NumUses = Result.second; + int NumUses = (int)Result.second; const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0)); const MCSymbol *FinalSym = AP.getSymbol(FinalGV); - *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(FinalSym, - GOTPCRelCst); + *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel( + FinalSym, MV, Offset, AP.MMI, AP.OutStreamer); // Update GOT equivalent usage information --NumUses; - if (NumUses) + if (NumUses >= 0) AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses); - else - AP.GlobalGOTEquivs.erase(GOTEquivSym); } static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP, @@ -2206,7 +2206,7 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP, // If the constant expression's size is greater than 64-bits, then we have // to emit the value in chunks. Try to constant fold the value and emit it // that way. - Constant *New = ConstantFoldConstantExpression(CE, DL); + Constant *New = ConstantFoldConstantExpression(CE, *DL); if (New && New != CE) return emitGlobalConstantImpl(New, AP); } @@ -2257,23 +2257,10 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const { // Symbol Lowering Routines. //===----------------------------------------------------------------------===// -/// GetTempSymbol - Return the MCSymbol corresponding to the assembler -/// temporary label with the specified stem and unique ID. -MCSymbol *AsmPrinter::GetTempSymbol(const Twine &Name, unsigned ID) const { - const DataLayout *DL = TM.getDataLayout(); - return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) + - Name + Twine(ID)); -} - -/// GetTempSymbol - Return an assembler temporary label with the specified -/// stem. -MCSymbol *AsmPrinter::GetTempSymbol(const Twine &Name) const { - const DataLayout *DL = TM.getDataLayout(); - return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ - Name); +MCSymbol *AsmPrinter::createTempSymbol(const Twine &Name) const { + return OutContext.createTempSymbol(Name, true); } - MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const { return MMI->getAddrLabelSymbol(BA->getBasicBlock()); } @@ -2523,3 +2510,5 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { /// Pin vtable to this file. AsmPrinterHandler::~AsmPrinterHandler() {} + +void AsmPrinterHandler::markFunctionEnd() {} diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index d0958c1..9de36da 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -12,9 +12,12 @@ //===----------------------------------------------------------------------===// #include "ByteStreamer.h" +#include "DwarfDebug.h" #include "DwarfExpression.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" @@ -27,29 +30,11 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" -void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) { - BS.EmitInt8( - Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op) - : dwarf::OperationEncodingString(Op)); -} - -void DebugLocDwarfExpression::EmitSigned(int Value) { - BS.EmitSLEB128(Value, Twine(Value)); -} - -void DebugLocDwarfExpression::EmitUnsigned(unsigned Value) { - BS.EmitULEB128(Value, Twine(Value)); -} - -bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) { - // This information is not available while emitting .debug_loc entries. - return false; -} - //===----------------------------------------------------------------------===// // Dwarf Emission Helper Routines //===----------------------------------------------------------------------===// @@ -178,57 +163,28 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, /// /// SectionLabel is a temporary label emitted at the start of the section that /// Label lives in. -void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, - const MCSymbol *SectionLabel) const { +void AsmPrinter::emitSectionOffset(const MCSymbol *Label) const { // On COFF targets, we have to emit the special .secrel32 directive. if (MAI->needsDwarfSectionOffsetDirective()) { OutStreamer.EmitCOFFSecRel32(Label); return; } - // Get the section that we're referring to, based on SectionLabel. - const MCSection &Section = SectionLabel->getSection(); - - // If Label has already been emitted, verify that it is in the same section as - // section label for sanity. - assert((!Label->isInSection() || &Label->getSection() == &Section) && - "Section offset using wrong section base for label"); - - // If the section in question will end up with an address of 0 anyway, we can - // just emit an absolute reference to save a relocation. - if (Section.isBaseAddressKnownZero()) { + // If the format uses relocations with dwarf, refer to the symbol directly. + if (MAI->doesDwarfUseRelocationsAcrossSections()) { OutStreamer.EmitSymbolValue(Label, 4); return; } // Otherwise, emit it as a label difference from the start of the section. - EmitLabelDifference(Label, SectionLabel, 4); -} - -// Some targets do not provide a DWARF register number for every -// register. This function attempts to emit a DWARF register by -// emitting a piece of a super-register or by piecing together -// multiple subregisters that alias the register. -void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer, - const MachineLocation &MLoc, - unsigned PieceSizeInBits, - unsigned PieceOffsetInBits) const { - assert(MLoc.isReg() && "MLoc must be a register"); - DebugLocDwarfExpression Expr(*this, Streamer); - Expr.AddMachineRegPiece(MLoc.getReg(), PieceSizeInBits, PieceOffsetInBits); -} - -void AsmPrinter::EmitDwarfOpPiece(ByteStreamer &Streamer, - unsigned PieceSizeInBits, - unsigned PieceOffsetInBits) const { - DebugLocDwarfExpression Expr(*this, Streamer); - Expr.AddOpPiece(PieceSizeInBits, PieceOffsetInBits); + EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4); } /// EmitDwarfRegOp - Emit dwarf register operation. void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer, const MachineLocation &MLoc) const { - DebugLocDwarfExpression Expr(*this, Streamer); + DebugLocDwarfExpression Expr(*MF->getSubtarget().getRegisterInfo(), + getDwarfDebug()->getDwarfVersion(), Streamer); const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo(); int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false); if (Reg < 0) { @@ -285,3 +241,60 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { break; } } + +void AsmPrinter::emitDwarfDIE(const DIE &Die) const { + // Get the abbreviation for this DIE. + const DIEAbbrev &Abbrev = Die.getAbbrev(); + + // Emit the code (index) for the abbreviation. + if (isVerbose()) + OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) + + "] 0x" + Twine::utohexstr(Die.getOffset()) + + ":0x" + Twine::utohexstr(Die.getSize()) + " " + + dwarf::TagString(Abbrev.getTag())); + EmitULEB128(Abbrev.getNumber()); + + const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); + + // Emit the DIE attribute values. + for (unsigned i = 0, N = Values.size(); i < N; ++i) { + dwarf::Attribute Attr = AbbrevData[i].getAttribute(); + dwarf::Form Form = AbbrevData[i].getForm(); + assert(Form && "Too many attributes for DIE (check abbreviation)"); + + if (isVerbose()) { + OutStreamer.AddComment(dwarf::AttributeString(Attr)); + if (Attr == dwarf::DW_AT_accessibility) + OutStreamer.AddComment(dwarf::AccessibilityString( + cast<DIEInteger>(Values[i])->getValue())); + } + + // Emit an attribute using the defined form. + Values[i]->EmitValue(this, Form); + } + + // Emit the DIE children if any. + if (Abbrev.hasChildren()) { + for (auto &Child : Die.getChildren()) + emitDwarfDIE(*Child); + + OutStreamer.AddComment("End Of Children Mark"); + EmitInt8(0); + } +} + +void +AsmPrinter::emitDwarfAbbrevs(const std::vector<DIEAbbrev *>& Abbrevs) const { + // For each abbrevation. + for (const DIEAbbrev *Abbrev : Abbrevs) { + // Emit the abbrevations code (base 1 index.) + EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); + + // Emit the abbreviations data. + Abbrev->Emit(this); + } + + // Mark end of abbreviations. + EmitULEB128(0, "EOM(3)"); +} diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h index 31867dd..f1efe9d 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h +++ b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h @@ -41,6 +41,10 @@ public: /// call. virtual void beginFunction(const MachineFunction *MF) = 0; + // \brief Emit any of function marker (like .cfi_endproc). This is called + // before endFunction and cannot switch sections. + virtual void markFunctionEnd(); + /// \brief Gather post-function debug information. /// Please note that some AsmPrinter implementations may not call /// beginFunction at all. diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index e6e7c97..bf63b1b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -73,7 +73,8 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { } /// EmitInlineAsm - Emit a blob of inline asm to the output streamer. -void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, +void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, + const MDNode *LocMDNode, InlineAsm::AsmDialect Dialect) const { assert(!Str.empty() && "Can't emit empty inline asm block"); @@ -93,17 +94,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, !OutStreamer.isIntegratedAssemblerRequired()) { emitInlineAsmStart(); OutStreamer.EmitRawText(Str); - // If we have a machine function then grab the MCSubtarget off of that, - // otherwise we're at the module level and want to construct one from - // the default CPU and target triple. - if (MF) { - emitInlineAsmEnd(MF->getSubtarget<MCSubtargetInfo>(), nullptr); - } else { - std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( - TM.getTargetTriple(), TM.getTargetCPU(), - TM.getTargetFeatureString())); - emitInlineAsmEnd(*STI, nullptr); - } + emitInlineAsmEnd(STI, nullptr); return; } @@ -135,19 +126,11 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, std::unique_ptr<MCAsmParser> Parser( createMCAsmParser(SrcMgr, OutContext, OutStreamer, *MAI)); - // Initialize the parser with a fresh subtarget info. It is better to use a - // new STI here because the parser may modify it and we do not want those - // modifications to persist after parsing the inlineasm. The modifications - // made by the parser will be seen by the code emitters because it passes - // the current STI down to the EncodeInstruction() method. - std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( - TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString())); - - // Preserve a copy of the original STI because the parser may modify it. For - // example, when switching between arm and thumb mode. If the target needs to - // emit code to return to the original state it can do so in + // Create a temporary copy of the original STI because the parser may modify + // it. For example, when switching between arm and thumb mode. If the target + // needs to emit code to return to the original state it can do so in // emitInlineAsmEnd(). - MCSubtargetInfo STIOrig = *STI; + MCSubtargetInfo TmpSTI = STI; // We create a new MCInstrInfo here since we might be at the module level // and not have a MachineFunction to initialize the TargetInstrInfo from and @@ -155,7 +138,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, // because it's not subtarget dependent. std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo()); std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser( - *STI, *Parser, *MII, TM.Options.MCOptions)); + TmpSTI, *Parser, *MII, TM.Options.MCOptions)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); @@ -170,7 +153,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, // Don't implicitly switch to the text section before the asm. int Res = Parser->Run(/*NoInitialTextSection*/ true, /*NoFinalize*/ true); - emitInlineAsmEnd(STIOrig, STI.get()); + emitInlineAsmEnd(STI, &TmpSTI); if (Res && !HasDiagHandler) report_fatal_error("Error parsing inline asm\n"); } @@ -505,7 +488,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { else EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS); - EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect()); + EmitInlineAsm(OS.str(), getSubtargetInfo(), LocMD, MI->getInlineAsmDialect()); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't // enabled, so we use emitRawComment. diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h index 42be114..179a4d4 100644 --- a/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -19,6 +19,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/LEB128.h" +#include <string> namespace llvm { class ByteStreamer { @@ -66,6 +68,33 @@ class HashingByteStreamer : public ByteStreamer { Hash.addULEB128(DWord); } }; + +class BufferByteStreamer : public ByteStreamer { +private: + SmallVectorImpl<char> &Buffer; + // FIXME: This is actually only needed for textual asm output. + SmallVectorImpl<std::string> &Comments; + +public: + BufferByteStreamer(SmallVectorImpl<char> &Buffer, + SmallVectorImpl<std::string> &Comments) + : Buffer(Buffer), Comments(Comments) {} + void EmitInt8(uint8_t Byte, const Twine &Comment) override { + Buffer.push_back(Byte); + Comments.push_back(Comment.str()); + } + void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + raw_svector_ostream OSE(Buffer); + encodeSLEB128(DWord, OSE); + Comments.push_back(Comment.str()); + } + void EmitULEB128(uint64_t DWord, const Twine &Comment) override { + raw_svector_ostream OSE(Buffer); + encodeULEB128(DWord, OSE); + Comments.push_back(Comment.str()); + } +}; + } #endif diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 64ba56b..1a706f7 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -60,7 +61,7 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { /// Emit - Print the abbreviation using the specified asm printer. /// -void DIEAbbrev::Emit(AsmPrinter *AP) const { +void DIEAbbrev::Emit(const AsmPrinter *AP) const { // Emit its Dwarf tag type. AP->EmitULEB128(Tag, dwarf::TagString(Tag)); @@ -204,7 +205,7 @@ void DIEValue::dump() const { /// EmitValue - Emit integer of appropriate size. /// -void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { +void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { unsigned Size = ~0U; switch (Form) { case dwarf::DW_FORM_flag_present: @@ -218,6 +219,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_ref2: // Fall thru case dwarf::DW_FORM_data2: Size = 2; break; case dwarf::DW_FORM_sec_offset: // Fall thru + case dwarf::DW_FORM_strp: // Fall thru case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: Size = 4; break; case dwarf::DW_FORM_ref8: // Fall thru @@ -229,6 +231,9 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; case dwarf::DW_FORM_addr: Size = Asm->getDataLayout().getPointerSize(); break; + case dwarf::DW_FORM_ref_addr: + Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr); + break; default: llvm_unreachable("DIE Value form not supported yet"); } Asm->OutStreamer.EmitIntValue(Integer, Size); @@ -236,7 +241,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { /// SizeOf - Determine size of integer value in bytes. /// -unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_flag_present: return 0; case dwarf::DW_FORM_flag: // Fall thru @@ -245,6 +250,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_ref2: // Fall thru case dwarf::DW_FORM_data2: return sizeof(int16_t); case dwarf::DW_FORM_sec_offset: // Fall thru + case dwarf::DW_FORM_strp: // Fall thru case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: return sizeof(int32_t); case dwarf::DW_FORM_ref8: // Fall thru @@ -255,6 +261,10 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_udata: return getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer); case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(); + case dwarf::DW_FORM_ref_addr: + if (AP->OutStreamer.getContext().getDwarfVersion() == 2) + return AP->getDataLayout().getPointerSize(); + return sizeof(int32_t); default: llvm_unreachable("DIE Value form not supported yet"); } } @@ -272,13 +282,13 @@ void DIEInteger::print(raw_ostream &O) const { /// EmitValue - Emit expression value. /// -void DIEExpr::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { AP->OutStreamer.EmitValue(Expr, SizeOf(AP, Form)); } /// SizeOf - Determine size of expression value in bytes. /// -unsigned DIEExpr::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -298,7 +308,7 @@ void DIEExpr::print(raw_ostream &O) const { /// EmitValue - Emit label value. /// -void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIELabel::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelReference(Label, SizeOf(AP, Form), Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_sec_offset || @@ -307,7 +317,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { /// SizeOf - Determine size of label value in bytes. /// -unsigned DIELabel::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -326,13 +336,13 @@ void DIELabel::print(raw_ostream &O) const { /// EmitValue - Emit delta value. /// -void DIEDelta::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIEDelta::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form)); } /// SizeOf - Determine size of delta value in bytes. /// -unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -351,13 +361,13 @@ void DIEDelta::print(raw_ostream &O) const { /// EmitValue - Emit string value. /// -void DIEString::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { Access->EmitValue(AP, Form); } /// SizeOf - Determine size of delta value in bytes. /// -unsigned DIEString::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { return Access->SizeOf(AP, Form); } @@ -372,32 +382,9 @@ void DIEString::print(raw_ostream &O) const { // DIEEntry Implementation //===----------------------------------------------------------------------===// -/// Emit something like ".long Hi+Offset-Lo" where the size in bytes of the -/// directive is specified by Size and Hi/Lo specify the labels. -static void emitLabelOffsetDifference(MCStreamer &Streamer, const MCSymbol *Hi, - uint64_t Offset, const MCSymbol *Lo, - unsigned Size) { - MCContext &Context = Streamer.getContext(); - - // Emit Hi+Offset - Lo - // Get the Hi+Offset expression. - const MCExpr *Plus = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, Context), - MCConstantExpr::Create(Offset, Context), Context); - - // Get the Hi+Offset-Lo expression. - const MCExpr *Diff = MCBinaryExpr::CreateSub( - Plus, MCSymbolRefExpr::Create(Lo, Context), Context); - - // Otherwise, emit with .set (aka assignment). - MCSymbol *SetLabel = Context.CreateTempSymbol(); - Streamer.EmitAssignment(SetLabel, Diff); - Streamer.EmitSymbolValue(SetLabel, Size); -} - /// EmitValue - Emit debug information entry offset. /// -void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_ref_addr) { const DwarfDebug *DD = AP->getDwarfDebug(); @@ -413,14 +400,12 @@ void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr, DIEEntry::getRefAddrSize(AP)); else - emitLabelOffsetDifference(AP->OutStreamer, CU->getSectionSym(), Addr, - CU->getSectionSym(), - DIEEntry::getRefAddrSize(AP)); + AP->OutStreamer.EmitIntValue(Addr, DIEEntry::getRefAddrSize(AP)); } else AP->EmitInt32(Entry.getOffset()); } -unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) { +unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) { // DWARF4: References that use the attribute form DW_FORM_ref_addr are // specified to be four bytes in the DWARF 32-bit format and eight bytes // in the DWARF 64-bit format, while DWARF Version 2 specifies that such @@ -441,7 +426,7 @@ void DIEEntry::print(raw_ostream &O) const { //===----------------------------------------------------------------------===// // DIETypeSignature Implementation //===----------------------------------------------------------------------===// -void DIETypeSignature::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { +void DIETypeSignature::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { assert(Form == dwarf::DW_FORM_ref_sig8); Asm->OutStreamer.EmitIntValue(Unit.getTypeSignature(), 8); } @@ -460,7 +445,7 @@ void DIETypeSignature::dump() const { print(dbgs()); } /// ComputeSize - calculate the size of the location expression. /// -unsigned DIELoc::ComputeSize(AsmPrinter *AP) const { +unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const { if (!Size) { const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) @@ -472,7 +457,7 @@ unsigned DIELoc::ComputeSize(AsmPrinter *AP) const { /// EmitValue - Emit location data. /// -void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { +void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; @@ -490,7 +475,7 @@ void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { /// SizeOf - Determine size of location data in bytes. /// -unsigned DIELoc::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); @@ -515,7 +500,7 @@ void DIELoc::print(raw_ostream &O) const { /// ComputeSize - calculate the size of the block. /// -unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const { +unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const { if (!Size) { const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) @@ -527,7 +512,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const { /// EmitValue - Emit block data. /// -void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { +void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; @@ -543,7 +528,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { /// SizeOf - Determine size of block data in bytes. /// -unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); @@ -564,7 +549,7 @@ void DIEBlock::print(raw_ostream &O) const { // DIELocList Implementation //===----------------------------------------------------------------------===// -unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) @@ -574,14 +559,14 @@ unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { /// EmitValue - Emit label value. /// -void DIELocList::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { DwarfDebug *DD = AP->getDwarfDebug(); MCSymbol *Label = DD->getDebugLocEntries()[Index].Label; if (AP->MAI->doesDwarfUseRelocationsAcrossSections() && !DD->useSplitDwarf()) - AP->EmitSectionOffset(Label, DD->getDebugLocSym()); + AP->emitSectionOffset(Label); else - AP->EmitLabelDifference(Label, DD->getDebugLocSym(), 4); + AP->EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4); } #ifndef NDEBUG diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp index 1e2ba2c..da7252a 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -510,7 +510,7 @@ uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) { // ... take the least significant 8 bytes and return those. Our MD5 // implementation always returns its results in little endian, swap bytes // appropriately. - return *reinterpret_cast<support::ulittle64_t *>(Result + 8); + return support::endian::read64le(Result + 8); } /// This is based on the type signature computation given in section 7.27 of the @@ -531,7 +531,7 @@ uint64_t DIEHash::computeCUSignature(const DIE &Die) { // ... take the least significant 8 bytes and return those. Our MD5 // implementation always returns its results in little endian, swap bytes // appropriately. - return *reinterpret_cast<support::ulittle64_t *>(Result + 8); + return support::endian::read64le(Result + 8); } /// This is based on the type signature computation given in section 7.27 of the @@ -555,5 +555,5 @@ uint64_t DIEHash::computeTypeSignature(const DIE &Die) { // ... take the least significant 8 bytes and return those. Our MD5 // implementation always returns its results in little endian, swap bytes // appropriately. - return *reinterpret_cast<support::ulittle64_t *>(Result + 8); + return support::endian::read64le(Result + 8); } diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index 0c2a5e5..bbdf237 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> #include <map> diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h index 6d55c03..6914bbe 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -9,22 +9,24 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H +#include "llvm/ADT/SmallString.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" namespace llvm { +class AsmPrinter; class MDNode; /// \brief This struct describes location entries emitted in the .debug_loc /// section. class DebugLocEntry { - // Begin and end symbols for the address range that this location is valid. + /// Begin and end symbols for the address range that this location is valid. const MCSymbol *Begin; const MCSymbol *End; public: - /// A single location or constant. + /// \brief A single location or constant. struct Value { Value(const MDNode *Var, const MDNode *Expr, int64_t i) : Variable(Var), Expression(Expr), EntryKind(E_Integer) { @@ -41,20 +43,20 @@ public: Value(const MDNode *Var, const MDNode *Expr, MachineLocation Loc) : Variable(Var), Expression(Expr), EntryKind(E_Location), Loc(Loc) { assert(DIVariable(Var).Verify()); - assert(DIExpression(Expr).Verify()); + assert(DIExpression(Expr)->isValid()); } - // The variable to which this location entry corresponds. + /// The variable to which this location entry corresponds. const MDNode *Variable; - // Any complex address location expression for this Value. + /// Any complex address location expression for this Value. const MDNode *Expression; - // Type of entry that this represents. + /// Type of entry that this represents. enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt }; enum EntryType EntryKind; - // Either a constant, + /// Either a constant, union { int64_t Int; const ConstantFP *CFP; @@ -84,6 +86,8 @@ private: /// A nonempty list of locations/constants belonging to this entry, /// sorted by offset. SmallVector<Value, 1> Values; + SmallString<8> DWARFBytes; + SmallVector<std::string, 1> Comments; public: DebugLocEntry(const MCSymbol *B, const MCSymbol *E, Value Val) @@ -92,9 +96,9 @@ public: } /// \brief If this and Next are describing different pieces of the same - // variable, merge them by appending Next's values to the current - // list of values. - // Return true if the merge was successful. + /// variable, merge them by appending Next's values to the current + /// list of values. + /// Return true if the merge was successful. bool MergeValues(const DebugLocEntry &Next) { if (Begin == Next.Begin) { DIExpression Expr(Values[0].Expression); @@ -135,7 +139,7 @@ public: }) && "value must be a piece"); } - // Sort the pieces by offset. + // \brief Sort the pieces by offset. // Remove any duplicate entries by dropping all but the first. void sortUniqueValues() { std::sort(Values.begin(), Values.end()); @@ -146,9 +150,18 @@ public: }), Values.end()); } + + /// \brief Lower this entry into a DWARF expression. + void finalize(const AsmPrinter &AP, + const DITypeIdentifierMap &TypeIdentifierMap); + + /// \brief Return the lowered DWARF expression. + StringRef getDWARFBytes() const { return DWARFBytes; } + /// \brief Return the assembler comments for the lowered DWARF expression. + const SmallVectorImpl<std::string> &getComments() const { return Comments; } }; -/// Compare two Values for equality. +/// \brief Compare two Values for equality. inline bool operator==(const DebugLocEntry::Value &A, const DebugLocEntry::Value &B) { if (A.EntryKind != B.EntryKind) @@ -173,7 +186,7 @@ inline bool operator==(const DebugLocEntry::Value &A, llvm_unreachable("unhandled EntryKind"); } -/// Compare two pieces based on their offset. +/// \brief Compare two pieces based on their offset. inline bool operator<(const DebugLocEntry::Value &A, const DebugLocEntry::Value &B) { return A.getExpression().getBitPieceOffset() < diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index a71f35e..f64338e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -54,7 +54,7 @@ void DwarfAccelTable::ComputeBucketCount(void) { // Then compute the bucket size, minimum of 1 bucket. if (num > 1024) Header.bucket_count = num / 4; - if (num > 16) + else if (num > 16) Header.bucket_count = num / 2; else Header.bucket_count = num > 0 ? num : 1; @@ -70,6 +70,7 @@ static bool compareDIEs(const DwarfAccelTable::HashDataContents *A, void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) { // Create the individual hash data outputs. + Data.reserve(Entries.size()); for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { @@ -95,8 +96,17 @@ void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) { for (size_t i = 0, e = Data.size(); i < e; ++i) { uint32_t bucket = Data[i]->HashValue % Header.bucket_count; Buckets[bucket].push_back(Data[i]); - Data[i]->Sym = Asm->GetTempSymbol(Prefix, i); + Data[i]->Sym = Asm->createTempSymbol(Prefix); } + + // Sort the contents of the buckets by hash value so that hash + // collisions end up together. Stable sort makes testing easier and + // doesn't cost much more. + for (size_t i = 0; i < Buckets.size(); ++i) + std::stable_sort(Buckets[i].begin(), Buckets[i].end(), + [] (HashData *LHS, HashData *RHS) { + return LHS->HashValue < RHS->HashValue; + }); } // Emits the header for the table via the AsmPrinter. @@ -136,19 +146,32 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { Asm->EmitInt32(index); else Asm->EmitInt32(UINT32_MAX); - index += Buckets[i].size(); + // Buckets point in the list of hashes, not to the data. Do not + // increment the index multiple times in case of hash collisions. + uint64_t PrevHash = UINT64_MAX; + for (auto *HD : Buckets[i]) { + uint32_t HashValue = HD->HashValue; + if (PrevHash != HashValue) + ++index; + PrevHash = HashValue; + } } } // Walk through the buckets and emit the individual hashes for each // bucket. void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { + uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) { + uint32_t HashValue = (*HI)->HashValue; + if (PrevHash == HashValue) + continue; Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i)); - Asm->EmitInt32((*HI)->HashValue); + Asm->EmitInt32(HashValue); + PrevHash = HashValue; } } } @@ -157,11 +180,16 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { // element in each bucket. This is done via a symbol subtraction from the // beginning of the section. The non-section symbol will be output later // when we emit the actual data. -void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { +void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) { + uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) { + uint32_t HashValue = (*HI)->HashValue; + if (PrevHash == HashValue) + continue; + PrevHash = HashValue; Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i)); MCContext &Context = Asm->OutStreamer.getContext(); const MCExpr *Sub = MCBinaryExpr::CreateSub( @@ -175,17 +203,20 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { // Walk through the buckets and emit the full data for each element in // the bucket. For the string case emit the dies and the various offsets. // Terminate each HashData bucket with 0. -void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D, - MCSymbol *StrSym) { - uint64_t PrevHash = UINT64_MAX; +void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + uint64_t PrevHash = UINT64_MAX; for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) { + // Terminate the previous entry if there is no hash collision + // with the current one. + if (PrevHash != UINT64_MAX && PrevHash != (*HI)->HashValue) + Asm->EmitInt32(0); // Remember to emit the label for our offset. Asm->OutStreamer.EmitLabel((*HI)->Sym); Asm->OutStreamer.AddComment((*HI)->Str); - Asm->EmitSectionOffset((*HI)->Data.StrSym, StrSym); + Asm->emitSectionOffset((*HI)->Data.StrSym); Asm->OutStreamer.AddComment("Num DIEs"); Asm->EmitInt32((*HI)->Data.Values.size()); for (HashDataContents *HD : (*HI)->Data.Values) { @@ -200,17 +231,17 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D, Asm->EmitInt8(HD->Flags); } } - // Emit a 0 to terminate the data unless we have a hash collision. - if (PrevHash != (*HI)->HashValue) - Asm->EmitInt32(0); PrevHash = (*HI)->HashValue; } + // Emit the final end marker for the bucket. + if (!Buckets[i].empty()) + Asm->EmitInt32(0); } } // Emit the entire data structure to the output file. -void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D, - MCSymbol *StrSym) { +void DwarfAccelTable::emit(AsmPrinter *Asm, const MCSymbol *SecBegin, + DwarfDebug *D) { // Emit the header. EmitHeader(Asm); @@ -221,10 +252,10 @@ void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D, EmitHashes(Asm); // Emit the offsets. - EmitOffsets(Asm, SecBegin); + emitOffsets(Asm, SecBegin); // Emit the hash data. - EmitData(Asm, D, StrSym); + EmitData(Asm, D); } #ifndef NDEBUG diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 74963da..e6fdf08 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -222,8 +222,8 @@ private: void EmitHeader(AsmPrinter *); void EmitBuckets(AsmPrinter *); void EmitHashes(AsmPrinter *); - void EmitOffsets(AsmPrinter *, MCSymbol *); - void EmitData(AsmPrinter *, DwarfDebug *D, MCSymbol *StrSym); + void emitOffsets(AsmPrinter *, const MCSymbol *); + void EmitData(AsmPrinter *, DwarfDebug *D); // Allocator for HashData and HashDataContents. BumpPtrAllocator Allocator; @@ -248,7 +248,7 @@ public: void AddName(StringRef Name, MCSymbol *StrSym, const DIE *Die, char Flags = 0); void FinalizeTable(AsmPrinter *, StringRef); - void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *, MCSymbol *StrSym); + void emit(AsmPrinter *, const MCSymbol *, DwarfDebug *); #ifndef NDEBUG void print(raw_ostream &O); void dump() { print(dbgs()); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index f45b24c..1bee367 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -39,9 +39,24 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A) + : EHStreamer(A), shouldEmitCFI(false) {} + +void DwarfCFIExceptionBase::markFunctionEnd() { + if (shouldEmitCFI) + Asm->OutStreamer.EmitCFIEndProc(); + + if (MMI->getLandingPads().empty()) + return; + + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); +} + DwarfCFIException::DwarfCFIException(AsmPrinter *A) - : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false), - shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {} + : DwarfCFIExceptionBase(A), shouldEmitPersonality(false), + shouldEmitLSDA(false), shouldEmitMoves(false), + moveTypeModule(AsmPrinter::CFI_M_None) {} DwarfCFIException::~DwarfCFIException() {} @@ -72,8 +87,6 @@ void DwarfCFIException::endModule() { } } -/// beginFunction - Gather pre-function exception information. Assumes it's -/// being emitted immediately after the function entry point. void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; @@ -100,7 +113,8 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitLSDA = shouldEmitPersonality && LSDAEncoding != dwarf::DW_EH_PE_omit; - if (!shouldEmitPersonality && !shouldEmitMoves) + shouldEmitCFI = shouldEmitPersonality || shouldEmitMoves; + if (!shouldEmitCFI) return; Asm->OutStreamer.EmitCFIStartProc(/*IsSimple=*/false); @@ -113,43 +127,18 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding); - MCSymbol *EHBegin = - Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); - if (Asm->MAI->useAssignmentForEHBegin()) { - MCContext &Ctx = Asm->OutContext; - MCSymbol *CurPos = Ctx.CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(CurPos); - Asm->OutStreamer.EmitAssignment(EHBegin, - MCSymbolRefExpr::Create(CurPos, Ctx)); - } else { - Asm->OutStreamer.EmitLabel(EHBegin); - } - // Provide LSDA information. if (!shouldEmitLSDA) return; - Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception", - Asm->getFunctionNumber()), - LSDAEncoding); + Asm->OutStreamer.EmitCFILsda(Asm->getCurExceptionSym(), LSDAEncoding); } /// endFunction - Gather and emit post-function exception information. /// void DwarfCFIException::endFunction(const MachineFunction *) { - if (!shouldEmitPersonality && !shouldEmitMoves) - return; - - Asm->OutStreamer.EmitCFIEndProc(); - if (!shouldEmitPersonality) return; - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", - Asm->getFunctionNumber())); - - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); - emitExceptionTable(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index dcc5fe4..eee5fc5 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -19,7 +19,7 @@ DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), - Skeleton(nullptr), LabelBegin(nullptr), BaseAddress(nullptr) { + Skeleton(nullptr), BaseAddress(nullptr) { insertDIE(Node, &getUnitDie()); } @@ -164,24 +164,17 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) { addUInt(*Loc, dwarf::DW_FORM_udata, DD->getAddressPool().getIndex(Sym, /* TLS */ true)); } - // 3) followed by a custom OP to make the debugger do a TLS lookup. - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); + // 3) followed by an OP to make the debugger do a TLS lookup. + addUInt(*Loc, dwarf::DW_FORM_data1, + DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address + : dwarf::DW_OP_form_tls_address); } else { DD->addArangeLabel(SymbolCU(this, Sym)); addOpAddress(*Loc, Sym); } addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); - // Add the linkage name. - StringRef LinkageName = GV.getLinkageName(); - if (!LinkageName.empty()) - // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: - // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and - // TAG_variable. - addString(*VariableDIE, - DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name - : dwarf::DW_AT_MIPS_linkage_name, - GlobalValue::getRealLinkageName(LinkageName)); + addLinkageName(*VariableDIE, GV.getLinkageName()); } else if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(GV.getConstant())) { addConstantValue(*VariableDIE, CI, GTy); @@ -243,7 +236,7 @@ void DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, addSectionDelta(Die, Attribute, Label, Sec); } -void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) { +void DwarfCompileUnit::initStmtList() { // Define start line table label for each Compile Unit. MCSymbol *LineTableStartSym = Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID()); @@ -255,8 +248,9 @@ void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) { // left in the skeleton CU and so not included. // The line table entries are not always emitted in assembly, so it // is not okay to use line_table_start here. + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym, - DwarfLineSectionSym); + TLOF.getDwarfLineSection()->getBeginSymbol()); } void DwarfCompileUnit::applyStmtList(DIE &D) { @@ -285,7 +279,7 @@ void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin, DIE &DwarfCompileUnit::updateSubprogramScopeDIE(DISubprogram SP) { DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes()); - attachLowHighPC(*SPDie, DD->getFunctionBeginSym(), DD->getFunctionEndSym()); + attachLowHighPC(*SPDie, Asm->getFunctionBegin(), Asm->getFunctionEnd()); if (!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim( *DD->getCurrentFunction())) addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr); @@ -378,13 +372,14 @@ void DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range) { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + // Emit offset in .debug_range as a relocatable label. emitDIE will handle // emitting it appropriately. - auto *RangeSectionSym = DD->getRangeSectionSym(); + const MCSymbol *RangeSectionSym = + TLOF.getDwarfRangesSection()->getBeginSymbol(); - RangeSpanList List( - Asm->GetTempSymbol("debug_ranges", DD->getNextRangeNumber()), - std::move(Range)); + RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range)); // Under fission, ranges are specified by constant offsets relative to the // CU's DW_AT_GNU_ranges_base. @@ -709,12 +704,14 @@ void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) { } } -void DwarfCompileUnit::emitHeader(const MCSymbol *ASectionSym) const { +void DwarfCompileUnit::emitHeader(bool UseOffsets) { // Don't bother labeling the .dwo unit, as its offset isn't used. - if (!Skeleton) + if (!Skeleton) { + LabelBegin = Asm->createTempSymbol("cu_begin"); Asm->OutStreamer.EmitLabel(LabelBegin); + } - DwarfUnit::emitHeader(ASectionSym); + DwarfUnit::emitHeader(UseOffsets); } /// addGlobalName - Add a new global name to the compile unit. diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index c66af65..9484bb6 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -36,9 +36,6 @@ class DwarfCompileUnit : public DwarfUnit { /// Skeleton unit associated with this unit. DwarfCompileUnit *Skeleton; - /// A label at the start of the non-dwo section related to this unit. - MCSymbol *SectionSym; - /// The start of the unit within its section. MCSymbol *LabelBegin; @@ -76,7 +73,7 @@ public: return Skeleton; } - void initStmtList(MCSymbol *DwarfLineSectionSym); + void initStmtList(); /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. void applyStmtList(DIE &D); @@ -168,22 +165,9 @@ public: /// Set the skeleton unit associated with this unit. void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; } - MCSymbol *getSectionSym() const { + const MCSymbol *getSectionSym() const { assert(Section); - return SectionSym; - } - - /// Pass in the SectionSym even though we could recreate it in every compile - /// unit (type units will have actually distinct symbols once they're in - /// comdat sections). - void initSection(const MCSection *Section, MCSymbol *SectionSym) { - DwarfUnit::initSection(Section); - this->SectionSym = SectionSym; - - // Don't bother labeling the .dwo unit, as its offset isn't used. - if (!Skeleton) - LabelBegin = - Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); + return Section->getBeginSymbol(); } unsigned getLength() { @@ -191,7 +175,7 @@ public: getHeaderSize() + UnitDie.getSize(); } - void emitHeader(const MCSymbol *ASectionSym) const override; + void emitHeader(bool UseOffsets) override; MCSymbol *getLabelBegin() const { assert(Section); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index aa1f79f..e9ebd97 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -45,6 +45,7 @@ #include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -105,6 +106,25 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, static const char *const DWARFGroupName = "DWARF Emission"; static const char *const DbgTimerName = "DWARF Debug Writer"; +void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) { + BS.EmitInt8( + Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op) + : dwarf::OperationEncodingString(Op)); +} + +void DebugLocDwarfExpression::EmitSigned(int64_t Value) { + BS.EmitSLEB128(Value, Twine(Value)); +} + +void DebugLocDwarfExpression::EmitUnsigned(uint64_t Value) { + BS.EmitULEB128(Value, Twine(Value)); +} + +bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) { + // This information is not available while emitting .debug_loc entries. + return false; +} + //===----------------------------------------------------------------------===// /// resolve - Look in the DwarfDebug map for the MDNode that @@ -169,11 +189,12 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = { DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) - : Asm(A), MMI(Asm->MMI), PrevLabel(nullptr), GlobalRangeCount(0), - InfoHolder(A, *this, "info_string", DIEValueAllocator), + : Asm(A), MMI(Asm->MMI), PrevLabel(nullptr), + InfoHolder(A, "info_string", DIEValueAllocator), UsedNonDefaultText(false), - SkeletonHolder(A, *this, "skel_string", DIEValueAllocator), + SkeletonHolder(A, "skel_string", DIEValueAllocator), IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()), + IsPS4(Triple(A->getTargetTriple()).isPS4()), AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, @@ -182,17 +203,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) dwarf::DW_FORM_data4)), AccelTypes(TypeAtoms) { - DwarfInfoSectionSym = DwarfAbbrevSectionSym = DwarfStrSectionSym = nullptr; - DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = nullptr; - DwarfLineSectionSym = nullptr; - DwarfAddrSectionSym = nullptr; - DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = nullptr; - FunctionBeginSym = FunctionEndSym = nullptr; CurFn = nullptr; CurMI = nullptr; // Turn on accelerator tables for Darwin by default, pubnames by - // default for non-Darwin, and handle split dwarf. + // default for non-Darwin/PS4, and handle split dwarf. if (DwarfAccelTables == Default) HasDwarfAccelTables = IsDarwin; else @@ -204,7 +219,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) HasSplitDwarf = SplitDwarf == Enable; if (DwarfPubSections == Default) - HasDwarfPubSections = !IsDarwin; + HasDwarfPubSections = !IsDarwin && !IsPS4; else HasDwarfPubSections = DwarfPubSections == Enable; @@ -212,6 +227,10 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber : MMI->getModule()->getDwarfVersion(); + // Darwin and PS4 use the standard TLS opcode (defined in DWARF 3). + // Everybody else uses GNU's. + UseGNUTLSOpcode = !(IsDarwin || IsPS4) || DwarfVersion < 3; + Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion); { @@ -223,19 +242,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) // Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h. DwarfDebug::~DwarfDebug() { } -// Switch to the specified MCSection and emit an assembler -// temporary label to it if SymbolStem is specified. -static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section, - const char *SymbolStem = nullptr) { - Asm->OutStreamer.SwitchSection(Section); - if (!SymbolStem) - return nullptr; - - MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); - Asm->OutStreamer.EmitLabel(TmpSym); - return TmpSym; -} - static bool isObjCClass(StringRef Name) { return Name.startswith("+") || Name.startswith("-"); } @@ -264,13 +270,6 @@ static StringRef getObjCMethodName(StringRef In) { return In.slice(In.find(' ') + 1, In.find(']')); } -// Helper for sorting sections into a stable output order. -static bool SectionSort(const MCSection *A, const MCSection *B) { - std::string LA = (A ? A->getLabelBeginName() : ""); - std::string LB = (B ? B->getLabelBeginName() : ""); - return LA < LB; -} - // Add the various names to the Dwarf accelerator table names. // TODO: Determine whether or not we should add names for programs // that do not have a DW_AT_name or DW_AT_linkage_name field - this @@ -388,7 +387,7 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { NewCU.addString(Die, dwarf::DW_AT_name, FN); if (!useSplitDwarf()) { - NewCU.initStmtList(DwarfLineSectionSym); + NewCU.initStmtList(); // If we're using split dwarf the compilation dir is going to be in the // skeleton CU and so we don't need to duplicate it here. @@ -410,11 +409,9 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { dwarf::DW_FORM_data1, RVer); if (useSplitDwarf()) - NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(), - DwarfInfoDWOSectionSym); + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection()); else - NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(), - DwarfInfoSectionSym); + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection()); CUMap.insert(std::make_pair(DIUnit, &NewCU)); CUDieMap.insert(std::make_pair(&Die, &NewCU)); @@ -445,9 +442,6 @@ void DwarfDebug::beginModule() { return; TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes); - // Emit initial sections so we can reference labels later. - emitSectionLabels(); - SingleCU = CU_Nodes->getNumOperands() == 1; for (MDNode *N : CU_Nodes->operands()) { @@ -458,8 +452,11 @@ void DwarfDebug::beginModule() { ScopesWithImportedEntities.push_back(std::make_pair( DIImportedEntity(ImportedEntities.getElement(i)).getContext(), ImportedEntities.getElement(i))); - std::sort(ScopesWithImportedEntities.begin(), - ScopesWithImportedEntities.end(), less_first()); + // Stable sort to preserve the order of appearance of imported entities. + // This is to avoid out-of-order processing of interdependent declarations + // within the same scope, e.g. { namespace A = base; namespace B = A; } + std::stable_sort(ScopesWithImportedEntities.begin(), + ScopesWithImportedEntities.end(), less_first()); DIArray GVs = CUNode.getGlobalVariables(); for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) CU.getOrCreateGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); @@ -541,6 +538,8 @@ void DwarfDebug::collectDeadVariables() { } void DwarfDebug::finalizeModuleInfo() { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + finishSubprogramDefinitions(); finishVariableDefinitions(); @@ -570,13 +569,16 @@ void DwarfDebug::finalizeModuleInfo() { // We don't keep track of which addresses are used in which CU so this // is a bit pessimistic under LTO. - if (!AddrPool.isEmpty()) + if (!AddrPool.isEmpty()) { + const MCSymbol *Sym = TLOF.getDwarfAddrSection()->getBeginSymbol(); SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base, - DwarfAddrSectionSym, DwarfAddrSectionSym); - if (!SkCU->getRangeLists().empty()) + Sym, Sym); + } + if (!SkCU->getRangeLists().empty()) { + const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol(); SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base, - DwarfDebugRangeSectionSym, - DwarfDebugRangeSectionSym); + Sym, Sym); + } } // If we have code split among multiple sections or non-contiguous @@ -613,7 +615,7 @@ void DwarfDebug::endModule() { // If we aren't actually generating debug info (check beginModule - // conditionalized on !DisableDebugInfoPrinting and the presence of the // llvm.dbg.cu metadata node) - if (!DwarfInfoSectionSym) + if (!MMI->hasDebugInfo()) return; // Finalize the debug info for the module. @@ -621,12 +623,18 @@ void DwarfDebug::endModule() { emitDebugStr(); - // Emit all the DIEs into a debug info section. - emitDebugInfo(); + if (useSplitDwarf()) + emitDebugLocDWO(); + else + // Emit info into a debug loc section. + emitDebugLoc(); // Corresponding abbreviations into a abbrev section. emitAbbreviations(); + // Emit all the DIEs into a debug info section. + emitDebugInfo(); + // Emit info into a debug aranges section. if (GenerateARangeSection) emitDebugARanges(); @@ -639,12 +647,9 @@ void DwarfDebug::endModule() { emitDebugInfoDWO(); emitDebugAbbrevDWO(); emitDebugLineDWO(); - emitDebugLocDWO(); // Emit DWO addresses. AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); - } else - // Emit info into a debug loc section. - emitDebugLoc(); + } // Emit info into the dwarf accelerator table sections. if (useDwarfAccelTables()) { @@ -828,7 +833,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, if (End != nullptr) EndLabel = getLabelAfterInsn(End); else if (std::next(I) == Ranges.end()) - EndLabel = FunctionEndSym; + EndLabel = Asm->getFunctionEnd(); else EndLabel = getLabelBeforeInsn(std::next(I)->first); assert(EndLabel && "Forgot label after instruction ending a range!"); @@ -922,11 +927,13 @@ DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP, DotDebugLocEntries.resize(DotDebugLocEntries.size() + 1); DebugLocList &LocList = DotDebugLocEntries.back(); LocList.CU = &TheCU; - LocList.Label = - Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1); + LocList.Label = Asm->createTempSymbol("debug_loc"); // Build the location list for this variable. buildLocationList(LocList.List, Ranges); + // Finalize the entry by lowering it into a DWARF bytestream. + for (auto &Entry : LocList.List) + Entry.finalize(*Asm, TypeIdentifierMap); } // Collect info for variables that were optimized out. @@ -964,23 +971,25 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { // Check if source location changes, but ignore DBG_VALUE locations. if (!MI->isDebugValue()) { DebugLoc DL = MI->getDebugLoc(); - if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) { - unsigned Flags = 0; - PrevInstLoc = DL; - if (DL == PrologEndLoc) { - Flags |= DWARF2_FLAG_PROLOGUE_END; - PrologEndLoc = DebugLoc(); - Flags |= DWARF2_FLAG_IS_STMT; - } - if (DL.getLine() != - Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine()) - Flags |= DWARF2_FLAG_IS_STMT; - + if (DL != PrevInstLoc) { if (!DL.isUnknown()) { + unsigned Flags = 0; + PrevInstLoc = DL; + if (DL == PrologEndLoc) { + Flags |= DWARF2_FLAG_PROLOGUE_END; + PrologEndLoc = DebugLoc(); + Flags |= DWARF2_FLAG_IS_STMT; + } + if (DL.getLine() != + Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine()) + Flags |= DWARF2_FLAG_IS_STMT; + const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); - } else + } else if (UnknownLocations) { + PrevInstLoc = DL; recordSourceLine(0, 0, nullptr, 0); + } } } @@ -1116,11 +1125,6 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { else Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); - // Emit a label for the function so that we have a beginning address. - FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); - // Assumes in correct section after the entry point. - Asm->OutStreamer.EmitLabel(FunctionBeginSym); - // Calculate history for local variables. calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(), DbgValues); @@ -1131,12 +1135,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (Ranges.empty()) continue; - // The first mention of a function argument gets the FunctionBeginSym + // The first mention of a function argument gets the CurrentFnBegin // label, so arguments are visible when breaking at function entry. DIVariable DIVar(Ranges.front().first->getDebugVariable()); if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable && getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) { - LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym; + LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin(); if (Ranges.front().first->getDebugExpression().isBitPiece()) { // Mark all non-overlapping initial pieces. for (auto I = Ranges.begin(); I != Ranges.end(); ++I) { @@ -1145,7 +1149,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { [&](DbgValueHistoryMap::InstrRange Pred) { return !piecesOverlap(Piece, Pred.first->getDebugExpression()); })) - LabelsBeforeInsn[I->first] = FunctionBeginSym; + LabelsBeforeInsn[I->first] = Asm->getFunctionBegin(); else break; } @@ -1160,7 +1164,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { } PrevInstLoc = DebugLoc(); - PrevLabel = FunctionBeginSym; + PrevLabel = Asm->getFunctionBegin(); // Record beginning of function. PrologEndLoc = findPrologueEndLoc(MF); @@ -1191,11 +1195,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { return; } - // Define end label for subprogram. - FunctionEndSym = Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()); - // Assumes in correct section after the entry point. - Asm->OutStreamer.EmitLabel(FunctionEndSym); - // Set DwarfDwarfCompileUnitID in MCContext to default value. Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); @@ -1207,7 +1206,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { collectVariableInfo(TheCU, SP, ProcessedVars); // Add the range of this function to the list of ranges for the CU. - TheCU.addRange(RangeSpan(FunctionBeginSym, FunctionEndSym)); + TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd())); // Under -gmlt, skip building the subprogram if there are no inlined // subroutines inside it. @@ -1290,103 +1289,10 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, // Emit Methods //===----------------------------------------------------------------------===// -// Emit initial Dwarf sections with a label at the start of each one. -void DwarfDebug::emitSectionLabels() { - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - - // Dwarf sections base addresses. - DwarfInfoSectionSym = - emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); - if (useSplitDwarf()) { - DwarfInfoDWOSectionSym = - emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo"); - DwarfTypesDWOSectionSym = emitSectionSym( - Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo"); - } - DwarfAbbrevSectionSym = - emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); - if (useSplitDwarf()) - DwarfAbbrevDWOSectionSym = emitSectionSym( - Asm, TLOF.getDwarfAbbrevDWOSection(), "section_abbrev_dwo"); - if (GenerateARangeSection) - emitSectionSym(Asm, TLOF.getDwarfARangesSection()); - - DwarfLineSectionSym = - emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); - if (GenerateGnuPubSections) { - DwarfGnuPubNamesSectionSym = - emitSectionSym(Asm, TLOF.getDwarfGnuPubNamesSection()); - DwarfGnuPubTypesSectionSym = - emitSectionSym(Asm, TLOF.getDwarfGnuPubTypesSection()); - } else if (HasDwarfPubSections) { - emitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); - emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); - } - - DwarfStrSectionSym = - emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); - if (useSplitDwarf()) { - DwarfStrDWOSectionSym = - emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string"); - DwarfAddrSectionSym = - emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec"); - DwarfDebugLocSectionSym = - emitSectionSym(Asm, TLOF.getDwarfLocDWOSection(), "skel_loc"); - } else - DwarfDebugLocSectionSym = - emitSectionSym(Asm, TLOF.getDwarfLocSection(), "section_debug_loc"); - DwarfDebugRangeSectionSym = - emitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range"); -} - -// Recursively emits a debug information entry. -void DwarfDebug::emitDIE(DIE &Die) { - // Get the abbreviation for this DIE. - const DIEAbbrev &Abbrev = Die.getAbbrev(); - - // Emit the code (index) for the abbreviation. - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) + - "] 0x" + Twine::utohexstr(Die.getOffset()) + - ":0x" + Twine::utohexstr(Die.getSize()) + " " + - dwarf::TagString(Abbrev.getTag())); - Asm->EmitULEB128(Abbrev.getNumber()); - - const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); - const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); - - // Emit the DIE attribute values. - for (unsigned i = 0, N = Values.size(); i < N; ++i) { - dwarf::Attribute Attr = AbbrevData[i].getAttribute(); - dwarf::Form Form = AbbrevData[i].getForm(); - assert(Form && "Too many attributes for DIE (check abbreviation)"); - - if (Asm->isVerbose()) { - Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); - if (Attr == dwarf::DW_AT_accessibility) - Asm->OutStreamer.AddComment(dwarf::AccessibilityString( - cast<DIEInteger>(Values[i])->getValue())); - } - - // Emit an attribute using the defined form. - Values[i]->EmitValue(Asm, Form); - } - - // Emit the DIE children if any. - if (Abbrev.hasChildren()) { - for (auto &Child : Die.getChildren()) - emitDIE(*Child); - - Asm->OutStreamer.AddComment("End Of Children Mark"); - Asm->EmitInt8(0); - } -} - // Emit the debug info section. void DwarfDebug::emitDebugInfo() { DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - - Holder.emitUnits(DwarfAbbrevSectionSym); + Holder.emitUnits(/* UseOffsets */ false); } // Emit the abbreviation section. @@ -1396,65 +1302,39 @@ void DwarfDebug::emitAbbreviations() { Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); } -// Emit the last address of the section and the end of the line matrix. -void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { - // Define last address of section. - Asm->OutStreamer.AddComment("Extended Op"); - Asm->EmitInt8(0); - - Asm->OutStreamer.AddComment("Op size"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1); - Asm->OutStreamer.AddComment("DW_LNE_set_address"); - Asm->EmitInt8(dwarf::DW_LNE_set_address); - - Asm->OutStreamer.AddComment("Section end label"); - - Asm->OutStreamer.EmitSymbolValue( - Asm->GetTempSymbol("section_end", SectionEnd), - Asm->getDataLayout().getPointerSize()); - - // Mark end of matrix. - Asm->OutStreamer.AddComment("DW_LNE_end_sequence"); - Asm->EmitInt8(0); - Asm->EmitInt8(1); - Asm->EmitInt8(1); -} - void DwarfDebug::emitAccel(DwarfAccelTable &Accel, const MCSection *Section, - StringRef TableName, StringRef SymName) { + StringRef TableName) { Accel.FinalizeTable(Asm, TableName); Asm->OutStreamer.SwitchSection(Section); - auto *SectionBegin = Asm->GetTempSymbol(SymName); - Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - Accel.Emit(Asm, SectionBegin, this, DwarfStrSectionSym); + Accel.emit(Asm, Section->getBeginSymbol(), this); } // Emit visible names into a hashed accelerator table section. void DwarfDebug::emitAccelNames() { emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(), - "Names", "names_begin"); + "Names"); } // Emit objective C classes and categories into a hashed accelerator table // section. void DwarfDebug::emitAccelObjC() { emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(), - "ObjC", "objc_begin"); + "ObjC"); } // Emit namespace dies into a hashed accelerator table. void DwarfDebug::emitAccelNamespaces() { emitAccel(AccelNamespace, Asm->getObjFileLowering().getDwarfAccelNamespaceSection(), - "namespac", "namespac_begin"); + "namespac"); } // Emit type dies into a hashed accelerator table. void DwarfDebug::emitAccelTypes() { emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(), - "types", "types_begin"); + "types"); } // Public name handling. @@ -1537,15 +1417,14 @@ void DwarfDebug::emitDebugPubSection( if (auto *Skeleton = TheU->getSkeleton()) TheU = Skeleton; - unsigned ID = TheU->getUniqueID(); // Start the dwarf pubnames section. Asm->OutStreamer.SwitchSection(PSec); // Emit the header. Asm->OutStreamer.AddComment("Length of Public " + Name + " Info"); - MCSymbol *BeginLabel = Asm->GetTempSymbol("pub" + Name + "_begin", ID); - MCSymbol *EndLabel = Asm->GetTempSymbol("pub" + Name + "_end", ID); + MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin"); + MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end"); Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); Asm->OutStreamer.EmitLabel(BeginLabel); @@ -1554,7 +1433,7 @@ void DwarfDebug::emitDebugPubSection( Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym()); + Asm->emitSectionOffset(TheU->getLabelBegin()); Asm->OutStreamer.AddComment("Compilation Unit Length"); Asm->EmitInt32(TheU->getLength()); @@ -1600,62 +1479,27 @@ void DwarfDebug::emitDebugStr() { Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); } -/// Emits an optimal (=sorted) sequence of DW_OP_pieces. -void DwarfDebug::emitLocPieces(ByteStreamer &Streamer, - const DITypeIdentifierMap &Map, - ArrayRef<DebugLocEntry::Value> Values) { - assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) { - return P.isBitPiece(); - }) && "all values are expected to be pieces"); - assert(std::is_sorted(Values.begin(), Values.end()) && - "pieces are expected to be sorted"); - - unsigned Offset = 0; - for (auto Piece : Values) { - DIExpression Expr = Piece.getExpression(); - unsigned PieceOffset = Expr.getBitPieceOffset(); - unsigned PieceSize = Expr.getBitPieceSize(); - assert(Offset <= PieceOffset && "overlapping or duplicate pieces"); - if (Offset < PieceOffset) { - // The DWARF spec seriously mandates pieces with no locations for gaps. - Asm->EmitDwarfOpPiece(Streamer, PieceOffset-Offset); - Offset += PieceOffset-Offset; - } - Offset += PieceSize; - -#ifndef NDEBUG - DIVariable Var = Piece.getVariable(); - unsigned VarSize = Var.getSizeInBits(Map); - assert(PieceSize+PieceOffset <= VarSize - && "piece is larger than or outside of variable"); - assert(PieceSize != VarSize - && "piece covers entire variable"); -#endif - emitDebugLocValue(Streamer, Piece, PieceOffset); - } -} - void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry) { - const DebugLocEntry::Value Value = Entry.getValues()[0]; - if (Value.isBitPiece()) - // Emit all pieces that belong to the same variable and range. - return emitLocPieces(Streamer, TypeIdentifierMap, Entry.getValues()); - - assert(Entry.getValues().size() == 1 && "only pieces may have >1 value"); - emitDebugLocValue(Streamer, Value); + auto Comment = Entry.getComments().begin(); + auto End = Entry.getComments().end(); + for (uint8_t Byte : Entry.getDWARFBytes()) + Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : ""); } -void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer, - const DebugLocEntry::Value &Value, - unsigned PieceOffsetInBits) { +static void emitDebugLocValue(const AsmPrinter &AP, + const DITypeIdentifierMap &TypeIdentifierMap, + ByteStreamer &Streamer, + const DebugLocEntry::Value &Value, + unsigned PieceOffsetInBits) { DIVariable DV = Value.getVariable(); - DebugLocDwarfExpression DwarfExpr(*Asm, Streamer); - + DebugLocDwarfExpression DwarfExpr(*AP.MF->getSubtarget().getRegisterInfo(), + AP.getDwarfDebug()->getDwarfVersion(), + Streamer); // Regular entry. if (Value.isInt()) { - DIBasicType BTy(resolve(DV.getType())); + DIBasicType BTy(DV.getType().resolve(TypeIdentifierMap)); if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) DwarfExpr.AddSignedConstant(Value.getInt()); @@ -1666,7 +1510,7 @@ void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer, DIExpression Expr = Value.getExpression(); if (!Expr || (Expr.getNumElements() == 0)) // Regular entry. - Asm->EmitDwarfRegOp(Streamer, Loc); + AP.EmitDwarfRegOp(Streamer, Loc); else { // Complex address entry. if (Loc.getOffset()) { @@ -1682,6 +1526,52 @@ void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer, // FIXME: ^ } + +void DebugLocEntry::finalize(const AsmPrinter &AP, + const DITypeIdentifierMap &TypeIdentifierMap) { + BufferByteStreamer Streamer(DWARFBytes, Comments); + const DebugLocEntry::Value Value = Values[0]; + if (Value.isBitPiece()) { + // Emit all pieces that belong to the same variable and range. + assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) { + return P.isBitPiece(); + }) && "all values are expected to be pieces"); + assert(std::is_sorted(Values.begin(), Values.end()) && + "pieces are expected to be sorted"); + + unsigned Offset = 0; + for (auto Piece : Values) { + DIExpression Expr = Piece.getExpression(); + unsigned PieceOffset = Expr.getBitPieceOffset(); + unsigned PieceSize = Expr.getBitPieceSize(); + assert(Offset <= PieceOffset && "overlapping or duplicate pieces"); + if (Offset < PieceOffset) { + // The DWARF spec seriously mandates pieces with no locations for gaps. + DebugLocDwarfExpression Expr(*AP.MF->getSubtarget().getRegisterInfo(), + AP.getDwarfDebug()->getDwarfVersion(), + Streamer); + Expr.AddOpPiece(PieceOffset-Offset, 0); + Offset += PieceOffset-Offset; + } + Offset += PieceSize; + +#ifndef NDEBUG + DIVariable Var = Piece.getVariable(); + unsigned VarSize = Var.getSizeInBits(TypeIdentifierMap); + assert(PieceSize+PieceOffset <= VarSize + && "piece is larger than or outside of variable"); + assert(PieceSize != VarSize + && "piece covers entire variable"); +#endif + emitDebugLocValue(AP, TypeIdentifierMap, Streamer, Piece, PieceOffset); + } + } else { + assert(Values.size() == 1 && "only pieces may have >1 value"); + emitDebugLocValue(AP, TypeIdentifierMap, Streamer, Value, 0); + } +} + + void DwarfDebug::emitDebugLocEntryLocation(const DebugLocEntry &Entry) { Asm->OutStreamer.AddComment("Loc expr size"); MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); @@ -1752,10 +1642,7 @@ struct ArangeSpan { // address we can tie back to a CU. void DwarfDebug::emitDebugARanges() { // Provides a unique id per text section. - DenseMap<const MCSection *, SmallVector<SymbolCU, 8>> SectionMap; - - // Prime section data. - SectionMap[Asm->getObjFileLowering().getTextSection()]; + MapVector<const MCSection *, SmallVector<SymbolCU, 8>> SectionMap; // Filter labels by section. for (const SymbolCU &SCU : ArangeLabels) { @@ -1772,31 +1659,13 @@ void DwarfDebug::emitDebugARanges() { } } - // Build a list of sections used. - std::vector<const MCSection *> Sections; - for (const auto &it : SectionMap) { - const MCSection *Section = it.first; - Sections.push_back(Section); - } - - // Sort the sections into order. - // This is only done to ensure consistent output order across different runs. - std::sort(Sections.begin(), Sections.end(), SectionSort); - // Add terminating symbols for each section. - for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) { - const MCSection *Section = Sections[ID]; + for (const auto &I : SectionMap) { + const MCSection *Section = I.first; MCSymbol *Sym = nullptr; - if (Section) { - // We can't call MCSection::getLabelEndName, as it's only safe to do so - // if we know the section name up-front. For user-created sections, the - // resulting label may not be valid to use as a label. (section names can - // use a greater set of characters on some systems) - Sym = Asm->GetTempSymbol("debug_end", ID); - Asm->OutStreamer.SwitchSection(Section); - Asm->OutStreamer.EmitLabel(Sym); - } + if (Section) + Sym = Asm->OutStreamer.endSection(Section); // Insert a final terminator. SectionMap[Section].push_back(SymbolCU(nullptr, Sym)); @@ -1804,8 +1673,9 @@ void DwarfDebug::emitDebugARanges() { DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans; - for (const MCSection *Section : Sections) { - SmallVector<SymbolCU, 8> &List = SectionMap[Section]; + for (auto &I : SectionMap) { + const MCSection *Section = I.first; + SmallVector<SymbolCU, 8> &List = I.second; if (List.size() < 2) continue; @@ -1902,7 +1772,7 @@ void DwarfDebug::emitDebugARanges() { Asm->OutStreamer.AddComment("DWARF Arange version number"); Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); Asm->OutStreamer.AddComment("Offset Into Debug Info Section"); - Asm->EmitSectionOffset(CU->getLabelBegin(), CU->getSectionSym()); + Asm->emitSectionOffset(CU->getLabelBegin()); Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(PtrSize); Asm->OutStreamer.AddComment("Segment Size (in bytes)"); @@ -1998,10 +1868,9 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { auto OwnedUnit = make_unique<DwarfCompileUnit>( CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder); DwarfCompileUnit &NewCU = *OwnedUnit; - NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(), - DwarfInfoSectionSym); + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection()); - NewCU.initStmtList(DwarfLineSectionSym); + NewCU.initStmtList(); initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit)); @@ -2012,9 +1881,8 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { // compile units that would normally be in debug_info. void DwarfDebug::emitDebugInfoDWO() { assert(useSplitDwarf() && "No split dwarf debug info?"); - // Don't pass an abbrev symbol, using a constant zero instead so as not to - // emit relocations into the dwo file. - InfoHolder.emitUnits(/* AbbrevSymbol */ nullptr); + // Don't emit relocations into the dwo file. + InfoHolder.emitUnits(/* UseOffsets */ true); } // Emit the .debug_abbrev.dwo section for separated dwarf. This contains the @@ -2058,7 +1926,7 @@ static uint64_t makeTypeSignature(StringRef Identifier) { // appropriately. MD5::MD5Result Result; Hash.final(Result); - return *reinterpret_cast<support::ulittle64_t *>(Result + 8); + return support::endian::read64le(Result + 8); } void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 1c0e163..74db3ef 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -88,7 +88,8 @@ public: : Var(V), Expr(1, E), TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(nullptr), DD(DD) { FrameIndex.push_back(FI); - assert(Var.Verify() && E.Verify()); + assert(Var.Verify()); + assert(!E || E->isValid()); } /// Construct a DbgVariable from a DEBUG_VALUE. @@ -243,25 +244,10 @@ class DwarfDebug : public AsmPrinterHandler { // If nonnull, stores the CU in which the previous subprogram was contained. const DwarfCompileUnit *PrevCU; - // Section Symbols: these are assembler temporary labels that are emitted at - // the beginning of each supported dwarf section. These are used to form - // section offsets and are created by EmitSectionLabels. - MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; - MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; - MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym; - MCSymbol *FunctionBeginSym, *FunctionEndSym; - MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym; - MCSymbol *DwarfTypesDWOSectionSym; - MCSymbol *DwarfStrDWOSectionSym; - MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym; - // As an optimization, there is no need to emit an entry in the directory // table for the same directory as DW_AT_comp_dir. StringRef CompilationDir; - // Counter for assigning globally unique IDs for ranges. - unsigned GlobalRangeCount; - // Holder for the file specific debug information. DwarfFile InfoHolder; @@ -290,6 +276,9 @@ class DwarfDebug : public AsmPrinterHandler { // text. bool UsedNonDefaultText; + // Whether to use the GNU TLS opcode (instead of the standard opcode). + bool UseGNUTLSOpcode; + // Version of dwarf we're emitting. unsigned DwarfVersion; @@ -318,6 +307,7 @@ class DwarfDebug : public AsmPrinterHandler { // True iff there are multiple CUs in this module. bool SingleCU; bool IsDarwin; + bool IsPS4; AddressPool AddrPool; @@ -347,9 +337,6 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief Construct a DIE for this abstract scope. void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); - /// \brief Emit initial Dwarf sections with a label at the start of each one. - void emitSectionLabels(); - /// \brief Compute the size and offset of a DIE given an incoming Offset. unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); @@ -373,13 +360,9 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief Emit the abbreviation section. void emitAbbreviations(); - /// \brief Emit the last address of the section and the end of - /// the line matrix. - void emitEndOfLineMatrix(unsigned SectionEnd); - /// \brief Emit a specified accelerator table. void emitAccel(DwarfAccelTable &Accel, const MCSection *Section, - StringRef TableName, StringRef SymName); + StringRef TableName); /// \brief Emit visible names into a hashed accelerator table section. void emitAccelNames(); @@ -540,8 +523,9 @@ public: SymSize[Sym] = Size; } - /// \brief Recursively Emits a debug information entry. - void emitDIE(DIE &Die); + /// \brief Returns whether to use DW_OP_GNU_push_tls_address, instead of the + /// standard DW_OP_form_tls_address opcode + bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; } // Experimental DWARF5 features. @@ -556,15 +540,6 @@ public: /// Returns the Dwarf Version. unsigned getDwarfVersion() const { return DwarfVersion; } - /// Returns the section symbol for the .debug_loc section. - MCSymbol *getDebugLocSym() const { return DwarfDebugLocSectionSym; } - - /// Returns the section symbol for the .debug_str section. - MCSymbol *getDebugStrSym() const { return DwarfStrSectionSym; } - - /// Returns the section symbol for the .debug_ranges section. - MCSymbol *getRangeSectionSym() const { return DwarfDebugRangeSectionSym; } - /// Returns the previous CU that was being updated const DwarfCompileUnit *getPrevCU() const { return PrevCU; } void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; } @@ -577,7 +552,8 @@ public: /// \brief Emit an entry for the debug loc section. This can be used to /// handle an entry that's going to be emitted into the debug loc section. - void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry); + void emitDebugLocEntry(ByteStreamer &Streamer, + const DebugLocEntry &Entry); /// \brief emit a single value for the debug loc section. void emitDebugLocValue(ByteStreamer &Streamer, const DebugLocEntry::Value &Value, @@ -621,8 +597,6 @@ public: void addAccelType(StringRef Name, const DIE &Die, char Flags); const MachineFunction *getCurrentFunction() const { return CurFn; } - const MCSymbol *getFunctionBeginSym() const { return FunctionBeginSym; } - const MCSymbol *getFunctionEndSym() const { return FunctionEndSym; } iterator_range<ImportedEntityMap::const_iterator> findImportedEntitiesForScope(const MDNode *Scope) const { @@ -642,12 +616,6 @@ public: /// \brief Return Label immediately following the instruction. MCSymbol *getLabelAfterInsn(const MachineInstr *MI); - // FIXME: Consider rolling ranges up into DwarfDebug since we use a single - // range_base anyway, so there's no need to keep them as separate per-CU range - // lists. (though one day we might end up with a range.dwo section, in which - // case it'd go to DwarfFile) - unsigned getNextRangeNumber() { return GlobalRangeCount++; } - // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit. SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() { diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index e8867c0..6eaf707 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -21,17 +21,24 @@ namespace llvm { class MachineFunction; class ARMTargetStreamer; -class DwarfCFIException : public EHStreamer { - /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality - /// should be emitted. +class DwarfCFIExceptionBase : public EHStreamer { +protected: + DwarfCFIExceptionBase(AsmPrinter *A); + + /// Per-function flag to indicate if frame CFI info should be emitted. + bool shouldEmitCFI; + + void markFunctionEnd() override; +}; + +class DwarfCFIException : public DwarfCFIExceptionBase { + /// Per-function flag to indicate if .cfi_personality should be emitted. bool shouldEmitPersonality; - /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda - /// should be emitted. + /// Per-function flag to indicate if .cfi_lsda should be emitted. bool shouldEmitLSDA; - /// shouldEmitMoves - Per-function flag to indicate if frame moves info - /// should be emitted. + /// Per-function flag to indicate if frame moves info should be emitted. bool shouldEmitMoves; AsmPrinter::CFIMoveType moveTypeModule; @@ -43,26 +50,21 @@ public: DwarfCFIException(AsmPrinter *A); virtual ~DwarfCFIException(); - /// endModule - Emit all exception information that should come after the - /// content. + /// Emit all exception information that should come after the content. void endModule() override; - /// beginFunction - Gather pre-function exception information. Assumes being - /// emitted immediately after the function entry point. + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. void beginFunction(const MachineFunction *MF) override; - /// endFunction - Gather and emit post-function exception information. + /// Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; }; -class ARMException : public EHStreamer { +class ARMException : public DwarfCFIExceptionBase { void emitTypeInfos(unsigned TTypeEncoding) override; ARMTargetStreamer &getTargetStreamer(); - /// shouldEmitCFI - Per-function flag to indicate if frame CFI info - /// should be emitted. - bool shouldEmitCFI; - public: //===--------------------------------------------------------------------===// // Main entry points. @@ -70,15 +72,14 @@ public: ARMException(AsmPrinter *A); virtual ~ARMException(); - /// endModule - Emit all exception information that should come after the - /// content. + /// Emit all exception information that should come after the content. void endModule() override; - /// beginFunction - Gather pre-function exception information. Assumes being - /// emitted immediately after the function entry point. + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. void beginFunction(const MachineFunction *MF) override; - /// endFunction - Gather and emit post-function exception information. + /// Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index fcab067..489e455 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -22,14 +22,6 @@ using namespace llvm; -const TargetRegisterInfo *DwarfExpression::getTRI() const { - return AP.TM.getSubtargetImpl()->getRegisterInfo(); -} - -unsigned DwarfExpression::getDwarfVersion() const { - return AP.getDwarfDebug()->getDwarfVersion(); -} - void DwarfExpression::AddReg(int DwarfReg, const char *Comment) { assert(DwarfReg >= 0 && "invalid negative dwarf register number"); if (DwarfReg < 32) { @@ -74,28 +66,28 @@ void DwarfExpression::AddShr(unsigned ShiftBy) { } bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) { - int DwarfReg = getTRI()->getDwarfRegNum(MachineReg, false); - if (DwarfReg < 0) - return false; - if (isFrameRegister(MachineReg)) { // If variable offset is based in frame register then use fbreg. EmitOp(dwarf::DW_OP_fbreg); EmitSigned(Offset); - } else { - AddRegIndirect(DwarfReg, Offset); + return true; } + + int DwarfReg = TRI.getDwarfRegNum(MachineReg, false); + if (DwarfReg < 0) + return false; + + AddRegIndirect(DwarfReg, Offset); return true; } bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg, unsigned PieceSizeInBits, unsigned PieceOffsetInBits) { - const TargetRegisterInfo *TRI = getTRI(); - if (!TRI->isPhysicalRegister(MachineReg)) + if (!TRI.isPhysicalRegister(MachineReg)) return false; - int Reg = TRI->getDwarfRegNum(MachineReg, false); + int Reg = TRI.getDwarfRegNum(MachineReg, false); // If this is a valid register number, emit it. if (Reg >= 0) { @@ -107,12 +99,12 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg, // Walk up the super-register chain until we find a valid number. // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0. - for (MCSuperRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) { - Reg = TRI->getDwarfRegNum(*SR, false); + for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) { + Reg = TRI.getDwarfRegNum(*SR, false); if (Reg >= 0) { - unsigned Idx = TRI->getSubRegIndex(*SR, MachineReg); - unsigned Size = TRI->getSubRegIdxSize(Idx); - unsigned RegOffset = TRI->getSubRegIdxOffset(Idx); + unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg); + unsigned Size = TRI.getSubRegIdxSize(Idx); + unsigned RegOffset = TRI.getSubRegIdxOffset(Idx); AddReg(Reg, "super-register"); if (PieceOffsetInBits == RegOffset) { AddOpPiece(Size, RegOffset); @@ -136,15 +128,15 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg, // efficient DW_OP_piece. unsigned CurPos = PieceOffsetInBits; // The size of the register in bits, assuming 8 bits per byte. - unsigned RegSize = TRI->getMinimalPhysRegClass(MachineReg)->getSize() * 8; + unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8; // Keep track of the bits in the register we already emitted, so we // can avoid emitting redundant aliasing subregs. SmallBitVector Coverage(RegSize, false); - for (MCSubRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) { - unsigned Idx = TRI->getSubRegIndex(MachineReg, *SR); - unsigned Size = TRI->getSubRegIdxSize(Idx); - unsigned Offset = TRI->getSubRegIdxOffset(Idx); - Reg = TRI->getDwarfRegNum(*SR, false); + for (MCSubRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) { + unsigned Idx = TRI.getSubRegIndex(MachineReg, *SR); + unsigned Size = TRI.getSubRegIdxSize(Idx); + unsigned Offset = TRI.getSubRegIdxOffset(Idx); + Reg = TRI.getDwarfRegNum(*SR, false); // Intersection between the bits we already emitted and the bits // covered by this subregister. @@ -180,7 +172,7 @@ void DwarfExpression::AddSignedConstant(int Value) { // value, so the producers and consumers started to rely on heuristics // to disambiguate the value vs. location status of the expression. // See PR21176 for more details. - if (getDwarfVersion() >= 4) + if (DwarfVersion >= 4) EmitOp(dwarf::DW_OP_stack_value); } @@ -188,7 +180,7 @@ void DwarfExpression::AddUnsignedConstant(unsigned Value) { EmitOp(dwarf::DW_OP_constu); EmitUnsigned(Value); // cf. comment in DwarfExpression::AddSignedConstant(). - if (getDwarfVersion() >= 4) + if (DwarfVersion >= 4) EmitOp(dwarf::DW_OP_stack_value); } @@ -204,11 +196,12 @@ bool DwarfExpression::AddMachineRegExpression(DIExpression Expr, unsigned MachineReg, unsigned PieceOffsetInBits) { auto I = Expr.begin(); - // Pattern-match combinations for which more efficient representations exist - // first. - if (I == Expr.end()) + auto E = Expr.end(); + if (I == E) return AddMachineRegPiece(MachineReg); + // Pattern-match combinations for which more efficient representations exist + // first. bool ValidReg = false; switch (*I) { case dwarf::DW_OP_bit_piece: { @@ -218,20 +211,23 @@ bool DwarfExpression::AddMachineRegExpression(DIExpression Expr, return AddMachineRegPiece(MachineReg, SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); } - case dwarf::DW_OP_plus: + case dwarf::DW_OP_plus: { // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset]. - if (I->getNext() == dwarf::DW_OP_deref) { + auto N = I->getNext(); + if ((N != E) && (*N == dwarf::DW_OP_deref)) { unsigned Offset = I->getArg(1); ValidReg = AddMachineRegIndirect(MachineReg, Offset); std::advance(I, 2); break; } else ValidReg = AddMachineRegPiece(MachineReg); - case dwarf::DW_OP_deref: - // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg]. - ValidReg = AddMachineRegIndirect(MachineReg); - ++I; - break; + } + case dwarf::DW_OP_deref: { + // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg]. + ValidReg = AddMachineRegIndirect(MachineReg); + ++I; + break; + } default: llvm_unreachable("unsupported operand"); } @@ -240,7 +236,7 @@ bool DwarfExpression::AddMachineRegExpression(DIExpression Expr, return false; // Emit remaining elements of the expression. - AddExpression(I, Expr.end(), PieceOffsetInBits); + AddExpression(I, E, PieceOffsetInBits); return true; } diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h index b90b7b6..985d52c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -30,21 +30,22 @@ class DIELoc; /// entry. class DwarfExpression { protected: - const AsmPrinter &AP; // Various convenience accessors that extract things out of AsmPrinter. - const TargetRegisterInfo *getTRI() const; - unsigned getDwarfVersion() const; + const TargetRegisterInfo &TRI; + unsigned DwarfVersion; public: - DwarfExpression(const AsmPrinter &AP) : AP(AP) {} + DwarfExpression(const TargetRegisterInfo &TRI, + unsigned DwarfVersion) + : TRI(TRI), DwarfVersion(DwarfVersion) {} virtual ~DwarfExpression() {} /// Output a dwarf operand and an optional assembler comment. virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0; /// Emit a raw signed value. - virtual void EmitSigned(int Value) = 0; + virtual void EmitSigned(int64_t Value) = 0; /// Emit a raw unsigned value. - virtual void EmitUnsigned(unsigned Value) = 0; + virtual void EmitUnsigned(uint64_t Value) = 0; /// Return whether the given machine register is the frame register in the /// current function. virtual bool isFrameRegister(unsigned MachineReg) = 0; @@ -105,27 +106,27 @@ class DebugLocDwarfExpression : public DwarfExpression { ByteStreamer &BS; public: - DebugLocDwarfExpression(const AsmPrinter &AP, ByteStreamer &BS) - : DwarfExpression(AP), BS(BS) {} + DebugLocDwarfExpression(const TargetRegisterInfo &TRI, + unsigned DwarfVersion, ByteStreamer &BS) + : DwarfExpression(TRI, DwarfVersion), BS(BS) {} void EmitOp(uint8_t Op, const char *Comment = nullptr) override; - void EmitSigned(int Value) override; - void EmitUnsigned(unsigned Value) override; + void EmitSigned(int64_t Value) override; + void EmitUnsigned(uint64_t Value) override; bool isFrameRegister(unsigned MachineReg) override; }; /// DwarfExpression implementation for singular DW_AT_location. class DIEDwarfExpression : public DwarfExpression { +const AsmPrinter &AP; DwarfUnit &DU; DIELoc &DIE; public: - DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE) - : DwarfExpression(AP), DU(DU), DIE(DIE) {} - + DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE); void EmitOp(uint8_t Op, const char *Comment = nullptr) override; - void EmitSigned(int Value) override; - void EmitUnsigned(unsigned Value) override; + void EmitSigned(int64_t Value) override; + void EmitUnsigned(uint64_t Value) override; bool isFrameRegister(unsigned MachineReg) override; }; } diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 3988f0d..60acc58e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -17,9 +17,8 @@ #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { -DwarfFile::DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref, - BumpPtrAllocator &DA) - : Asm(AP), DD(DD), StrPool(DA, *Asm, Pref) {} +DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA) + : Asm(AP), StrPool(DA, *Asm, Pref) {} DwarfFile::~DwarfFile() {} @@ -48,15 +47,15 @@ void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) { // Emit the various dwarf units to the unit section USection with // the abbreviations going into ASection. -void DwarfFile::emitUnits(const MCSymbol *ASectionSym) { +void DwarfFile::emitUnits(bool UseOffsets) { for (const auto &TheU : CUs) { DIE &Die = TheU->getUnitDie(); const MCSection *USection = TheU->getSection(); Asm->OutStreamer.SwitchSection(USection); - TheU->emitHeader(ASectionSym); + TheU->emitHeader(UseOffsets); - DD.emitDIE(Die); + Asm->emitDwarfDIE(Die); } } @@ -120,23 +119,13 @@ unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) { Die.setSize(Offset - Die.getOffset()); return Offset; } + void DwarfFile::emitAbbrevs(const MCSection *Section) { // Check to see if it is worth the effort. if (!Abbreviations.empty()) { // Start the debug abbrev section. Asm->OutStreamer.SwitchSection(Section); - - // For each abbrevation. - for (const DIEAbbrev *Abbrev : Abbreviations) { - // Emit the abbrevations code (base 1 index.) - Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); - - // Emit the abbreviations data. - Abbrev->Emit(Asm); - } - - // Mark end of abbreviations. - Asm->EmitULEB128(0, "EOM(3)"); + Asm->emitDwarfAbbrevs(Abbreviations); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index 35bf33a..c9de666 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -37,8 +37,6 @@ class DwarfFile { // Target of Dwarf emission, used for sizing of abbreviations. AsmPrinter *Asm; - DwarfDebug ⅅ - // Used to uniquely define abbreviations. FoldingSet<DIEAbbrev> AbbreviationsSet; @@ -62,8 +60,7 @@ class DwarfFile { DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap; public: - DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref, - BumpPtrAllocator &DA); + DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA); ~DwarfFile(); @@ -83,7 +80,7 @@ public: /// \brief Emit all of the units to the section listed with the given /// abbreviation section. - void emitUnits(const MCSymbol *ASectionSym); + void emitUnits(bool UseOffsets); /// \brief Emit a set of abbreviations to the specific section. void emitAbbrevs(const MCSection *); diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index d76b66c..165ef16 100644 --- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -19,7 +19,7 @@ getEntry(AsmPrinter &Asm, std::pair<MCSymbol *, unsigned> &Entry = Pool[Str]; if (!Entry.first) { Entry.second = Pool.size() - 1; - Entry.first = Asm.GetTempSymbol(Prefix, Entry.second); + Entry.first = Asm.createTempSymbol(Prefix); } return Entry; } diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index b0c7d48..f6af73f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -17,6 +17,7 @@ #include "DwarfDebug.h" #include "DwarfExpression.h" #include "llvm/ADT/APFloat.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" @@ -43,17 +44,23 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, cl::desc("Generate DWARF4 type units."), cl::init(false)); +DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, + DIELoc &DIE) + : DwarfExpression(*AP.MF->getSubtarget().getRegisterInfo(), + AP.getDwarfDebug()->getDwarfVersion()), + AP(AP), DU(DU), DIE(DIE) {} + void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) { DU.addUInt(DIE, dwarf::DW_FORM_data1, Op); } -void DIEDwarfExpression::EmitSigned(int Value) { +void DIEDwarfExpression::EmitSigned(int64_t Value) { DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value); } -void DIEDwarfExpression::EmitUnsigned(unsigned Value) { +void DIEDwarfExpression::EmitUnsigned(uint64_t Value) { DU.addUInt(DIE, dwarf::DW_FORM_udata, Value); } bool DIEDwarfExpression::isFrameRegister(unsigned MachineReg) { - return MachineReg == getTRI()->getFrameRegister(*AP.MF); + return MachineReg == TRI.getFrameRegister(*AP.MF); } @@ -257,12 +264,14 @@ void DwarfUnit::addIndexedString(DIE &Die, dwarf::Attribute Attribute, /// to be in the local string pool instead of indirected. void DwarfUnit::addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef String) { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); MCSymbol *Symb = DU->getStringPool().getSymbol(*Asm, String); DIEValue *Value; if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) Value = new (DIEValueAllocator) DIELabel(Symb); else - Value = new (DIEValueAllocator) DIEDelta(Symb, DD->getDebugStrSym()); + Value = new (DIEValueAllocator) + DIEDelta(Symb, TLOF.getDwarfStrSection()->getBeginSymbol()); DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); Die.addValue(Attribute, dwarf::DW_FORM_strp, Str); } @@ -750,6 +759,15 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) { addBlock(Die, dwarf::DW_AT_const_value, Block); } +// Add a linkage name to the DIE. +void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) { + if (!LinkageName.empty()) + addString(Die, + DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name + : dwarf::DW_AT_MIPS_linkage_name, + GlobalValue::getRealLinkageName(LinkageName)); +} + /// addTemplateParams - Add template parameters into buffer. void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { // Add template parameters. @@ -1269,9 +1287,8 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP, assert(((LinkageName.empty() || DeclLinkageName.empty()) || LinkageName == DeclLinkageName) && "decl has a linkage name and it is different"); - if (!LinkageName.empty() && DeclLinkageName.empty()) - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, - GlobalValue::getRealLinkageName(LinkageName)); + if (DeclLinkageName.empty()) + addLinkageName(SPDie, LinkageName); if (!DeclDie) return false; @@ -1344,9 +1361,8 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie, if (SP.isOptimized()) addFlag(SPDie, dwarf::DW_AT_APPLE_optimized); - if (unsigned isa = Asm->getISAEncoding(SP.getFunction())) { + if (unsigned isa = Asm->getISAEncoding()) addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); - } if (SP.isLValueReference()) addFlag(SPDie, dwarf::DW_AT_reference); @@ -1597,7 +1613,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { return &StaticMemberDIE; } -void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const { +void DwarfUnit::emitHeader(bool UseOffsets) { // Emit size of content not including length itself Asm->OutStreamer.AddComment("Length of Unit"); Asm->EmitInt32(getHeaderSize() + UnitDie.getSize()); @@ -1605,14 +1621,16 @@ void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const { Asm->OutStreamer.AddComment("DWARF version number"); Asm->EmitInt16(DD->getDwarfVersion()); Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); + // We share one abbreviations table across all units so it's always at the // start of the section. Use a relocatable offset where needed to ensure // linking doesn't invalidate that offset. - if (ASectionSym) - Asm->EmitSectionOffset(ASectionSym, ASectionSym); + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + if (!UseOffsets) + Asm->emitSectionOffset(TLOF.getDwarfAbbrevSection()->getBeginSymbol()); else - // Use a constant value when no symbol is provided. Asm->EmitInt32(0); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); } @@ -1622,8 +1640,8 @@ void DwarfUnit::initSection(const MCSection *Section) { this->Section = Section; } -void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const { - DwarfUnit::emitHeader(ASectionSym); +void DwarfTypeUnit::emitHeader(bool UseOffsets) { + DwarfUnit::emitHeader(UseOffsets); Asm->OutStreamer.AddComment("Type Signature"); Asm->OutStreamer.EmitIntValue(TypeSignature, sizeof(TypeSignature)); Asm->OutStreamer.AddComment("Type DIE Offset"); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index 7a5e47d..81c5821 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -120,7 +120,6 @@ protected: DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); - void initSection(const MCSection *Section); /// Add a string attribute data and value. void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); @@ -132,6 +131,8 @@ protected: public: virtual ~DwarfUnit(); + void initSection(const MCSection *Section); + const MCSection *getSection() const { assert(Section); return Section; @@ -251,6 +252,9 @@ public: void addConstantFPValue(DIE &Die, const MachineOperand &MO); void addConstantFPValue(DIE &Die, const ConstantFP *CFP); + /// \brief Add a linkage name, if it isn't empty. + void addLinkageName(DIE &Die, StringRef LinkageName); + /// addTemplateParams - Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DIArray TParams); @@ -321,7 +325,7 @@ public: } /// Emit the header for this unit, not including the initial length field. - virtual void emitHeader(const MCSymbol *ASectionSym) const; + virtual void emitHeader(bool UseOffsets); virtual DwarfCompileUnit &getCU() = 0; @@ -423,12 +427,11 @@ public: void setType(const DIE *Ty) { this->Ty = Ty; } /// Emit the header for this unit, not including the initial length field. - void emitHeader(const MCSymbol *ASectionSym) const override; + void emitHeader(bool UseOffsets) override; unsigned getHeaderSize() const override { return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature sizeof(uint32_t); // Type DIE Offset } - using DwarfUnit::initSection; DwarfCompileUnit &getCU() override { return CU; } }; } // end llvm namespace diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 4841814..14df4c9 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -436,12 +436,7 @@ void EHStreamer::emitExceptionTable() { Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+ Twine(Asm->getFunctionNumber())); Asm->OutStreamer.EmitLabel(GCCETSym); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception", - Asm->getFunctionNumber())); - - if (IsSJLJ) - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_", - Asm->getFunctionNumber())); + Asm->OutStreamer.EmitLabel(Asm->getCurExceptionSym()); // Emit the LSDA header. Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); @@ -552,16 +547,14 @@ void EHStreamer::emitExceptionTable() { I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { const CallSiteEntry &S = *I; - MCSymbol *EHFuncBeginSym = - Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); + MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin(); MCSymbol *BeginLabel = S.BeginLabel; if (!BeginLabel) BeginLabel = EHFuncBeginSym; MCSymbol *EndLabel = S.EndLabel; if (!EndLabel) - EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); - + EndLabel = Asm->getFunctionEnd(); // Offset of the call site relative to the previous call site, counted in // number of 16-byte bundles. The first call site is counted relative to @@ -689,19 +682,3 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { Asm->EmitULEB128(TypeID); } } - -/// Emit all exception information that should come after the content. -void EHStreamer::endModule() { - llvm_unreachable("Should be implemented"); -} - -/// Gather pre-function exception information. Assumes it's being emitted -/// immediately after the function entry point. -void EHStreamer::beginFunction(const MachineFunction *MF) { - llvm_unreachable("Should be implemented"); -} - -/// Gather and emit post-function exception information. -void EHStreamer::endFunction(const MachineFunction *) { - llvm_unreachable("Should be implemented"); -} diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h index 9b316ff..94d0585 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -125,16 +125,6 @@ public: EHStreamer(AsmPrinter *A); virtual ~EHStreamer(); - /// Emit all exception information that should come after the content. - void endModule() override; - - /// Gather pre-function exception information. Assumes being emitted - /// immediately after the function entry point. - void beginFunction(const MachineFunction *MF) override; - - /// Gather and emit post-function exception information. - void endFunction(const MachineFunction *) override; - // Unused. void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} void beginInstruction(const MachineInstr *MI) override {} diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 2b03877..7d76ead 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -48,8 +48,6 @@ Win64Exception::~Win64Exception() {} void Win64Exception::endModule() { } -/// beginFunction - Gather pre-function exception information. Assumes it's -/// being emitted immediately after the function entry point. void Win64Exception::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; @@ -80,9 +78,6 @@ void Win64Exception::beginFunction(const MachineFunction *MF) { const MCSymbol *PersHandlerSym = TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); Asm->OutStreamer.EmitWinEHHandler(PersHandlerSym, true, true); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); } /// endFunction - Gather and emit post-function exception information. @@ -91,9 +86,6 @@ void Win64Exception::endFunction(const MachineFunction *) { if (!shouldEmitPersonality && !shouldEmitMoves) return; - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", - Asm->getFunctionNumber())); - // Map all labels and get rid of any dead landing pads. MMI->TidyLandingPads(); @@ -170,10 +162,8 @@ void Win64Exception::emitCSpecificHandlerTable() { SmallVector<CallSiteEntry, 64> CallSites; computeCallSiteTable(CallSites, LandingPads, FirstActions); - MCSymbol *EHFuncBeginSym = - Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); - MCSymbol *EHFuncEndSym = - Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); + MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin(); + MCSymbol *EHFuncEndSym = Asm->getFunctionEnd(); // Emit the number of table entries. unsigned NumEntries = 0; diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp index b5e0929..d2b4eec 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -190,8 +190,11 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { return; assert(FI.End && "Don't know where the function ends?"); - StringRef FuncName = getDISubprogram(GV).getDisplayName(), - GVName = GV->getName(); + StringRef GVName = GV->getName(); + StringRef FuncName; + if (DISubprogram SP = getDISubprogram(GV)) + FuncName = SP.getDisplayName(); + // FIXME Clang currently sets DisplayName to "bar" for a C++ // "namespace_foo::bar" function, see PR21528. Luckily, dbghelp.dll is trying // to demangle display names anyways, so let's just put a mangled name into @@ -364,10 +367,7 @@ void WinCodeViewLineTables::endFunction(const MachineFunction *MF) { FnDebugInfo.erase(GV); VisitedFunctions.pop_back(); } else { - // Define end label for subprogram. - MCSymbol *FunctionEndSym = Asm->OutStreamer.getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(FunctionEndSym); - CurFn->End = FunctionEndSym; + CurFn->End = Asm->getFunctionEnd(); } CurFn = nullptr; } diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index 4b64be0..fa17108 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -48,7 +48,7 @@ namespace { bool expandAtomicLoadToLL(LoadInst *LI); bool expandAtomicLoadToCmpXchg(LoadInst *LI); bool expandAtomicStore(StoreInst *SI); - bool expandAtomicRMW(AtomicRMWInst *AI); + bool tryExpandAtomicRMW(AtomicRMWInst *AI); bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); @@ -135,9 +135,12 @@ bool AtomicExpand::runOnFunction(Function &F) { // - into a load if it is idempotent // - into a Cmpxchg/LL-SC loop otherwise // we try them in that order. - MadeChange |= - (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) || - (TLI->shouldExpandAtomicRMWInIR(RMWI) && expandAtomicRMW(RMWI)); + + if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { + MadeChange = true; + } else { + MadeChange |= tryExpandAtomicRMW(RMWI); + } } else if (CASI && TLI->hasLoadLinkedStoreConditional()) { MadeChange |= expandAtomicCmpXchg(CASI); } @@ -211,7 +214,7 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) { // atomic if implemented as a native store. So we replace them by an // atomic swap, that can be implemented for example as a ldrex/strex on ARM // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. - // It is the responsibility of the target to only return true in + // It is the responsibility of the target to only signal expansion via // shouldExpandAtomicRMW in cases where this is required and possible. IRBuilder<> Builder(SI); AtomicRMWInst *AI = @@ -220,14 +223,26 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) { SI->eraseFromParent(); // Now we have an appropriate swap instruction, lower it as usual. - return expandAtomicRMW(AI); + return tryExpandAtomicRMW(AI); } -bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) { - if (TLI->hasLoadLinkedStoreConditional()) +bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { + switch (TLI->shouldExpandAtomicRMWInIR(AI)) { + case TargetLoweringBase::AtomicRMWExpansionKind::None: + return false; + case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: { + assert(TLI->hasLoadLinkedStoreConditional() && + "TargetLowering requested we expand AtomicRMW instruction into " + "load-linked/store-conditional combos, but such instructions aren't " + "supported"); + return expandAtomicRMWToLLSC(AI); - else + } + case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: { return expandAtomicRMWToCmpXchg(AI); + } + } + llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); } /// Emit IR to implement the given atomicrmw operation on values in registers, diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index b8f05cd..abe7ca1 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -727,6 +728,62 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, return true; } +static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) { + auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end(); + auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end(); + if ((E1 - I1) != (E2 - I2)) + return false; + for (; I1 != E1; ++I1, ++I2) { + if (**I1 != **I2) + return false; + } + return true; +} + +static void +removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, + MachineBasicBlock &MBBCommon) { + // Remove MMOs from memory operations in the common block + // when they do not match the ones from the block being tail-merged. + // This ensures later passes conservatively compute dependencies. + MachineBasicBlock *MBB = MBBIStartPos->getParent(); + // Note CommonTailLen does not necessarily matches the size of + // the common BB nor all its instructions because of debug + // instructions differences. + unsigned CommonTailLen = 0; + for (auto E = MBB->end(); MBBIStartPos != E; ++MBBIStartPos) + ++CommonTailLen; + + MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin(); + MachineBasicBlock::reverse_iterator MBBIE = MBB->rend(); + MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin(); + MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend(); + + while (CommonTailLen--) { + assert(MBBI != MBBIE && "Reached BB end within common tail length!"); + (void)MBBIE; + + if (MBBI->isDebugValue()) { + ++MBBI; + continue; + } + + while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue()) + ++MBBICommon; + + assert(MBBICommon != MBBIECommon && + "Reached BB end within common tail length!"); + assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!"); + + if (MBBICommon->mayLoad() || MBBICommon->mayStore()) + if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon)) + MBBICommon->clearMemRefs(); + + ++MBBI; + ++MBBICommon; + } +} + // See if any of the blocks in MergePotentials (which all have a common single // successor, or all have no successor) can be tail-merged. If there is a // successor, any blocks in MergePotentials that are not tail-merged and @@ -761,7 +818,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // Sort by hash value so that blocks with identical end sequences sort // together. - std::stable_sort(MergePotentials.begin(), MergePotentials.end()); + array_pod_sort(MergePotentials.begin(), MergePotentials.end()); // Walk through equivalence sets looking for actual exact matches. while (MergePotentials.size() > 1) { @@ -840,6 +897,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, continue; DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() << (i == e-1 ? "" : ", ")); + // Remove MMOs from memory operations as needed. + removeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB); // Hack the end off BB i, making it jump to BB commonTailIndex instead. ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index f21d4d2..ef57638 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -19,7 +19,6 @@ add_llvm_library(LLVMCodeGen ExecutionDepsFix.cpp ExpandISelPseudos.cpp ExpandPostRAPseudos.cpp - ForwardControlFlowIntegrity.cpp GCMetadata.cpp GCMetadataPrinter.cpp GCRootLowering.cpp @@ -29,7 +28,6 @@ add_llvm_library(LLVMCodeGen InlineSpiller.cpp InterferenceCache.cpp IntrinsicLowering.cpp - JumpInstrTables.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp LexicalScopes.cpp diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 7c0068e..da66639 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -24,9 +24,10 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeBranchFolderPassPass(Registry); initializeCodeGenPreparePass(Registry); initializeDeadMachineInstructionElimPass(Registry); + initializeDwarfEHPreparePass(Registry); initializeEarlyIfConverterPass(Registry); - initializeExpandPostRAPass(Registry); initializeExpandISelPseudosPass(Registry); + initializeExpandPostRAPass(Registry); initializeFinalizeMachineBundlesPass(Registry); initializeGCMachineCodeAnalysisPass(Registry); initializeGCModuleInfoPass(Registry); @@ -36,31 +37,34 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLiveStacksPass(Registry); initializeLiveVariablesPass(Registry); initializeLocalStackSlotPassPass(Registry); + initializeLowerIntrinsicsPass(Registry); initializeMachineBlockFrequencyInfoPass(Registry); initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); - initializeMachineCopyPropagationPass(Registry); - initializeMachineCombinerPass(Registry); initializeMachineCSEPass(Registry); + initializeMachineCombinerPass(Registry); + initializeMachineCopyPropagationPass(Registry); initializeMachineDominatorTreePass(Registry); - initializeMachinePostDominatorTreePass(Registry); + initializeMachineFunctionPrinterPassPass(Registry); initializeMachineLICMPass(Registry); initializeMachineLoopInfoPass(Registry); initializeMachineModuleInfoPass(Registry); + initializeMachinePostDominatorTreePass(Registry); initializeMachineSchedulerPass(Registry); initializeMachineSinkingPass(Registry); initializeMachineVerifierPassPass(Registry); initializeOptimizePHIsPass(Registry); + initializePEIPass(Registry); initializePHIEliminationPass(Registry); initializePeepholeOptimizerPass(Registry); initializePostMachineSchedulerPass(Registry); initializePostRASchedulerPass(Registry); initializeProcessImplicitDefsPass(Registry); - initializePEIPass(Registry); initializeRegisterCoalescerPass(Registry); initializeSlotIndexesPass(Registry); - initializeStackProtectorPass(Registry); initializeStackColoringPass(Registry); + initializeStackMapLivenessPass(Registry); + initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); initializeTailDuplicatePassPass(Registry); initializeTargetPassConfigPass(Registry); @@ -70,9 +74,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeUnreachableMachineBlockElimPass(Registry); initializeVirtRegMapPass(Registry); initializeVirtRegRewriterPass(Registry); - initializeLowerIntrinsicsPass(Registry); - initializeMachineFunctionPrinterPassPass(Registry); - initializeStackMapLivenessPass(Registry); + initializeWinEHPreparePass(Registry); } void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index c0d7dca..6c9d048 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -124,7 +124,6 @@ class TypePromotionTransaction; const TargetLowering *TLI; const TargetTransformInfo *TTI; const TargetLibraryInfo *TLInfo; - DominatorTree *DT; /// CurInstIterator - As we scan instructions optimizing them, this is the /// next instruction to optimize. Xforms that can invalidate this should @@ -142,8 +141,7 @@ class TypePromotionTransaction; /// promotion for the current function. InstrToOrigTy PromotedInsts; - /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to - /// be updated. + /// ModifiedDT - If CFG is modified in anyway. bool ModifiedDT; /// OptSize - True if optimizing for size. @@ -186,7 +184,7 @@ class TypePromotionTransaction; bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, Instruction *&Inst, const SmallVectorImpl<Instruction *> &Exts, - unsigned CreatedInst); + unsigned CreatedInstCost); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); }; @@ -214,9 +212,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLI = TM->getSubtargetImpl(F)->getTargetLowering(); TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DT = DTWP ? &DTWP->getDomTree() : nullptr; OptSize = F.hasFnAttribute(Attribute::OptimizeForSize); /// This optimization identifies DIV instructions that can be @@ -255,7 +250,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) { MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration); // Restart BB iteration if the dominator tree of the Function was changed - ModifiedDT |= ModifiedDTOnIteration; if (ModifiedDTOnIteration) break; } @@ -298,8 +292,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) { if (EverMadeChange || MadeChange) MadeChange |= EliminateFallThrough(F); - if (MadeChange) - ModifiedDT = true; EverMadeChange |= MadeChange; } @@ -313,9 +305,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) { EverMadeChange |= simplifyOffsetableRelocate(*I); } - if (ModifiedDT && DT) - DT->recalculate(F); - return EverMadeChange; } @@ -341,7 +330,7 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) { // Remember if SinglePred was the entry block of the function. // If so, we will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(BB, DT); + MergeBasicBlockIntoOnlyPred(BB, nullptr); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); @@ -481,7 +470,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(DestBB, DT); + MergeBasicBlockIntoOnlyPred(DestBB, nullptr); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); @@ -523,13 +512,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); - if (DT && !ModifiedDT) { - BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); - BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); - BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); - DT->changeImmediateDominator(DestBB, NewIDom); - DT->eraseNode(BB); - } BB->eraseFromParent(); ++NumBlocksElim; @@ -561,12 +543,15 @@ static void computeBaseDerivedRelocateMap( IntrinsicInst *I = Item.second; auto BaseKey = std::make_pair(Key.first, Key.first); - IntrinsicInst *Base = RelocateIdxMap[BaseKey]; - if (!Base) + + // We're iterating over RelocateIdxMap so we cannot modify it. + auto MaybeBase = RelocateIdxMap.find(BaseKey); + if (MaybeBase == RelocateIdxMap.end()) // TODO: We might want to insert a new base object relocate and gep off // that, if there are enough derived object relocates. continue; - RelocateInstMap[Base].push_back(I); + + RelocateInstMap[MaybeBase->second].push_back(I); } } @@ -615,8 +600,8 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase, // Create a Builder and replace the target callsite with a gep IRBuilder<> Builder(ToReplace); Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); - Value *Replacement = - Builder.CreateGEP(RelocatedBase, makeArrayRef(OffsetV)); + Value *Replacement = Builder.CreateGEP( + Derived->getSourceElementType(), RelocatedBase, makeArrayRef(OffsetV)); Instruction *ReplacementInst = cast<Instruction>(Replacement); ReplacementInst->removeFromParent(); ReplacementInst->insertAfter(RelocatedBase); @@ -1225,6 +1210,42 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { return true; } + const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; + + // Align the pointer arguments to this call if the target thinks it's a good + // idea + unsigned MinSize, PrefAlign; + if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { + for (auto &Arg : CI->arg_operands()) { + // We want to align both objects whose address is used directly and + // objects whose address is used in casts and GEPs, though it only makes + // sense for GEPs if the offset is a multiple of the desired alignment and + // if size - offset meets the size threshold. + if (!Arg->getType()->isPointerTy()) + continue; + APInt Offset(TD->getPointerSizeInBits( + cast<PointerType>(Arg->getType())->getAddressSpace()), 0); + Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset); + uint64_t Offset2 = Offset.getLimitedValue(); + AllocaInst *AI; + if ((Offset2 & (PrefAlign-1)) == 0 && + (AI = dyn_cast<AllocaInst>(Val)) && + AI->getAlignment() < PrefAlign && + TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) + AI->setAlignment(PrefAlign); + // TODO: Also align GlobalVariables + } + // If this is a memcpy (or similar) then we may be able to improve the + // alignment + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) { + unsigned Align = getKnownAlignment(MI->getDest(), *TD); + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) + Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD)); + if (Align > MI->getAlignment()) + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align)); + } + } + IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II) { switch (II->getIntrinsicID()) { @@ -1241,8 +1262,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { WeakVH IterHandle(CurInstIterator); replaceAndRecursivelySimplify(CI, RetVal, - TLI ? TLI->getDataLayout() : nullptr, - TLInfo, ModifiedDT ? nullptr : DT); + TLInfo, nullptr); // If the iterator instruction was recursively deleted, start over at the // start of the block. @@ -1284,15 +1304,11 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { // From here on out we're working with named functions. if (!CI->getCalledFunction()) return false; - // We'll need DataLayout from here on out. - const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; - if (!TD) return false; - // Lower all default uses of _chk calls. This is very similar // to what InstCombineCalls does, but here we are only lowering calls // to fortified library functions (e.g. __memcpy_chk) that have the default // "don't know" as the objectsize. Anything else should be left alone. - FortifiedLibCallSimplifier Simplifier(TD, TLInfo, true); + FortifiedLibCallSimplifier Simplifier(TLInfo, true); if (Value *V = Simplifier.optimizeCall(CI)) { CI->replaceAllUsesWith(V); CI->eraseFromParent(); @@ -2025,7 +2041,7 @@ private: ExtAddrMode &AMBefore, ExtAddrMode &AMAfter); bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); - bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion, + bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const; }; @@ -2159,7 +2175,7 @@ class TypePromotionHelper { /// \brief Utility function to promote the operand of \p Ext when this /// operand is a promotable trunc or sext or zext. /// \p PromotedInsts maps the instructions to their type before promotion. - /// \p CreatedInsts[out] contains how many non-free instructions have been + /// \p CreatedInstsCost[out] contains the cost of all instructions /// created to promote the operand of Ext. /// Newly added extensions are inserted in \p Exts. /// Newly added truncates are inserted in \p Truncs. @@ -2167,53 +2183,55 @@ class TypePromotionHelper { /// \return The promoted value which is used instead of Ext. static Value *promoteOperandForTruncAndAnyExt( Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs); + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI); /// \brief Utility function to promote the operand of \p Ext when this /// operand is promotable and is not a supported trunc or sext. /// \p PromotedInsts maps the instructions to their type before promotion. - /// \p CreatedInsts[out] contains how many non-free instructions have been + /// \p CreatedInstsCost[out] contains the cost of all the instructions /// created to promote the operand of Ext. /// Newly added extensions are inserted in \p Exts. /// Newly added truncates are inserted in \p Truncs. /// Should never be called directly. /// \return The promoted value which is used instead of Ext. - static Value * - promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, - SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs, bool IsSExt); + static Value *promoteOperandForOther(Instruction *Ext, + TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, + const TargetLowering &TLI, bool IsSExt); /// \see promoteOperandForOther. - static Value * - signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts, - SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs) { - return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, - Truncs, true); + static Value *signExtendOperandForOther( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, + Exts, Truncs, TLI, true); } /// \see promoteOperandForOther. - static Value * - zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts, - SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs) { - return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, - Truncs, false); + static Value *zeroExtendOperandForOther( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, + Exts, Truncs, TLI, false); } public: /// Type for the utility function that promotes the operand of Ext. typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs); + SmallVectorImpl<Instruction *> *Truncs, + const TargetLowering &TLI); /// \brief Given a sign/zero extend instruction \p Ext, return the approriate /// action to promote the operand of \p Ext instead of using Ext. /// \return NULL if no promotable action is possible with the current @@ -2330,16 +2348,18 @@ TypePromotionHelper::Action TypePromotionHelper::getAction( Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( llvm::Instruction *SExt, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs) { + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { // By construction, the operand of SExt is an instruction. Otherwise we cannot // get through it and this method should not be called. Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); Value *ExtVal = SExt; + bool HasMergedNonFreeExt = false; if (isa<ZExtInst>(SExtOpnd)) { // Replace s|zext(zext(opnd)) // => zext(opnd). + HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd); Value *ZExt = TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType()); TPT.replaceAllUsesWith(SExt, ZExt); @@ -2350,7 +2370,7 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( // => z|sext(opnd). TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); } - CreatedInsts = 0; + CreatedInstsCost = 0; // Remove dead code. if (SExtOpnd->use_empty()) @@ -2359,8 +2379,11 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( // Check if the extension is still needed. Instruction *ExtInst = dyn_cast<Instruction>(ExtVal); if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) { - if (ExtInst && Exts) - Exts->push_back(ExtInst); + if (ExtInst) { + if (Exts) + Exts->push_back(ExtInst); + CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt; + } return ExtVal; } @@ -2373,13 +2396,14 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( Value *TypePromotionHelper::promoteOperandForOther( Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) { + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI, + bool IsSExt) { // By construction, the operand of Ext is an instruction. Otherwise we cannot // get through it and this method should not be called. Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0)); - CreatedInsts = 0; + CreatedInstsCost = 0; if (!ExtOpnd->hasOneUse()) { // ExtOpnd will be promoted. // All its uses, but Ext, will need to use a truncated value of the @@ -2454,7 +2478,6 @@ Value *TypePromotionHelper::promoteOperandForOther( continue; } ExtForOpnd = cast<Instruction>(ValForExtOpnd); - ++CreatedInsts; } if (Exts) Exts->push_back(ExtForOpnd); @@ -2463,6 +2486,7 @@ Value *TypePromotionHelper::promoteOperandForOther( // Move the sign extension before the insertion point. TPT.moveBefore(ExtForOpnd, ExtOpnd); TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd); + CreatedInstsCost += !TLI.isExtFree(ExtForOpnd); // If more sext are required, new instructions will have to be created. ExtForOpnd = nullptr; } @@ -2475,22 +2499,22 @@ Value *TypePromotionHelper::promoteOperandForOther( /// IsPromotionProfitable - Check whether or not promoting an instruction /// to a wider type was profitable. -/// \p MatchedSize gives the number of instructions that have been matched -/// in the addressing mode after the promotion was applied. -/// \p SizeWithPromotion gives the number of created instructions for -/// the promotion plus the number of instructions that have been -/// matched in the addressing mode before the promotion. +/// \p NewCost gives the cost of extension instructions created by the +/// promotion. +/// \p OldCost gives the cost of extension instructions before the promotion +/// plus the number of instructions that have been +/// matched in the addressing mode the promotion. /// \p PromotedOperand is the value that has been promoted. /// \return True if the promotion is profitable, false otherwise. -bool -AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize, - unsigned SizeWithPromotion, - Value *PromotedOperand) const { - // We folded less instructions than what we created to promote the operand. +bool AddressingModeMatcher::IsPromotionProfitable( + unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const { + DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n'); + // The cost of the new extensions is greater than the cost of the + // old extension plus what we folded. // This is not profitable. - if (MatchedSize < SizeWithPromotion) + if (NewCost > OldCost) return false; - if (MatchedSize > SizeWithPromotion) + if (NewCost < OldCost) return true; // The promotion is neutral but it may help folding the sign extension in // loads for instance. @@ -2688,9 +2712,10 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - unsigned CreatedInsts = 0; + unsigned CreatedInstsCost = 0; + unsigned ExtCost = !TLI.isExtFree(Ext); Value *PromotedOperand = - TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr); + TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI); // SExt has been moved away. // Thus either it will be rematched later in the recursive calls or it is // gone. Anyway, we must not fold it into the addressing mode at this point. @@ -2712,7 +2737,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, unsigned OldSize = AddrModeInsts.size(); if (!MatchAddr(PromotedOperand, Depth) || - !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts, + // The total of the new cost is equals to the cost of the created + // instructions. + // The total of the old cost is equals to the cost of the extension plus + // what we have saved in the addressing mode. + !IsPromotionProfitable(CreatedInstsCost, + ExtCost + (AddrModeInsts.size() - OldSize), PromotedOperand)) { AddrMode = BackupAddrMode; AddrModeInsts.resize(OldSize); @@ -3472,7 +3502,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) { bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, Instruction *&Inst, const SmallVectorImpl<Instruction *> &Exts, - unsigned CreatedInsts = 0) { + unsigned CreatedInstsCost = 0) { // Iterate over all the extensions to see if one form an ext(load). for (auto I : Exts) { // Check if we directly have ext(load). @@ -3494,10 +3524,11 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); SmallVector<Instruction *, 4> NewExts; - unsigned NewCreatedInsts = 0; + unsigned NewCreatedInstsCost = 0; + unsigned ExtCost = !TLI->isExtFree(I); // Promote. - Value *PromotedVal = - TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr); + Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost, + &NewExts, nullptr, *TLI); assert(PromotedVal && "TypePromotionHelper should have filtered out those cases"); @@ -3507,9 +3538,10 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, // With exactly 2, the transformation is neutral, because we will merge // one extension but leave one. However, we optimistically keep going, // because the new extension may be removed too. - unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts; + long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost; + TotalCreatedInstsCost -= ExtCost; if (!StressExtLdPromotion && - (TotalCreatedInsts > 1 || + (TotalCreatedInstsCost > 1 || !isPromotedInstructionLegal(*TLI, PromotedVal))) { // The promotion is not profitable, rollback to the previous state. TPT.rollback(LastKnownGood); @@ -3517,8 +3549,8 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, } // The promotion is profitable. // Check if it exposes an ext(load). - (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts); - if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 || + (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost); + if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost || // If we have created a new extension, i.e., now we have two // extensions. We must make sure one of them is merged with // the load, otherwise we may degrade the code quality. @@ -4193,8 +4225,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a // trivial PHI, go ahead and zap it here. - if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr, - TLInfo, DT)) { + const DataLayout &DL = I->getModule()->getDataLayout(); + if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) { P->replaceAllUsesWith(V); P->eraseFromParent(); ++NumPHIsElim; @@ -4463,8 +4495,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { /// FIXME: Remove the (equivalent?) implementation in SelectionDAG. /// bool CodeGenPrepare::splitBranchCondition(Function &F) { - if (!TM || TM->Options.EnableFastISel != true || - !TLI || TLI->isJumpExpensive()) + if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive()) return false; bool MadeChange = false; @@ -4625,10 +4656,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { } } - // Request DOM Tree update. // Note: No point in getting fancy here, since the DT info is never - // available to CodeGenPrepare and the existing update code is broken - // anyways. + // available to CodeGenPrepare. ModifiedDT = true; MadeChange = true; diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 7b47a48..42656fb 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -13,13 +13,19 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "dwarfehprepare" @@ -33,18 +39,28 @@ namespace { // RewindFunction - _Unwind_Resume or the target equivalent. Constant *RewindFunction; + DominatorTree *DT; + const TargetLowering *TLI; + bool InsertUnwindResumeCalls(Function &Fn); Value *GetExceptionObject(ResumeInst *RI); + size_t + pruneUnreachableResumes(Function &Fn, + SmallVectorImpl<ResumeInst *> &Resumes, + SmallVectorImpl<LandingPadInst *> &CleanupLPads); public: static char ID; // Pass identification, replacement for typeid. // INITIALIZE_TM_PASS requires a default constructor, but it isn't used in // practice. - DwarfEHPrepare() : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr) {} + DwarfEHPrepare() + : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr), DT(nullptr), + TLI(nullptr) {} DwarfEHPrepare(const TargetMachine *TM) - : FunctionPass(ID), TM(TM), RewindFunction(nullptr) {} + : FunctionPass(ID), TM(TM), RewindFunction(nullptr), DT(nullptr), + TLI(nullptr) {} bool runOnFunction(Function &Fn) override; @@ -53,6 +69,8 @@ namespace { return false; } + void getAnalysisUsage(AnalysisUsage &AU) const override; + const char *getPassName() const override { return "Exception handling preparation"; } @@ -60,13 +78,22 @@ namespace { } // end anonymous namespace char DwarfEHPrepare::ID = 0; -INITIALIZE_TM_PASS(DwarfEHPrepare, "dwarfehprepare", "Prepare DWARF exceptions", - false, false) +INITIALIZE_TM_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare", + "Prepare DWARF exceptions", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_TM_PASS_END(DwarfEHPrepare, "dwarfehprepare", + "Prepare DWARF exceptions", false, false) FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) { return new DwarfEHPrepare(TM); } +void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); +} + /// GetExceptionObject - Return the exception object from the value passed into /// the 'resume' instruction (typically an aggregate). Clean up any dead /// instructions, including the 'resume' instruction. @@ -107,21 +134,81 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { return ExnObj; } +/// Replace resumes that are not reachable from a cleanup landing pad with +/// unreachable and then simplify those blocks. +size_t DwarfEHPrepare::pruneUnreachableResumes( + Function &Fn, SmallVectorImpl<ResumeInst *> &Resumes, + SmallVectorImpl<LandingPadInst *> &CleanupLPads) { + BitVector ResumeReachable(Resumes.size()); + size_t ResumeIndex = 0; + for (auto *RI : Resumes) { + for (auto *LP : CleanupLPads) { + if (isPotentiallyReachable(LP, RI, DT)) { + ResumeReachable.set(ResumeIndex); + break; + } + } + ++ResumeIndex; + } + + // If everything is reachable, there is no change. + if (ResumeReachable.all()) + return Resumes.size(); + + const TargetTransformInfo &TTI = + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn); + LLVMContext &Ctx = Fn.getContext(); + + // Otherwise, insert unreachable instructions and call simplifycfg. + size_t ResumesLeft = 0; + for (size_t I = 0, E = Resumes.size(); I < E; ++I) { + ResumeInst *RI = Resumes[I]; + if (ResumeReachable[I]) { + Resumes[ResumesLeft++] = RI; + } else { + BasicBlock *BB = RI->getParent(); + new UnreachableInst(Ctx, RI); + RI->eraseFromParent(); + SimplifyCFG(BB, TTI, 1); + } + } + Resumes.resize(ResumesLeft); + return ResumesLeft; +} + /// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present /// into calls to the appropriate _Unwind_Resume function. bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { SmallVector<ResumeInst*, 16> Resumes; + SmallVector<LandingPadInst*, 16> CleanupLPads; + bool FoundLP = false; for (BasicBlock &BB : Fn) { if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator())) Resumes.push_back(RI); + if (auto *LP = BB.getLandingPadInst()) { + if (LP->isCleanup()) + CleanupLPads.push_back(LP); + // Check the personality on the first landingpad. Don't do anything if + // it's for MSVC. + if (!FoundLP) { + FoundLP = true; + EHPersonality Pers = classifyEHPersonality(LP->getPersonalityFn()); + if (isMSVCEHPersonality(Pers)) + return false; + } + } } if (Resumes.empty()) return false; - // Find the rewind function if we didn't already. - const TargetLowering *TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); LLVMContext &Ctx = Fn.getContext(); + + size_t ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads); + if (ResumesLeft == 0) + return true; // We pruned them all. + + // Find the rewind function if we didn't already. if (!RewindFunction) { FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false); @@ -130,9 +217,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { } // Create the basic block where the _Unwind_Resume call will live. - unsigned ResumesSize = Resumes.size(); - - if (ResumesSize == 1) { + if (ResumesLeft == 1) { // Instead of creating a new BB and PHI node, just append the call to // _Unwind_Resume to the end of the single resume block. ResumeInst *RI = Resumes.front(); @@ -149,7 +234,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { } BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn); - PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize, + PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft, "exn.obj", UnwindBB); // Extract the exception object from the ResumeInst and add it to the PHI node @@ -175,6 +260,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { bool DwarfEHPrepare::runOnFunction(Function &Fn) { assert(TM && "DWARF EH preparation requires a target machine"); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); bool Changed = InsertUnwindResumeCalls(Fn); + DT = nullptr; + TLI = nullptr; return Changed; } diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index b3a22c8..5b09cf1 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -113,7 +113,7 @@ struct DomainValue { } namespace { -/// LiveReg - Information about a live register. +/// Information about a live register. struct LiveReg { /// Value currently in this register, or NULL when no value is being tracked. /// This counts as a DomainValue reference. @@ -125,7 +125,7 @@ struct LiveReg { /// will be a negative number. int Def; }; -} // anonynous namespace +} // anonymous namespace namespace { class ExeDepsFix : public MachineFunctionPass { @@ -174,7 +174,7 @@ public: private: iterator_range<SmallVectorImpl<int>::const_iterator> - regIndizes(unsigned Reg) const; + regIndices(unsigned Reg) const; // DomainValue allocation. DomainValue *alloc(int domain = -1); @@ -205,10 +205,10 @@ private: char ExeDepsFix::ID = 0; -/// Translate TRI register number to a list of indizes into our stmaller tables +/// Translate TRI register number to a list of indices into our smaller tables /// of interesting registers. iterator_range<SmallVectorImpl<int>::const_iterator> -ExeDepsFix::regIndizes(unsigned Reg) const { +ExeDepsFix::regIndices(unsigned Reg) const { assert(Reg < AliasMap.size() && "Invalid register"); const auto &Entry = AliasMap[Reg]; return make_range(Entry.begin(), Entry.end()); @@ -225,7 +225,7 @@ DomainValue *ExeDepsFix::alloc(int domain) { return dv; } -/// release - Release a reference to DV. When the last reference is released, +/// Release a reference to DV. When the last reference is released, /// collapse if needed. void ExeDepsFix::release(DomainValue *DV) { while (DV) { @@ -245,8 +245,8 @@ void ExeDepsFix::release(DomainValue *DV) { } } -/// resolve - Follow the chain of dead DomainValues until a live DomainValue is -/// reached. Update the referenced pointer when necessary. +/// Follow the chain of dead DomainValues until a live DomainValue is reached. +/// Update the referenced pointer when necessary. DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) { DomainValue *DV = DVRef; if (!DV || !DV->Next) @@ -325,8 +325,7 @@ void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) { setLiveReg(rx, alloc(domain)); } -/// Merge - All instructions and registers in B are moved to A, and B is -/// released. +/// All instructions and registers in B are moved to A, and B is released. bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { assert(!A->isCollapsed() && "Cannot merge into collapsed"); assert(!B->isCollapsed() && "Cannot merge from collapsed"); @@ -352,7 +351,7 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { return true; } -// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values. +/// Set up LiveRegs by merging predecessor live-out values. void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Detect back-edges from predecessors we haven't processed yet. SeenUnknownBackEdge = false; @@ -378,7 +377,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { if (MBB->pred_empty()) { for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { - for (int rx : regIndizes(*i)) { + for (int rx : regIndices(*i)) { // Treat function live-ins as if they were defined just before the first // instruction. Usually, function arguments are set up immediately // before the call. @@ -475,7 +474,7 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) { bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, unsigned Pref) { unsigned reg = MI->getOperand(OpIdx).getReg(); - for (int rx : regIndizes(reg)) { + for (int rx : regIndices(reg)) { unsigned Clearance = CurInstr - LiveRegs[rx].Def; DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); @@ -521,7 +520,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { break; if (MO.isUse()) continue; - for (int rx : regIndizes(MO.getReg())) { + for (int rx : regIndices(MO.getReg())) { // This instruction explicitly defines rx. DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr << '\t' << *MI); @@ -587,7 +586,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - for (int rx : regIndizes(mo.getReg())) { + for (int rx : regIndices(mo.getReg())) { force(rx, domain); } } @@ -596,7 +595,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - for (int rx : regIndizes(mo.getReg())) { + for (int rx : regIndices(mo.getReg())) { kill(rx); force(rx, domain); } @@ -616,7 +615,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - for (int rx : regIndizes(mo.getReg())) { + for (int rx : regIndices(mo.getReg())) { DomainValue *dv = LiveRegs[rx].Value; if (dv == nullptr) continue; @@ -712,7 +711,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { ii != ee; ++ii) { MachineOperand &mo = *ii; if (!mo.isReg()) continue; - for (int rx : regIndizes(mo.getReg())) { + for (int rx : regIndices(mo.getReg())) { if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { kill(rx); setLiveReg(rx, dv); diff --git a/lib/CodeGen/ForwardControlFlowIntegrity.cpp b/lib/CodeGen/ForwardControlFlowIntegrity.cpp deleted file mode 100644 index 63c3699..0000000 --- a/lib/CodeGen/ForwardControlFlowIntegrity.cpp +++ /dev/null @@ -1,374 +0,0 @@ -//===-- ForwardControlFlowIntegrity.cpp: Forward-Edge CFI -----------------===// -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief A pass that instruments code with fast checks for indirect calls and -/// hooks for a function to check violations. -/// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "cfi" - -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/JumpInstrTableInfo.h" -#include "llvm/CodeGen/ForwardControlFlowIntegrity.h" -#include "llvm/CodeGen/JumpInstrTables.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InlineAsm.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -STATISTIC(NumCFIIndirectCalls, - "Number of indirect call sites rewritten by the CFI pass"); - -char ForwardControlFlowIntegrity::ID = 0; -INITIALIZE_PASS_BEGIN(ForwardControlFlowIntegrity, "forward-cfi", - "Control-Flow Integrity", true, true) -INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo); -INITIALIZE_PASS_DEPENDENCY(JumpInstrTables); -INITIALIZE_PASS_END(ForwardControlFlowIntegrity, "forward-cfi", - "Control-Flow Integrity", true, true) - -ModulePass *llvm::createForwardControlFlowIntegrityPass() { - return new ForwardControlFlowIntegrity(); -} - -ModulePass *llvm::createForwardControlFlowIntegrityPass( - JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing, - StringRef CFIFuncName) { - return new ForwardControlFlowIntegrity(JTT, CFIType, CFIEnforcing, - CFIFuncName); -} - -// Checks to see if a given CallSite is making an indirect call, including -// cases where the indirect call is made through a bitcast. -static bool isIndirectCall(CallSite &CS) { - if (CS.getCalledFunction()) - return false; - - // Check the value to see if it is merely a bitcast of a function. In - // this case, it will translate to a direct function call in the resulting - // assembly, so we won't treat it as an indirect call here. - const Value *V = CS.getCalledValue(); - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - return !(CE->isCast() && isa<Function>(CE->getOperand(0))); - } - - // Otherwise, since we know it's a call, it must be an indirect call - return true; -} - -static const char cfi_failure_func_name[] = "__llvm_cfi_pointer_warning"; - -ForwardControlFlowIntegrity::ForwardControlFlowIntegrity() - : ModulePass(ID), IndirectCalls(), JTType(JumpTable::Single), - CFIType(CFIntegrity::Sub), CFIEnforcing(false), CFIFuncName("") { - initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry()); -} - -ForwardControlFlowIntegrity::ForwardControlFlowIntegrity( - JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing, - std::string CFIFuncName) - : ModulePass(ID), IndirectCalls(), JTType(JTT), CFIType(CFIType), - CFIEnforcing(CFIEnforcing), CFIFuncName(CFIFuncName) { - initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry()); -} - -ForwardControlFlowIntegrity::~ForwardControlFlowIntegrity() {} - -void ForwardControlFlowIntegrity::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<JumpInstrTableInfo>(); - AU.addRequired<JumpInstrTables>(); -} - -void ForwardControlFlowIntegrity::getIndirectCalls(Module &M) { - // To get the indirect calls, we iterate over all functions and iterate over - // the list of basic blocks in each. We extract a total list of indirect calls - // before modifying any of them, since our modifications will modify the list - // of basic blocks. - for (Function &F : M) { - for (BasicBlock &BB : F) { - for (Instruction &I : BB) { - CallSite CS(&I); - if (!(CS && isIndirectCall(CS))) - continue; - - Value *CalledValue = CS.getCalledValue(); - - // Don't rewrite this instruction if the indirect call is actually just - // inline assembly, since our transformation will generate an invalid - // module in that case. - if (isa<InlineAsm>(CalledValue)) - continue; - - IndirectCalls.push_back(&I); - } - } - } -} - -void ForwardControlFlowIntegrity::updateIndirectCalls(Module &M, - CFITables &CFIT) { - Type *Int64Ty = Type::getInt64Ty(M.getContext()); - for (Instruction *I : IndirectCalls) { - CallSite CS(I); - Value *CalledValue = CS.getCalledValue(); - - // Get the function type for this call and look it up in the tables. - Type *VTy = CalledValue->getType(); - PointerType *PTy = dyn_cast<PointerType>(VTy); - Type *EltTy = PTy->getElementType(); - FunctionType *FunTy = dyn_cast<FunctionType>(EltTy); - FunctionType *TransformedTy = JumpInstrTables::transformType(JTType, FunTy); - ++NumCFIIndirectCalls; - Constant *JumpTableStart = nullptr; - Constant *JumpTableMask = nullptr; - Constant *JumpTableSize = nullptr; - - // Some call sites have function types that don't correspond to any - // address-taken function in the module. This happens when function pointers - // are passed in from external code. - auto it = CFIT.find(TransformedTy); - if (it == CFIT.end()) { - // In this case, make sure that the function pointer will change by - // setting the mask and the start to be 0 so that the transformed - // function is 0. - JumpTableStart = ConstantInt::get(Int64Ty, 0); - JumpTableMask = ConstantInt::get(Int64Ty, 0); - JumpTableSize = ConstantInt::get(Int64Ty, 0); - } else { - JumpTableStart = it->second.StartValue; - JumpTableMask = it->second.MaskValue; - JumpTableSize = it->second.Size; - } - - rewriteFunctionPointer(M, I, CalledValue, JumpTableStart, JumpTableMask, - JumpTableSize); - } - - return; -} - -bool ForwardControlFlowIntegrity::runOnModule(Module &M) { - JumpInstrTableInfo *JITI = &getAnalysis<JumpInstrTableInfo>(); - Type *Int64Ty = Type::getInt64Ty(M.getContext()); - Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); - - // JumpInstrTableInfo stores information about the alignment of each entry. - // The alignment returned by JumpInstrTableInfo is alignment in bytes, not - // in the exponent. - ByteAlignment = JITI->entryByteAlignment(); - LogByteAlignment = llvm::Log2_64(ByteAlignment); - - // Set up tables for control-flow integrity based on information about the - // jump-instruction tables. - CFITables CFIT; - for (const auto &KV : JITI->getTables()) { - uint64_t Size = static_cast<uint64_t>(KV.second.size()); - uint64_t TableSize = NextPowerOf2(Size); - - int64_t MaskValue = ((TableSize << LogByteAlignment) - 1) & -ByteAlignment; - Constant *JumpTableMaskValue = ConstantInt::get(Int64Ty, MaskValue); - Constant *JumpTableSize = ConstantInt::get(Int64Ty, Size); - - // The base of the table is defined to be the first jumptable function in - // the table. - Function *First = KV.second.begin()->second; - Constant *JumpTableStartValue = ConstantExpr::getBitCast(First, VoidPtrTy); - CFIT[KV.first].StartValue = JumpTableStartValue; - CFIT[KV.first].MaskValue = JumpTableMaskValue; - CFIT[KV.first].Size = JumpTableSize; - } - - if (CFIT.empty()) - return false; - - getIndirectCalls(M); - - if (!CFIEnforcing) { - addWarningFunction(M); - } - - // Update the instructions with the check and the indirect jump through our - // table. - updateIndirectCalls(M, CFIT); - - return true; -} - -void ForwardControlFlowIntegrity::addWarningFunction(Module &M) { - PointerType *CharPtrTy = Type::getInt8PtrTy(M.getContext()); - - // Get the type of the Warning Function: void (i8*, i8*), - // where the first argument is the name of the function in which the violation - // occurs, and the second is the function pointer that violates CFI. - SmallVector<Type *, 2> WarningFunArgs; - WarningFunArgs.push_back(CharPtrTy); - WarningFunArgs.push_back(CharPtrTy); - FunctionType *WarningFunTy = - FunctionType::get(Type::getVoidTy(M.getContext()), WarningFunArgs, false); - - if (!CFIFuncName.empty()) { - Constant *FailureFun = M.getOrInsertFunction(CFIFuncName, WarningFunTy); - if (!FailureFun) - report_fatal_error("Could not get or insert the function specified by" - " -cfi-func-name"); - } else { - // The default warning function swallows the warning and lets the call - // continue, since there's no generic way for it to print out this - // information. - Function *WarningFun = M.getFunction(cfi_failure_func_name); - if (!WarningFun) { - WarningFun = - Function::Create(WarningFunTy, GlobalValue::LinkOnceAnyLinkage, - cfi_failure_func_name, &M); - } - - BasicBlock *Entry = - BasicBlock::Create(M.getContext(), "entry", WarningFun, 0); - ReturnInst::Create(M.getContext(), Entry); - } -} - -void ForwardControlFlowIntegrity::rewriteFunctionPointer( - Module &M, Instruction *I, Value *FunPtr, Constant *JumpTableStart, - Constant *JumpTableMask, Constant *JumpTableSize) { - IRBuilder<> TempBuilder(I); - - Type *OrigFunType = FunPtr->getType(); - - BasicBlock *CurBB = cast<BasicBlock>(I->getParent()); - Function *CurF = cast<Function>(CurBB->getParent()); - Type *Int64Ty = Type::getInt64Ty(M.getContext()); - - Value *TI = TempBuilder.CreatePtrToInt(FunPtr, Int64Ty); - Value *TStartInt = TempBuilder.CreatePtrToInt(JumpTableStart, Int64Ty); - - Value *NewFunPtr = nullptr; - Value *Check = nullptr; - switch (CFIType) { - case CFIntegrity::Sub: { - // This is the subtract, mask, and add version. - // Subtract from the base. - Value *Sub = TempBuilder.CreateSub(TI, TStartInt); - - // Mask the difference to force this to be a table offset. - Value *And = TempBuilder.CreateAnd(Sub, JumpTableMask); - - // Add it back to the base. - Value *Result = TempBuilder.CreateAdd(And, TStartInt); - - // Convert it back into a function pointer that we can call. - NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType); - break; - } - case CFIntegrity::Ror: { - // This is the subtract and rotate version. - // Rotate right by the alignment value. The optimizer should recognize - // this sequence as a rotation. - - // This cast is safe, since unsigned is always a subset of uint64_t. - uint64_t LogByteAlignment64 = static_cast<uint64_t>(LogByteAlignment); - Constant *RightShift = ConstantInt::get(Int64Ty, LogByteAlignment64); - Constant *LeftShift = ConstantInt::get(Int64Ty, 64 - LogByteAlignment64); - - // Subtract from the base. - Value *Sub = TempBuilder.CreateSub(TI, TStartInt); - - // Create the equivalent of a rotate-right instruction. - Value *Shr = TempBuilder.CreateLShr(Sub, RightShift); - Value *Shl = TempBuilder.CreateShl(Sub, LeftShift); - Value *Or = TempBuilder.CreateOr(Shr, Shl); - - // Perform unsigned comparison to check for inclusion in the table. - Check = TempBuilder.CreateICmpULT(Or, JumpTableSize); - NewFunPtr = FunPtr; - break; - } - case CFIntegrity::Add: { - // This is the mask and add version. - // Mask the function pointer to turn it into an offset into the table. - Value *And = TempBuilder.CreateAnd(TI, JumpTableMask); - - // Then or this offset to the base and get the pointer value. - Value *Result = TempBuilder.CreateAdd(And, TStartInt); - - // Convert it back into a function pointer that we can call. - NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType); - break; - } - } - - if (!CFIEnforcing) { - // If a check hasn't been added (in the rotation version), then check to see - // if it's the same as the original function. This check determines whether - // or not we call the CFI failure function. - if (!Check) - Check = TempBuilder.CreateICmpEQ(NewFunPtr, FunPtr); - BasicBlock *InvalidPtrBlock = - BasicBlock::Create(M.getContext(), "invalid.ptr", CurF, 0); - BasicBlock *ContinuationBB = CurBB->splitBasicBlock(I); - - // Remove the unconditional branch that connects the two blocks. - TerminatorInst *TermInst = CurBB->getTerminator(); - TermInst->eraseFromParent(); - - // Add a conditional branch that depends on the Check above. - BranchInst::Create(ContinuationBB, InvalidPtrBlock, Check, CurBB); - - // Call the warning function for this pointer, then continue. - Instruction *BI = BranchInst::Create(ContinuationBB, InvalidPtrBlock); - insertWarning(M, InvalidPtrBlock, BI, FunPtr); - } else { - // Modify the instruction to call this value. - CallSite CS(I); - CS.setCalledFunction(NewFunPtr); - } -} - -void ForwardControlFlowIntegrity::insertWarning(Module &M, BasicBlock *Block, - Instruction *I, Value *FunPtr) { - Function *ParentFun = cast<Function>(Block->getParent()); - - // Get the function to call right before the instruction. - Function *WarningFun = nullptr; - if (CFIFuncName.empty()) { - WarningFun = M.getFunction(cfi_failure_func_name); - } else { - WarningFun = M.getFunction(CFIFuncName); - } - - assert(WarningFun && "Could not find the CFI failure function"); - - Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); - - IRBuilder<> WarningInserter(I); - // Create a mergeable GlobalVariable containing the name of the function. - Value *ParentNameGV = - WarningInserter.CreateGlobalString(ParentFun->getName()); - Value *ParentNamePtr = WarningInserter.CreateBitCast(ParentNameGV, VoidPtrTy); - Value *FunVoidPtr = WarningInserter.CreateBitCast(FunPtr, VoidPtrTy); - WarningInserter.CreateCall2(WarningFun, ParentNamePtr, FunVoidPtr); -} diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 7a29569..b8799a5 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -247,7 +247,7 @@ namespace { return true; else if (Incr1 == Incr2) { // Favors subsumption. - if (C1->NeedSubsumption == false && C2->NeedSubsumption == true) + if (!C1->NeedSubsumption && C2->NeedSubsumption) return true; else if (C1->NeedSubsumption == C2->NeedSubsumption) { // Favors diamond over triangle, etc. @@ -726,6 +726,12 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, if (BBI.IsDone || BBI.IsUnpredicable) return false; + // If it is already predicated but we couldn't analyze its terminator, the + // latter might fallthrough, but we can't determine where to. + // Conservatively avoid if-converting again. + if (BBI.Predicate.size() && !BBI.IsBrAnalyzable) + return false; + // If it is already predicated, check if the new predicate subsumes // its predicate. if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate)) @@ -1555,7 +1561,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, UpdatePredRedefs(I, Redefs); } - std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); + BBI.Predicate.append(Cond.begin(), Cond.end()); BBI.IsAnalyzed = false; BBI.NonPredSize = 0; @@ -1620,9 +1626,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, } } - std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), - std::back_inserter(ToBBI.Predicate)); - std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate)); + ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end()); + ToBBI.Predicate.append(Cond.begin(), Cond.end()); ToBBI.ClobbersPred |= FromBBI.ClobbersPred; ToBBI.IsAnalyzed = false; @@ -1661,8 +1666,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { if (NBB && !FromBBI.BB->isSuccessor(NBB)) FromBBI.BB->addSuccessor(NBB); - std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), - std::back_inserter(ToBBI.Predicate)); + ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end()); FromBBI.Predicate.clear(); ToBBI.NonPredSize += FromBBI.NonPredSize; diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index 187e015..fd5749b 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -21,7 +21,8 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" // Static member used for null interference cursors. -InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference; +const InterferenceCache::BlockInterference + InterferenceCache::Cursor::NoInterference; // Initializes PhysRegEntries (instead of a SmallVector, PhysRegEntries is a // buffer of size NumPhysRegs to speed up alloc/clear for targets with large diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 1791afb..6519a80 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -170,8 +170,8 @@ public: /// Cursor - The primary query interface for the block interference cache. class Cursor { Entry *CacheEntry; - BlockInterference *Current; - static BlockInterference NoInterference; + const BlockInterference *Current; + static const BlockInterference NoInterference; void setEntry(Entry *E) { Current = nullptr; diff --git a/lib/CodeGen/JumpInstrTables.cpp b/lib/CodeGen/JumpInstrTables.cpp deleted file mode 100644 index 75fa261..0000000 --- a/lib/CodeGen/JumpInstrTables.cpp +++ /dev/null @@ -1,296 +0,0 @@ -//===-- JumpInstrTables.cpp: Jump-Instruction Tables ----------------------===// -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief An implementation of jump-instruction tables. -/// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "jt" - -#include "llvm/CodeGen/JumpInstrTables.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/JumpInstrTableInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include <vector> - -using namespace llvm; - -char JumpInstrTables::ID = 0; - -INITIALIZE_PASS_BEGIN(JumpInstrTables, "jump-instr-tables", - "Jump-Instruction Tables", true, true) -INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo); -INITIALIZE_PASS_END(JumpInstrTables, "jump-instr-tables", - "Jump-Instruction Tables", true, true) - -STATISTIC(NumJumpTables, "Number of indirect call tables generated"); -STATISTIC(NumFuncsInJumpTables, "Number of functions in the jump tables"); - -ModulePass *llvm::createJumpInstrTablesPass() { - // The default implementation uses a single table for all functions. - return new JumpInstrTables(JumpTable::Single); -} - -ModulePass *llvm::createJumpInstrTablesPass(JumpTable::JumpTableType JTT) { - return new JumpInstrTables(JTT); -} - -namespace { -static const char jump_func_prefix[] = "__llvm_jump_instr_table_"; -static const char jump_section_prefix[] = ".jump.instr.table.text."; - -// Checks to see if a given CallSite is making an indirect call, including -// cases where the indirect call is made through a bitcast. -bool isIndirectCall(CallSite &CS) { - if (CS.getCalledFunction()) - return false; - - // Check the value to see if it is merely a bitcast of a function. In - // this case, it will translate to a direct function call in the resulting - // assembly, so we won't treat it as an indirect call here. - const Value *V = CS.getCalledValue(); - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - return !(CE->isCast() && isa<Function>(CE->getOperand(0))); - } - - // Otherwise, since we know it's a call, it must be an indirect call - return true; -} - -// Replaces Functions and GlobalAliases with a different Value. -bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) { - User *Us = U->getUser(); - if (!Us) - return false; - if (Instruction *I = dyn_cast<Instruction>(Us)) { - CallSite CS(I); - - // Don't do the replacement if this use is a direct call to this function. - // If the use is not the called value, then replace it. - if (CS && (isIndirectCall(CS) || CS.isCallee(U))) { - return false; - } - - U->set(V); - } else if (Constant *C = dyn_cast<Constant>(Us)) { - // Don't replace calls to bitcasts of function symbols, since they get - // translated to direct calls. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Us)) { - if (CE->getOpcode() == Instruction::BitCast) { - // This bitcast must have exactly one user. - if (CE->user_begin() != CE->user_end()) { - User *ParentUs = *CE->user_begin(); - if (CallInst *CI = dyn_cast<CallInst>(ParentUs)) { - CallSite CS(CI); - Use &CEU = *CE->use_begin(); - if (CS.isCallee(&CEU)) { - return false; - } - } - } - } - } - - // GlobalAlias doesn't support replaceUsesOfWithOnConstant. And the verifier - // requires alias to point to a defined function. So, GlobalAlias is handled - // as a separate case in runOnModule. - if (!isa<GlobalAlias>(C)) - C->replaceUsesOfWithOnConstant(GV, V, U); - } else { - llvm_unreachable("The Use of a Function symbol is neither an instruction " - "nor a constant"); - } - - return true; -} - -// Replaces all replaceable address-taken uses of GV with a pointer to a -// jump-instruction table entry. -void replaceValueWithFunction(GlobalValue *GV, Function *F) { - // Go through all uses of this function and replace the uses of GV with the - // jump-table version of the function. Get the uses as a vector before - // replacing them, since replacing them changes the use list and invalidates - // the iterator otherwise. - for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E;) { - Use &U = *I++; - - // Replacement of constants replaces all instances in the constant. So, some - // uses might have already been handled by the time we reach them here. - if (U.get() == GV) - replaceGlobalValueIndirectUse(GV, F, &U); - } - - return; -} -} // end anonymous namespace - -JumpInstrTables::JumpInstrTables() - : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), - JTType(JumpTable::Single) { - initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry()); -} - -JumpInstrTables::JumpInstrTables(JumpTable::JumpTableType JTT) - : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), JTType(JTT) { - initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry()); -} - -JumpInstrTables::~JumpInstrTables() {} - -void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<JumpInstrTableInfo>(); -} - -Function *JumpInstrTables::insertEntry(Module &M, Function *Target) { - FunctionType *OrigFunTy = Target->getFunctionType(); - FunctionType *FunTy = transformType(JTType, OrigFunTy); - - JumpMap::iterator it = Metadata.find(FunTy); - if (Metadata.end() == it) { - struct TableMeta Meta; - Meta.TableNum = TableCount; - Meta.Count = 0; - Metadata[FunTy] = Meta; - it = Metadata.find(FunTy); - ++NumJumpTables; - ++TableCount; - } - - it->second.Count++; - - std::string NewName(jump_func_prefix); - NewName += (Twine(it->second.TableNum) + "_" + Twine(it->second.Count)).str(); - Function *JumpFun = - Function::Create(OrigFunTy, GlobalValue::ExternalLinkage, NewName, &M); - // The section for this table - JumpFun->setSection((jump_section_prefix + Twine(it->second.TableNum)).str()); - JITI->insertEntry(FunTy, Target, JumpFun); - - ++NumFuncsInJumpTables; - return JumpFun; -} - -bool JumpInstrTables::hasTable(FunctionType *FunTy) { - FunctionType *TransTy = transformType(JTType, FunTy); - return Metadata.end() != Metadata.find(TransTy); -} - -FunctionType *JumpInstrTables::transformType(JumpTable::JumpTableType JTT, - FunctionType *FunTy) { - // Returning nullptr forces all types into the same table, since all types map - // to the same type - Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext()); - - // Ignore the return type. - Type *RetTy = VoidPtrTy; - bool IsVarArg = FunTy->isVarArg(); - std::vector<Type *> ParamTys(FunTy->getNumParams()); - FunctionType::param_iterator PI, PE; - int i = 0; - - std::vector<Type *> EmptyParams; - Type *Int32Ty = Type::getInt32Ty(FunTy->getContext()); - FunctionType *VoidFnTy = FunctionType::get( - Type::getVoidTy(FunTy->getContext()), EmptyParams, false); - switch (JTT) { - case JumpTable::Single: - - return FunctionType::get(RetTy, EmptyParams, false); - case JumpTable::Arity: - // Transform all types to void* so that all functions with the same arity - // end up in the same table. - for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE; - PI++, i++) { - ParamTys[i] = VoidPtrTy; - } - - return FunctionType::get(RetTy, ParamTys, IsVarArg); - case JumpTable::Simplified: - // Project all parameters types to one of 3 types: composite, integer, and - // function, matching the three subclasses of Type. - for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE; - ++PI, ++i) { - assert((isa<IntegerType>(*PI) || isa<FunctionType>(*PI) || - isa<CompositeType>(*PI)) && - "This type is not an Integer or a Composite or a Function"); - if (isa<CompositeType>(*PI)) { - ParamTys[i] = VoidPtrTy; - } else if (isa<FunctionType>(*PI)) { - ParamTys[i] = VoidFnTy; - } else if (isa<IntegerType>(*PI)) { - ParamTys[i] = Int32Ty; - } - } - - return FunctionType::get(RetTy, ParamTys, IsVarArg); - case JumpTable::Full: - // Don't transform this type at all. - return FunTy; - } - - return nullptr; -} - -bool JumpInstrTables::runOnModule(Module &M) { - JITI = &getAnalysis<JumpInstrTableInfo>(); - - // Get the set of jumptable-annotated functions that have their address taken. - DenseMap<Function *, Function *> Functions; - for (Function &F : M) { - if (F.hasFnAttribute(Attribute::JumpTable) && F.hasAddressTaken()) { - assert(F.hasUnnamedAddr() && - "Attribute 'jumptable' requires 'unnamed_addr'"); - Functions[&F] = nullptr; - } - } - - // Create the jump-table functions. - for (auto &KV : Functions) { - Function *F = KV.first; - KV.second = insertEntry(M, F); - } - - // GlobalAlias is a special case, because the target of an alias statement - // must be a defined function. So, instead of replacing a given function in - // the alias, we replace all uses of aliases that target jumptable functions. - // Note that there's no need to create these functions, since only aliases - // that target known jumptable functions are replaced, and there's no way to - // put the jumptable annotation on a global alias. - DenseMap<GlobalAlias *, Function *> Aliases; - for (GlobalAlias &GA : M.aliases()) { - Constant *Aliasee = GA.getAliasee(); - if (Function *F = dyn_cast<Function>(Aliasee)) { - auto it = Functions.find(F); - if (it != Functions.end()) { - Aliases[&GA] = it->second; - } - } - } - - // Replace each address taken function with its jump-instruction table entry. - for (auto &KV : Functions) - replaceValueWithFunction(KV.first, KV.second); - - for (auto &KV : Aliases) - replaceValueWithFunction(KV.first, KV.second); - - return !Functions.empty(); -} diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 9c23368..0fb0c46 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -12,12 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetMachine.h" -#include "llvm/Analysis/JumpInstrTableInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/BasicTTIImpl.h" -#include "llvm/CodeGen/ForwardControlFlowIntegrity.h" -#include "llvm/CodeGen/JumpInstrTables.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" @@ -33,12 +30,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -50,8 +43,16 @@ EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); void LLVMTargetMachine::initAsmInfo() { - MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo( - *getSubtargetImpl()->getRegisterInfo(), getTargetTriple()); + MRI = TheTarget.createMCRegInfo(getTargetTriple()); + MII = TheTarget.createMCInstrInfo(); + // FIXME: Having an MCSubtargetInfo on the target machine is a hack due + // to some backends having subtarget feature dependent module level + // code generation. This is similar to the hack in the AsmPrinter for + // module level assembly etc. + STI = TheTarget.createMCSubtargetInfo(getTargetTriple(), getTargetCPU(), + getTargetFeatureString()); + + MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*MRI, getTargetTriple()); // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, // and if the old one gets included then MCAsmInfo will be NULL and // we'll crash later. @@ -69,12 +70,13 @@ void LLVMTargetMachine::initAsmInfo() { AsmInfo = TmpAsmInfo; } -LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, - StringRef CPU, StringRef FS, - TargetOptions Options, +LLVMTargetMachine::LLVMTargetMachine(const Target &T, + StringRef DataLayoutString, + StringRef Triple, StringRef CPU, + StringRef FS, TargetOptions Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : TargetMachine(T, Triple, CPU, FS, Options) { + : TargetMachine(T, DataLayoutString, Triple, CPU, FS, Options) { CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); } @@ -115,8 +117,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, // Install a MachineModuleInfo class, which is an immutable pass that holds // all the per-module stuff we're generating, including MCContext. MachineModuleInfo *MMI = new MachineModuleInfo( - *TM->getMCAsmInfo(), *TM->getSubtargetImpl()->getRegisterInfo(), - TM->getObjFileLowering()); + *TM->getMCAsmInfo(), *TM->getMCRegisterInfo(), TM->getObjFileLowering()); PM.add(MMI); // Set up a MachineFunction for the rest of CodeGen to work on. @@ -145,16 +146,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) { - // Passes to handle jumptable function annotations. These can't be handled at - // JIT time, so we don't add them directly to addPassesToGenerateCode. - PM.add(createJumpInstrTableInfoPass( - getSubtargetImpl()->getInstrInfo()->getJumpInstrTableEntryBound())); - PM.add(createJumpInstrTablesPass(Options.JTType)); - if (Options.FCFI) - PM.add(createForwardControlFlowIntegrityPass( - Options.JTType, Options.CFIType, Options.CFIEnforcing, - Options.getCFIFuncName())); - // Add common CodeGen passes. MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, StartAfter, StopAfter); @@ -174,22 +165,22 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, if (Options.MCOptions.MCSaveTempLabels) Context->setAllowTemporaryLabels(false); - const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + const MCSubtargetInfo &STI = *getMCSubtargetInfo(); const MCAsmInfo &MAI = *getMCAsmInfo(); - const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo(); - const MCInstrInfo &MII = *getSubtargetImpl()->getInstrInfo(); + const MCRegisterInfo &MRI = *getMCRegisterInfo(); + const MCInstrInfo &MII = *getMCInstrInfo(); + std::unique_ptr<MCStreamer> AsmStreamer; switch (FileType) { case CGFT_AssemblyFile: { - MCInstPrinter *InstPrinter = - getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, - MII, MRI, STI); + MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter( + MAI.getAssemblerDialect(), MAI, MII, MRI, STI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = nullptr; if (Options.MCOptions.ShowMCEncoding) - MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context); + MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); @@ -203,17 +194,16 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, - *Context); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); if (!MCE || !MAB) return true; - AsmStreamer.reset( - getTarget() - .createMCObjectStreamer(getTargetTriple(), *Context, *MAB, Out, MCE, - STI, Options.MCOptions.MCRelaxAll)); + Triple T(getTargetTriple()); + AsmStreamer.reset(getTarget().createMCObjectStreamer( + T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + /*DWARFMustBeAtTheEnd*/ true)); break; } case CGFT_Null: @@ -253,18 +243,19 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, // Create the code emitter for the target if it exists. If not, .o file // emission fails. - const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo(); - const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter( - *getSubtargetImpl()->getInstrInfo(), MRI, STI, *Ctx); + const MCRegisterInfo &MRI = *getMCRegisterInfo(); + MCCodeEmitter *MCE = + getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); if (!MCE || !MAB) return true; + Triple T(getTargetTriple()); + const MCSubtargetInfo &STI = *getMCSubtargetInfo(); std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer( - getTargetTriple(), *Ctx, *MAB, Out, MCE, STI, - Options.MCOptions.MCRelaxAll)); + T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + /*DWARFMustBeAtTheEnd*/ true)); // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index cdf505e..4321849 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -138,16 +138,3 @@ void LatencyPriorityQueue::remove(SUnit *SU) { std::swap(*I, Queue.back()); Queue.pop_back(); } - -#ifdef NDEBUG -void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {} -#else -void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const { - LatencyPriorityQueue q = *this; - while (!q.empty()) { - SUnit *su = q.pop(); - dbgs() << "Height " << su->getHeight() << ": "; - su->dump(DAG); - } -} -#endif diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index dc936a3..e3791be 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -276,7 +277,7 @@ public: /// getDebugLoc - Return DebugLoc of this UserValue. DebugLoc getDebugLoc() { return dl;} - void print(raw_ostream&, const TargetMachine*); + void print(raw_ostream &, const TargetRegisterInfo *); }; } // namespace @@ -362,7 +363,7 @@ public: }; } // namespace -void UserValue::print(raw_ostream &OS, const TargetMachine *TM) { +void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { DIVariable DV(Variable); OS << "!\""; DV.printExtendedName(OS); @@ -378,7 +379,7 @@ void UserValue::print(raw_ostream &OS, const TargetMachine *TM) { } for (unsigned i = 0, e = locations.size(); i != e; ++i) { OS << " Loc" << i << '='; - locations[i].print(OS, TM); + locations[i].print(OS, TRI); } OS << '\n'; } @@ -386,7 +387,7 @@ void UserValue::print(raw_ostream &OS, const TargetMachine *TM) { void LDVImpl::print(raw_ostream &OS) { OS << "********** DEBUG VARIABLES **********\n"; for (unsigned i = 0, e = userValues.size(); i != e; ++i) - userValues[i]->print(OS, &MF->getTarget()); + userValues[i]->print(OS, TRI); } void UserValue::coalesceLocation(unsigned LocNo) { @@ -1004,7 +1005,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { return; const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); for (unsigned i = 0, e = userValues.size(); i != e; ++i) { - DEBUG(userValues[i]->print(dbgs(), &MF->getTarget())); + DEBUG(userValues[i]->print(dbgs(), TRI)); userValues[i]->rewriteLocations(*VRM, *TRI); userValues[i]->emitDebugValues(VRM, *LIS, *TII); } diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index fd7516d..2afd7fa 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -32,6 +32,7 @@ #include <algorithm> using namespace llvm; +namespace { //===----------------------------------------------------------------------===// // Implementation of various methods necessary for calculation of live ranges. // The implementation of the methods abstracts from the concrete type of the @@ -293,6 +294,7 @@ private: return I; } }; +} // namespace //===----------------------------------------------------------------------===// // LiveRange methods @@ -567,13 +569,9 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End, /// Also remove the value# from value# list. void LiveRange::removeValNo(VNInfo *ValNo) { if (empty()) return; - iterator I = end(); - iterator E = begin(); - do { - --I; - if (I->valno == ValNo) - segments.erase(I); - } while (I != E); + segments.erase(std::remove_if(begin(), end(), [ValNo](const Segment &S) { + return S.valno == ValNo; + }), end()); // Now that ValNo is dead, remove it. markValNoForDeletion(ValNo); } @@ -747,7 +745,6 @@ void LiveRange::flushSegmentSet() { segments.empty() && "segment set can be used only initially before switching to the array"); segments.append(segmentSet->begin(), segmentSet->end()); - delete segmentSet; segmentSet = nullptr; verify(); } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index cc08045..adca4cc 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -199,7 +199,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); assert(LI.empty() && "Should only compute empty intervals."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); - LRCalc->calculate(LI); + LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg)); computeDeadValues(LI, nullptr); } @@ -466,7 +466,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, // Is the register live before? Otherwise we may have to add a read-undef // flag for subregister defs. - if (MRI->tracksSubRegLiveness()) { + if (MRI->shouldTrackSubRegLiveness(LI.reg)) { if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) { MachineInstr *MI = getInstructionFromIndex(Def); MI->addRegisterDefReadUndef(LI.reg); @@ -662,7 +662,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end))); } - if (MRI->tracksSubRegLiveness()) { + if (MRI->subRegLivenessEnabled()) { SRs.clear(); for (const LiveInterval::SubRange &SR : LI.subranges()) { SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end))); @@ -700,7 +700,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { goto CancelKill; } - if (MRI->tracksSubRegLiveness()) { + if (MRI->subRegLivenessEnabled()) { // When reading a partial undefined value we must not add a kill flag. // The regalloc might have used the undef lane for something else. // Example: diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp index 7efd941..89567ef 100644 --- a/lib/CodeGen/LivePhysRegs.cpp +++ b/lib/CodeGen/LivePhysRegs.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index d804b39..45e7265 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -50,7 +50,7 @@ static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc, LR.createDeadDef(DefIdx, Alloc); } -void LiveRangeCalc::calculate(LiveInterval &LI) { +void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { assert(MRI && Indexes && "call reset() first"); // Step 1: Create minimal live segments for every definition of Reg. @@ -63,7 +63,7 @@ void LiveRangeCalc::calculate(LiveInterval &LI) { continue; unsigned SubReg = MO.getSubReg(); - if (LI.hasSubRanges() || (SubReg != 0 && MRI->tracksSubRegLiveness())) { + if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) { unsigned Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg) : MRI->getMaxLaneMaskForVReg(Reg); diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index 90bf971..34d9953 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -187,7 +187,7 @@ public: /// Calculates liveness for the register specified in live interval @p LI. /// Creates subregister live ranges as needed if subreg liveness tracking is /// enabled. - void calculate(LiveInterval &LI); + void calculate(LiveInterval &LI, bool TrackSubRegs); //===--------------------------------------------------------------------===// // Low-level interface. diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index 8a6ac25..5c9c679 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -61,8 +61,10 @@ LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) { assert(Slot >= 0 && "Spill slot indice must be >= 0"); SS2IntervalMap::iterator I = S2IMap.find(Slot); if (I == S2IMap.end()) { - I = S2IMap.insert(I, std::make_pair(Slot, - LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F))); + I = S2IMap.emplace(std::piecewise_construct, std::forward_as_tuple(Slot), + std::forward_as_tuple( + TargetRegisterInfo::index2StackSlot(Slot), 0.0F)) + .first; S2RCMap.insert(std::make_pair(Slot, RC)); } else { // Use the largest common subclass register class. diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index c4bca5f..11deb81 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include <algorithm> using namespace llvm; diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index e8bf687..8378429 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -252,7 +252,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { } static inline bool -lookupCandidateBaseReg(int64_t BaseOffset, +lookupCandidateBaseReg(unsigned BaseReg, + int64_t BaseOffset, int64_t FrameSizeAdjust, int64_t LocalFrameOffset, const MachineInstr *MI, @@ -260,7 +261,7 @@ lookupCandidateBaseReg(int64_t BaseOffset, // Check if the relative offset from the where the base register references // to the target address is in range for the instruction. int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset; - return TRI->isFrameOffsetLegal(MI, Offset); + return TRI->isFrameOffsetLegal(MI, BaseReg, Offset); } bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { @@ -362,8 +363,9 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // instruction itself will be taken into account by the target, // so we don't have to adjust for it here when reusing a base // register. - if (UsedBaseReg && lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust, - LocalOffset, MI, TRI)) { + if (UsedBaseReg && lookupCandidateBaseReg(BaseReg, BaseOffset, + FrameSizeAdjust, LocalOffset, MI, + TRI)) { DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n"); // We found a register to reuse. Offset = FrameSizeAdjust + LocalOffset - BaseOffset; @@ -382,7 +384,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // then don't bother creating it. if (ref + 1 >= e || !lookupCandidateBaseReg( - BaseOffset, FrameSizeAdjust, + BaseReg, BaseOffset, FrameSizeAdjust, FrameReferenceInsns[ref + 1].getLocalOffset(), FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) { BaseOffset = PrevBaseOffset; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 3c73905..98359b1 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -307,7 +307,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << '\t'; if (I->isInsideBundle()) OS << " * "; - I->print(OS, &getParent()->getTarget()); + I->print(OS); } // Print the successors of this block according to the CFG. diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 1b5c1f1..ecc50c9 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -40,13 +41,14 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; -#define DEBUG_TYPE "block-placement2" +#define DEBUG_TYPE "block-placement" STATISTIC(NumCondBranches, "Number of conditional branches"); STATISTIC(NumUncondBranches, "Number of uncondittional branches"); @@ -61,11 +63,23 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks", cl::init(0), cl::Hidden); // FIXME: Find a good default for this flag and remove the flag. -static cl::opt<unsigned> -ExitBlockBias("block-placement-exit-block-bias", - cl::desc("Block frequency percentage a loop exit block needs " - "over the original exit to be considered the new exit."), - cl::init(0), cl::Hidden); +static cl::opt<unsigned> ExitBlockBias( + "block-placement-exit-block-bias", + cl::desc("Block frequency percentage a loop exit block needs " + "over the original exit to be considered the new exit."), + cl::init(0), cl::Hidden); + +static cl::opt<bool> OutlineOptionalBranches( + "outline-optional-branches", + cl::desc("Put completely optional branches, i.e. branches with a common " + "post dominator, out of line."), + cl::init(false), cl::Hidden); + +static cl::opt<unsigned> OutlineOptionalThreshold( + "outline-optional-threshold", + cl::desc("Don't outline optional branches that are a single block with an " + "instruction count below this threshold"), + cl::init(4), cl::Hidden); namespace { class BlockChain; @@ -107,7 +121,7 @@ public: /// function. It also registers itself as the chain that block participates /// in with the BlockToChain mapping. BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB) - : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) { + : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) { assert(BB && "Cannot create a chain with a null basic block"); BlockToChain[BB] = this; } @@ -144,19 +158,18 @@ public: // Update the incoming blocks to point to this chain, and add them to the // chain structure. - for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end(); - BI != BE; ++BI) { - Blocks.push_back(*BI); - assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain"); - BlockToChain[*BI] = this; + for (MachineBasicBlock *ChainBB : *Chain) { + Blocks.push_back(ChainBB); + assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain"); + BlockToChain[ChainBB] = this; } } #ifndef NDEBUG /// \brief Dump the blocks in this chain. LLVM_DUMP_METHOD void dump() { - for (iterator I = begin(), E = end(); I != E; ++I) - (*I)->dump(); + for (MachineBasicBlock *MBB : *this) + MBB->dump(); } #endif // NDEBUG @@ -188,6 +201,13 @@ class MachineBlockPlacement : public MachineFunctionPass { /// \brief A handle to the target's lowering info. const TargetLoweringBase *TLI; + /// \brief A handle to the post dominator tree. + MachineDominatorTree *MDT; + + /// \brief A set of blocks that are unavoidably execute, i.e. they dominate + /// all terminators of the MachineFunction. + SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks; + /// \brief Allocator and owner of BlockChain structures. /// /// We build BlockChains lazily while processing the loop structure of @@ -205,28 +225,26 @@ class MachineBlockPlacement : public MachineFunctionPass { /// between basic blocks. DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain; - void markChainSuccessors(BlockChain &Chain, - MachineBasicBlock *LoopHeaderBB, + void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter); - MachineBasicBlock *selectBestCandidateBlock( - BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList, - const BlockFilterSet *BlockFilter); - MachineBasicBlock *getFirstUnplacedBlock( - MachineFunction &F, - const BlockChain &PlacedChain, - MachineFunction::iterator &PrevUnplacedBlockIt, - const BlockFilterSet *BlockFilter); + MachineBasicBlock * + selectBestCandidateBlock(BlockChain &Chain, + SmallVectorImpl<MachineBasicBlock *> &WorkList, + const BlockFilterSet *BlockFilter); + MachineBasicBlock * + getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt, + const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet); - MachineBasicBlock *findBestLoopExit(MachineFunction &F, - MachineLoop &L, + MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L, const BlockFilterSet &LoopBlockSet); void buildLoopChains(MachineFunction &F, MachineLoop &L); void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB, @@ -244,6 +262,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBranchProbabilityInfo>(); AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -252,12 +271,13 @@ public: char MachineBlockPlacement::ID = 0; char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID; -INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2", +INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement", "Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2", +INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement", "Branch Probability Basic Block Placement", false, false) #ifndef NDEBUG @@ -267,8 +287,8 @@ INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2", static std::string getBlockName(MachineBasicBlock *BB) { std::string Result; raw_string_ostream OS(Result); - OS << "BB#" << BB->getNumber() - << " (derived from LLVM BB '" << BB->getName() << "')"; + OS << "BB#" << BB->getNumber(); + OS << " (derived from LLVM BB '" << BB->getName() << "')"; OS.flush(); return Result; } @@ -292,26 +312,22 @@ static std::string getBlockNum(MachineBasicBlock *BB) { /// having one fewer active predecessor. It also adds any successors of this /// chain which reach the zero-predecessor state to the worklist passed in. void MachineBlockPlacement::markChainSuccessors( - BlockChain &Chain, - MachineBasicBlock *LoopHeaderBB, + BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter) { // Walk all the blocks in this chain, marking their successors as having // a predecessor placed. - for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end(); - CBI != CBE; ++CBI) { + for (MachineBasicBlock *MBB : Chain) { // Add any successors for which this is the only un-placed in-loop // predecessor to the worklist as a viable candidate for CFG-neutral // placement. No subsequent placement of this block will violate the CFG // shape, so we get to use heuristics to choose a favorable placement. - for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(), - SE = (*CBI)->succ_end(); - SI != SE; ++SI) { - if (BlockFilter && !BlockFilter->count(*SI)) + for (MachineBasicBlock *Succ : MBB->successors()) { + if (BlockFilter && !BlockFilter->count(Succ)) continue; - BlockChain &SuccChain = *BlockToChain[*SI]; + BlockChain &SuccChain = *BlockToChain[Succ]; // Disregard edges within a fixed chain, or edges to the loop header. - if (&Chain == &SuccChain || *SI == LoopHeaderBB) + if (&Chain == &SuccChain || Succ == LoopHeaderBB) continue; // This is a cross-chain edge that is within the loop, so decrement the @@ -331,9 +347,10 @@ void MachineBlockPlacement::markChainSuccessors( /// very hot successor edges. /// /// \returns The best successor block found, or null if none are viable. -MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( - MachineBasicBlock *BB, BlockChain &Chain, - const BlockFilterSet *BlockFilter) { +MachineBasicBlock * +MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, + BlockChain &Chain, + const BlockFilterSet *BlockFilter) { const BranchProbability HotProb(4, 5); // 80% MachineBasicBlock *BestSucc = nullptr; @@ -363,6 +380,30 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ); BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + // If we outline optional branches, look whether Succ is unavoidable, i.e. + // dominates all terminators of the MachineFunction. If it does, other + // successors must be optional. Don't do this for cold branches. + if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() && + UnavoidableBlocks.count(Succ) > 0) { + auto HasShortOptionalBranch = [&]() { + for (MachineBasicBlock *Pred : Succ->predecessors()) { + // Check whether there is an unplaced optional branch. + if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) || + BlockToChain[Pred] == &Chain) + continue; + // Check whether the optional branch has exactly one BB. + if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB) + continue; + // Check whether the optional branch is small. + if (Pred->size() < OutlineOptionalThreshold) + return true; + } + return false; + }; + if (!HasShortOptionalBranch()) + return Succ; + } + // Only consider successors which are either "hot", or wouldn't violate // any CFG constraints. if (SuccChain.LoopPredecessors != 0) { @@ -426,29 +467,26 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( // some code complexity) into the loop below. WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), [&](MachineBasicBlock *BB) { - return BlockToChain.lookup(BB) == &Chain; - }), + return BlockToChain.lookup(BB) == &Chain; + }), WorkList.end()); MachineBasicBlock *BestBlock = nullptr; BlockFrequency BestFreq; - for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(), - WBE = WorkList.end(); - WBI != WBE; ++WBI) { - BlockChain &SuccChain = *BlockToChain[*WBI]; + for (MachineBasicBlock *MBB : WorkList) { + BlockChain &SuccChain = *BlockToChain[MBB]; if (&SuccChain == &Chain) { - DEBUG(dbgs() << " " << getBlockName(*WBI) - << " -> Already merged!\n"); + DEBUG(dbgs() << " " << getBlockName(MBB) << " -> Already merged!\n"); continue; } assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); - BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); - DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> "; - MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); + BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB); + DEBUG(dbgs() << " " << getBlockName(MBB) << " -> "; + MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); if (BestBlock && BestFreq >= CandidateFreq) continue; - BestBlock = *WBI; + BestBlock = MBB; BestFreq = CandidateFreq; } return BestBlock; @@ -481,8 +519,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( } void MachineBlockPlacement::buildChain( - MachineBasicBlock *BB, - BlockChain &Chain, + MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter) { assert(BB); @@ -509,8 +546,8 @@ void MachineBlockPlacement::buildChain( BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); if (!BestSucc) { - BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, - BlockFilter); + BestSucc = + getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, BlockFilter); if (!BestSucc) break; @@ -523,8 +560,8 @@ void MachineBlockPlacement::buildChain( // Zero out LoopPredecessors for the successor we're about to merge in case // we selected a successor that didn't fit naturally into the CFG. SuccChain.LoopPredecessors = 0; - DEBUG(dbgs() << "Merging from " << getBlockNum(BB) - << " to " << getBlockNum(BestSucc) << "\n"); + DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to " + << getBlockNum(BestSucc) << "\n"); markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); Chain.merge(BestSucc, &SuccChain); BB = *std::prev(Chain.end()); @@ -554,20 +591,17 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, if (!LoopBlockSet.count(*HeaderChain.begin())) return L.getHeader(); - DEBUG(dbgs() << "Finding best loop top for: " - << getBlockName(L.getHeader()) << "\n"); + DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader()) + << "\n"); BlockFrequency BestPredFreq; MachineBasicBlock *BestPred = nullptr; - for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(), - PE = L.getHeader()->pred_end(); - PI != PE; ++PI) { - MachineBasicBlock *Pred = *PI; + for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) { if (!LoopBlockSet.count(Pred)) continue; DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " << Pred->succ_size() << " successors, "; - MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); + MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); if (Pred->succ_size() > 1) continue; @@ -594,15 +628,13 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, return BestPred; } - /// \brief Find the best loop exiting block for layout. /// /// This routine implements the logic to analyze the loop looking for the best /// block to layout at the top of the loop. Typically this is done to maximize /// fallthrough opportunities. MachineBasicBlock * -MachineBlockPlacement::findBestLoopExit(MachineFunction &F, - MachineLoop &L, +MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, const BlockFilterSet &LoopBlockSet) { // We don't want to layout the loop linearly in all cases. If the loop header // is just a normal basic block in the loop, we want to look for what block @@ -624,15 +656,13 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, // blocks where rotating to exit with that block will reach an outer loop. SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop; - DEBUG(dbgs() << "Finding best loop exit for: " - << getBlockName(L.getHeader()) << "\n"); - for (MachineLoop::block_iterator I = L.block_begin(), - E = L.block_end(); - I != E; ++I) { - BlockChain &Chain = *BlockToChain[*I]; + DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader()) + << "\n"); + for (MachineBasicBlock *MBB : L.getBlocks()) { + BlockChain &Chain = *BlockToChain[MBB]; // Ensure that this block is at the end of a chain; otherwise it could be // mid-way through an inner loop or a successor of an analyzable branch. - if (*I != *std::prev(Chain.end())) + if (MBB != *std::prev(Chain.end())) continue; // Now walk the successors. We need to establish whether this has a viable @@ -646,43 +676,40 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, // the MBPI analysis, we use the internal weights and manually compute the // probabilities to avoid quadratic behavior. uint32_t WeightScale = 0; - uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale); - for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(), - SE = (*I)->succ_end(); - SI != SE; ++SI) { - if ((*SI)->isLandingPad()) + uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale); + for (MachineBasicBlock *Succ : MBB->successors()) { + if (Succ->isLandingPad()) continue; - if (*SI == *I) + if (Succ == MBB) continue; - BlockChain &SuccChain = *BlockToChain[*SI]; + BlockChain &SuccChain = *BlockToChain[Succ]; // Don't split chains, either this chain or the successor's chain. if (&Chain == &SuccChain) { - DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " - << getBlockName(*SI) << " (chain conflict)\n"); + DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " (chain conflict)\n"); continue; } - uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI); - if (LoopBlockSet.count(*SI)) { - DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> " - << getBlockName(*SI) << " (" << SuccWeight << ")\n"); + uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ); + if (LoopBlockSet.count(Succ)) { + DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " (" << SuccWeight << ")\n"); HasLoopingSucc = true; continue; } unsigned SuccLoopDepth = 0; - if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) { + if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) { SuccLoopDepth = ExitLoop->getLoopDepth(); if (ExitLoop->contains(&L)) - BlocksExitingToOuterLoop.insert(*I); + BlocksExitingToOuterLoop.insert(MBB); } BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); - BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; - DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " - << getBlockName(*SI) << " [L:" << SuccLoopDepth - << "] ("; - MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); + BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb; + DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] ("; + MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); // Note that we bias this toward an existing layout successor to retain // incoming order in the absence of better information. The exit must have // a frequency higher than the current exit before we consider breaking @@ -690,10 +717,10 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, BranchProbability Bias(100 - ExitBlockBias, 100); if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth || ExitEdgeFreq > BestExitEdgeFreq || - ((*I)->isLayoutSuccessor(*SI) && + (MBB->isLayoutSuccessor(Succ) && !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) { BestExitEdgeFreq = ExitEdgeFreq; - ExitingBB = *I; + ExitingBB = MBB; } } @@ -734,12 +761,10 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, MachineBasicBlock *Top = *LoopChain.begin(); bool ViableTopFallthrough = false; - for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(), - PE = Top->pred_end(); - PI != PE; ++PI) { - BlockChain *PredChain = BlockToChain[*PI]; - if (!LoopBlockSet.count(*PI) && - (!PredChain || *PI == *std::prev(PredChain->end()))) { + for (MachineBasicBlock *Pred : Top->predecessors()) { + BlockChain *PredChain = BlockToChain[Pred]; + if (!LoopBlockSet.count(Pred) && + (!PredChain || Pred == *std::prev(PredChain->end()))) { ViableTopFallthrough = true; break; } @@ -750,18 +775,16 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, // introduce an unnecessary branch. if (ViableTopFallthrough) { MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); - for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(), - SE = Bottom->succ_end(); - SI != SE; ++SI) { - BlockChain *SuccChain = BlockToChain[*SI]; - if (!LoopBlockSet.count(*SI) && - (!SuccChain || *SI == *SuccChain->begin())) + for (MachineBasicBlock *Succ : Bottom->successors()) { + BlockChain *SuccChain = BlockToChain[Succ]; + if (!LoopBlockSet.count(Succ) && + (!SuccChain || Succ == *SuccChain->begin())) return; } } - BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(), - ExitingBB); + BlockChain::iterator ExitIt = + std::find(LoopChain.begin(), LoopChain.end(), ExitingBB); if (ExitIt == LoopChain.end()) return; @@ -778,8 +801,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, MachineLoop &L) { // First recurse through any nested loops, building chains for those inner // loops. - for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI) - buildLoopChains(F, **LI); + for (MachineLoop *InnerLoop : L) + buildLoopChains(F, *InnerLoop); SmallVector<MachineBasicBlock *, 16> BlockWorkList; BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); @@ -805,21 +828,16 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, SmallPtrSet<BlockChain *, 4> UpdatedPreds; assert(LoopChain.LoopPredecessors == 0); UpdatedPreds.insert(&LoopChain); - for (MachineLoop::block_iterator BI = L.block_begin(), - BE = L.block_end(); - BI != BE; ++BI) { - BlockChain &Chain = *BlockToChain[*BI]; + for (MachineBasicBlock *LoopBB : L.getBlocks()) { + BlockChain &Chain = *BlockToChain[LoopBB]; if (!UpdatedPreds.insert(&Chain).second) continue; assert(Chain.LoopPredecessors == 0); - for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); - BCI != BCE; ++BCI) { - assert(BlockToChain[*BCI] == &Chain); - for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), - PE = (*BCI)->pred_end(); - PI != PE; ++PI) { - if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI)) + for (MachineBasicBlock *ChainBB : Chain) { + assert(BlockToChain[ChainBB] == &Chain); + for (MachineBasicBlock *Pred : ChainBB->predecessors()) { + if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred)) continue; ++Chain.LoopPredecessors; } @@ -841,29 +859,26 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, << " Loop header: " << getBlockName(*L.block_begin()) << "\n" << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"; } - for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end(); - BCI != BCE; ++BCI) { - dbgs() << " ... " << getBlockName(*BCI) << "\n"; - if (!LoopBlockSet.erase(*BCI)) { + for (MachineBasicBlock *ChainBB : LoopChain) { + dbgs() << " ... " << getBlockName(ChainBB) << "\n"; + if (!LoopBlockSet.erase(ChainBB)) { // We don't mark the loop as bad here because there are real situations // where this can occur. For example, with an unanalyzable fallthrough // from a loop block to a non-loop block or vice versa. dbgs() << "Loop chain contains a block not contained by the loop!\n" << " Loop header: " << getBlockName(*L.block_begin()) << "\n" << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" - << " Bad block: " << getBlockName(*BCI) << "\n"; + << " Bad block: " << getBlockName(ChainBB) << "\n"; } } if (!LoopBlockSet.empty()) { BadLoop = true; - for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(), - LBE = LoopBlockSet.end(); - LBI != LBE; ++LBI) + for (MachineBasicBlock *LoopBB : LoopBlockSet) dbgs() << "Loop contains blocks never placed into a chain!\n" << " Loop header: " << getBlockName(*L.block_begin()) << "\n" << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" - << " Bad block: " << getBlockName(*LBI) << "\n"; + << " Bad block: " << getBlockName(LoopBB) << "\n"; } assert(!BadLoop && "Detected problems with the placement of this loop."); }); @@ -875,8 +890,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { MachineBasicBlock *BB = FI; - BlockChain *Chain - = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); + BlockChain *Chain = + new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); // Also, merge any blocks which we cannot reason about and must preserve // the exact fallthrough behavior for. for (;;) { @@ -899,28 +914,44 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { } } + if (OutlineOptionalBranches) { + // Find the nearest common dominator of all of F's terminators. + MachineBasicBlock *Terminator = nullptr; + for (MachineBasicBlock &MBB : F) { + if (MBB.succ_size() == 0) { + if (Terminator == nullptr) + Terminator = &MBB; + else + Terminator = MDT->findNearestCommonDominator(Terminator, &MBB); + } + } + + // MBBs dominating this common dominator are unavoidable. + UnavoidableBlocks.clear(); + for (MachineBasicBlock &MBB : F) { + if (MDT->dominates(&MBB, Terminator)) { + UnavoidableBlocks.insert(&MBB); + } + } + } + // Build any loop-based chains. - for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE; - ++LI) - buildLoopChains(F, **LI); + for (MachineLoop *L : *MLI) + buildLoopChains(F, *L); SmallVector<MachineBasicBlock *, 16> BlockWorkList; SmallPtrSet<BlockChain *, 4> UpdatedPreds; - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { - MachineBasicBlock *BB = &*FI; - BlockChain &Chain = *BlockToChain[BB]; + for (MachineBasicBlock &MBB : F) { + BlockChain &Chain = *BlockToChain[&MBB]; if (!UpdatedPreds.insert(&Chain).second) continue; assert(Chain.LoopPredecessors == 0); - for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); - BCI != BCE; ++BCI) { - assert(BlockToChain[*BCI] == &Chain); - for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), - PE = (*BCI)->pred_end(); - PI != PE; ++PI) { - if (BlockToChain[*PI] == &Chain) + for (MachineBasicBlock *ChainBB : Chain) { + assert(BlockToChain[ChainBB] == &Chain); + for (MachineBasicBlock *Pred : ChainBB->predecessors()) { + if (BlockToChain[Pred] == &Chain) continue; ++Chain.LoopPredecessors; } @@ -940,46 +971,40 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Crash at the end so we get all of the debugging output first. bool BadFunc = false; FunctionBlockSetType FunctionBlockSet; - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) - FunctionBlockSet.insert(FI); + for (MachineBasicBlock &MBB : F) + FunctionBlockSet.insert(&MBB); - for (BlockChain::iterator BCI = FunctionChain.begin(), - BCE = FunctionChain.end(); - BCI != BCE; ++BCI) - if (!FunctionBlockSet.erase(*BCI)) { + for (MachineBasicBlock *ChainBB : FunctionChain) + if (!FunctionBlockSet.erase(ChainBB)) { BadFunc = true; dbgs() << "Function chain contains a block not in the function!\n" - << " Bad block: " << getBlockName(*BCI) << "\n"; + << " Bad block: " << getBlockName(ChainBB) << "\n"; } if (!FunctionBlockSet.empty()) { BadFunc = true; - for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(), - FBE = FunctionBlockSet.end(); - FBI != FBE; ++FBI) + for (MachineBasicBlock *RemainingBB : FunctionBlockSet) dbgs() << "Function contains blocks never placed into a chain!\n" - << " Bad block: " << getBlockName(*FBI) << "\n"; + << " Bad block: " << getBlockName(RemainingBB) << "\n"; } assert(!BadFunc && "Detected problems with the block placement."); }); // Splice the blocks into place. MachineFunction::iterator InsertPos = F.begin(); - for (BlockChain::iterator BI = FunctionChain.begin(), - BE = FunctionChain.end(); - BI != BE; ++BI) { - DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain " - : " ... ") - << getBlockName(*BI) << "\n"); - if (InsertPos != MachineFunction::iterator(*BI)) - F.splice(InsertPos, *BI); + for (MachineBasicBlock *ChainBB : FunctionChain) { + DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain " + : " ... ") + << getBlockName(ChainBB) << "\n"); + if (InsertPos != MachineFunction::iterator(ChainBB)) + F.splice(InsertPos, ChainBB); else ++InsertPos; // Update the terminator of the previous block. - if (BI == FunctionChain.begin()) + if (ChainBB == *FunctionChain.begin()) continue; - MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI)); + MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(ChainBB)); // FIXME: It would be awesome of updateTerminator would just return rather // than assert when the branch cannot be analyzed in order to remove this @@ -989,16 +1014,16 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { // The "PrevBB" is not yet updated to reflect current code layout, so, // o. it may fall-through to a block without explict "goto" instruction - // before layout, and no longer fall-through it after layout; or + // before layout, and no longer fall-through it after layout; or // o. just opposite. - // + // // AnalyzeBranch() may return erroneous value for FBB when these two // situations take place. For the first scenario FBB is mistakenly set // NULL; for the 2nd scenario, the FBB, which is expected to be NULL, // is mistakenly pointing to "*BI". // bool needUpdateBr = true; - if (!Cond.empty() && (!FBB || FBB == *BI)) { + if (!Cond.empty() && (!FBB || FBB == ChainBB)) { PrevBB->updateTerminator(); needUpdateBr = false; Cond.clear(); @@ -1018,7 +1043,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { << getBlockName(PrevBB) << "\n"); DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB) << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n"); - DebugLoc dl; // FIXME: this is nowhere + DebugLoc dl; // FIXME: this is nowhere TII->RemoveBranch(*PrevBB); TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); needUpdateBr = true; @@ -1042,29 +1067,30 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) return; if (FunctionChain.begin() == FunctionChain.end()) - return; // Empty chain. + return; // Empty chain. const BranchProbability ColdProb(1, 5); // 20% BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; - for (BlockChain::iterator BI = std::next(FunctionChain.begin()), - BE = FunctionChain.end(); - BI != BE; ++BI) { + for (MachineBasicBlock *ChainBB : FunctionChain) { + if (ChainBB == *FunctionChain.begin()) + continue; + // Don't align non-looping basic blocks. These are unlikely to execute // enough times to matter in practice. Note that we'll still handle // unnatural CFGs inside of a natural outer loop (the common case) and // rotated loops. - MachineLoop *L = MLI->getLoopFor(*BI); + MachineLoop *L = MLI->getLoopFor(ChainBB); if (!L) continue; unsigned Align = TLI->getPrefLoopAlignment(L); if (!Align) - continue; // Don't care about loop alignment. + continue; // Don't care about loop alignment. // If the block is cold relative to the function entry don't waste space // aligning it. - BlockFrequency Freq = MBFI->getBlockFreq(*BI); + BlockFrequency Freq = MBFI->getBlockFreq(ChainBB); if (Freq < WeightedEntryFreq) continue; @@ -1077,12 +1103,13 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Check for the existence of a non-layout predecessor which would benefit // from aligning this block. - MachineBasicBlock *LayoutPred = *std::prev(BI); + MachineBasicBlock *LayoutPred = + &*std::prev(MachineFunction::iterator(ChainBB)); // Force alignment if all the predecessors are jumps. We already checked // that the block isn't cold above. - if (!LayoutPred->isSuccessor(*BI)) { - (*BI)->setAlignment(Align); + if (!LayoutPred->isSuccessor(ChainBB)) { + ChainBB->setAlignment(Align); continue; } @@ -1090,10 +1117,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // cold relative to the block. When this is true, other predecessors make up // all of the hot entries into the block and thus alignment is likely to be // important. - BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); + BranchProbability LayoutProb = + MBPI->getEdgeProbability(LayoutPred, ChainBB); BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb; if (LayoutEdgeFreq <= (Freq * ColdProb)) - (*BI)->setAlignment(Align); + ChainBB->setAlignment(Align); } } @@ -1110,6 +1138,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { MLI = &getAnalysis<MachineLoopInfo>(); TII = F.getSubtarget().getInstrInfo(); TLI = F.getSubtarget().getTargetLowering(); + MDT = &getAnalysis<MachineDominatorTree>(); assert(BlockToChain.empty()); buildCFGChains(F); @@ -1119,9 +1148,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { if (AlignAllBlock) // Align all of the blocks in the function to a specific alignment. - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) - FI->setAlignment(AlignAllBlock); + for (MachineBasicBlock &MBB : F) + MBB.setAlignment(AlignAllBlock); // We always return true as we have no way to track whether the final order // differs from the original order. @@ -1176,20 +1204,19 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); - for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { - BlockFrequency BlockFreq = MBFI->getBlockFreq(I); - Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches - : NumUncondBranches; - Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq - : UncondBranchTakenFreq; - for (MachineBasicBlock::succ_iterator SI = I->succ_begin(), - SE = I->succ_end(); - SI != SE; ++SI) { + for (MachineBasicBlock &MBB : F) { + BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB); + Statistic &NumBranches = + (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches; + Statistic &BranchTakenFreq = + (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq; + for (MachineBasicBlock *Succ : MBB.successors()) { // Skip if this successor is a fallthrough. - if (I->isLayoutSuccessor(*SI)) + if (MBB.isLayoutSuccessor(Succ)) continue; - BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI); + BlockFrequency EdgeFreq = + BlockFreq * MBPI->getEdgeProbability(&MBB, Succ); ++NumBranches; BranchTakenFreq += EdgeFreq.getFrequency(); } diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 21b9c5a..f72d72a 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index cbd6272..9611122 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -75,10 +75,9 @@ MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg, I != E; ++I) { unsigned MappedDef = *I; // Source of copy is no longer available for propagation. - if (AvailCopyMap.erase(MappedDef)) { - for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR) - AvailCopyMap.erase(*SR); - } + AvailCopyMap.erase(MappedDef); + for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR) + AvailCopyMap.erase(*SR); } } } diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index df60cf3..467a2e4 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SmallBitVector.h" using namespace llvm; @@ -59,3 +60,68 @@ void MachineDominatorTree::releaseMemory() { void MachineDominatorTree::print(raw_ostream &OS, const Module*) const { DT->print(OS); } + +void MachineDominatorTree::applySplitCriticalEdges() const { + // Bail out early if there is nothing to do. + if (CriticalEdgesToSplit.empty()) + return; + + // For each element in CriticalEdgesToSplit, remember whether or not element + // is the new immediate domminator of its successor. The mapping is done by + // index, i.e., the information for the ith element of CriticalEdgesToSplit is + // the ith element of IsNewIDom. + SmallBitVector IsNewIDom(CriticalEdgesToSplit.size(), true); + size_t Idx = 0; + + // Collect all the dominance properties info, before invalidating + // the underlying DT. + for (CriticalEdge &Edge : CriticalEdgesToSplit) { + // Update dominator information. + MachineBasicBlock *Succ = Edge.ToBB; + MachineDomTreeNode *SuccDTNode = DT->getNode(Succ); + + for (MachineBasicBlock *PredBB : Succ->predecessors()) { + if (PredBB == Edge.NewBB) + continue; + // If we are in this situation: + // FromBB1 FromBB2 + // + + + // + + + + + // + + + + + // ... Split1 Split2 ... + // + + + // + + + // + + // Succ + // Instead of checking the domiance property with Split2, we check it with + // FromBB2 since Split2 is still unknown of the underlying DT structure. + if (NewBBs.count(PredBB)) { + assert(PredBB->pred_size() == 1 && "A basic block resulting from a " + "critical edge split has more " + "than one predecessor!"); + PredBB = *PredBB->pred_begin(); + } + if (!DT->dominates(SuccDTNode, DT->getNode(PredBB))) { + IsNewIDom[Idx] = false; + break; + } + } + ++Idx; + } + + // Now, update DT with the collected dominance properties info. + Idx = 0; + for (CriticalEdge &Edge : CriticalEdgesToSplit) { + // We know FromBB dominates NewBB. + MachineDomTreeNode *NewDTNode = DT->addNewBlock(Edge.NewBB, Edge.FromBB); + + // If all the other predecessors of "Succ" are dominated by "Succ" itself + // then the new block is the new immediate dominator of "Succ". Otherwise, + // the new block doesn't dominate anything. + if (IsNewIDom[Idx]) + DT->changeImmediateDominator(DT->getNode(Edge.ToBB), NewDTNode); + ++Idx; + } + NewBBs.clear(); + CriticalEdgesToSplit.clear(); +} diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 151a260..6ceace8 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -54,7 +54,7 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, unsigned FunctionNum, MachineModuleInfo &mmi) - : Fn(F), Target(TM), STI(TM.getSubtargetImpl()), Ctx(mmi.getContext()), + : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()), MMI(mmi) { if (STI->getRegisterInfo()) RegInfo = new (Allocator) MachineRegisterInfo(this); @@ -584,14 +584,6 @@ int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, return -++NumFixedObjects; } -int MachineFrameInfo::CreateFrameAllocation(uint64_t Size) { - // Force the use of a frame pointer. The intention is that this intrinsic be - // used in conjunction with unwind mechanisms that leak the frame pointer. - setFrameAddressIsTaken(true); - Size = RoundUpToAlignment(Size, StackAlignment); - return CreateStackObject(Size, StackAlignment, false); -} - BitVector MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { assert(MBB && "MBB must be valid"); @@ -903,16 +895,16 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, // DataLayout. if (isa<PointerType>(A->getType())) A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, - const_cast<Constant*>(A), TD); + const_cast<Constant *>(A), *TD); else if (A->getType() != IntTy) A = ConstantFoldInstOperands(Instruction::BitCast, IntTy, - const_cast<Constant*>(A), TD); + const_cast<Constant *>(A), *TD); if (isa<PointerType>(B->getType())) B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, - const_cast<Constant*>(B), TD); + const_cast<Constant *>(B), *TD); else if (B->getType() != IntTy) B = ConstantFoldInstOperands(Instruction::BitCast, IntTy, - const_cast<Constant*>(B), TD); + const_cast<Constant *>(B), *TD); return A == B; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 981e4a3..1240efb 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -276,17 +276,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) { /// print - Print the specified machine operand. /// -void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { - // If the instruction is embedded into a basic block, we can find the - // target info for the instruction. - if (!TM) - if (const MachineInstr *MI = getParent()) - if (const MachineBasicBlock *MBB = MI->getParent()) - if (const MachineFunction *MF = MBB->getParent()) - TM = &MF->getTarget(); - const TargetRegisterInfo *TRI = - TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr; - +void MachineOperand::print(raw_ostream &OS, + const TargetRegisterInfo *TRI) const { switch (getType()) { case MachineOperand::MO_Register: OS << PrintReg(getReg(), TRI, getSubReg()); @@ -1512,23 +1503,19 @@ void MachineInstr::dump() const { #endif } -static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, - raw_ostream &CommentOS) { - const LLVMContext &Ctx = MF->getFunction()->getContext(); - DL.print(Ctx, CommentOS); -} - -void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, - bool SkipOpers) const { - // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. +void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const { + // We can be a bit tidier if we know the MachineFunction. const MachineFunction *MF = nullptr; + const TargetRegisterInfo *TRI = nullptr; const MachineRegisterInfo *MRI = nullptr; + const TargetInstrInfo *TII = nullptr; if (const MachineBasicBlock *MBB = getParent()) { MF = MBB->getParent(); - if (!TM && MF) - TM = &MF->getTarget(); - if (MF) + if (MF) { MRI = &MF->getRegInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); + } } // Save a list of virtual registers. @@ -1541,7 +1528,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, !getOperand(StartOp).isImplicit(); ++StartOp) { if (StartOp != 0) OS << ", "; - getOperand(StartOp).print(OS, TM); + getOperand(StartOp).print(OS, TRI); unsigned Reg = getOperand(StartOp).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) VirtRegs.push_back(Reg); @@ -1551,8 +1538,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, OS << " = "; // Print the opcode name. - if (TM && TM->getSubtargetImpl()->getInstrInfo()) - OS << TM->getSubtargetImpl()->getInstrInfo()->getName(getOpcode()); + if (TII) + OS << TII->getName(getOpcode()); else OS << "UNKNOWN"; @@ -1568,7 +1555,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) { // Print asm string. OS << " "; - getOperand(InlineAsm::MIOp_AsmString).print(OS, TM); + getOperand(InlineAsm::MIOp_AsmString).print(OS, TRI); // Print HasSideEffects, MayLoad, MayStore, IsAlignStack unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); @@ -1606,9 +1593,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, if (TargetRegisterInfo::isPhysicalRegister(Reg)) { if (MRI->use_empty(Reg)) { bool HasAliasLive = false; - for (MCRegAliasIterator AI( - Reg, TM->getSubtargetImpl()->getRegisterInfo(), true); - AI.isValid(); ++AI) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { unsigned AliasReg = *AI; if (!MRI->use_empty(AliasReg)) { HasAliasLive = true; @@ -1641,10 +1626,9 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, if (DI.isVariable() && !DIV.getName().empty()) OS << "!\"" << DIV.getName() << '\"'; else - MO.print(OS, TM); - } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { - OS << TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIndexName( - MO.getImm()); + MO.print(OS, TRI); + } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { + OS << TRI->getSubRegIndexName(MO.getImm()); } else if (i == AsmDescOp && MO.isImm()) { // Pretty print the inline asm operand descriptor. OS << '$' << AsmOpCount++; @@ -1661,11 +1645,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, unsigned RCID = 0; if (InlineAsm::hasRegClassConstraint(Flag, RCID)) { - if (TM) { - const TargetRegisterInfo *TRI = - TM->getSubtargetImpl()->getRegisterInfo(); - OS << ':' - << TRI->getRegClassName(TRI->getRegClass(RCID)); + if (TRI) { + OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID)); } else OS << ":RC" << RCID; } @@ -1679,7 +1660,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, // Compute the index of the next operand descriptor. AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag); } else - MO.print(OS, TM); + MO.print(OS, TRI); } // Briefly indicate whether any call clobbers were omitted. @@ -1715,7 +1696,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, if (!HaveSemi) OS << ";"; HaveSemi = true; for (unsigned i = 0; i != VirtRegs.size(); ++i) { const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]); - OS << " " << MRI->getTargetRegisterInfo()->getRegClassName(RC) + OS << " " << TRI->getRegClassName(RC) << ':' << PrintReg(VirtRegs[i]); for (unsigned j = i+1; j != VirtRegs.size();) { if (MRI->getRegClass(VirtRegs[j]) != RC) { @@ -1738,7 +1719,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt); if (!InlinedAtDL.isUnknown() && MF) { OS << " inlined @[ "; - printDebugLoc(InlinedAtDL, MF, OS); + InlinedAtDL.print(OS); OS << " ]"; } } @@ -1747,7 +1728,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, } else if (!debugLoc.isUnknown() && MF) { if (!HaveSemi) OS << ";"; OS << " dbg:"; - printDebugLoc(debugLoc, MF, OS); + debugLoc.print(OS); } OS << '\n'; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 64d0932..2f65a2e 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -54,6 +54,12 @@ HoistCheapInsts("hoist-cheap-insts", cl::desc("MachineLICM should hoist even cheap instructions"), cl::init(false), cl::Hidden); +static cl::opt<bool> +SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", + cl::desc("MachineLICM should sink instructions into " + "loops to avoid register spills"), + cl::init(false), cl::Hidden); + STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); STATISTIC(NumLowRP, @@ -243,6 +249,11 @@ namespace { void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); void HoistRegion(MachineDomTreeNode *N, bool IsHeader); + /// SinkIntoLoop - Sink instructions into loops if profitable. This + /// especially tries to prevent register spills caused by register pressure + /// if there is little to no overhead moving instructions into loops. + void SinkIntoLoop(); + /// getRegisterClassIDAndCost - For a given MI, register, and the operand /// index, return the ID and cost of its representative register class by /// reference. @@ -381,6 +392,9 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { FirstInLoop = true; HoistOutOfLoop(N); CSEMap.clear(); + + if (SinkInstsToAvoidSpills) + SinkIntoLoop(); } } @@ -771,6 +785,53 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { } } +void MachineLICM::SinkIntoLoop() { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + + SmallVector<MachineInstr *, 8> Candidates; + for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); + I != Preheader->instr_end(); ++I) { + // We need to ensure that we can safely move this instruction into the loop. + // As such, it must not have side-effects, e.g. such as a call has. + if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I)) + Candidates.push_back(I); + } + + for (MachineInstr *I : Candidates) { + const MachineOperand &MO = I->getOperand(0); + if (!MO.isDef() || !MO.isReg() || !MO.getReg()) + continue; + if (!MRI->hasOneDef(MO.getReg())) + continue; + bool CanSink = true; + MachineBasicBlock *B = nullptr; + for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { + // FIXME: Come up with a proper cost model that estimates whether sinking + // the instruction (and thus possibly executing it on every loop + // iteration) is more expensive than a register. + // For now assumes that copies are cheap and thus almost always worth it. + if (!MI.isCopy()) { + CanSink = false; + break; + } + if (!B) { + B = MI.getParent(); + continue; + } + B = DT->findNearestCommonDominator(B, MI.getParent()); + if (!B) { + CanSink = false; + break; + } + } + if (!CanSink || !B || B == Preheader) + continue; + B->splice(B->getFirstNonPHI(), Preheader, I); + } +} + static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); } diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index 89054d4..ce6abdd 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; // Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops. diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 32b7db1..278a8f2 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -65,7 +65,7 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) { const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); const TargetRegisterClass *OldRC = getRegClass(Reg); const TargetRegisterClass *NewRC = - getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC); + getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC, *MF); // Stop early if there is no room to grow. if (NewRC == OldRC) diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 89ac6a8..7a3c80b 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -209,6 +209,11 @@ static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched); +static cl::opt<bool> EnableMachineSched( + "enable-misched", + cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), + cl::Hidden); + /// Forward declare the standard machine scheduler. This will be used as the /// default scheduler if the target does not set a default. static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C); @@ -304,6 +309,12 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { /// design would be to split blocks at scheduling boundaries, but LLVM has a /// general bias against block splitting purely for implementation simplicity. bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { + if (EnableMachineSched.getNumOccurrences()) { + if (!EnableMachineSched) + return false; + } else if (!mf.getSubtarget().enableMachineScheduler()) + return false; + DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); // Initialize the context of the pass. diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index bdb094f..991241e 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -397,7 +397,7 @@ void MachineVerifier::report(const char *msg, assert(MO); report(msg, MO->getParent()); errs() << "- operand " << MONum << ": "; - MO->print(errs(), TM); + MO->print(errs(), TRI); errs() << "\n"; } @@ -739,7 +739,7 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) { if (!isUInt<5>(MI->getOperand(1).getImm())) report("Unknown asm flags", &MI->getOperand(1), 1); - assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed"); + static_assert(InlineAsm::MIOp_FirstOperand == 2, "Asm format changed"); unsigned OpNo = InlineAsm::MIOp_FirstOperand; unsigned NumOps; @@ -927,7 +927,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TII->getRegClass(MCID, MONum, TRI, *MF)) { if (SubIdx) { const TargetRegisterClass *SuperRC = - TRI->getLargestLegalSuperClass(RC); + TRI->getLargestLegalSuperClass(RC, *MF); if (!SuperRC) { report("No largest legal super class exists.", MO, MONum); return; @@ -1573,7 +1573,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (!hasRead) { // When tracking subregister liveness, the main range must start new // values on partial register writes, even if there is no read. - if (!MRI->tracksSubRegLiveness() || LaneMask != 0 || !hasSubRegDef) { + if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask != 0 || + !hasSubRegDef) { report("Instruction ending live segment doesn't read the register", MI); errs() << S << " in " << LR << '\n'; @@ -1649,40 +1650,35 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, } void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { - verifyLiveRange(LI, LI.reg); - unsigned Reg = LI.reg; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - unsigned Mask = 0; - unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg); - for (const LiveInterval::SubRange &SR : LI.subranges()) { - if ((Mask & SR.LaneMask) != 0) - report("Lane masks of sub ranges overlap in live interval", MF, LI); - if ((SR.LaneMask & ~MaxMask) != 0) - report("Subrange lanemask is invalid", MF, LI); - Mask |= SR.LaneMask; - verifyLiveRange(SR, LI.reg, SR.LaneMask); - if (!LI.covers(SR)) - report("A Subrange is not covered by the main range", MF, LI); - } - } else if (LI.hasSubRanges()) { - report("subregister liveness only allowed for virtual registers", MF, LI); + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + verifyLiveRange(LI, Reg); + + unsigned Mask = 0; + unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg); + for (const LiveInterval::SubRange &SR : LI.subranges()) { + if ((Mask & SR.LaneMask) != 0) + report("Lane masks of sub ranges overlap in live interval", MF, LI); + if ((SR.LaneMask & ~MaxMask) != 0) + report("Subrange lanemask is invalid", MF, LI); + Mask |= SR.LaneMask; + verifyLiveRange(SR, LI.reg, SR.LaneMask); + if (!LI.covers(SR)) + report("A Subrange is not covered by the main range", MF, LI); } // Check the LI only has one connected component. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - ConnectedVNInfoEqClasses ConEQ(*LiveInts); - unsigned NumComp = ConEQ.Classify(&LI); - if (NumComp > 1) { - report("Multiple connected components in live interval", MF, LI); - for (unsigned comp = 0; comp != NumComp; ++comp) { - errs() << comp << ": valnos"; - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), - E = LI.vni_end(); I!=E; ++I) - if (comp == ConEQ.getEqClass(*I)) - errs() << ' ' << (*I)->id; - errs() << '\n'; - } + ConnectedVNInfoEqClasses ConEQ(*LiveInts); + unsigned NumComp = ConEQ.Classify(&LI); + if (NumComp > 1) { + report("Multiple connected components in live interval", MF, LI); + for (unsigned comp = 0; comp != NumComp; ++comp) { + errs() << comp << ": valnos"; + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), + E = LI.vni_end(); I!=E; ++I) + if (comp == ConEQ.getEqClass(*I)) + errs() << ' ' << (*I)->id; + errs() << '\n'; } } } diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index def2e3d..d514190 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> @@ -46,6 +47,10 @@ SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false), cl::Hidden, cl::desc("Split all critical edges during " "PHI elimination")); +static cl::opt<bool> NoPhiElimLiveOutEarlyExit( + "no-phi-elim-live-out-early-exit", cl::init(false), cl::Hidden, + cl::desc("Do not use an early exit if isLiveOutPastPHIs returns true.")); + namespace { class PHIElimination : public MachineFunctionPass { MachineRegisterInfo *MRI; // Machine register information @@ -573,12 +578,14 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // there is a risk it may not be coalesced away. // // If the copy would be a kill, there is no need to split the edge. - if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges) + bool ShouldSplit = isLiveOutPastPHIs(Reg, PreMBB); + if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit) continue; - - DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" - << PreMBB->getNumber() << " -> BB#" << MBB.getNumber() - << ": " << *BBI); + if (ShouldSplit) { + DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" + << PreMBB->getNumber() << " -> BB#" << MBB.getNumber() + << ": " << *BBI); + } // If Reg is not live-in to MBB, it means it must be live-in to some // other PreMBB successor, and we can avoid the interference by splitting @@ -588,7 +595,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // is likely to be left after coalescing. If we are looking at a loop // exiting edge, split it so we won't insert code in the loop, otherwise // don't bother. - bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges; + ShouldSplit = ShouldSplit && !isLiveIn(Reg, &MBB); // Check for a loop exiting edge. if (!ShouldSplit && CurLoop != PreLoop) { @@ -603,7 +610,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // Split unless this edge is entering CurLoop from an outer loop. ShouldSplit = PreLoop && !PreLoop->contains(CurLoop); } - if (!ShouldSplit) + if (!ShouldSplit && !SplitAllCriticalEdges) continue; if (!PreMBB->SplitCriticalEdge(&MBB, this)) { DEBUG(dbgs() << "Failed to split critical edge.\n"); diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 272d068..c128414 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -23,8 +23,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" @@ -55,9 +54,6 @@ static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden, static cl::opt<cl::boolOrDefault> OptimizeRegAlloc("optimize-regalloc", cl::Hidden, cl::desc("Enable optimized register allocation compilation path.")); -static cl::opt<cl::boolOrDefault> -EnableMachineSched("enable-misched", - cl::desc("Enable the machine instruction scheduling pass.")); static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); @@ -116,28 +112,6 @@ static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID, return PassID; } -/// Allow Pass selection to be overriden by command line options. This supports -/// flags with ternary conditions. TargetID is passed through by default. The -/// pass is suppressed when the option is false. When the option is true, the -/// StandardID is selected if the target provides no default. -static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID, - cl::boolOrDefault Override, - AnalysisID StandardID) { - switch (Override) { - case cl::BOU_UNSET: - return TargetID; - case cl::BOU_TRUE: - if (TargetID.isValid()) - return TargetID; - if (StandardID == nullptr) - report_fatal_error("Target cannot enable pass"); - return StandardID; - case cl::BOU_FALSE: - return IdentifyingPassPtr(); - } - llvm_unreachable("Invalid command line option state"); -} - /// Allow standard passes to be disabled by the command line, regardless of who /// is adding the pass. /// @@ -182,9 +156,6 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID, if (StandardID == &MachineCSEID) return applyDisable(TargetID, DisableMachineCSE); - if (StandardID == &MachineSchedulerID) - return applyOverride(TargetID, EnableMachineSched, StandardID); - if (StandardID == &TargetPassConfig::PostRAMachineLICMID) return applyDisable(TargetID, DisablePostRAMachineLICM); @@ -249,11 +220,6 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) // Substitute Pseudo Pass IDs for real ones. substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); substitutePass(&PostRAMachineLICMID, &MachineLICMID); - - // Temporarily disable experimental passes. - const TargetSubtargetInfo &ST = *TM->getSubtargetImpl(); - if (!ST.useMachineScheduler()) - disablePass(&MachineSchedulerID); } /// Insert InsertedPassID pass after TargetPassID. @@ -409,10 +375,8 @@ void TargetPassConfig::addIRPasses() { // Before running any passes, run the verifier to determine if the input // coming from the front-end and/or optimizer is valid. - if (!DisableVerify) { + if (!DisableVerify) addPass(createVerifierPass()); - addPass(createDebugInfoVerifierPass()); - } // Run loop strength reduction before anything else. if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { @@ -455,7 +419,11 @@ void TargetPassConfig::addPassesToHandleExceptions() { addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::WinEH: + // We support using both GCC-style and MSVC-style exceptions on Windows, so + // add both preparation passes. Each pass will only actually run if it + // recognizes the personality function. addPass(createWinEHPass(TM)); + addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::None: addPass(createLowerInvokePass()); @@ -479,12 +447,6 @@ void TargetPassConfig::addCodeGenPrepare() { void TargetPassConfig::addISelPrepare() { addPreISel(); - // Need to verify DebugInfo *before* creating the stack protector analysis. - // It's a function pass, and verifying between it and its users causes a - // crash. - if (!DisableVerify) - addPass(createDebugInfoVerifierPass()); - addPass(createStackProtectorPass(TM)); if (PrintISelInput) diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 283d1f2..ebe05e3 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -76,6 +76,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -411,8 +412,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, if (ExtendLife && !ExtendedUses.empty()) // Extend the liveness of the extension result. - std::copy(ExtendedUses.begin(), ExtendedUses.end(), - std::back_inserter(Uses)); + Uses.append(ExtendedUses.begin(), ExtendedUses.end()); // Now replace all uses. bool Changed = false; @@ -916,7 +916,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { // => v0 = COPY v1 // Currently we haven't seen motivating example for that and we // want to avoid untested code. - NumRewrittenCopies += Changed == true; + NumRewrittenCopies += Changed; return Changed; } diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 6d29b98..e073e6a 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "PrologEpilogInserter.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -28,6 +27,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/DiagnosticInfo.h" @@ -48,6 +48,53 @@ using namespace llvm; #define DEBUG_TYPE "pei" +namespace { +class PEI : public MachineFunctionPass { +public: + static char ID; + PEI() : MachineFunctionPass(ID) { + initializePEIPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract + /// frame indexes with appropriate references. + /// + bool runOnMachineFunction(MachineFunction &Fn) override; + +private: + RegScavenger *RS; + + // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved + // stack frame indexes. + unsigned MinCSFrameIndex, MaxCSFrameIndex; + + // Entry and return blocks of the current function. + MachineBasicBlock *EntryBlock; + SmallVector<MachineBasicBlock *, 4> ReturnBlocks; + + // Flag to control whether to use the register scavenger to resolve + // frame index materialization registers. Set according to + // TRI->requiresFrameIndexScavenging() for the current function. + bool FrameIndexVirtualScavenging; + + void calculateSets(MachineFunction &Fn); + void calculateCallsInformation(MachineFunction &Fn); + void calculateCalleeSavedRegisters(MachineFunction &Fn); + void insertCSRSpillsAndRestores(MachineFunction &Fn); + void calculateFrameObjectOffsets(MachineFunction &Fn); + void replaceFrameIndices(MachineFunction &Fn); + void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj); + void scavengeFrameVirtualRegs(MachineFunction &Fn); + void insertPrologEpilogCode(MachineFunction &Fn); + + // Convenience for recognizing return blocks. + bool isReturnBlock(MachineBasicBlock *MBB); +}; +} // namespace + char PEI::ID = 0; char &llvm::PrologEpilogCodeInserterID = PEI::ID; @@ -810,17 +857,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, continue; } - // Frame allocations are target independent. Simply swap the index with - // the offset. - if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC) { - assert(TFI->hasFP(Fn) && "frame alloc requires FP"); - MachineOperand &FI = MI->getOperand(i); - unsigned Reg; - int FrameOffset = TFI->getFrameIndexReference(Fn, FI.getIndex(), Reg); - FI.ChangeToImmediate(FrameOffset); - continue; - } - // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex // to insert more than one instruction. We need the register diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h deleted file mode 100644 index f88b8ef..0000000 --- a/lib/CodeGen/PrologEpilogInserter.h +++ /dev/null @@ -1,78 +0,0 @@ -//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -*---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass is responsible for finalizing the functions frame layout, saving -// callee saved registers, and for emitting prolog & epilog code for the -// function. -// -// This pass must be run after register allocation. After this pass is -// executed, it is illegal to construct MO_FrameIndex operands. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H -#define LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SparseBitVector.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetRegisterInfo.h" - -namespace llvm { - class RegScavenger; - class MachineBasicBlock; - - class PEI : public MachineFunctionPass { - public: - static char ID; - PEI() : MachineFunctionPass(ID) { - initializePEIPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override; - - /// runOnMachineFunction - Insert prolog/epilog code and replace abstract - /// frame indexes with appropriate references. - /// - bool runOnMachineFunction(MachineFunction &Fn) override; - - private: - RegScavenger *RS; - - // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved - // stack frame indexes. - unsigned MinCSFrameIndex, MaxCSFrameIndex; - - // Entry and return blocks of the current function. - MachineBasicBlock* EntryBlock; - SmallVector<MachineBasicBlock*, 4> ReturnBlocks; - - // Flag to control whether to use the register scavenger to resolve - // frame index materialization registers. Set according to - // TRI->requiresFrameIndexScavenging() for the curren function. - bool FrameIndexVirtualScavenging; - - void calculateSets(MachineFunction &Fn); - void calculateCallsInformation(MachineFunction &Fn); - void calculateCalleeSavedRegisters(MachineFunction &Fn); - void insertCSRSpillsAndRestores(MachineFunction &Fn); - void calculateFrameObjectOffsets(MachineFunction &Fn); - void replaceFrameIndices(MachineFunction &Fn); - void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, - int &SPAdj); - void scavengeFrameVirtualRegs(MachineFunction &Fn); - void insertPrologEpilogCode(MachineFunction &Fn); - - // Convenience for recognizing return blocks. - bool isReturnBlock(MachineBasicBlock* MBB); - }; -} // End llvm namespace -#endif diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index 6b346f4..16ff48e 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -27,6 +27,7 @@ #endif #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index edc3294..e94f1bb 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -1554,7 +1554,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); - const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC); + const TargetRegisterClass *SuperRC = + TRI->getLargestLegalSuperClass(CurRC, *MF); unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); // Split around every non-copy instruction if this split will relax // the constraints on the virtual register. diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 77a42b3..eeff73d 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -178,8 +178,40 @@ class Interference : public PBQPRAConstraint { private: typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr; - typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IMatrixKey; - typedef DenseMap<IMatrixKey, PBQPRAGraph::MatrixPtr> IMatrixCache; + typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey; + typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache; + typedef DenseSet<IKey> DisjointAllowedRegsCache; + typedef std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId> IEdgeKey; + typedef DenseSet<IEdgeKey> IEdgeCache; + + bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, + PBQPRAGraph::NodeId MId, + const DisjointAllowedRegsCache &D) const { + const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs(); + const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs(); + + if (NRegs == MRegs) + return false; + + if (NRegs < MRegs) + return D.count(IKey(NRegs, MRegs)) > 0; + + return D.count(IKey(MRegs, NRegs)) > 0; + } + + void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, + PBQPRAGraph::NodeId MId, + DisjointAllowedRegsCache &D) { + const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs(); + const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs(); + + assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself"); + + if (NRegs < MRegs) + D.insert(IKey(NRegs, MRegs)); + else + D.insert(IKey(MRegs, NRegs)); + } // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required // for the fast interference graph construction algorithm. The last is there @@ -247,6 +279,13 @@ public: // and uniquing them. IMatrixCache C; + // Finding an edge is expensive in the worst case (O(max_clique(G))). So + // cache locally edges we have already seen. + IEdgeCache EC; + + // Cache known disjoint allowed registers pairs + DisjointAllowedRegsCache D; + typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet; typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>, decltype(&lowestStartPoint)> IntervalQueue; @@ -290,14 +329,21 @@ public: for (const auto &A : Active) { PBQP::GraphBase::NodeId MId = getNodeId(A); + // Do not add an edge when the nodes' allowed registers do not + // intersect: there is obviously no interference. + if (haveDisjointAllowedRegs(G, NId, MId, D)) + continue; + // Check that we haven't already added this edge - // FIXME: findEdge is expensive in the worst case (O(max_clique(G))). - // It might be better to replace this with a local bit-matrix. - if (G.findEdge(NId, MId) != PBQPRAGraph::invalidEdgeId()) + IEdgeKey EK(std::min(NId, MId), std::max(NId, MId)); + if (EC.count(EK)) continue; // This is a new edge - add it to the graph. - createInterferenceEdge(G, NId, MId, C); + if (!createInterferenceEdge(G, NId, MId, C)) + setDisjointAllowedRegs(G, NId, MId, D); + else + EC.insert(EK); } // Finally, add Cur to the Active set. @@ -307,35 +353,48 @@ public: private: - void createInterferenceEdge(PBQPRAGraph &G, PBQPRAGraph::NodeId NId, - PBQPRAGraph::NodeId MId, IMatrixCache &C) { + // Create an Interference edge and add it to the graph, unless it is + // a null matrix, meaning the nodes' allowed registers do not have any + // interference. This case occurs frequently between integer and floating + // point registers for example. + // return true iff both nodes interferes. + bool createInterferenceEdge(PBQPRAGraph &G, + PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId, + IMatrixCache &C) { const TargetRegisterInfo &TRI = *G.getMetadata().MF.getSubtarget().getRegisterInfo(); - const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs(); const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs(); // Try looking the edge costs up in the IMatrixCache first. - IMatrixKey K(&NRegs, &MRegs); + IKey K(&NRegs, &MRegs); IMatrixCache::iterator I = C.find(K); if (I != C.end()) { G.addEdgeBypassingCostAllocator(NId, MId, I->second); - return; + return true; } PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0); + bool NodesInterfere = false; for (unsigned I = 0; I != NRegs.size(); ++I) { unsigned PRegN = NRegs[I]; for (unsigned J = 0; J != MRegs.size(); ++J) { unsigned PRegM = MRegs[J]; - if (TRI.regsOverlap(PRegN, PRegM)) + if (TRI.regsOverlap(PRegN, PRegM)) { M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity(); + NodesInterfere = true; + } } } + if (!NodesInterfere) + return false; + PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M)); C[K] = G.getEdgeCostsPtr(EId); + + return true; } }; diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index ab33672..178fa18 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -131,7 +131,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { RCI.NumRegs = StressRA; // Check if RC is a proper sub-class. - if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC)) + if (const TargetRegisterClass *Super = + TRI->getLargestLegalSuperClass(RC, *MF)) if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs) RCI.ProperSubClass = true; @@ -175,6 +176,6 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const { } compute(RC); unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC); - return TRI->getRegPressureSetLimit(Idx) - - TRI->getRegClassWeight(RC).RegWeight * NReserved; + return TRI->getRegPressureSetLimit(*MF, Idx) - + TRI->getRegClassWeight(RC).RegWeight * NReserved; } diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 1e4cfe8..9e3cf41 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -58,6 +58,10 @@ EnableJoining("join-liveintervals", cl::desc("Coalesce copies (default=true)"), cl::init(true)); +static cl::opt<bool> UseTerminalRule("terminal-rule", + cl::desc("Apply the terminal rule"), + cl::init(false)); + /// Temporary flag to test critical edge unsplitting. static cl::opt<bool> EnableJoinSplits("join-splitedges", @@ -160,12 +164,14 @@ namespace { /// LaneMask are split as necessary. @p LaneMask are the lanes that /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange /// lanemasks already adjusted to the coalesced register. - void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, + /// @returns false if live range conflicts couldn't get resolved. + bool mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, unsigned LaneMask, CoalescerPair &CP); /// Join the liveranges of two subregisters. Joins @p RRange into /// @p LRange, @p RRange may be invalid afterwards. - void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, + /// @returns false if live range conflicts couldn't get resolved. + bool joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, unsigned LaneMask, const CoalescerPair &CP); /// We found a non-trivially-coalescable copy. If the source value number is @@ -204,6 +210,20 @@ namespace { /// Returns true if @p CopyMI was a copy of an undef value and eliminated. bool eliminateUndefCopy(MachineInstr *CopyMI); + /// Check whether or not we should apply the terminal rule on the + /// destination (Dst) of \p Copy. + /// When the terminal rule applies, Copy is not profitable to + /// coalesce. + /// Dst is terminal if it has exactly one affinity (Dst, Src) and + /// at least one interference (Dst, Dst2). If Dst is terminal, the + /// terminal rule consists in checking that at least one of + /// interfering node, say Dst2, has an affinity of equal or greater + /// weight with Src. + /// In that case, Dst2 and Dst will not be able to be both coalesced + /// with Src. Since Dst2 exposes more coalescing opportunities than + /// Dst, we can drop \p Copy. + bool applyTerminalRule(const MachineInstr &Copy) const; + public: static char ID; ///< Class identification, replacement for typeinfo RegisterCoalescer() : MachineFunctionPass(ID) { @@ -1143,7 +1163,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. - if (SubIdx != 0 && MO.isUse() && MRI->tracksSubRegLiveness()) { + if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) { if (!DstInt->hasSubRanges()) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); @@ -1756,6 +1776,9 @@ public: void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, SmallVectorImpl<unsigned> &ShrinkRegs); + /// Remove liverange defs at places where implicit defs will be removed. + void removeImplicitDefs(); + /// Get the value assignments suitable for passing to LiveInterval::join. const int *getAssignments() const { return Assignments.data(); } }; @@ -1856,7 +1879,11 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { assert(DefMI != nullptr); if (SubRangeJoin) { // We don't care about the lanes when joining subregister ranges. - V.ValidLanes = V.WriteLanes = 1; + V.WriteLanes = V.ValidLanes = 1; + if (DefMI->isImplicitDef()) { + V.ValidLanes = 0; + V.ErasableImplicitDef = true; + } } else { bool Redef = false; V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); @@ -2339,6 +2366,18 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) LI.removeEmptySubRanges(); } +void JoinVals::removeImplicitDefs() { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + Val &V = Vals[i]; + if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned) + continue; + + VNInfo *VNI = LR.getValNumInfo(i); + VNI->markUnused(); + LR.removeValNo(VNI); + } +} + void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, SmallVectorImpl<unsigned> &ShrinkRegs) { for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { @@ -2382,7 +2421,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, } } -void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, +bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, unsigned LaneMask, const CoalescerPair &CP) { SmallVector<VNInfo*, 16> NewVNInfo; @@ -2392,12 +2431,19 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, NewVNInfo, CP, LIS, TRI, true, true); // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs()) - // Conflicts should already be resolved so the mapping/resolution should - // always succeed. - if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) - llvm_unreachable("Can't join subrange although main ranges are compatible"); - if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) - llvm_unreachable("Can't join subrange although main ranges are compatible"); + // We should be able to resolve all conflicts here as we could successfully do + // it on the mainrange already. There is however a problem when multiple + // ranges get mapped to the "overflow" lane mask bit which creates unexpected + // interferences. + if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) { + DEBUG(dbgs() << "*** Couldn't join subrange!\n"); + return false; + } + if (!LHSVals.resolveConflicts(RHSVals) || + !RHSVals.resolveConflicts(LHSVals)) { + DEBUG(dbgs() << "*** Couldn't join subrange!\n"); + return false; + } // The merging algorithm in LiveInterval::join() can't handle conflicting // value mappings, so we need to remove any live ranges that overlap a @@ -2407,6 +2453,9 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, LHSVals.pruneValues(RHSVals, EndPoints, false); RHSVals.pruneValues(LHSVals, EndPoints, false); + LHSVals.removeImplicitDefs(); + RHSVals.removeImplicitDefs(); + LRange.verify(); RRange.verify(); @@ -2416,16 +2465,17 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n"); if (EndPoints.empty()) - return; + return true; // Recompute the parts of the live range we had to remove because of // CR_Replace conflicts. DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: " << LRange << '\n'); LIS->extendToIndices(LRange, EndPoints); + return true; } -void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, +bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, unsigned LaneMask, CoalescerPair &CP) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); @@ -2453,7 +2503,8 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, CommonRange = &R; } LiveRange RangeCopy(ToMerge, Allocator); - joinSubRegRanges(*CommonRange, RangeCopy, Common, CP); + if (!joinSubRegRanges(*CommonRange, RangeCopy, Common, CP)) + return false; LaneMask &= ~RMask; } @@ -2461,13 +2512,14 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask)); LI.createSubRangeFrom(Allocator, LaneMask, ToMerge); } + return true; } bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { SmallVector<VNInfo*, 16> NewVNInfo; LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); - bool TrackSubRegLiveness = MRI->tracksSubRegLiveness(); + bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC()); JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS, @@ -2511,22 +2563,40 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // Determine lanemasks of RHS in the coalesced register and merge subranges. unsigned SrcIdx = CP.getSrcIdx(); + bool Abort = false; if (!RHS.hasSubRanges()) { unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() : TRI->getSubRegIndexLaneMask(SrcIdx); - mergeSubRangeInto(LHS, RHS, Mask, CP); + if (!mergeSubRangeInto(LHS, RHS, Mask, CP)) + Abort = true; } else { // Pair up subranges and merge. for (LiveInterval::SubRange &R : RHS.subranges()) { unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); - mergeSubRangeInto(LHS, R, Mask, CP); + if (!mergeSubRangeInto(LHS, R, Mask, CP)) { + Abort = true; + break; + } } } + if (Abort) { + // This shouldn't have happened :-( + // However we are aware of at least one existing problem where we + // can't merge subranges when multiple ranges end up in the + // "overflow bit" 32. As a workaround we drop all subregister ranges + // which means we loose some precision but are back to a well defined + // state. + assert((CP.getNewRC()->getLaneMask() & 0x80000000u) + && "SubRange merge should only fail when merging into bit 32."); + DEBUG(dbgs() << "\tSubrange join aborted!\n"); + LHS.clearSubRanges(); + RHS.clearSubRanges(); + } else { + DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); - DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); - - LHSVals.pruneSubRegValues(LHS, ShrinkMask); - RHSVals.pruneSubRegValues(LHS, ShrinkMask); + LHSVals.pruneSubRegValues(LHS, ShrinkMask); + RHSVals.pruneSubRegValues(LHS, ShrinkMask); + } } // The merging algorithm in LiveInterval::join() can't handle conflicting @@ -2645,6 +2715,58 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { return Progress; } +/// Check if DstReg is a terminal node. +/// I.e., it does not have any affinity other than \p Copy. +static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy, + const MachineRegisterInfo *MRI) { + assert(Copy.isCopyLike()); + // Check if the destination of this copy as any other affinity. + for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg)) + if (&MI != &Copy && MI.isCopyLike()) + return false; + return true; +} + +bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { + assert(Copy.isCopyLike()); + if (!UseTerminalRule) + return false; + // Check if the destination of this copy has any other affinity. + unsigned DstReg = Copy.getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + !isTerminalReg(DstReg, Copy, MRI)) + return false; + + // DstReg is a terminal node. Check if it inteferes with any other + // copy involving SrcReg. + unsigned SrcReg = Copy.getOperand(1).getReg(); + const MachineBasicBlock *OrigBB = Copy.getParent(); + const LiveInterval &DstLI = LIS->getInterval(DstReg); + for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) { + // Technically we should check if the weight of the new copy is + // interesting compared to the other one and update the weight + // of the copies accordingly. However, this would only work if + // we would gather all the copies first then coalesce, whereas + // right now we interleave both actions. + // For now, just consider the copies that are in the same block. + if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB) + continue; + unsigned OtherReg = MI.getOperand(0).getReg(); + if (OtherReg == SrcReg) + OtherReg = MI.getOperand(1).getReg(); + // Check if OtherReg is a non-terminal. + if (TargetRegisterInfo::isPhysicalRegister(OtherReg) || + isTerminalReg(OtherReg, MI, MRI)) + continue; + // Check that OtherReg interfere with DstReg. + if (LIS->getInterval(OtherReg).overlaps(DstLI)) { + DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg) << '\n'); + return true; + } + } + return false; +} + void RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { DEBUG(dbgs() << MBB->getName() << ":\n"); @@ -2659,7 +2781,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { // cmp+jmp macro fusion. for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { - if (!MII->isCopyLike()) + if (!MII->isCopyLike() || applyTerminalRule(*MII)) continue; if (isLocalCopy(&(*MII), LIS)) LocalWorkList.push_back(&(*MII)); @@ -2670,7 +2792,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { else { for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) - if (MII->isCopyLike()) + if (MII->isCopyLike() && !applyTerminalRule(*MII)) WorkList.push_back(MII); } // Try coalescing the collected copies immediately, and remove the nulls. @@ -2741,7 +2863,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { AA = &getAnalysis<AliasAnalysis>(); Loops = &getAnalysis<MachineLoopInfo>(); if (EnableGlobalCopies == cl::BOU_UNSET) - JoinGlobalCopies = STI.useMachineScheduler(); + JoinGlobalCopies = STI.enableJoinGlobalCopies(); else JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 9925efb..3634103 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -304,6 +304,7 @@ static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) { return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end(); } +namespace { /// Collect this instruction's unique uses and defs into SmallVectors for /// processing defs and uses in order. /// @@ -354,6 +355,7 @@ protected: } } }; +} // namespace /// Collect physical and virtual register operands. static void collectOperands(const MachineInstr *MI, diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 78bfd23..17dd729 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -96,14 +96,15 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { /// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. static void getUnderlyingObjects(const Value *V, - SmallVectorImpl<Value *> &Objects) { + SmallVectorImpl<Value *> &Objects, + const DataLayout &DL) { SmallPtrSet<const Value *, 16> Visited; SmallVector<const Value *, 4> Working(1, V); do { V = Working.pop_back_val(); SmallVector<Value *, 4> Objs; - GetUnderlyingObjects(const_cast<Value *>(V), Objs); + GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL); for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { @@ -132,7 +133,8 @@ UnderlyingObjectsVector; /// object, return the Value for that object. static void getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo *MFI, - UnderlyingObjectsVector &Objects) { + UnderlyingObjectsVector &Objects, + const DataLayout &DL) { if (!MI->hasOneMemOperand() || (!(*MI->memoperands_begin())->getValue() && !(*MI->memoperands_begin())->getPseudoValue()) || @@ -156,7 +158,7 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, return; SmallVector<Value *, 4> Objs; - getUnderlyingObjects(V, Objs); + getUnderlyingObjects(V, Objs, DL); for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { @@ -468,7 +470,8 @@ static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { // This MI might have either incomplete info, or known to be unsafe // to deal with (i.e. volatile object). static inline bool isUnsafeMemoryObject(MachineInstr *MI, - const MachineFrameInfo *MFI) { + const MachineFrameInfo *MFI, + const DataLayout &DL) { if (!MI || MI->memoperands_empty()) return true; // We purposefully do no check for hasOneMemOperand() here @@ -491,7 +494,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, return true; SmallVector<Value *, 4> Objs; - getUnderlyingObjects(V, Objs); + getUnderlyingObjects(V, Objs, DL); for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { // Does this pointer refer to a distinct and identifiable object? @@ -508,7 +511,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, /// these two MIs be reordered during scheduling from memory dependency /// point of view. static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, - MachineInstr *MIa, + const DataLayout &DL, MachineInstr *MIa, MachineInstr *MIb) { const MachineFunction *MF = MIa->getParent()->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -527,7 +530,7 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) return true; - if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI)) + if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL)) return true; // If we are dealing with two "normal" loads, we do not need an edge @@ -579,10 +582,10 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, /// This recursive function iterates over chain deps of SUb looking for /// "latest" node that needs a chain edge to SUa. -static unsigned -iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, - SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth, - SmallPtrSetImpl<const SUnit*> &Visited) { +static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, + const DataLayout &DL, SUnit *SUa, SUnit *SUb, + SUnit *ExitSU, unsigned *Depth, + SmallPtrSetImpl<const SUnit *> &Visited) { if (!SUa || !SUb || SUb == ExitSU) return *Depth; @@ -607,7 +610,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, // add that edge to the predecessors chain of SUb, // and stop descending. if (*Depth > 200 || - MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { SUb->addPred(SDep(SUa, SDep::MayAliasMem)); return *Depth; } @@ -617,7 +620,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); I != E; ++I) if (I->isNormalMemoryOrBarrier()) - iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited); + iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited); return *Depth; } @@ -626,7 +629,8 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, /// checks whether SU can be aliasing any node dominated /// by it. static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, - SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList, + const DataLayout &DL, SUnit *SU, SUnit *ExitSU, + std::set<SUnit *> &CheckList, unsigned LatencyToLoad) { if (!SU) return; @@ -638,7 +642,7 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, I != IE; ++I) { if (SU == *I) continue; - if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) { + if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) { SDep Dep(SU, SDep::MayAliasMem); Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0); (*I)->addPred(Dep); @@ -649,22 +653,22 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), JE = (*I)->Succs.end(); J != JE; ++J) if (J->isNormalMemoryOrBarrier()) - iterateChainSucc (AA, MFI, SU, J->getSUnit(), - ExitSU, &Depth, Visited); + iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth, + Visited); } } /// Check whether two objects need a chain edge, if so, add it /// otherwise remember the rejected SU. -static inline -void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, - SUnit *SUa, SUnit *SUb, - std::set<SUnit *> &RejectList, - unsigned TrueMemOrderLatency = 0, - bool isNormalMemory = false) { +static inline void addChainDependency(AliasAnalysis *AA, + const MachineFrameInfo *MFI, + const DataLayout &DL, SUnit *SUa, + SUnit *SUb, std::set<SUnit *> &RejectList, + unsigned TrueMemOrderLatency = 0, + bool isNormalMemory = false) { // If this is a false dependency, // do not add the edge, but rememeber the rejected node. - if (MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); Dep.setLatency(TrueMemOrderLatency); SUb->addPred(Dep); @@ -883,7 +887,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, BarrierChain = SU; // This is a barrier event that acts as a pivotal node in the DAG, // so it is safe to clear list of exposed nodes. - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); RejectMemNodes.clear(); NonAliasMemDefs.clear(); @@ -896,25 +900,27 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, unsigned ChainLatency = 0; if (AliasChain->getInstr()->mayLoad()) ChainLatency = TrueMemOrderLatency; - addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes, - ChainLatency); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, + RejectMemNodes, ChainLatency); } AliasChain = SU; for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + I->second[i], RejectMemNodes); } for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, - TrueMemOrderLatency); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + I->second[i], RejectMemNodes, TrueMemOrderLatency); } - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); PendingLoads.clear(); AliasMemDefs.clear(); @@ -928,7 +934,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, BarrierChain->addPred(SDep(SU, SDep::Barrier)); UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(MI, MFI, Objs); + getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout()); if (Objs.empty()) { // Treat all other stores conservatively. @@ -952,8 +958,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, - 0, true); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + I->second[i], RejectMemNodes, 0, true); // If we're not using AA, then we only need one store per object. if (!AAForDep) @@ -977,7 +983,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes, + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + J->second[i], RejectMemNodes, TrueMemOrderLatency, true); J->second.clear(); } @@ -986,13 +993,15 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Add dependencies from all the PendingLoads, i.e. loads // with no underlying object. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); // Add dependence on alias chain, if needed. if (AliasChain) - addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, + RejectMemNodes); } - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); } else if (MI->mayLoad()) { bool MayAlias = true; @@ -1000,7 +1009,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Invariant load, no chain dependencies needed! } else { UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(MI, MFI, Objs); + getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout()); if (Objs.empty()) { // A load with no underlying object. Depend on all @@ -1008,8 +1017,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], - RejectMemNodes); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + I->second[i], RejectMemNodes); PendingLoads.push_back(SU); MayAlias = true; @@ -1032,18 +1041,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], - RejectMemNodes, 0, true); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + I->second[i], RejectMemNodes, 0, true); if (ThisMayAlias) AliasMemUses[V].push_back(SU); else NonAliasMemUses[V].push_back(SU); } if (MayAlias) - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0); + adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, + RejectMemNodes, /*Latency=*/0); // Add dependencies on alias and barrier chains, if needed. if (MayAlias && AliasChain) - addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, + RejectMemNodes); if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Barrier)); } @@ -1211,7 +1222,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { else if (SU == &ExitSU) oss << "<exit>"; else - SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true); + SU->getInstr()->print(oss, /*SkipOpers=*/true); return oss.str(); } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6129401..a1c84c5 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -246,10 +246,11 @@ namespace { SDValue visitSDIVREM(SDNode *N); SDValue visitUDIVREM(SDNode *N); SDValue visitAND(SDNode *N); + SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitOR(SDNode *N); + SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitXOR(SDNode *N); SDValue SimplifyVBinOp(SDNode *N); - SDValue SimplifyVUnaryOp(SDNode *N); SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); @@ -302,6 +303,7 @@ namespace { SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitSCALAR_TO_VECTOR(SDNode *N); SDValue visitINSERT_SUBVECTOR(SDNode *N); SDValue visitMLOAD(SDNode *N); SDValue visitMSTORE(SDNode *N); @@ -713,6 +715,22 @@ static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) { return nullptr; } +static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { + if (isa<ConstantSDNode>(N)) + return N.getNode(); + if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) + return N.getNode(); + return nullptr; +} + +static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { + if (isa<ConstantFPSDNode>(N)) + return N.getNode(); + if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) + return N.getNode(); + return nullptr; +} + // \brief Returns the SDNode if it is a constant splat BuildVector or constant // int. static ConstantSDNode *isConstOrConstSplat(SDValue N) { @@ -1180,11 +1198,6 @@ void DAGCombiner::Run(CombineLevel AtLevel) { LegalOperations = Level >= AfterLegalizeVectorOps; LegalTypes = Level >= AfterLegalizeTypes; - // Early exit if this basic block is in an optnone function. - if (DAG.getMachineFunction().getFunction()->hasFnAttribute( - Attribute::OptimizeNone)) - return; - // Add all the dag nodes to the worklist. for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) @@ -1369,6 +1382,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); case ISD::MLOAD: return visitMLOAD(N); case ISD::MSTORE: return visitMSTORE(N); @@ -2685,6 +2699,109 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { return SDValue(); } +/// This contains all DAGCombine rules which reduce two values combined by +/// an And operation to a single value. This makes them reusable in the context +/// of visitSELECT(). Rules involving constants are not included as +/// visitSELECT() already handles those cases. +SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, + SDNode *LocReference) { + EVT VT = N1.getValueType(); + + // fold (and x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) + if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } + // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + } + // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) + if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && + Op0 == Op1 && LL.getValueType().isInteger() && + Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && + cast<ConstantSDNode>(RR)->isAllOnesValue()) || + (cast<ConstantSDNode>(LR)->isAllOnesValue() && + cast<ConstantSDNode>(RR)->isNullValue()))) { + SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), + LL, DAG.getConstant(1, LL.getValueType())); + AddToWorklist(ADDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, + DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + getSetCCResultType(N0.getSimpleValueType()))))) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } + } + + if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && + VT.getSizeInBits() <= 64) { + if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + APInt ADDC = ADDI->getAPIntValue(); + if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal + // immediate for an add, but it is legal if its top c2 bits are set, + // transform the ADD so the immediate doesn't need to be materialized + // in a register. + if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), + SRLI->getZExtValue()); + if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { + ADDC |= Mask; + if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + SDValue NewAdd = + DAG.getNode(ISD::ADD, SDLoc(N0), VT, + N0.getOperand(0), DAG.getConstant(ADDC, VT)); + CombineTo(N0.getNode(), NewAdd); + // Return N so it doesn't get rechecked! + return SDValue(LocReference, 0); + } + } + } + } + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2716,9 +2833,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return N0; } - // fold (and x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); // fold (and c1, c2) -> c1&c2 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); @@ -2808,9 +2922,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SplatBitSize = SplatBitSize * 2) SplatValue |= SplatValue.shl(SplatBitSize); - Constant = APInt::getAllOnesValue(BitWidth); - for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) - Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); + // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a + // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. + if (SplatBitSize % BitWidth == 0) { + Constant = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) + Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); + } } } @@ -2863,118 +2981,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } - // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) - SDValue LL, LR, RL, RR, CC0, CC1; - if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ - ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); - ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); - - if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && - LL.getValueType().isInteger()) { - // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) - if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); - } - // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) - if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); - } - // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) - if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); - } - } - // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) - if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && - Op0 == Op1 && LL.getValueType().isInteger() && - Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && - cast<ConstantSDNode>(RR)->isAllOnesValue()) || - (cast<ConstantSDNode>(LR)->isAllOnesValue() && - cast<ConstantSDNode>(RR)->isNullValue()))) { - SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), - LL, DAG.getConstant(1, LL.getValueType())); - AddToWorklist(ADDNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ADDNode, - DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); - } - // canonicalize equivalent to ll == rl - if (LL == RR && LR == RL) { - Op1 = ISD::getSetCCSwappedOperands(Op1); - std::swap(RL, RR); - } - if (LL == RL && LR == RR) { - bool isInteger = LL.getValueType().isInteger(); - ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); - if (Result != ISD::SETCC_INVALID && - (!LegalOperations || - (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getSimpleValueType()))))) - return DAG.getSetCC(SDLoc(N), N0.getValueType(), - LL, LR, Result); - } - } - - // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } - - // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) - // fold (and (sra)) -> (and (srl)) when possible. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) - return SDValue(N, 0); - - // fold (zext_inreg (extload x)) -> (zextload x) - if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - EVT MemVT = LN0->getMemoryVT(); - // If we zero all the possible extended bits, then we can turn this into - // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); - if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarType().getSizeInBits())) && - ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, - LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); - AddToWorklist(N); - CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } - // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use - if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - N0.hasOneUse()) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - EVT MemVT = LN0->getMemoryVT(); - // If we zero all the possible extended bits, then we can turn this into - // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); - if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarType().getSizeInBits())) && - ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, - LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); - AddToWorklist(N); - CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) @@ -3046,33 +3052,60 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } - if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && - VT.getSizeInBits() <= 64) { - if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - APInt ADDC = ADDI->getAPIntValue(); - if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { - // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal - // immediate for an add, but it is legal if its top c2 bits are set, - // transform the ADD so the immediate doesn't need to be materialized - // in a register. - if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { - APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), - SRLI->getZExtValue()); - if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { - ADDC |= Mask; - if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { - SDValue NewAdd = - DAG.getNode(ISD::ADD, SDLoc(N0), VT, - N0.getOperand(0), DAG.getConstant(ADDC, VT)); - CombineTo(N0.getNode(), NewAdd); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } - } - } - } + if (SDValue Combined = visitANDLike(N0, N1, N)) + return Combined; + + // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; } + // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) + // fold (and (sra)) -> (and (srl)) when possible. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (zext_inreg (extload x)) -> (zextload x) + if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + AddToWorklist(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use + if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + AddToWorklist(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), @@ -3338,6 +3371,98 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); } +/// This contains all DAGCombine rules which reduce two values combined by +/// an Or operation to a single value \see visitANDLike(). +SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { + EVT VT = N1.getValueType(); + // fold (or x, undef) -> -1 + if (!LegalOperations && + (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + } + // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) + // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) + if (cast<ConstantSDNode>(LR)->isNullValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) + // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + getSetCCResultType(N0.getValueType()))))) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } + } + + // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant && + N1.getOperand(1).getOpcode() == ISD::Constant && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + // We can only do this xform if we know that bits from X that are set in C2 + // but not in C1 are already zero. Likewise for Y. + const APInt &LHSMask = + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + const APInt &RHSMask = + cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); + + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(0), N1.getOperand(0)); + return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, X, + DAG.getConstant(LHSMask | RHSMask, VT)); + } + } + + // (or (and X, M), (and X, N)) -> (and X, (or M, N)) + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(0) == N1.getOperand(0) && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(1), N1.getOperand(1)); + return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X); + } + + return SDValue(); +} + SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3425,12 +3550,6 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } - // fold (or x, undef) -> -1 - if (!LegalOperations && - (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { - EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; - return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); - } // fold (or c1, c2) -> c1|c2 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); @@ -3449,6 +3568,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) return N1; + if (SDValue Combined = visitORLike(N0, N1, N)) + return Combined; + // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) SDValue BSwap = MatchBSwapHWord(N, N0, N1); if (BSwap.getNode()) @@ -3474,91 +3596,12 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return SDValue(); } } - // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) - SDValue LL, LR, RL, RR, CC0, CC1; - if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ - ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); - ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); - - if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && - LL.getValueType().isInteger()) { - // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) - // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) - if (cast<ConstantSDNode>(LR)->isNullValue() && - (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); - } - // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) - // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) - if (cast<ConstantSDNode>(LR)->isAllOnesValue() && - (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); - } - } - // canonicalize equivalent to ll == rl - if (LL == RR && LR == RL) { - Op1 = ISD::getSetCCSwappedOperands(Op1); - std::swap(RL, RR); - } - if (LL == RL && LR == RR) { - bool isInteger = LL.getValueType().isInteger(); - ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); - if (Result != ISD::SETCC_INVALID && - (!LegalOperations || - (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getValueType()))))) - return DAG.getSetCC(SDLoc(N), N0.getValueType(), - LL, LR, Result); - } - } - // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) if (N0.getOpcode() == N1.getOpcode()) { SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); if (Tmp.getNode()) return Tmp; } - // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. - if (N0.getOpcode() == ISD::AND && - N1.getOpcode() == ISD::AND && - N0.getOperand(1).getOpcode() == ISD::Constant && - N1.getOperand(1).getOpcode() == ISD::Constant && - // Don't increase # computations. - (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { - // We can only do this xform if we know that bits from X that are set in C2 - // but not in C1 are already zero. Likewise for Y. - const APInt &LHSMask = - cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - const APInt &RHSMask = - cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); - - if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && - DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { - SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(0), N1.getOperand(0)); - return DAG.getNode(ISD::AND, SDLoc(N), VT, X, - DAG.getConstant(LHSMask | RHSMask, VT)); - } - } - - // (or (and X, M), (and X, N)) -> (and X, (or M, N)) - if (N0.getOpcode() == ISD::AND && - N1.getOpcode() == ISD::AND && - N0.getOperand(0) == N1.getOperand(0) && - // Don't increase # computations. - (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { - SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(1), N1.getOperand(1)); - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), X); - } - // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) return SDValue(Rot, 0); @@ -3947,6 +3990,32 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); + // fold (xor (shl 1, x), -1) -> (rotl ~1, x) + // Here is a concrete example of this equivalence: + // i16 x == 14 + // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 + // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 + // + // => + // + // i16 ~1 == 0b1111111111111110 + // i16 rol(~1, 14) == 0b1011111111111111 + // + // Some additional tips to help conceptualize this transform: + // - Try to see the operation as placing a single zero in a value of all ones. + // - There exists no value for x which would allow the result to contain zero. + // - Values of x larger than the bitwidth are undefined and do not require a + // consistent result. + // - Pushing the zero left requires shifting one bits in from the right. + // A rotate left of ~1 is a nice way of achieving the desired result. + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) + if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) + if (N0.getOpcode() == ISD::SHL) + if (auto *ShlLHS = dyn_cast<ConstantSDNode>(N0.getOperand(0))) + if (N1C->isAllOnesValue() && ShlLHS->isOne()) + return DAG.getNode(ISD::ROTL, SDLoc(N), VT, DAG.getConstant(~1, VT), + N0.getOperand(1)); + // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) if (N0.getOpcode() == N1.getOpcode()) { SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); @@ -4792,6 +4861,69 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SimplifySelect(SDLoc(N), N0, N1, N2); } + if (VT0 == MVT::i1) { + if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + // select (and Cond0, Cond1), X, Y + // -> select Cond0, (select Cond1, X, Y), Y + if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, + InnerSelect, N2); + } + // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) + if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, + InnerSelect); + } + } + + // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y + if (N1->getOpcode() == ISD::SELECT) { + SDValue N1_0 = N1->getOperand(0); + SDValue N1_1 = N1->getOperand(1); + SDValue N1_2 = N1->getOperand(2); + if (N1_2 == N2) { + // Create the actual and node if we can generate good code for it. + if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), + N0, N1_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, + N1_1, N2); + } + // Otherwise see if we can optimize the "and" to a better pattern. + if (SDValue Combined = visitANDLike(N0, N1_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1_1, N2); + } + } + // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y + if (N2->getOpcode() == ISD::SELECT) { + SDValue N2_0 = N2->getOperand(0); + SDValue N2_1 = N2->getOperand(1); + SDValue N2_2 = N2->getOperand(2); + if (N2_1 == N1) { + // Create the actual or node if we can generate good code for it. + if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), + N0, N2_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, + N1, N2_2); + } + // Otherwise see if we can optimize to a better pattern. + if (SDValue Combined = visitORLike(N0, N2_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1, N2_2); + } + } + } + return SDValue(); } @@ -6440,7 +6572,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getValueType() == N->getValueType(0)) return N0; // fold (truncate c1) -> c1 - if (isa<ConstantSDNode>(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) @@ -7453,14 +7585,23 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // Fold scalars or any vector constants (not just splats). // This fold is done in general by InstCombine, but extra fmul insts // may have been generated during lowering. + SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + auto *BV00 = dyn_cast<BuildVectorSDNode>(N00); auto *BV01 = dyn_cast<BuildVectorSDNode>(N01); - if ((N1CFP && isConstOrConstSplatFP(N01)) || - (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { - SDLoc SL(N); - SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); - return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts); + + // Check 1: Make sure that the first operand of the inner multiply is NOT + // a constant. Otherwise, we may induce infinite looping. + if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) { + // Check 2: Make sure that the second operand of the inner multiply and + // the second operand of the outer multiply are constants. + if ((N1CFP && isConstOrConstSplatFP(N01)) || + (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { + SDLoc SL(N); + SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); + return DAG.getNode(ISD::FMUL, SL, VT, N00, MulConsts); + } } } @@ -7821,8 +7962,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - if (N0C && + if (isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -7874,8 +8014,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - if (N0C && + if (isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -8033,7 +8172,6 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. @@ -8042,7 +8180,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(); // fold (fp_extend c1fp) -> c1fp - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the @@ -8117,14 +8255,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVUnaryOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } - // Constant fold FNEG. - if (isa<ConstantFPSDNode>(N0)) - return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0)); + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options)) @@ -8219,13 +8352,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVUnaryOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } - // fold (fabs c1) -> fabs(c1) - if (isa<ConstantFPSDNode>(N0)) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // fold (fabs (fabs x)) -> (fabs x) @@ -8941,7 +9069,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), Align, LD->getAAInfo()); - return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); + if (NewLoad.getNode() != N) + return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } @@ -9106,9 +9235,6 @@ struct LoadedSlice { unsigned Shift = 0, SelectionDAG *DAG = nullptr) : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} - LoadedSlice(const LoadedSlice &LS) - : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} - /// \brief Get the bits used in a chunk of bits \p BitWidth large. /// \return Result is \p BitWidth and has used bits set to 1 and /// not used bits set to 0. @@ -9855,6 +9981,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { return SDValue(); } +namespace { /// Helper struct to parse and store a memory address as base + index + offset. /// We ignore sign extensions when it is safe to do so. /// The following two expressions are not equivalent. To differentiate we need @@ -9942,6 +10069,7 @@ struct BaseIndexOffset { return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); } }; +} // namespace bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, @@ -10575,11 +10703,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > ST->getAlignment()) - return DAG.getTruncStore(Chain, SDLoc(N), Value, + if (Align > ST->getAlignment()) { + SDValue NewStore = + DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), Align, ST->getAAInfo()); + if (NewStore.getNode() != N) + return CombineTo(ST, NewStore, true); + } } } @@ -11226,12 +11358,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); - SDValue V = reduceBuildVecExtToExtBuildVec(N); - if (V.getNode()) + if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; - V = reduceBuildVecConvertToConvertBuildVec(N); - if (V.getNode()) + if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) return V; // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT @@ -11352,7 +11482,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { // If the input vector is too large, try to split it. // We don't support having two input vectors that are too large. - if (VecIn2.getNode()) + // If the zero vector was used, we can not split the vector, + // since we'd need 3 inputs. + if (UsesZeroVector || VecIn2.getNode()) return SDValue(); if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) @@ -11364,7 +11496,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, DAG.getConstant(0, TLI.getVectorIdxTy())); - UsesZeroVector = false; } else return SDValue(); } @@ -11465,14 +11596,12 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { unsigned NumElts = OpVT.getVectorNumElements(); if (ISD::UNDEF == Op.getOpcode()) - for (unsigned i = 0; i != NumElts; ++i) - Opnds.push_back(DAG.getUNDEF(MinVT)); + Opnds.append(NumElts, DAG.getUNDEF(MinVT)); if (ISD::BUILD_VECTOR == Op.getOpcode()) { if (SVT.isFloatingPoint()) { assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); - for (unsigned i = 0; i != NumElts; ++i) - Opnds.push_back(Op.getOperand(i)); + Opnds.append(Op->op_begin(), Op->op_begin() + NumElts); } else { for (unsigned i = 0; i != NumElts; ++i) Opnds.push_back( @@ -11850,7 +11979,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // We may have jumped through bitcasts, so the type of the // BUILD_VECTOR may not match the type of the shuffle. if (V->getValueType(0) != VT) - NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); + NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); return NewBV; } } @@ -11872,6 +12001,81 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return V; } + // If this shuffle only has a single input that is a bitcasted shuffle, + // attempt to merge the 2 shuffles and suitably bitcast the inputs/output + // back to their original types. + if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && + N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps && + TLI.isTypeLegal(VT)) { + + // Peek through the bitcast only if there is one user. + SDValue BC0 = N0; + while (BC0.getOpcode() == ISD::BITCAST) { + if (!BC0.hasOneUse()) + break; + BC0 = BC0.getOperand(0); + } + + auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) { + if (Scale == 1) + return SmallVector<int, 8>(Mask.begin(), Mask.end()); + + SmallVector<int, 8> NewMask; + for (int M : Mask) + for (int s = 0; s != Scale; ++s) + NewMask.push_back(M < 0 ? -1 : Scale * M + s); + return NewMask; + }; + + if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { + EVT SVT = VT.getScalarType(); + EVT InnerVT = BC0->getValueType(0); + EVT InnerSVT = InnerVT.getScalarType(); + + // Determine which shuffle works with the smaller scalar type. + EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT; + EVT ScaleSVT = ScaleVT.getScalarType(); + + if (TLI.isTypeLegal(ScaleVT) && + 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && + 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { + + int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); + int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); + + // Scale the shuffle masks to the smaller scalar type. + ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0); + SmallVector<int, 8> InnerMask = + ScaleShuffleMask(InnerSVN->getMask(), InnerScale); + SmallVector<int, 8> OuterMask = + ScaleShuffleMask(SVN->getMask(), OuterScale); + + // Merge the shuffle masks. + SmallVector<int, 8> NewMask; + for (int M : OuterMask) + NewMask.push_back(M < 0 ? -1 : InnerMask[M]); + + // Test for shuffle mask legality over both commutations. + SDValue SV0 = BC0->getOperand(0); + SDValue SV1 = BC0->getOperand(1); + bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); + if (!LegalMask) { + std::swap(SV0, SV1); + ShuffleVectorSDNode::commuteMask(NewMask); + LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); + } + + if (LegalMask) { + SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0); + SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1); + return DAG.getNode( + ISD::BITCAST, SDLoc(N), VT, + DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); + } + } + } + } + // Canonicalize shuffles according to rules: // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) @@ -11981,16 +12185,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Avoid introducing shuffles with illegal mask. if (!TLI.isShuffleMaskLegal(Mask, VT)) { - // Compute the commuted shuffle mask and test again. - for (unsigned i = 0; i != NumElts; ++i) { - int idx = Mask[i]; - if (idx < 0) - continue; - else if (idx < (int)NumElts) - Mask[i] = idx + NumElts; - else - Mask[i] = idx - NumElts; - } + ShuffleVectorSDNode::commuteMask(Mask); if (!TLI.isShuffleMaskLegal(Mask, VT)) return SDValue(); @@ -12010,6 +12205,34 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { + SDValue InVal = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern + // with a VECTOR_SHUFFLE. + if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + SDValue InVec = InVal->getOperand(0); + SDValue EltNo = InVal->getOperand(1); + + // FIXME: We could support implicit truncation if the shuffle can be + // scaled to a smaller vector scalar type. + ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo); + if (C0 && VT == InVec.getValueType() && + VT.getScalarType() == InVal.getValueType()) { + SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1); + int Elt = C0->getZExtValue(); + NewMask[0] = Elt; + + if (TLI.isShuffleMaskLegal(NewMask, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT), + NewMask); + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N2 = N->getOperand(2); @@ -12043,44 +12266,51 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT VT = N->getValueType(0); - SDLoc dl(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - if (N->getOpcode() == ISD::AND) { - if (RHS.getOpcode() == ISD::BITCAST) - RHS = RHS.getOperand(0); - if (RHS.getOpcode() == ISD::BUILD_VECTOR) { - SmallVector<int, 8> Indices; - unsigned NumElts = RHS.getNumOperands(); - for (unsigned i = 0; i != NumElts; ++i) { - SDValue Elt = RHS.getOperand(i); - if (!isa<ConstantSDNode>(Elt)) - return SDValue(); + SDLoc dl(N); - if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) - Indices.push_back(i); - else if (cast<ConstantSDNode>(Elt)->isNullValue()) - Indices.push_back(NumElts+i); - else - return SDValue(); - } + // Make sure we're not running after operation legalization where it + // may have custom lowered the vector shuffles. + if (LegalOperations) + return SDValue(); + + if (N->getOpcode() != ISD::AND) + return SDValue(); - // Let's see if the target supports this vector_shuffle and make sure - // we're not running after operation legalization where it may have - // custom lowered the vector shuffles. - EVT RVT = RHS.getValueType(); - if (LegalOperations || !TLI.isVectorClearMaskLegal(Indices, RVT)) + if (RHS.getOpcode() == ISD::BITCAST) + RHS = RHS.getOperand(0); + + if (RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<int, 8> Indices; + unsigned NumElts = RHS.getNumOperands(); + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Elt = RHS.getOperand(i); + if (!isa<ConstantSDNode>(Elt)) return SDValue(); - // Return the new VECTOR_SHUFFLE node. - EVT EltVT = RVT.getVectorElementType(); - SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), - DAG.getConstant(0, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps); - LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); - SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); - return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); + if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) + Indices.push_back(i); + else if (cast<ConstantSDNode>(Elt)->isNullValue()) + Indices.push_back(NumElts+i); + else + return SDValue(); } + + // Let's see if the target supports this vector_shuffle. + EVT RVT = RHS.getValueType(); + if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + EVT EltVT = RVT.getVectorElementType(); + SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), + DAG.getConstant(0, EltVT)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps); + LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); + SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); + return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); } return SDValue(); @@ -12093,8 +12323,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - SDValue Shuffle = XformToShuffleWithZero(N); - if (Shuffle.getNode()) return Shuffle; + + if (SDValue Shuffle = XformToShuffleWithZero(N)) + return Shuffle; // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold // this operation. @@ -12172,38 +12403,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { return SDValue(); } -/// Visit a binary vector operation, like FABS/FNEG. -SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { - assert(N->getValueType(0).isVector() && - "SimplifyVUnaryOp only works on vectors!"); - - SDValue N0 = N->getOperand(0); - - if (N0.getOpcode() != ISD::BUILD_VECTOR) - return SDValue(); - - // Operand is a BUILD_VECTOR node, see if we can constant fold it. - SmallVector<SDValue, 8> Ops; - for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { - SDValue Op = N0.getOperand(i); - if (Op.getOpcode() != ISD::UNDEF && - Op.getOpcode() != ISD::ConstantFP) - break; - EVT EltVT = Op.getValueType(); - SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op); - if (FoldOp.getOpcode() != ISD::UNDEF && - FoldOp.getOpcode() != ISD::ConstantFP) - break; - Ops.push_back(FoldOp); - AddToWorklist(FoldOp.getNode()); - } - - if (Ops.size() != N0.getNumOperands()) - return SDValue(); - - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops); -} - SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2){ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 1df4a1d..223a149 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -62,6 +62,7 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" @@ -497,7 +498,7 @@ bool FastISel::selectGetElementPtr(const User *I) { OI != E; ++OI) { const Value *Idx = *OI; if (auto *StTy = dyn_cast<StructType>(Ty)) { - unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); + uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); @@ -518,8 +519,8 @@ bool FastISel::selectGetElementPtr(const User *I) { if (CI->isZero()) continue; // N = N + Offset - TotalOffs += - DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); + uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue(); + TotalOffs += DL.getTypeAllocSize(Ty) * IdxN; if (TotalOffs >= MaxOffs) { N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (!N) // Unhandled operand. Halt "fast" selection and bail. @@ -801,7 +802,8 @@ bool FastISel::selectPatchpoint(const CallInst *I) { return false; // Push the register mask info. - Ops.push_back(MachineOperand::CreateRegMask(TRI.getCallPreservedMask(CC))); + Ops.push_back(MachineOperand::CreateRegMask( + TRI.getCallPreservedMask(*FuncInfo.MF, CC))); // Add scratch registers as implicit def and early clobber. const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 7e72dc6..291b583 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 61c0a6f..ece38f3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1442,13 +1442,27 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); + SDValue NewLoad; + if (Op.getValueType().isVector()) - return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(), - false, false, false, 0); - return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, - MachinePointerInfo(), - Vec.getValueType().getVectorElementType(), - false, false, false, 0); + NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, + MachinePointerInfo(), false, false, false, 0); + else + NewLoad = DAG.getExtLoad( + ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), + Vec.getValueType().getVectorElementType(), false, false, false, 0); + + // Replace the chain going out of the store, by the one out of the load. + DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1)); + + // We introduced a cycle though, so update the loads operands, making sure + // to use the original store's chain as an incoming chain. + SmallVector<SDValue, 6> NewLoadOperands(NewLoad->op_begin(), + NewLoad->op_end()); + NewLoadOperands[0] = Ch; + NewLoad = + SDValue(DAG.UpdateNodeOperands(NewLoad.getNode(), NewLoadOperands), 0); + return NewLoad; } SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { @@ -2817,132 +2831,8 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { unsigned Opc = Node->getOpcode(); MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC; - - switch (Opc) { - default: - llvm_unreachable("Unhandled atomic intrinsic Expand!"); - case ISD::ATOMIC_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; - case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; - case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; - case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; - case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; - } - break; - case ISD::ATOMIC_CMP_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; - case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; - case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; - case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; - case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; - } - break; - case ISD::ATOMIC_LOAD_ADD: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; - } - break; - case ISD::ATOMIC_LOAD_SUB: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; - } - break; - case ISD::ATOMIC_LOAD_AND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; - } - break; - case ISD::ATOMIC_LOAD_OR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; - } - break; - case ISD::ATOMIC_LOAD_XOR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; - } - break; - case ISD::ATOMIC_LOAD_NAND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; - } - break; - case ISD::ATOMIC_LOAD_MAX: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MAX_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MAX_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MAX_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MAX_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MAX_16;break; - } - break; - case ISD::ATOMIC_LOAD_UMAX: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMAX_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMAX_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMAX_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMAX_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMAX_16;break; - } - break; - case ISD::ATOMIC_LOAD_MIN: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MIN_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MIN_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MIN_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MIN_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MIN_16;break; - } - break; - case ISD::ATOMIC_LOAD_UMIN: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMIN_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMIN_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMIN_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMIN_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMIN_16;break; - } - break; - } + RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); return ExpandChainLibCall(LC, Node, false); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 5507c70..25e80b9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1116,7 +1116,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){ - assert(OpNo == 2 && "Only know how to promote the mask!"); SDValue DataOp = N->getValue(); EVT DataVT = DataOp.getValueType(); SDValue Mask = N->getMask(); @@ -1127,7 +1126,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN if (!TLI.isTypeLegal(DataVT)) { if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) { DataOp = GetPromotedInteger(DataOp); - Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); + if (!TLI.isTypeLegal(MaskVT)) + Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); TruncateStore = true; } else { @@ -1323,92 +1323,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { unsigned Opc = Node->getOpcode(); MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC; - - switch (Opc) { - default: - llvm_unreachable("Unhandled atomic intrinsic Expand!"); - case ISD::ATOMIC_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; - case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; - case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; - case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; - case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; - } - break; - case ISD::ATOMIC_CMP_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; - case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; - case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; - case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; - case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; - } - break; - case ISD::ATOMIC_LOAD_ADD: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; - } - break; - case ISD::ATOMIC_LOAD_SUB: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; - } - break; - case ISD::ATOMIC_LOAD_AND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; - } - break; - case ISD::ATOMIC_LOAD_OR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; - } - break; - case ISD::ATOMIC_LOAD_XOR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; - } - break; - case ISD::ATOMIC_LOAD_NAND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; - } - break; - } + RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); return ExpandChainLibCall(LC, Node, false); } @@ -1417,12 +1333,19 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { /// and the shift amount is a constant 'Amt'. Expand the operation. void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi) { - assert(Amt && "Expected zero shifts to be already optimized away."); SDLoc DL(N); // Expand the incoming operand to be shifted, so that we have its parts SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); + // Though Amt shouldn't usually be 0, it's possible. E.g. when legalization + // splitted a vector shift, like this: <op1, op2> SHL <0, 2>. + if (!Amt) { + Lo = InL; + Hi = InH; + return; + } + EVT NVT = InL.getValueType(); unsigned VTBits = N->getValueType(0).getSizeInBits(); unsigned NVTBits = NVT.getSizeInBits(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 63671f7..f7e4557 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2553,6 +2553,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { assert(InVT.isVector() && "can not widen non-vector type"); EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts); + + // The input and output types often differ here, and it could be that while + // we'd prefer to widen the result type, the input operands have been split. + // In this case, we also need to split the result of this node as well. + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + SDValue SplitVSetCC = SplitVecOp_VSETCC(N); + SDValue Res = ModifyToType(SplitVSetCC, WidenVT); + return Res; + } + InOp1 = GetWidenedVector(InOp1); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index db38b76..6303422 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -47,7 +47,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) TRI = STI.getRegisterInfo(); TLI = IS->TLI; TII = STI.getInstrInfo(); - ResourcesModel = TII->CreateTargetScheduleState(STI); + ResourcesModel.reset(TII->CreateTargetScheduleState(STI)); // This hard requirement could be relaxed, but for now // do not let it procede. assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); @@ -637,17 +637,3 @@ void ResourcePriorityQueue::remove(SUnit *SU) { Queue.pop_back(); } - - -#ifdef NDEBUG -void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {} -#else -void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const { - ResourcePriorityQueue q = *this; - while (!q.empty()) { - SUnit *su = q.pop(); - dbgs() << "Height " << su->getHeight() << ": "; - su->dump(DAG); - } -} -#endif diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9466f4d..b52f648 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -196,6 +196,22 @@ bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { return true; } +/// \brief Return true if the specified node is a BUILD_VECTOR node of +/// all ConstantFPSDNode or undef. +bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) + continue; + if (!isa<ConstantFPSDNode>(Op)) + return false; + } + return true; +} + /// isScalarToVector - Return true if the specified node is a /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low /// element is not an undef. @@ -1446,13 +1462,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { // N2 to point at N1. static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { std::swap(N1, N2); - int NElts = M.size(); - for (int i = 0; i != NElts; ++i) { - if (M[i] >= NElts) - M[i] -= NElts; - else if (M[i] >= 0) - M[i] += NElts; - } + ShuffleVectorSDNode::commuteMask(M); } SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, @@ -1625,19 +1635,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { MVT VT = SV.getSimpleValueType(0); - unsigned NumElems = VT.getVectorNumElements(); - SmallVector<int, 8> MaskVec; - - for (unsigned i = 0; i != NumElems; ++i) { - int Idx = SV.getMaskElt(i); - if (Idx >= 0) { - if (Idx < (int)NumElems) - Idx += NumElems; - else - Idx -= NumElems; - } - MaskVec.push_back(Idx); - } + SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end()); + ShuffleVectorSDNode::commuteMask(MaskVec); SDValue Op0 = SV.getOperand(0); SDValue Op1 = SV.getOperand(1); @@ -2844,7 +2843,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, } } - // Constant fold unary operations with a vector integer operand. + // Constant fold unary operations with a vector integer or float operand. if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) { if (BV->isConstant()) { switch (Opcode) { @@ -2852,18 +2851,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, // FIXME: Entirely reasonable to perform folding of other unary // operations here as the need arises. break; + case ISD::FNEG: + case ISD::FABS: + case ISD::FP_EXTEND: + case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { + // Let the above scalar folding handle the folding of each element. SmallVector<SDValue, 8> Ops; for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { SDValue OpN = BV->getOperand(i); - // Let the above scalar folding handle the conversion of each - // element. - OpN = getNode(ISD::SINT_TO_FP, DL, VT.getVectorElementType(), - OpN); + OpN = getNode(Opcode, DL, VT.getVectorElementType(), OpN); + if (OpN.getOpcode() != ISD::UNDEF && + OpN.getOpcode() != ISD::Constant && + OpN.getOpcode() != ISD::ConstantFP) + break; Ops.push_back(OpN); } - return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + if (Ops.size() == VT.getVectorNumElements()) + return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); } } } @@ -5418,17 +5424,9 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { assert(N->getNumOperands() == NumOps && "Update with wrong number of operands"); - // Check to see if there is no change. - bool AnyChange = false; - for (unsigned i = 0; i != NumOps; ++i) { - if (Ops[i] != N->getOperand(i)) { - AnyChange = true; - break; - } - } - - // No operands changed, just return the input node. - if (!AnyChange) return N; + // If no operands changed just return the input node. + if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin())) + return N; // See if the modified node already exists. void *InsertPos = nullptr; @@ -6673,8 +6671,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); - llvm::computeKnownBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, - TLI->getDataLayout()); + llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne, + *TLI->getDataLayout()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 097b618..6c14e79 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1016,6 +1016,24 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, } } +/// getCopyFromRegs - If there was virtual register allocated for the value V +/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. +SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { + DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); + SDValue res; + + if (It != FuncInfo.ValueMap.end()) { + unsigned InReg = It->second; + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, + Ty); + SDValue Chain = DAG.getEntryNode(); + res = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + resolveDanglingDebugInfo(V, res); + } + + return res; +} + /// getValue - Return an SDValue for the given Value. SDValue SelectionDAGBuilder::getValue(const Value *V) { // If we already have an SDValue for this value, use it. It's important @@ -1026,15 +1044,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { // If there's a virtual register allocated and initialized for this // value, use it. - DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); - if (It != FuncInfo.ValueMap.end()) { - unsigned InReg = It->second; - RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, - V->getType()); - SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); - resolveDanglingDebugInfo(V, N); - return N; + SDValue copyFromReg = getCopyFromRegs(V, V->getType()); + if (copyFromReg.getNode()) { + return copyFromReg; } // Otherwise create a new SDValue and remember it. @@ -1573,19 +1585,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; - // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = BrMBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - if (I.isUnconditional()) { // Update machine-CFG edges. BrMBB->addSuccessor(Succ0MBB); // If this is not a fall-through branch or optimizations are switched off, // emit the branch. - if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None) + if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB))); @@ -1682,7 +1688,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); - const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); + const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); @@ -1705,16 +1711,9 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, if (CB.TrueBB != CB.FalseBB) addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - // If the lhs block is the next block, invert the condition so that we can // fall through to the lhs instead of the rhs block. - if (CB.TrueBB == NextBlock) { + if (CB.TrueBB == NextBlock(SwitchBB)) { std::swap(CB.TrueBB, CB.FalseBB); SDValue True = DAG.getConstant(1, Cond.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); @@ -1781,19 +1780,12 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, Sub.getValueType()), Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), MVT::Other, CopyTo, CMP, DAG.getBasicBlock(JT.Default)); - if (JT.MBB != NextBlock) + // Avoid emitting unnecessary branches to the next block. + if (JT.MBB != NextBlock(SwitchBB)) BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond, DAG.getBasicBlock(JT.MBB)); @@ -1922,13 +1914,6 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), B.Reg, Sub); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - MachineBasicBlock* MBB = B.Cases[0].ThisBB; addSuccessorWithWeight(SwitchBB, B.Default); @@ -1938,7 +1923,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MVT::Other, CopyTo, RangeCmp, DAG.getBasicBlock(B.Default)); - if (MBB != NextBlock) + // Avoid emitting unnecessary branches to the next block. + if (MBB != NextBlock(SwitchBB)) BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo, DAG.getBasicBlock(MBB)); @@ -1991,14 +1977,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, MVT::Other, getControlRoot(), Cmp, DAG.getBasicBlock(B.TargetBB)); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - - if (NextMBB != NextBlock) + // Avoid emitting unnecessary branches to the next block. + if (NextMBB != NextBlock(SwitchBB)) BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd, DAG.getBasicBlock(NextMBB)); @@ -2027,13 +2007,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { case Intrinsic::experimental_patchpoint_i64: visitPatchpoint(&I, LandingPad); break; + case Intrinsic::experimental_gc_statepoint: + LowerStatepoint(ImmutableStatepoint(&I), LandingPad); + break; } } else LowerCallTo(&I, getValue(Callee), false, LandingPad); // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. - CopyToExportRegsIfNeeded(&I); + // We already took care of the exported value for the statepoint instruction + // during call to the LowerStatepoint. + if (!isStatepoint(I)) { + CopyToExportRegsIfNeeded(&I); + } // Update successor info addSuccessorWithWeight(InvokeMBB, Return); @@ -2128,11 +2115,10 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, MachineFunction *CurMF = FuncInfo.MF; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = nullptr; + MachineBasicBlock *NextMBB = nullptr; MachineFunction::iterator BBI = CR.CaseBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; + NextMBB = BBI; BranchProbabilityInfo *BPI = FuncInfo.BPI; // If any two of the cases has the same destination, and if one value @@ -2146,8 +2132,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, Case &Big = *(CR.Range.second-1); if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) { - const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue(); - const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue(); + const APInt& SmallValue = Small.Low->getValue(); + const APInt& BigValue = Big.Low->getValue(); // Check that there is only one bit different. if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && @@ -2205,13 +2191,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } // Rearrange the case blocks so that the last one falls through if possible. Case &BackCase = *(CR.Range.second-1); - if (Size > 1 && - NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { - // The last case block won't fall through into 'NextBlock' if we emit the + if (Size > 1 && NextMBB && Default != NextMBB && BackCase.BB != NextMBB) { + // The last case block won't fall through into 'NextMBB' if we emit the // branches in this order. See if rearranging a case value would help. // We start at the bottom as it's the case with the least weight. for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I) - if (I->BB == NextBlock) { + if (I->BB == NextMBB) { std::swap(*I, BackCase); break; } @@ -2287,8 +2272,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); - const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); + const APInt &First = FrontCase.Low->getValue(); + const APInt &Last = BackCase.High->getValue(); APInt TSize(First.getBitWidth(), 0); for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) @@ -2338,8 +2323,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, std::vector<MachineBasicBlock*> DestBBs; APInt TEI = First; for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { - const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); - const APInt &High = cast<ConstantInt>(I->High)->getValue(); + const APInt &Low = I->Low->getValue(); + const APInt &High = I->High->getValue(); if (Low.sle(TEI) && TEI.sle(High)) { DestBBs.push_back(I->BB); @@ -2352,26 +2337,19 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, // Calculate weight for each unique destination in CR. DenseMap<MachineBasicBlock*, uint32_t> DestWeights; - if (FuncInfo.BPI) - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { - DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = - DestWeights.find(I->BB); - if (Itr != DestWeights.end()) - Itr->second += I->ExtraWeight; - else - DestWeights[I->BB] = I->ExtraWeight; - } + if (FuncInfo.BPI) { + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) + DestWeights[I->BB] += I->ExtraWeight; + } // Update successor info. Add one edge to each unique successor. BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); - for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), - E = DestBBs.end(); I != E; ++I) { - if (!SuccsHandled[(*I)->getNumber()]) { - SuccsHandled[(*I)->getNumber()] = true; - DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = - DestWeights.find(*I); - addSuccessorWithWeight(JumpTableBB, *I, - Itr != DestWeights.end() ? Itr->second : 0); + for (MachineBasicBlock *DestBB : DestBBs) { + if (!SuccsHandled[DestBB->getNumber()]) { + SuccsHandled[DestBB->getNumber()] = true; + auto I = DestWeights.find(DestBB); + addSuccessorWithWeight(JumpTableBB, DestBB, + I != DestWeights.end() ? I->second : 0); } } @@ -2403,8 +2381,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // Size is the number of Cases represented by this range. unsigned Size = CR.Range.second - CR.Range.first; - const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); + const APInt &First = FrontCase.Low->getValue(); + const APInt &Last = BackCase.High->getValue(); double FMetric = 0; CaseItr Pivot = CR.Range.first + Size/2; @@ -2423,8 +2401,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; J!=E; ++I, ++J) { - const APInt &LEnd = cast<ConstantInt>(I->High)->getValue(); - const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue(); + const APInt &LEnd = I->High->getValue(); + const APInt &RBegin = J->Low->getValue(); APInt Range = ComputeRange(LEnd, RBegin); assert((Range - 2ULL).isNonNegative() && "Invalid case distance"); @@ -2479,7 +2457,7 @@ void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot, CaseRange LHSR(CR.Range.first, Pivot); CaseRange RHSR(Pivot, CR.Range.second); - const Constant *C = Pivot->Low; + const ConstantInt *C = Pivot->Low; MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr; // We know that we branch to the LHS if the Value being switched on is @@ -2489,8 +2467,7 @@ void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot, // Pivot's Value, then we can branch directly to the LHS's Target, // rather than creating a leaf node for it. if ((LHSR.second - LHSR.first) == 1 && LHSR.first->High == CR.GE && - cast<ConstantInt>(C)->getValue() == - (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) { + C->getValue() == (CR.GE->getValue() + 1LL)) { TrueBB = LHSR.first->BB; } else { TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); @@ -2506,8 +2483,7 @@ void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot, // is CR.LT - 1, then we can branch directly to the target block for // the current Case Value, rather than emitting a RHS leaf node for it. if ((RHSR.second - RHSR.first) == 1 && CR.LT && - cast<ConstantInt>(RHSR.first->Low)->getValue() == - (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) { + RHSR.first->Low->getValue() == (CR.LT->getValue() - 1LL)) { FalseBB = RHSR.first->BB; } else { FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); @@ -2571,8 +2547,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, << "Total number of comparisons: " << numCmps << '\n'); // Compute span of values. - const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue(); + const APInt& minValue = FrontCase.Low->getValue(); + const APInt& maxValue = BackCase.High->getValue(); APInt cmpRange = maxValue - minValue; DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' @@ -2612,8 +2588,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, count++; } - const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue(); - const APInt& highValue = cast<ConstantInt>(I->High)->getValue(); + const APInt& lowValue = I->Low->getValue(); + const APInt& highValue = I->High->getValue(); uint64_t lo = (lowValue - lowBound).getZExtValue(); uint64_t hi = (highValue - lowBound).getZExtValue(); @@ -2663,45 +2639,42 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, return true; } -/// Clusterify - Transform simple list of Cases into list of CaseRange's -void SelectionDAGBuilder::Clusterify(CaseVector& Cases, - const SwitchInst& SI) { +void SelectionDAGBuilder::Clusterify(CaseVector &Cases, const SwitchInst *SI) { BranchProbabilityInfo *BPI = FuncInfo.BPI; - // Start with "simple" cases. - for (SwitchInst::ConstCaseIt i : SI.cases()) { - const BasicBlock *SuccBB = i.getCaseSuccessor(); - MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - - uint32_t ExtraWeight = - BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0; - - Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), - SMBB, ExtraWeight)); - } - std::sort(Cases.begin(), Cases.end(), CaseCmp()); - - // Merge case into clusters - if (Cases.size() >= 2) - // Must recompute end() each iteration because it may be - // invalidated by erase if we hold on to it - for (CaseItr I = Cases.begin(), J = std::next(Cases.begin()); - J != Cases.end(); ) { - const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); - const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); - MachineBasicBlock* nextBB = J->BB; - MachineBasicBlock* currentBB = I->BB; - - // If the two neighboring cases go to the same destination, merge them - // into a single case. - if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { - I->High = J->High; - I->ExtraWeight += J->ExtraWeight; - J = Cases.erase(J); - } else { - I = J++; - } + + // Extract cases from the switch and sort them. + typedef std::pair<const ConstantInt*, unsigned> CasePair; + std::vector<CasePair> Sorted; + Sorted.reserve(SI->getNumCases()); + for (auto I : SI->cases()) + Sorted.push_back(std::make_pair(I.getCaseValue(), I.getSuccessorIndex())); + std::sort(Sorted.begin(), Sorted.end(), [](CasePair a, CasePair b) { + return a.first->getValue().slt(b.first->getValue()); + }); + + // Merge adjacent cases with the same destination, build Cases vector. + assert(Cases.empty() && "Cases should be empty before Clusterify;"); + Cases.reserve(SI->getNumCases()); + MachineBasicBlock *PreviousSucc = nullptr; + for (CasePair &CP : Sorted) { + const ConstantInt *CaseVal = CP.first; + unsigned SuccIndex = CP.second; + MachineBasicBlock *Succ = FuncInfo.MBBMap[SI->getSuccessor(SuccIndex)]; + uint32_t Weight = BPI ? BPI->getEdgeWeight(SI->getParent(), SuccIndex) : 0; + + if (PreviousSucc == Succ && + (CaseVal->getValue() - Cases.back().High->getValue()) == 1) { + // If this case has the same successor and is a neighbour, merge it into + // the previous cluster. + Cases.back().High = CaseVal; + Cases.back().ExtraWeight += Weight; + } else { + Cases.push_back(Case(CaseVal, CaseVal, Succ, Weight)); } + PreviousSucc = Succ; + } + DEBUG({ size_t numCmps = 0; for (auto &I : Cases) @@ -2729,16 +2702,10 @@ void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { MachineBasicBlock *SwitchMBB = FuncInfo.MBB; - // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = nullptr; - if (SwitchMBB + 1 != FuncInfo.MF->end()) - NextBlock = SwitchMBB + 1; - - // Create a vector of Cases, sorted so that we can efficiently create a binary // search tree from them. CaseVector Cases; - Clusterify(Cases, SI); + Clusterify(Cases, &SI); // Get the default destination MBB. MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; @@ -2775,7 +2742,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { SwitchMBB->addSuccessor(Default); // If this is not a fall-through branch, emit the branch. - if (Default != NextBlock) { + if (Default != NextBlock(SwitchMBB)) { DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Default))); } @@ -3429,30 +3396,21 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Ty = StTy->getElementType(Field); } else { Ty = cast<SequentialType>(Ty)->getElementType(); + MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(AS); + unsigned PtrSize = PtrTy.getSizeInBits(); + APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty)); // If this is a constant subscript, handle it quickly. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { - if (CI->isZero()) continue; - uint64_t Offs = - DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); - SDValue OffsVal; - EVT PTy = TLI.getPointerTy(AS); - unsigned PtrBits = PTy.getSizeInBits(); - if (PtrBits < 64) - OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, - DAG.getConstant(Offs, MVT::i64)); - else - OffsVal = DAG.getConstant(Offs, PTy); - - N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, - OffsVal); + if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->isZero()) + continue; + APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); + SDValue OffsVal = DAG.getConstant(Offs, PtrTy); + N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal); continue; } // N = N + Idx * ElementSize; - APInt ElementSize = - APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend @@ -3988,6 +3946,93 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) { MVT::f32); } +static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, + SelectionDAG &DAG) { + // IntegerPartOfX = ((int32_t)(t0); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = t0 - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode( + ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, DAG.getTargetLoweringInfo().getPointerTy())); + + SDValue TwoToFractionalPartOfX; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + } + + // Add the exponent into the result in integer domain. + SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX)); +} + /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, @@ -3999,92 +4044,10 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, // final result: // // #define LOG2OFe 1.4426950f - // IntegerPartOfX = ((int32_t)(X * LOG2OFe)); + // t0 = Op * LOG2OFe SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, getF32Constant(DAG, 0x3fb8aa3b)); - SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); - - // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX; - SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); - SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); - - // IntegerPartOfX <<= 23; - IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, TLI.getPointerTy())); - - SDValue TwoToFracPartOfX; - if (LimitFloatPrecision <= 6) { - // For floating-point precision of 6: - // - // TwoToFractionalPartOfX = - // 0.997535578f + - // (0.735607626f + 0.252464424f * x) * x; - // - // error 0.0144103317, which is 6 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3e814304)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f3c50c8)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - } else if (LimitFloatPrecision <= 12) { - // For floating-point precision of 12: - // - // TwoToFractionalPartOfX = - // 0.999892986f + - // (0.696457318f + - // (0.224338339f + 0.792043434e-1f * x) * x) * x; - // - // 0.000107046256 error, which is 13 to 14 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3da235e3)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3e65b8f3)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f324b07)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - } else { // LimitFloatPrecision <= 18 - // For floating-point precision of 18: - // - // TwoToFractionalPartOfX = - // 0.999999982f + - // (0.693148872f + - // (0.240227044f + - // (0.554906021e-1f + - // (0.961591928e-2f + - // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; - // - // error 2.47208000*10^(-7), which is better than 18 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3924b03e)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3ab24b87)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3c1d8c17)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3d634a1d)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); - SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3e75fe14)); - SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, - getF32Constant(DAG, 0x3f317234)); - SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - } - - // Add the exponent into the result in integer domain. - SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::ADD, dl, MVT::i32, - t13, IntegerPartOfX)); + return getLimitedPrecisionExp2(t0, dl, DAG); } // No special expansion. @@ -4375,91 +4338,8 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && - LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); - - // FractionalPartOfX = x - (float)IntegerPartOfX; - SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); - SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1); - - // IntegerPartOfX <<= 23; - IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, TLI.getPointerTy())); - - SDValue TwoToFractionalPartOfX; - if (LimitFloatPrecision <= 6) { - // For floating-point precision of 6: - // - // TwoToFractionalPartOfX = - // 0.997535578f + - // (0.735607626f + 0.252464424f * x) * x; - // - // error 0.0144103317, which is 6 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3e814304)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f3c50c8)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - } else if (LimitFloatPrecision <= 12) { - // For floating-point precision of 12: - // - // TwoToFractionalPartOfX = - // 0.999892986f + - // (0.696457318f + - // (0.224338339f + 0.792043434e-1f * x) * x) * x; - // - // error 0.000107046256, which is 13 to 14 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3da235e3)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3e65b8f3)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f324b07)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - } else { // LimitFloatPrecision <= 18 - // For floating-point precision of 18: - // - // TwoToFractionalPartOfX = - // 0.999999982f + - // (0.693148872f + - // (0.240227044f + - // (0.554906021e-1f + - // (0.961591928e-2f + - // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; - // error 2.47208000*10^(-7), which is better than 18 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3924b03e)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3ab24b87)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3c1d8c17)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3d634a1d)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); - SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3e75fe14)); - SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, - getF32Constant(DAG, 0x3f317234)); - SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - } - - // Add the exponent into the result in integer domain. - SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, - TwoToFractionalPartOfX); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::ADD, dl, MVT::i32, - t13, IntegerPartOfX)); - } + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) + return getLimitedPrecisionExp2(Op, dl, DAG); // No special expansion. return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); @@ -4483,90 +4363,10 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, // final result: // // #define LOG2OF10 3.3219281f - // IntegerPartOfX = (int32_t)(x * LOG2OF10); + // t0 = Op * LOG2OF10; SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, getF32Constant(DAG, 0x40549a78)); - SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); - - // FractionalPartOfX = x - (float)IntegerPartOfX; - SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); - SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); - - // IntegerPartOfX <<= 23; - IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, TLI.getPointerTy())); - - SDValue TwoToFractionalPartOfX; - if (LimitFloatPrecision <= 6) { - // For floating-point precision of 6: - // - // twoToFractionalPartOfX = - // 0.997535578f + - // (0.735607626f + 0.252464424f * x) * x; - // - // error 0.0144103317, which is 6 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3e814304)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f3c50c8)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - } else if (LimitFloatPrecision <= 12) { - // For floating-point precision of 12: - // - // TwoToFractionalPartOfX = - // 0.999892986f + - // (0.696457318f + - // (0.224338339f + 0.792043434e-1f * x) * x) * x; - // - // error 0.000107046256, which is 13 to 14 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3da235e3)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3e65b8f3)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f324b07)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - } else { // LimitFloatPrecision <= 18 - // For floating-point precision of 18: - // - // TwoToFractionalPartOfX = - // 0.999999982f + - // (0.693148872f + - // (0.240227044f + - // (0.554906021e-1f + - // (0.961591928e-2f + - // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; - // error 2.47208000*10^(-7), which is better than 18 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3924b03e)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3ab24b87)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3c1d8c17)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3d634a1d)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); - SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3e75fe14)); - SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, - getF32Constant(DAG, 0x3f317234)); - SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - } - - SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::ADD, dl, MVT::i32, - t13, IntegerPartOfX)); + return getLimitedPrecisionExp2(t0, dl, DAG); } // No special expansion. @@ -5114,34 +4914,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return nullptr; } - case Intrinsic::x86_avx_vinsertf128_pd_256: - case Intrinsic::x86_avx_vinsertf128_ps_256: - case Intrinsic::x86_avx_vinsertf128_si_256: - case Intrinsic::x86_avx2_vinserti128: { - EVT DestVT = TLI.getValueType(I.getType()); - EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); - uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * - ElVT.getVectorNumElements(); - Res = - DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, - getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - DAG.getConstant(Idx, TLI.getVectorIdxTy())); - setValue(&I, Res); - return nullptr; - } - case Intrinsic::x86_avx_vextractf128_pd_256: - case Intrinsic::x86_avx_vextractf128_ps_256: - case Intrinsic::x86_avx_vextractf128_si_256: - case Intrinsic::x86_avx2_vextracti128: { - EVT DestVT = TLI.getValueType(I.getType()); - uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * - DestVT.getVectorNumElements(); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT, - getValue(I.getArgOperand(0)), - DAG.getConstant(Idx, TLI.getVectorIdxTy())); - setValue(&I, Res); - return nullptr; - } case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: @@ -5539,7 +5311,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; SmallVector<Value *, 4> Allocas; - GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL); + GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL); for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), E = Allocas.end(); Object != E; ++Object) { @@ -5618,45 +5390,47 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); - case Intrinsic::frameallocate: { + case Intrinsic::frameescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); - // Do the allocation and map it as a normal value. - // FIXME: Maybe we should add this to the alloca map so that we don't have - // to register allocate it? - uint64_t Size = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(); - int Alloc = MF.getFrameInfo()->CreateFrameAllocation(Size); - MVT PtrVT = TLI.getPointerTy(0); - SDValue FIVal = DAG.getFrameIndex(Alloc, PtrVT); - setValue(&I, FIVal); - - // Directly emit a FRAME_ALLOC machine instr. Label assignment emission is - // the same on all targets. - MCSymbol *FrameAllocSym = - MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName()); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, - TII->get(TargetOpcode::FRAME_ALLOC)) - .addSym(FrameAllocSym) - .addFrameIndex(Alloc); + // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission + // is the same on all targets. + for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { + AllocaInst *Slot = + cast<AllocaInst>(I.getArgOperand(Idx)->stripPointerCasts()); + assert(FuncInfo.StaticAllocaMap.count(Slot) && + "can only escape static allocas"); + int FI = FuncInfo.StaticAllocaMap[Slot]; + MCSymbol *FrameAllocSym = + MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName(), + Idx); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, + TII->get(TargetOpcode::FRAME_ALLOC)) + .addSym(FrameAllocSym) + .addFrameIndex(FI); + } return nullptr; } case Intrinsic::framerecover: { - // i8* @llvm.framerecover(i8* %fn, i8* %fp) + // i8* @llvm.framerecover(i8* %fn, i8* %fp, i32 %idx) MachineFunction &MF = DAG.getMachineFunction(); MVT PtrVT = TLI.getPointerTy(0); // Get the symbol that defines the frame offset. - Function *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); + auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); + auto *Idx = cast<ConstantInt>(I.getArgOperand(2)); + unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); MCSymbol *FrameAllocSym = - MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName()); + MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName(), + IdxVal); // Create a TargetExternalSymbol for the label to avoid any target lowering // that would make this PC relative. StringRef Name = FrameAllocSym->getName(); - assert(Name.size() == strlen(Name.data()) && "not null terminated"); + assert(Name.data()[Name.size()] == '\0' && "not null terminated"); SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT); SDValue OffsetVal = DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym); @@ -5672,6 +5446,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_begincatch: case Intrinsic::eh_endcatch: llvm_unreachable("begin/end catch intrinsics not lowered in codegen"); + case Intrinsic::eh_unwindhelp: { + AllocaInst *Slot = + cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); + assert(FuncInfo.StaticAllocaMap.count(Slot) && + "can only use static allocas with llvm.eh.unwindhelp"); + int FI = FuncInfo.StaticAllocaMap[Slot]; + // TODO: Save this in the not-yet-existant WinEHFuncInfo struct. + (void)FI; + return nullptr; + } } } @@ -5805,9 +5589,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), PointerType::getUnqual(LoadTy)); - if (const Constant *LoadCst = - ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), - Builder.DL)) + if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr( + const_cast<Constant *>(LoadInput), *Builder.DL)) return Builder.getValue(LoadCst); } @@ -6748,10 +6531,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Memory output, or 'other' output (e.g. 'X' constraint). assert(OpInfo.isIndirect && "Memory output must be indirect operand"); + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + // Add information to the INLINEASM node to know about this output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); - AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, - TLI.getPointerTy())); + OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } @@ -6855,6 +6643,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { "Unexpected number of operands"); // Add information to the INLINEASM node to know about this input. // See InlineAsm.h isUseOperandTiedToDef. + OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, @@ -6894,10 +6683,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { assert(InOperandVal.getValueType() == TLI.getPointerTy() && "Memory operands expect pointer values"); + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); - AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI.getPointerTy())); + ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, MVT::i32)); AsmNodeOperands.push_back(InOperandVal); break; } @@ -7901,8 +7695,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; - // Check successor nodes' PHI nodes that expect a constant to be available - // from this block. + // Check PHI nodes in successors that expect a value to be available from this + // block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { const BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa<PHINode>(SuccBB->begin())) continue; @@ -7989,3 +7783,10 @@ AddSuccessorMBB(const BasicBlock *BB, SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely)); return SuccMBB; } + +MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { + MachineFunction::iterator I = MBB; + if (++I == FuncInfo.MF->end()) + return nullptr; + return I; +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index ad7411f..30240d8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -137,19 +137,19 @@ private: /// Case - A struct to record the Value for a switch case, and the /// case's target basic block. struct Case { - const Constant *Low; - const Constant *High; + const ConstantInt *Low; + const ConstantInt *High; MachineBasicBlock* BB; uint32_t ExtraWeight; Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { } - Case(const Constant *low, const Constant *high, MachineBasicBlock *bb, + Case(const ConstantInt *low, const ConstantInt *high, MachineBasicBlock *bb, uint32_t extraweight) : Low(low), High(high), BB(bb), ExtraWeight(extraweight) { } APInt size() const { - const APInt &rHigh = cast<ConstantInt>(High)->getValue(); - const APInt &rLow = cast<ConstantInt>(Low)->getValue(); + const APInt &rHigh = High->getValue(); + const APInt &rLow = Low->getValue(); return (rHigh - rLow + 1ULL); } }; @@ -173,7 +173,7 @@ private: /// CaseRec - A struct with ctor used in lowering switches to a binary tree /// of conditional branches. struct CaseRec { - CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge, + CaseRec(MachineBasicBlock *bb, const ConstantInt *lt, const ConstantInt *ge, CaseRange r) : CaseBB(bb), LT(lt), GE(ge), Range(r) {} @@ -181,8 +181,8 @@ private: MachineBasicBlock *CaseBB; /// LT, GE - If nonzero, we know the current case value must be less-than or /// greater-than-or-equal-to these Constants. - const Constant *LT; - const Constant *GE; + const ConstantInt *LT; + const ConstantInt *GE; /// Range - A pair of iterators representing the range of case values to be /// processed at this point in the binary search tree. CaseRange Range; @@ -190,24 +190,15 @@ private: typedef std::vector<CaseRec> CaseRecVector; - /// The comparison function for sorting the switch case values in the vector. - /// WARNING: Case ranges should be disjoint! - struct CaseCmp { - bool operator()(const Case &C1, const Case &C2) { - assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High)); - const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); - const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); - return CI1->getValue().slt(CI2->getValue()); - } - }; - struct CaseBitsCmp { bool operator()(const CaseBits &C1, const CaseBits &C2) { return C1.Bits > C2.Bits; } }; - void Clusterify(CaseVector &Cases, const SwitchInst &SI); + /// Populate Cases with the cases in SI, clustering adjacent cases with the + /// same destination together. + void Clusterify(CaseVector &Cases, const SwitchInst *SI); /// CaseBlock - This structure is used to communicate between /// SelectionDAGBuilder and SDISel for the code generation of additional basic @@ -606,6 +597,10 @@ public: void visit(unsigned Opcode, const User &I); + /// getCopyFromRegs - If there was virtual register allocated for the value V + /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. + SDValue getCopyFromRegs(const Value *V, Type *Ty); + // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, // generate the debug data structures now that we've seen its definition. void resolveDanglingDebugInfo(const Value *V, SDValue Val); @@ -622,8 +617,7 @@ public: void removeValue(const Value *V) { // This is to support hack in lowerCallFromStatepoint // Should be removed when hack is resolved - if (NodeMap.count(V)) - NodeMap.erase(V); + NodeMap.erase(V); } void setUnusedArgValue(const Value *V, SDValue NewN) { @@ -662,7 +656,9 @@ public: void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); // This function is responsible for the whole statepoint lowering process. - void LowerStatepoint(ImmutableStatepoint Statepoint); + // It uniformly handles invoke and call statepoints. + void LowerStatepoint(ImmutableStatepoint Statepoint, + MachineBasicBlock *LandingPad = nullptr); private: std::pair<SDValue, SDValue> lowerInvokable( TargetLowering::CallLoweringInfo &CLI, @@ -830,6 +826,9 @@ private: bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr, int64_t Offset, bool IsIndirect, const SDValue &N); + + /// Return the next block after MBB, or nullptr if there is none. + MachineBasicBlock *NextBlock(MachineBasicBlock *MBB); }; } // end namespace llvm diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 17eff94..5898da4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -95,6 +95,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::FRAME_ALLOC_RECOVER: return "FRAME_ALLOC_RECOVER"; case ISD::READ_REGISTER: return "READ_REGISTER"; case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 5e867cf..4d2af3f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -168,14 +168,13 @@ static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " "instruction selector")); -static cl::opt<bool> -EnableFastISelAbort("fast-isel-abort", cl::Hidden, - cl::desc("Enable abort calls when \"fast\" instruction selection " - "fails to lower an instruction")); -static cl::opt<bool> -EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden, - cl::desc("Enable abort calls when \"fast\" instruction selection " - "fails to lower a formal argument")); +static cl::opt<int> EnableFastISelAbort( + "fast-isel-abort", cl::Hidden, + cl::desc("Enable abort calls when \"fast\" instruction selection " + "fails to lower an instruction: 0 disable the abort, 1 will " + "abort but for args, calls and terminators, 2 will also " + "abort for argument lowering, and 3 will never fallback " + "to SelectionDAG.")); static cl::opt<bool> UseMBPI("use-mbpi", @@ -293,7 +292,8 @@ namespace llvm { const TargetLowering *TLI = IS->TLI; const TargetSubtargetInfo &ST = IS->MF->getSubtarget(); - if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() || + if (OptLevel == CodeGenOpt::None || + (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) || TLI->getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); if (TLI->getSchedulingPreference() == Sched::RegPressure) @@ -416,7 +416,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) && "-fast-isel-verbose requires -fast-isel"); assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && - "-fast-isel-abort requires -fast-isel"); + "-fast-isel-abort > 0 requires -fast-isel"); const Function &Fn = *mf.getFunction(); MF = &mf; @@ -595,9 +595,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, bool &HadTailCall) { - // Lower all of the non-terminator instructions. If a call is emitted - // as a tail call, cease emitting nodes for this block. Terminators - // are handled below. + // Lower the instructions. If a call is emitted as a tail call, cease emitting + // nodes for this block. for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) SDB->visit(*I); @@ -1182,8 +1181,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (!FastIS->lowerArguments()) { // Fast isel failed to lower these arguments ++NumFastIselFailLowerArguments; - if (EnableFastISelAbortArgs) - llvm_unreachable("FastISel didn't lower all arguments"); + if (EnableFastISelAbort > 1) + report_fatal_error("FastISel didn't lower all arguments"); // Use SelectionDAG argument lowering LowerArguments(Fn); @@ -1252,6 +1251,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { dbgs() << "FastISel missed call: "; Inst->dump(); } + if (EnableFastISelAbort > 2) + // FastISel selector couldn't handle something and bailed. + // For the purpose of debugging, just abort. + report_fatal_error("FastISel didn't select the entire block"); if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) { unsigned &R = FuncInfo->ValueMap[Inst]; @@ -1279,24 +1282,24 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { continue; } - if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) { - // Don't abort, and use a different message for terminator misses. - NumFastIselFailures += NumFastIselRemaining; - if (EnableFastISelVerbose || EnableFastISelAbort) { + bool ShouldAbort = EnableFastISelAbort; + if (EnableFastISelVerbose || EnableFastISelAbort) { + if (isa<TerminatorInst>(Inst)) { + // Use a different message for terminator misses. dbgs() << "FastISel missed terminator: "; - Inst->dump(); - } - } else { - NumFastIselFailures += NumFastIselRemaining; - if (EnableFastISelVerbose || EnableFastISelAbort) { + // Don't abort unless for terminator unless the level is really high + ShouldAbort = (EnableFastISelAbort > 2); + } else { dbgs() << "FastISel miss: "; - Inst->dump(); } - if (EnableFastISelAbort) - // The "fast" selector couldn't handle something and bailed. - // For the purpose of debugging, just abort. - llvm_unreachable("FastISel didn't select the entire block"); + Inst->dump(); } + if (ShouldAbort) + // FastISel selector couldn't handle something and bailed. + // For the purpose of debugging, just abort. + report_fatal_error("FastISel didn't select the entire block"); + + NumFastIselFailures += NumFastIselRemaining; break; } @@ -1775,9 +1778,23 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { } else { assert(InlineAsm::getNumOperandRegisters(Flags) == 1 && "Memory operand with multiple values?"); + + unsigned TiedToOperand; + if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) { + // We need the constraint ID from the operand this is tied to. + unsigned CurOp = InlineAsm::Op_FirstOperand; + Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); + for (; TiedToOperand; --TiedToOperand) { + CurOp += InlineAsm::getNumOperandRegisters(Flags)+1; + Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); + } + } + // Otherwise, this is a memory operand. Ask the target to select it. std::vector<SDValue> SelOps; - if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) + if (SelectInlineAsmMemoryOperand(InOps[i+1], + InlineAsm::getMemoryConstraintID(Flags), + SelOps)) report_fatal_error("Could not match memory address. Inline asm" " failure!"); @@ -1933,7 +1950,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { std::vector<SDValue> Ops(N->op_begin(), N->op_end()); SelectInlineAsmMemoryOperands(Ops); - EVT VTs[] = { MVT::Other, MVT::Glue }; + const EVT VTs[] = {MVT::Other, MVT::Glue}; SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), VTs, Ops); New->setNodeId(-1); return New.getNode(); diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 1271f6b..3cc7a98 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -224,6 +224,7 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite, + MachineBasicBlock *LandingPad, SelectionDAGBuilder &Builder) { ImmutableCallSite CS(StatepointSite.getCallSite()); @@ -245,15 +246,29 @@ static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite, Tmp->setTailCall(CS.isTailCall()); Tmp->setCallingConv(CS.getCallingConv()); Tmp->setAttributes(CS.getAttributes()); - Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false); + Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false, LandingPad); // Handle the return value of the call iff any. const bool HasDef = !Tmp->getType()->isVoidTy(); if (HasDef) { - // The value of the statepoint itself will be the value of call itself. - // We'll replace the actually call node shortly. gc_result will grab - // this value. - Builder.setValue(CS.getInstruction(), Builder.getValue(Tmp)); + if (CS.isInvoke()) { + // Result value will be used in different basic block for invokes + // so we need to export it now. But statepoint call has a different type + // than the actuall call. It means that standart exporting mechanism will + // create register of the wrong type. So instead we need to create + // register with correct type and save value into it manually. + // TODO: To eliminate this problem we can remove gc.result intrinsics + // completelly and make statepoint call to return a tuple. + unsigned reg = Builder.FuncInfo.CreateRegs(Tmp->getType()); + Builder.CopyValueToVirtualRegister(Tmp, reg); + Builder.FuncInfo.ValueMap[CS.getInstruction()] = reg; + } + else { + // The value of the statepoint itself will be the value of call itself. + // We'll replace the actually call node shortly. gc_result will grab + // this value. + Builder.setValue(CS.getInstruction(), Builder.getValue(Tmp)); + } } else { // The token value is never used from here on, just generate a poison value Builder.setValue(CS.getInstruction(), Builder.DAG.getIntPtrConstant(-1)); @@ -267,6 +282,15 @@ static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite, // Search for the call node // The following code is essentially reverse engineering X86's // LowerCallTo. + // We are expecting DAG to have the following form: + // ch = eh_label (only in case of invoke statepoint) + // ch, glue = callseq_start ch + // ch, glue = X86::Call ch, glue + // ch, glue = callseq_end ch, glue + // ch = eh_label ch (only in case of invoke statepoint) + // + // DAG root will be either last eh_label or callseq_end. + SDNode *CallNode = nullptr; // We just emitted a call, so it should be last thing generated @@ -276,8 +300,11 @@ static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite, SDNode *CallEnd = Chain.getNode(); int Sanity = 0; while (CallEnd->getOpcode() != ISD::CALLSEQ_END) { - CallEnd = CallEnd->getGluedNode(); - assert(CallEnd && "Can not find call node"); + assert(CallEnd->getNumOperands() >= 1 && + CallEnd->getOperand(0).getValueType() == MVT::Other); + + CallEnd = CallEnd->getOperand(0).getNode(); + assert(Sanity < 20 && "should have found call end already"); Sanity++; } @@ -506,7 +533,9 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { LowerStatepoint(ImmutableStatepoint(&CI)); } -void SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP) { +void +SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, + MachineBasicBlock *LandingPad/*=nullptr*/) { // The basic scheme here is that information about both the original call and // the safepoint is encoded in the CallInst. We create a temporary call and // lower it, then reverse engineer the calling sequence. @@ -542,13 +571,12 @@ void SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP) { } #endif - // Lower statepoint vmstate and gcstate arguments SmallVector<SDValue, 10> LoweredArgs; lowerStatepointMetaArgs(LoweredArgs, ISP, *this); // Get call node, we will replace it later with statepoint - SDNode *CallNode = lowerCallFromStatepoint(ISP, *this); + SDNode *CallNode = lowerCallFromStatepoint(ISP, LandingPad, *this); // Construct the actual STATEPOINT node with all the appropriate arguments // and return values. @@ -634,7 +662,24 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) { assert(isStatepoint(I) && "first argument must be a statepoint token"); - setValue(&CI, getValue(I)); + if (isa<InvokeInst>(I)) { + // For invokes we should have stored call result in a virtual register. + // We can not use default getValue() functionality to copy value from this + // register because statepoint and actuall call return types can be + // different, and getValue() will use CopyFromReg of the wrong type, + // which is always i32 in our case. + PointerType *CalleeType = cast<PointerType>( + ImmutableStatepoint(I).actualCallee()->getType()); + Type *RetTy = cast<FunctionType>( + CalleeType->getElementType())->getReturnType(); + SDValue CopyFromReg = getCopyFromRegs(I, RetTy); + + assert(CopyFromReg.getNode()); + setValue(&CI, CopyFromReg); + } + else { + setValue(&CI, getValue(I)); + } } void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 0a3c926..ddbf0b2 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -96,18 +96,21 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, for (unsigned i = 0; i != NumOps; ++i) { Entry.Node = Ops[i]; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.isSExt = isSigned; - Entry.isZExt = !isSigned; + Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned); + Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned); Args.push_back(Entry); } + if (LC == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported library call operation!"); SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); + bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed) - .setSExtResult(isSigned).setZExtResult(!isSigned); + .setSExtResult(signExtend).setZExtResult(!signExtend); return LowerCallTo(CLI); } diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp index f6393a5..66a6a3c 100644 --- a/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/lib/CodeGen/ShadowStackGCLowering.cpp @@ -53,10 +53,10 @@ private: Type *GetConcreteStackEntryType(Function &F); void CollectRoots(Function &F); static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B, - Value *BasePtr, int Idx1, + Type *Ty, Value *BasePtr, int Idx1, const char *Name); static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B, - Value *BasePtr, int Idx1, int Idx2, + Type *Ty, Value *BasePtr, int Idx1, int Idx2, const char *Name); }; } @@ -343,13 +343,14 @@ void ShadowStackGCLowering::CollectRoots(Function &F) { } GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, - IRBuilder<> &B, Value *BasePtr, - int Idx, int Idx2, - const char *Name) { + IRBuilder<> &B, Type *Ty, + Value *BasePtr, int Idx, + int Idx2, + const char *Name) { Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0), ConstantInt::get(Type::getInt32Ty(Context), Idx), ConstantInt::get(Type::getInt32Ty(Context), Idx2)}; - Value *Val = B.CreateGEP(BasePtr, Indices, Name); + Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name); assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); @@ -357,11 +358,11 @@ GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, } GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, - IRBuilder<> &B, Value *BasePtr, + IRBuilder<> &B, Type *Ty, Value *BasePtr, int Idx, const char *Name) { Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0), ConstantInt::get(Type::getInt32Ty(Context), Idx)}; - Value *Val = B.CreateGEP(BasePtr, Indices, Name); + Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name); assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); @@ -402,14 +403,15 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { // Initialize the map pointer and load the current head of the shadow stack. Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); - Instruction *EntryMapPtr = - CreateGEP(Context, AtEntry, StackEntry, 0, 1, "gc_frame.map"); + Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 0, 1, "gc_frame.map"); AtEntry.CreateStore(FrameMap, EntryMapPtr); // After all the allocas... for (unsigned I = 0, E = Roots.size(); I != E; ++I) { // For each root, find the corresponding slot in the aggregate... - Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root"); + Value *SlotPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 1 + I, "gc_root"); // And use it in lieu of the alloca. AllocaInst *OriginalAlloca = Roots[I].second; @@ -426,10 +428,10 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { AtEntry.SetInsertPoint(IP->getParent(), IP); // Push the entry onto the shadow stack. - Instruction *EntryNextPtr = - CreateGEP(Context, AtEntry, StackEntry, 0, 0, "gc_frame.next"); - Instruction *NewHeadVal = - CreateGEP(Context, AtEntry, StackEntry, 0, "gc_newhead"); + Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 0, 0, "gc_frame.next"); + Instruction *NewHeadVal = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 0, "gc_newhead"); AtEntry.CreateStore(CurrentHead, EntryNextPtr); AtEntry.CreateStore(NewHeadVal, Head); @@ -439,7 +441,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { // Pop the entry from the shadow stack. Don't reuse CurrentHead from // AtEntry, since that would make the value live for the entire function. Instruction *EntryNextPtr2 = - CreateGEP(Context, *AtExit, StackEntry, 0, 0, "gc_frame.next"); + CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0, + "gc_frame.next"); Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); AtExit->CreateStore(SavedHead, Head); } diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 35e4292..2335a88 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -128,7 +128,8 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { Value *Zero = ConstantInt::get(Int32Ty, 0); Value *One = ConstantInt::get(Int32Ty, 1); Value *Idxs[2] = { Zero, One }; - Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site"); + Value *CallSite = + Builder.CreateGEP(FunctionContextTy, FuncCtx, Idxs, "call_site"); // Insert a store of the call-site number ConstantInt *CallSiteNoC = diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index d46621d..025ae70 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -127,7 +127,7 @@ void SlotIndexes::renumberIndexes() { void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { // Number indexes with half the default spacing so we can catch up quickly. const unsigned Space = SlotIndex::InstrDist/2; - assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM"); + static_assert((Space & 3) == 0, "InstrDist must be a multiple of 2*NUM"); IndexList::iterator startItr = std::prev(curItr); unsigned index = startItr->getIndex(); diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index faf94b6..7572803 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -364,7 +364,7 @@ void StackColoring::calculateLocalLiveness() { } } - BBSet = NextBBSet; + BBSet = std::move(NextBBSet); }// while changed. } diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp index 767f43a..d88be57 100644 --- a/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -14,24 +14,24 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/StackMapLivenessAnalysis.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "stackmaps" -namespace llvm { -cl::opt<bool> EnablePatchPointLiveness("enable-patchpoint-liveness", - cl::Hidden, cl::init(true), - cl::desc("Enable PatchPoint Liveness Analysis Pass")); -} +static cl::opt<bool> EnablePatchPointLiveness( + "enable-patchpoint-liveness", cl::Hidden, cl::init(true), + cl::desc("Enable PatchPoint Liveness Analysis Pass")); STATISTIC(NumStackMapFuncVisited, "Number of functions visited"); STATISTIC(NumStackMapFuncSkipped, "Number of functions skipped"); @@ -39,6 +39,46 @@ STATISTIC(NumBBsVisited, "Number of basic blocks visited"); STATISTIC(NumBBsHaveNoStackmap, "Number of basic blocks with no stackmap"); STATISTIC(NumStackMaps, "Number of StackMaps visited"); +namespace { +/// \brief This pass calculates the liveness information for each basic block in +/// a function and attaches the register live-out information to a patchpoint +/// intrinsic if present. +/// +/// This pass can be disabled via the -enable-patchpoint-liveness=false flag. +/// The pass skips functions that don't have any patchpoint intrinsics. The +/// information provided by this pass is optional and not required by the +/// aformentioned intrinsic to function. +class StackMapLiveness : public MachineFunctionPass { + MachineFunction *MF; + const TargetRegisterInfo *TRI; + LivePhysRegs LiveRegs; + +public: + static char ID; + + /// \brief Default construct and initialize the pass. + StackMapLiveness(); + + /// \brief Tell the pass manager which passes we depend on and what + /// information we preserve. + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// \brief Calculate the liveness information for the given machine function. + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + /// \brief Performs the actual liveness calculation for the function. + bool calculateLiveness(); + + /// \brief Add the current register live set to the instruction. + void addLiveOutSetToMI(MachineInstr &MI); + + /// \brief Create a register mask and initialize it with the registers from + /// the register live set. + uint32_t *createRegisterMask() const; +}; +} // namespace + char StackMapLiveness::ID = 0; char &llvm::StackMapLivenessID = StackMapLiveness::ID; INITIALIZE_PASS(StackMapLiveness, "stackmap-liveness", @@ -60,18 +100,18 @@ void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const { } /// Calculate the liveness information for the given machine function. -bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) { +bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) { if (!EnablePatchPointLiveness) return false; - DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " - << _MF.getName() << " **********\n"); - MF = &_MF; - TRI = MF->getSubtarget().getRegisterInfo(); + DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << MF.getName() + << " **********\n"); + this->MF = &MF; + TRI = MF.getSubtarget().getRegisterInfo(); ++NumStackMapFuncVisited; // Skip this function if there are no patchpoints to process. - if (!MF->getFrameInfo()->hasPatchPoint()) { + if (!MF.getFrameInfo()->hasPatchPoint()) { ++NumStackMapFuncSkipped; return false; } diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index 5d46419..aa18dea 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -19,8 +19,6 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -76,10 +74,21 @@ StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { llvm_unreachable("Unsupported stackmap version!"); } +/// Go up the super-register chain until we hit a valid dwarf register number. +static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) { + int RegNo = TRI->getDwarfRegNum(Reg, false); + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR) + RegNo = TRI->getDwarfRegNum(*SR, false); + + assert(RegNo >= 0 && "Invalid Dwarf register number."); + return (unsigned) RegNo; +} + MachineInstr::const_mop_iterator StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, MachineInstr::const_mop_iterator MOE, LocationVec &Locs, LiveOutVec &LiveOuts) const { + const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo(); if (MOI->isImm()) { switch (MOI->getImm()) { default: llvm_unreachable("Unrecognized operand type."); @@ -89,7 +98,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, Size /= 8; unsigned Reg = (++MOI)->getReg(); int64_t Imm = (++MOI)->getImm(); - Locs.push_back(Location(StackMaps::Location::Direct, Size, Reg, Imm)); + Locs.push_back(Location(StackMaps::Location::Direct, Size, + getDwarfRegNum(Reg, TRI), Imm)); break; } case StackMaps::IndirectMemRefOp: { @@ -97,7 +107,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, assert(Size > 0 && "Need a valid size for indirect memory locations."); unsigned Reg = (++MOI)->getReg(); int64_t Imm = (++MOI)->getImm(); - Locs.push_back(Location(StackMaps::Location::Indirect, Size, Reg, Imm)); + Locs.push_back(Location(StackMaps::Location::Indirect, Size, + getDwarfRegNum(Reg, TRI), Imm)); break; } case StackMaps::ConstantOp: { @@ -122,12 +133,18 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) && "Virtreg operands should have been rewritten before now."); - const TargetRegisterClass *RC = - AP.TM.getSubtargetImpl()->getRegisterInfo()->getMinimalPhysRegClass( - MOI->getReg()); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg()); assert(!MOI->getSubReg() && "Physical subreg still around."); + + unsigned Offset = 0; + unsigned RegNo = getDwarfRegNum(MOI->getReg(), TRI); + unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false); + unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, MOI->getReg()); + if (SubRegIdx) + Offset = TRI->getSubRegIdxOffset(SubRegIdx); + Locs.push_back( - Location(Location::Register, RC->getSize(), MOI->getReg(), 0)); + Location(Location::Register, RC->getSize(), RegNo, Offset)); return ++MOI; } @@ -137,14 +154,74 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, return ++MOI; } -/// Go up the super-register chain until we hit a valid dwarf register number. -static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) { - int RegNo = TRI->getDwarfRegNum(Reg, false); - for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR) - RegNo = TRI->getDwarfRegNum(*SR, false); +void StackMaps::print(raw_ostream &OS) { + const TargetRegisterInfo *TRI = + AP.MF ? AP.MF->getSubtarget().getRegisterInfo() : nullptr; + OS << WSMP << "callsites:\n"; + for (const auto &CSI : CSInfos) { + const LocationVec &CSLocs = CSI.Locations; + const LiveOutVec &LiveOuts = CSI.LiveOuts; - assert(RegNo >= 0 && "Invalid Dwarf register number."); - return (unsigned) RegNo; + OS << WSMP << "callsite " << CSI.ID << "\n"; + OS << WSMP << " has " << CSLocs.size() << " locations\n"; + + unsigned OperIdx = 0; + for (const auto &Loc : CSLocs) { + OS << WSMP << " Loc " << OperIdx << ": "; + switch (Loc.LocType) { + case Location::Unprocessed: + OS << "<Unprocessed operand>"; + break; + case Location::Register: + OS << "Register "; + if (TRI) + OS << TRI->getName(Loc.Reg); + else + OS << Loc.Reg; + break; + case Location::Direct: + OS << "Direct "; + if (TRI) + OS << TRI->getName(Loc.Reg); + else + OS << Loc.Reg; + if (Loc.Offset) + OS << " + " << Loc.Offset; + break; + case Location::Indirect: + OS << "Indirect "; + if (TRI) + OS << TRI->getName(Loc.Reg); + else + OS << Loc.Reg; + OS << "+" << Loc.Offset; + break; + case Location::Constant: + OS << "Constant " << Loc.Offset; + break; + case Location::ConstantIndex: + OS << "Constant Index " << Loc.Offset; + break; + } + OS << " [encoding: .byte " << Loc.LocType << ", .byte " << Loc.Size + << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n"; + OperIdx++; + } + + OS << WSMP << " has " << LiveOuts.size() << " live-out registers\n"; + + OperIdx = 0; + for (const auto &LO : LiveOuts) { + OS << WSMP << " LO " << OperIdx << ": "; + if (TRI) + OS << TRI->getName(LO.Reg); + else + OS << LO.Reg; + OS << " [encoding: .short " << LO.RegNo << ", .byte 0, .byte " + << LO.Size << "]\n"; + OperIdx++; + } + } } /// Create a live-out register record for the given register Reg. @@ -160,7 +237,7 @@ StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const { StackMaps::LiveOutVec StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { assert(Mask && "No register mask specified"); - const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo(); LiveOutVec LiveOuts; // Create a LiveOutReg for each bit that is set in the register mask. @@ -383,16 +460,13 @@ void StackMaps::emitConstantPoolEntries(MCStreamer &OS) { /// 0x3, Indirect, [Reg + Offset] (spilled value) /// 0x4, Constant, Offset (small constant) /// 0x5, ConstIndex, Constants[Offset] (large constant) -void StackMaps::emitCallsiteEntries(MCStreamer &OS, - const TargetRegisterInfo *TRI) { +void StackMaps::emitCallsiteEntries(MCStreamer &OS) { + DEBUG(print(dbgs())); // Callsite entries. - DEBUG(dbgs() << WSMP << "callsites:\n"); for (const auto &CSI : CSInfos) { const LocationVec &CSLocs = CSI.Locations; const LiveOutVec &LiveOuts = CSI.LiveOuts; - DEBUG(dbgs() << WSMP << "callsite " << CSI.ID << "\n"); - // Verify stack map entry. It's better to communicate a problem to the // runtime than crash in case of in-process compilation. Currently, we do // simple overflow checks, but we may eventually communicate other @@ -413,83 +487,20 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS, // Reserved for flags. OS.EmitIntValue(0, 2); - - DEBUG(dbgs() << WSMP << " has " << CSLocs.size() << " locations\n"); - OS.EmitIntValue(CSLocs.size(), 2); - unsigned OperIdx = 0; for (const auto &Loc : CSLocs) { - unsigned RegNo = 0; - int Offset = Loc.Offset; - if(Loc.Reg) { - RegNo = getDwarfRegNum(Loc.Reg, TRI); - - // If this is a register location, put the subregister byte offset in - // the location offset. - if (Loc.LocType == Location::Register) { - assert(!Loc.Offset && "Register location should have zero offset"); - unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false); - unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, Loc.Reg); - if (SubRegIdx) - Offset = TRI->getSubRegIdxOffset(SubRegIdx); - } - } - else { - assert(Loc.LocType != Location::Register && - "Missing location register"); - } - - DEBUG(dbgs() << WSMP << " Loc " << OperIdx << ": "; - switch (Loc.LocType) { - case Location::Unprocessed: - dbgs() << "<Unprocessed operand>"; - break; - case Location::Register: - dbgs() << "Register " << TRI->getName(Loc.Reg); - break; - case Location::Direct: - dbgs() << "Direct " << TRI->getName(Loc.Reg); - if (Loc.Offset) - dbgs() << " + " << Loc.Offset; - break; - case Location::Indirect: - dbgs() << "Indirect " << TRI->getName(Loc.Reg) - << " + " << Loc.Offset; - break; - case Location::Constant: - dbgs() << "Constant " << Loc.Offset; - break; - case Location::ConstantIndex: - dbgs() << "Constant Index " << Loc.Offset; - break; - } - dbgs() << " [encoding: .byte " << Loc.LocType - << ", .byte " << Loc.Size - << ", .short " << RegNo - << ", .int " << Offset << "]\n"; - ); - OS.EmitIntValue(Loc.LocType, 1); OS.EmitIntValue(Loc.Size, 1); - OS.EmitIntValue(RegNo, 2); - OS.EmitIntValue(Offset, 4); - OperIdx++; + OS.EmitIntValue(Loc.Reg, 2); + OS.EmitIntValue(Loc.Offset, 4); } - DEBUG(dbgs() << WSMP << " has " << LiveOuts.size() - << " live-out registers\n"); - // Num live-out registers and padding to align to 4 byte. OS.EmitIntValue(0, 2); OS.EmitIntValue(LiveOuts.size(), 2); - OperIdx = 0; for (const auto &LO : LiveOuts) { - DEBUG(dbgs() << WSMP << " LO " << OperIdx << ": " - << TRI->getName(LO.Reg) - << " [encoding: .short " << LO.RegNo - << ", .byte 0, .byte " << LO.Size << "]\n"); OS.EmitIntValue(LO.RegNo, 2); OS.EmitIntValue(0, 1); OS.EmitIntValue(LO.Size, 1); @@ -512,7 +523,6 @@ void StackMaps::serializeToStackMapSection() { MCContext &OutContext = AP.OutStreamer.getContext(); MCStreamer &OS = AP.OutStreamer; - const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo(); // Create the section. const MCSection *StackMapSection = @@ -527,7 +537,7 @@ void StackMaps::serializeToStackMapSection() { emitStackmapHeader(OS); emitFunctionFrameRecords(OS); emitConstantPoolEntries(OS); - emitCallsiteEntries(OS, TRI); + emitCallsiteEntries(OS); OS.AddBlankLine(); // Clean up. diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index cc72e5e..a5a175f 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -184,10 +184,18 @@ void StackSlotColoring::InitializeSlots() { UsedColors.resize(LastFI); Assignments.resize(LastFI); + typedef std::iterator_traits<LiveStacks::iterator>::value_type Pair; + SmallVector<Pair *, 16> Intervals; + Intervals.reserve(LS->getNumIntervals()); + for (auto &I : *LS) + Intervals.push_back(&I); + std::sort(Intervals.begin(), Intervals.end(), + [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; }); + // Gather all spill slots into a list. DEBUG(dbgs() << "Spill slot intervals:\n"); - for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) { - LiveInterval &li = i->second; + for (auto *I : Intervals) { + LiveInterval &li = I->second; DEBUG(li.dump()); int FI = TargetRegisterInfo::stackSlot2Index(li.reg); if (MFI->isDeadObjectIndex(FI)) diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index 2566c1f..38725b5 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -285,21 +285,20 @@ bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI, bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, unsigned SubIdx, unsigned &Size, unsigned &Offset, - const TargetMachine *TM) const { + const MachineFunction &MF) const { if (!SubIdx) { Size = RC->getSize(); Offset = 0; return true; } - unsigned BitSize = - TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxSize(SubIdx); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + unsigned BitSize = TRI->getSubRegIdxSize(SubIdx); // Convert bit size to byte size to be consistent with // MCRegisterClass::getSize(). if (BitSize % 8) return false; - int BitOffset = - TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxOffset(SubIdx); + int BitOffset = TRI->getSubRegIdxOffset(SubIdx); if (BitOffset < 0 || BitOffset % 8) return false; @@ -308,7 +307,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); - if (!TM->getDataLayout()->isLittleEndian()) { + if (!MF.getTarget().getDataLayout()->isLittleEndian()) { Offset = RC->getSize() - (Offset + Size); } return true; @@ -377,16 +376,13 @@ void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { llvm_unreachable("Not a MachO target"); } -bool TargetInstrInfo:: -canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const { +bool TargetInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, + ArrayRef<unsigned> Ops) const { return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]); } -static MachineInstr* foldPatchpoint(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex, +static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, int FrameIndex, const TargetInstrInfo &TII) { unsigned StartIdx = 0; switch (MI->getOpcode()) { @@ -405,9 +401,8 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF, // Return false if any operands requested for folding are not foldable (not // part of the stackmap's live values). - for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end(); - I != E; ++I) { - if (*I < StartIdx) + for (unsigned Op : Ops) { + if (Op < StartIdx) return nullptr; } @@ -427,8 +422,8 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF, // Compute the spill slot size and offset. const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(MO.getReg()); - bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, - SpillOffset, &MF.getTarget()); + bool Valid = + TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, SpillOffset, MF); if (!Valid) report_fatal_error("cannot spill patchpoint subregister operand"); MIB.addImm(StackMaps::IndirectMemRefOp); @@ -448,10 +443,9 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF, /// operand folded, otherwise NULL is returned. The client is responsible for /// removing the old instruction and adding the new one in the instruction /// stream. -MachineInstr* -TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, - const SmallVectorImpl<unsigned> &Ops, - int FI) const { +MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, + ArrayRef<unsigned> Ops, + int FI) const { unsigned Flags = 0; for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (MI->getOperand(Ops[i]).isDef()) @@ -517,10 +511,9 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific /// stack slot. -MachineInstr* -TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { +MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, + ArrayRef<unsigned> Ops, + MachineInstr *LoadMI) const { assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!"); #ifndef NDEBUG for (unsigned i = 0, e = Ops.size(); i != e; ++i) diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 9048a44..58a6d52 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -664,6 +664,44 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { return UNKNOWN_LIBCALL; } +RTLIB::Libcall RTLIB::getATOMIC(unsigned Opc, MVT VT) { +#define OP_TO_LIBCALL(Name, Enum) \ + case Name: \ + switch (VT.SimpleTy) { \ + default: \ + return UNKNOWN_LIBCALL; \ + case MVT::i8: \ + return Enum##_1; \ + case MVT::i16: \ + return Enum##_2; \ + case MVT::i32: \ + return Enum##_4; \ + case MVT::i64: \ + return Enum##_8; \ + case MVT::i128: \ + return Enum##_16; \ + } + + switch (Opc) { + OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET) + OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN) + } + +#undef OP_TO_LIBCALL + + return UNKNOWN_LIBCALL; +} + /// InitCmpLibcallCCs - Set default comparison libcall CC. /// static void InitCmpLibcallCCs(ISD::CondCode *CCs) { @@ -695,12 +733,11 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { } /// NOTE: The TargetMachine owns TLOF. -TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) - : TM(tm), DL(TM.getDataLayout()) { +TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { initActions(); // Perform these initializations only once. - IsLittleEndian = DL->isLittleEndian(); + IsLittleEndian = getDataLayout()->isLittleEndian(); MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = 4; @@ -792,58 +829,21 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::ConstantFP, MVT::f128, Expand); // These library functions default to expand. - setOperationAction(ISD::FLOG , MVT::f16, Expand); - setOperationAction(ISD::FLOG2, MVT::f16, Expand); - setOperationAction(ISD::FLOG10, MVT::f16, Expand); - setOperationAction(ISD::FEXP , MVT::f16, Expand); - setOperationAction(ISD::FEXP2, MVT::f16, Expand); - setOperationAction(ISD::FFLOOR, MVT::f16, Expand); - setOperationAction(ISD::FMINNUM, MVT::f16, Expand); - setOperationAction(ISD::FMAXNUM, MVT::f16, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); - setOperationAction(ISD::FCEIL, MVT::f16, Expand); - setOperationAction(ISD::FRINT, MVT::f16, Expand); - setOperationAction(ISD::FTRUNC, MVT::f16, Expand); - setOperationAction(ISD::FROUND, MVT::f16, Expand); - setOperationAction(ISD::FLOG , MVT::f32, Expand); - setOperationAction(ISD::FLOG2, MVT::f32, Expand); - setOperationAction(ISD::FLOG10, MVT::f32, Expand); - setOperationAction(ISD::FEXP , MVT::f32, Expand); - setOperationAction(ISD::FEXP2, MVT::f32, Expand); - setOperationAction(ISD::FFLOOR, MVT::f32, Expand); - setOperationAction(ISD::FMINNUM, MVT::f32, Expand); - setOperationAction(ISD::FMAXNUM, MVT::f32, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); - setOperationAction(ISD::FCEIL, MVT::f32, Expand); - setOperationAction(ISD::FRINT, MVT::f32, Expand); - setOperationAction(ISD::FTRUNC, MVT::f32, Expand); - setOperationAction(ISD::FROUND, MVT::f32, Expand); - setOperationAction(ISD::FLOG , MVT::f64, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - setOperationAction(ISD::FLOG10, MVT::f64, Expand); - setOperationAction(ISD::FEXP , MVT::f64, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::f64, Expand); - setOperationAction(ISD::FMINNUM, MVT::f64, Expand); - setOperationAction(ISD::FMAXNUM, MVT::f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); - setOperationAction(ISD::FCEIL, MVT::f64, Expand); - setOperationAction(ISD::FRINT, MVT::f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::f64, Expand); - setOperationAction(ISD::FROUND, MVT::f64, Expand); - setOperationAction(ISD::FLOG , MVT::f128, Expand); - setOperationAction(ISD::FLOG2, MVT::f128, Expand); - setOperationAction(ISD::FLOG10, MVT::f128, Expand); - setOperationAction(ISD::FEXP , MVT::f128, Expand); - setOperationAction(ISD::FEXP2, MVT::f128, Expand); - setOperationAction(ISD::FFLOOR, MVT::f128, Expand); - setOperationAction(ISD::FMINNUM, MVT::f128, Expand); - setOperationAction(ISD::FMAXNUM, MVT::f128, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand); - setOperationAction(ISD::FCEIL, MVT::f128, Expand); - setOperationAction(ISD::FRINT, MVT::f128, Expand); - setOperationAction(ISD::FTRUNC, MVT::f128, Expand); - setOperationAction(ISD::FROUND, MVT::f128, Expand); + for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) { + setOperationAction(ISD::FLOG , VT, Expand); + setOperationAction(ISD::FLOG2, VT, Expand); + setOperationAction(ISD::FLOG10, VT, Expand); + setOperationAction(ISD::FEXP , VT, Expand); + setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FFLOOR, VT, Expand); + setOperationAction(ISD::FMINNUM, VT, Expand); + setOperationAction(ISD::FMAXNUM, VT, Expand); + setOperationAction(ISD::FNEARBYINT, VT, Expand); + setOperationAction(ISD::FCEIL, VT, Expand); + setOperationAction(ISD::FRINT, VT, Expand); + setOperationAction(ISD::FTRUNC, VT, Expand); + setOperationAction(ISD::FROUND, VT, Expand); + } // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); @@ -859,7 +859,7 @@ MVT TargetLoweringBase::getPointerTy(uint32_t AS) const { } unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const { - return DL->getPointerSizeInBits(AS); + return getDataLayout()->getPointerSizeInBits(AS); } unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { @@ -868,7 +868,7 @@ unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { } MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { - return MVT::getIntegerVT(8*DL->getPointerSize(0)); + return MVT::getIntegerVT(8 * getDataLayout()->getPointerSize(0)); } EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const { @@ -1144,6 +1144,10 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". +// This function is in TargetLowering because it uses RegClassForVT which would +// need to be moved to TargetRegisterInfo and would necessitate moving +// isTypeLegal over as well - a massive change that would just require +// TargetLowering having a TargetRegisterInfo class member that it would use. std::pair<const TargetRegisterClass *, uint8_t> TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const { @@ -1498,7 +1502,7 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const { - return DL->getABITypeAlignment(Ty); + return getDataLayout()->getABITypeAlignment(Ty); } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index c1b34f7..bcf2aa7 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -31,6 +31,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -244,22 +245,9 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) { return ".data.rel.ro"; } -const MCSection *TargetLoweringObjectFileELF:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler &Mang, const TargetMachine &TM) const { - unsigned Flags = getELFSectionFlags(Kind); - - // If we have -ffunction-section or -fdata-section then we should emit the - // global value to a uniqued section specifically for it. - bool EmitUniqueSection = false; - if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) { - if (Kind.isText()) - EmitUniqueSection = TM.getFunctionSections(); - else - EmitUniqueSection = TM.getDataSections(); - } - EmitUniqueSection |= GV->hasComdat(); - +static const MCSectionELF *selectELFSectionForGlobal( + MCContext &Ctx, const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags) { unsigned EntrySize = 0; if (Kind.isMergeableCString()) { if (Kind.isMergeable2ByteCString()) { @@ -309,9 +297,29 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Name.push_back('.'); TM.getNameWithPrefix(Name, GV, Mang, true); } - return getContext().getELFSection(Name, getELFSectionType(Name, Kind), Flags, - EntrySize, Group, - EmitUniqueSection && !UniqueSectionNames); + return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags, + EntrySize, Group, + EmitUniqueSection && !UniqueSectionNames); +} + +const MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + unsigned Flags = getELFSectionFlags(Kind); + + // If we have -ffunction-section or -fdata-section then we should emit the + // global value to a uniqued section specifically for it. + bool EmitUniqueSection = false; + if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) { + if (Kind.isText()) + EmitUniqueSection = TM.getFunctionSections(); + else + EmitUniqueSection = TM.getDataSections(); + } + EmitUniqueSection |= GV->hasComdat(); + + return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM, + EmitUniqueSection, Flags); } const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable( @@ -323,7 +331,8 @@ const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable( if (!EmitUniqueSection) return ReadOnlySection; - return SelectSectionForGlobal(&F, SectionKind::getReadOnly(), Mang, TM); + return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(), + Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC); } bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection( @@ -423,6 +432,11 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { // MachO //===----------------------------------------------------------------------===// +TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() + : TargetLoweringObjectFile() { + SupportIndirectSymViaGOTPCRel = true; +} + /// getDepLibFromLinkerOpt - Extract the dependent library name from a linker /// option string. Returns StringRef() if the option does not specify a library. StringRef TargetLoweringObjectFileMachO:: @@ -697,6 +711,66 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol( return SSym; } +const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, + MachineModuleInfo *MMI, MCStreamer &Streamer) const { + // Although MachO 32-bit targets do not explictly have a GOTPCREL relocation + // as 64-bit do, we replace the GOT equivalent by accessing the final symbol + // through a non_lazy_ptr stub instead. One advantage is that it allows the + // computation of deltas to final external symbols. Example: + // + // _extgotequiv: + // .long _extfoo + // + // _delta: + // .long _extgotequiv-_delta + // + // is transformed to: + // + // _delta: + // .long L_extfoo$non_lazy_ptr-(_delta+0) + // + // .section __IMPORT,__pointers,non_lazy_symbol_pointers + // L_extfoo$non_lazy_ptr: + // .indirect_symbol _extfoo + // .long 0 + // + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + MCContext &Ctx = getContext(); + + // The offset must consider the original displacement from the base symbol + // since 32-bit targets don't have a GOTPCREL to fold the PC displacement. + Offset = -MV.getConstant(); + const MCSymbol *BaseSym = &MV.getSymB()->getSymbol(); + + // Access the final symbol via sym$non_lazy_ptr and generate the appropriated + // non_lazy_ptr stubs. + SmallString<128> Name; + StringRef Suffix = "$non_lazy_ptr"; + Name += DL->getPrivateGlobalPrefix(); + Name += Sym->getName(); + Name += Suffix; + MCSymbol *Stub = Ctx.GetOrCreateSymbol(Name); + + MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub); + if (!StubSym.getPointer()) + StubSym = MachineModuleInfoImpl:: + StubValueTy(const_cast<MCSymbol *>(Sym), true /* access indirectly */); + + const MCExpr *BSymExpr = + MCSymbolRefExpr::Create(BaseSym, MCSymbolRefExpr::VK_None, Ctx); + const MCExpr *LHS = + MCSymbolRefExpr::Create(Stub, MCSymbolRefExpr::VK_None, Ctx); + + if (!Offset) + return MCBinaryExpr::CreateSub(LHS, BSymExpr, Ctx); + + const MCExpr *RHS = + MCBinaryExpr::CreateAdd(BSymExpr, MCConstantExpr::Create(Offset, Ctx), Ctx); + return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); +} + //===----------------------------------------------------------------------===// // COFF //===----------------------------------------------------------------------===// @@ -853,6 +927,11 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, StringRef COMDATSymName = Sym->getName(); return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, Selection); + } else { + SmallString<256> TmpData; + getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true, Mang, TM); + return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData, + Selection); } } @@ -874,6 +953,42 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, return DataSection; } +void TargetLoweringObjectFileCOFF::getNameWithPrefix( + SmallVectorImpl<char> &OutName, const GlobalValue *GV, + bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const { + if (GV->hasPrivateLinkage() && + ((isa<Function>(GV) && TM.getFunctionSections()) || + (isa<GlobalVariable>(GV) && TM.getDataSections()))) + CannotUsePrivateLabel = true; + + Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel); +} + +const MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( + const Function &F, Mangler &Mang, const TargetMachine &TM) const { + // If the function can be removed, produce a unique section so that + // the table doesn't prevent the removal. + const Comdat *C = F.getComdat(); + bool EmitUniqueSection = TM.getFunctionSections() || C; + if (!EmitUniqueSection) + return ReadOnlySection; + + // FIXME: we should produce a symbol for F instead. + if (F.hasPrivateLinkage()) + return ReadOnlySection; + + MCSymbol *Sym = TM.getSymbol(&F, Mang); + StringRef COMDATSymName = Sym->getName(); + + SectionKind Kind = SectionKind::getReadOnly(); + const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); + unsigned Characteristics = getCOFFSectionFlags(Kind); + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; + + return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, + COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE); +} + StringRef TargetLoweringObjectFileCOFF:: getDepLibFromLinkerOpt(StringRef LinkerOption) const { const char *LibCmd = "/DEFAULTLIB:"; diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 1bbe6e1..57daeab 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -45,6 +45,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -102,6 +103,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg, MachineBasicBlock::iterator OldPos); + bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen); + bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef); bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, @@ -309,6 +312,45 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, return true; } +/// getSingleDef -- return the MachineInstr* if it is the single def of the Reg +/// in current BB. +static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB, + const MachineRegisterInfo *MRI) { + MachineInstr *Ret = nullptr; + for (MachineInstr &DefMI : MRI->def_instructions(Reg)) { + if (DefMI.getParent() != BB || DefMI.isDebugValue()) + continue; + if (!Ret) + Ret = &DefMI; + else if (Ret != &DefMI) + return nullptr; + } + return Ret; +} + +/// Check if there is a reversed copy chain from FromReg to ToReg: +/// %Tmp1 = copy %Tmp2; +/// %FromReg = copy %Tmp1; +/// %ToReg = add %FromReg ... +/// %Tmp2 = copy %ToReg; +/// MaxLen specifies the maximum length of the copy chain the func +/// can walk through. +bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg, + int Maxlen) { + unsigned TmpReg = FromReg; + for (int i = 0; i < Maxlen; i++) { + MachineInstr *Def = getSingleDef(TmpReg, MBB, MRI); + if (!Def || !Def->isCopy()) + return false; + + TmpReg = Def->getOperand(1).getReg(); + + if (TmpReg == ToReg) + return true; + } + return false; +} + /// noUseAfterLastDef - Return true if there are no intervening uses between the /// last instruction in the MBB that defines the specified register and the /// two-address instruction which is being processed. It also returns the last @@ -574,6 +616,27 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, if (!noUseAfterLastDef(regB, Dist, LastDefB)) return true; + // Look for situation like this: + // %reg101 = MOV %reg100 + // %reg102 = ... + // %reg103 = ADD %reg102, %reg101 + // ... = %reg103 ... + // %reg100 = MOV %reg103 + // If there is a reversed copy chain from reg101 to reg103, commute the ADD + // to eliminate an otherwise unavoidable copy. + // FIXME: + // We can extend the logic further: If an pair of operands in an insn has + // been merged, the insn could be regarded as a virtual copy, and the virtual + // copy could also be used to construct a copy chain. + // To more generally minimize register copies, ideally the logic of two addr + // instruction pass should be integrated with register allocation pass where + // interference graph is available. + if (isRevCopyChain(regC, regA, 3)) + return true; + + if (isRevCopyChain(regB, regA, 3)) + return false; + // Since there are no intervening uses for both registers, then commute // if the def of regC is closer. Its live interval is shorter. return LastDefB && LastDefC && LastDefC > LastDefB; diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 7d3b0ce..d9adfdf 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -286,7 +286,7 @@ void VirtRegRewriter::addMBBLiveIns() { } void VirtRegRewriter::rewrite() { - bool NoSubRegLiveness = !MRI->tracksSubRegLiveness(); + bool NoSubRegLiveness = !MRI->subRegLivenessEnabled(); SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp index 6f712a9..ab0f96e 100644 --- a/lib/CodeGen/WinEHPrepare.cpp +++ b/lib/CodeGen/WinEHPrepare.cpp @@ -16,6 +16,8 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/LibCallSemantics.h" #include "llvm/IR/Function.h" @@ -25,6 +27,10 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include <memory> @@ -36,25 +42,31 @@ using namespace llvm::PatternMatch; namespace { -struct HandlerAllocas { - TinyPtrVector<AllocaInst *> Allocas; - int ParentFrameAllocationIndex; -}; - // This map is used to model frame variable usage during outlining, to // construct a structure type to hold the frame variables in a frame // allocation block, and to remap the frame variable allocas (including // spill locations as needed) to GEPs that get the variable from the // frame allocation structure. -typedef MapVector<AllocaInst *, HandlerAllocas> FrameVarInfoMap; +typedef MapVector<Value *, TinyPtrVector<AllocaInst *>> FrameVarInfoMap; -class WinEHPrepare : public FunctionPass { - std::unique_ptr<FunctionPass> DwarfPrepare; +typedef SmallSet<BasicBlock *, 4> VisitedBlockSet; + +enum ActionType { Catch, Cleanup }; + +class LandingPadActions; +class ActionHandler; +class CatchHandler; +class CleanupHandler; +class LandingPadMap; +typedef DenseMap<const BasicBlock *, CatchHandler *> CatchHandlerMapTy; +typedef DenseMap<const BasicBlock *, CleanupHandler *> CleanupHandlerMapTy; + +class WinEHPrepare : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. WinEHPrepare(const TargetMachine *TM = nullptr) - : FunctionPass(ID), DwarfPrepare(createDwarfEHPass(TM)) {} + : FunctionPass(ID) {} bool runOnFunction(Function &Fn) override; @@ -67,11 +79,24 @@ public: } private: - bool prepareCPPEHHandlers(Function &F, - SmallVectorImpl<LandingPadInst *> &LPads); - bool outlineCatchHandler(Function *SrcFn, Constant *SelectorType, - LandingPadInst *LPad, CallInst *&EHAlloc, - AllocaInst *&EHObjPtr, FrameVarInfoMap &VarInfo); + bool prepareExceptionHandlers(Function &F, + SmallVectorImpl<LandingPadInst *> &LPads); + bool outlineHandler(ActionHandler *Action, Function *SrcFn, + LandingPadInst *LPad, BasicBlock *StartBB, + FrameVarInfoMap &VarInfo); + + void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions); + CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB, + VisitedBlockSet &VisitedBlocks); + CleanupHandler *findCleanupHandler(BasicBlock *StartBB, BasicBlock *EndBB); + + void processSEHCatchHandler(CatchHandler *Handler, BasicBlock *StartBB); + + // All fields are reset by runOnFunction. + EHPersonality Personality; + CatchHandlerMapTy CatchHandlerMap; + CleanupHandlerMapTy CleanupHandlerMap; + DenseMap<const LandingPadInst *, LandingPadMap> LPadMaps; }; class WinEHFrameVariableMaterializer : public ValueMaterializer { @@ -87,34 +112,218 @@ private: IRBuilder<> Builder; }; -class WinEHCatchDirector : public CloningDirector { +class LandingPadMap { +public: + LandingPadMap() : OriginLPad(nullptr) {} + void mapLandingPad(const LandingPadInst *LPad); + + bool isInitialized() { return OriginLPad != nullptr; } + + bool mapIfEHPtrLoad(const LoadInst *Load) { + return mapIfEHLoad(Load, EHPtrStores, EHPtrStoreAddrs); + } + bool mapIfSelectorLoad(const LoadInst *Load) { + return mapIfEHLoad(Load, SelectorStores, SelectorStoreAddrs); + } + + bool isLandingPadSpecificInst(const Instruction *Inst) const; + + void remapSelector(ValueToValueMapTy &VMap, Value *MappedValue) const; + +private: + bool mapIfEHLoad(const LoadInst *Load, + SmallVectorImpl<const StoreInst *> &Stores, + SmallVectorImpl<const Value *> &StoreAddrs); + + const LandingPadInst *OriginLPad; + // We will normally only see one of each of these instructions, but + // if more than one occurs for some reason we can handle that. + TinyPtrVector<const ExtractValueInst *> ExtractedEHPtrs; + TinyPtrVector<const ExtractValueInst *> ExtractedSelectors; + + // In optimized code, there will typically be at most one instance of + // each of the following, but in unoptimized IR it is not uncommon + // for the values to be stored, loaded and then stored again. In that + // case we will create a second entry for each store and store address. + SmallVector<const StoreInst *, 2> EHPtrStores; + SmallVector<const StoreInst *, 2> SelectorStores; + SmallVector<const Value *, 2> EHPtrStoreAddrs; + SmallVector<const Value *, 2> SelectorStoreAddrs; +}; + +class WinEHCloningDirectorBase : public CloningDirector { public: - WinEHCatchDirector(LandingPadInst *LPI, Function *CatchFn, Value *Selector, - Value *EHObj, FrameVarInfoMap &VarInfo) - : LPI(LPI), CurrentSelector(Selector->stripPointerCasts()), EHObj(EHObj), - Materializer(CatchFn, VarInfo), - SelectorIDType(Type::getInt32Ty(LPI->getContext())), - Int8PtrType(Type::getInt8PtrTy(LPI->getContext())) {} + WinEHCloningDirectorBase(Function *HandlerFn, + FrameVarInfoMap &VarInfo, + LandingPadMap &LPadMap) + : Materializer(HandlerFn, VarInfo), + SelectorIDType(Type::getInt32Ty(HandlerFn->getContext())), + Int8PtrType(Type::getInt8PtrTy(HandlerFn->getContext())), + LPadMap(LPadMap) {} CloningAction handleInstruction(ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) override; + virtual CloningAction handleBeginCatch(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) = 0; + virtual CloningAction handleEndCatch(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) = 0; + virtual CloningAction handleTypeIdFor(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) = 0; + virtual CloningAction handleInvoke(ValueToValueMapTy &VMap, + const InvokeInst *Invoke, + BasicBlock *NewBB) = 0; + virtual CloningAction handleResume(ValueToValueMapTy &VMap, + const ResumeInst *Resume, + BasicBlock *NewBB) = 0; + ValueMaterializer *getValueMaterializer() override { return &Materializer; } -private: - LandingPadInst *LPI; - Value *CurrentSelector; - Value *EHObj; +protected: WinEHFrameVariableMaterializer Materializer; Type *SelectorIDType; Type *Int8PtrType; + LandingPadMap &LPadMap; +}; + +class WinEHCatchDirector : public WinEHCloningDirectorBase { +public: + WinEHCatchDirector(Function *CatchFn, Value *Selector, + FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap) + : WinEHCloningDirectorBase(CatchFn, VarInfo, LPadMap), + CurrentSelector(Selector->stripPointerCasts()), + ExceptionObjectVar(nullptr) {} + + CloningAction handleBeginCatch(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleTypeIdFor(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke, + BasicBlock *NewBB) override; + CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume, + BasicBlock *NewBB) override; + + const Value *getExceptionVar() { return ExceptionObjectVar; } + TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; } + +private: + Value *CurrentSelector; - const Value *ExtractedEHPtr; - const Value *ExtractedSelector; - const Value *EHPtrStoreAddr; - const Value *SelectorStoreAddr; + const Value *ExceptionObjectVar; + TinyPtrVector<BasicBlock *> ReturnTargets; }; + +class WinEHCleanupDirector : public WinEHCloningDirectorBase { +public: + WinEHCleanupDirector(Function *CleanupFn, + FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap) + : WinEHCloningDirectorBase(CleanupFn, VarInfo, LPadMap) {} + + CloningAction handleBeginCatch(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleTypeIdFor(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke, + BasicBlock *NewBB) override; + CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume, + BasicBlock *NewBB) override; +}; + +class ActionHandler { +public: + ActionHandler(BasicBlock *BB, ActionType Type) + : StartBB(BB), Type(Type), HandlerBlockOrFunc(nullptr) {} + + ActionType getType() const { return Type; } + BasicBlock *getStartBlock() const { return StartBB; } + + bool hasBeenProcessed() { return HandlerBlockOrFunc != nullptr; } + + void setHandlerBlockOrFunc(Constant *F) { HandlerBlockOrFunc = F; } + Constant *getHandlerBlockOrFunc() { return HandlerBlockOrFunc; } + +private: + BasicBlock *StartBB; + ActionType Type; + + // Can be either a BlockAddress or a Function depending on the EH personality. + Constant *HandlerBlockOrFunc; +}; + +class CatchHandler : public ActionHandler { +public: + CatchHandler(BasicBlock *BB, Constant *Selector, BasicBlock *NextBB) + : ActionHandler(BB, ActionType::Catch), Selector(Selector), + NextBB(NextBB), ExceptionObjectVar(nullptr) {} + + // Method for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const ActionHandler *H) { + return H->getType() == ActionType::Catch; + } + + Constant *getSelector() const { return Selector; } + BasicBlock *getNextBB() const { return NextBB; } + + const Value *getExceptionVar() { return ExceptionObjectVar; } + TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; } + + void setExceptionVar(const Value *Val) { ExceptionObjectVar = Val; } + void setReturnTargets(TinyPtrVector<BasicBlock *> &Targets) { + ReturnTargets = Targets; + } + +private: + Constant *Selector; + BasicBlock *NextBB; + const Value *ExceptionObjectVar; + TinyPtrVector<BasicBlock *> ReturnTargets; +}; + +class CleanupHandler : public ActionHandler { +public: + CleanupHandler(BasicBlock *BB) : ActionHandler(BB, ActionType::Cleanup) {} + + // Method for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const ActionHandler *H) { + return H->getType() == ActionType::Cleanup; + } +}; + +class LandingPadActions { +public: + LandingPadActions() : HasCleanupHandlers(false) {} + + void insertCatchHandler(CatchHandler *Action) { Actions.push_back(Action); } + void insertCleanupHandler(CleanupHandler *Action) { + Actions.push_back(Action); + HasCleanupHandlers = true; + } + + bool includesCleanup() const { return HasCleanupHandlers; } + + SmallVectorImpl<ActionHandler *>::iterator begin() { return Actions.begin(); } + SmallVectorImpl<ActionHandler *>::iterator end() { return Actions.end(); } + +private: + // Note that this class does not own the ActionHandler objects in this vector. + // The ActionHandlers are owned by the CatchHandlerMap and CleanupHandlerMap + // in the WinEHPrepare class. + SmallVector<ActionHandler *, 4> Actions; + bool HasCleanupHandlers; +}; + } // end anonymous namespace char WinEHPrepare::ID = 0; @@ -125,10 +334,10 @@ FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) { return new WinEHPrepare(TM); } -static bool isMSVCPersonality(EHPersonality Pers) { - return Pers == EHPersonality::MSVC_Win64SEH || - Pers == EHPersonality::MSVC_CXX; -} +// FIXME: Remove this once the backend can handle the prepared IR. +static cl::opt<bool> +SEHPrepare("sehprepare", cl::Hidden, + cl::desc("Prepare functions with SEH personalities")); bool WinEHPrepare::runOnFunction(Function &Fn) { SmallVector<LandingPadInst *, 4> LPads; @@ -145,60 +354,67 @@ bool WinEHPrepare::runOnFunction(Function &Fn) { return false; // Classify the personality to see what kind of preparation we need. - EHPersonality Pers = classifyEHPersonality(LPads.back()->getPersonalityFn()); - - // Delegate through to the DWARF pass if this is unrecognized. - if (!isMSVCPersonality(Pers)) - return DwarfPrepare->runOnFunction(Fn); + Personality = classifyEHPersonality(LPads.back()->getPersonalityFn()); - // FIXME: This only returns true if the C++ EH handlers were outlined. - // When that code is complete, it should always return whatever - // prepareCPPEHHandlers returns. - if (Pers == EHPersonality::MSVC_CXX && prepareCPPEHHandlers(Fn, LPads)) - return true; - - // FIXME: SEH Cleanups are unimplemented. Replace them with unreachable. - if (Resumes.empty()) + // Do nothing if this is not an MSVC personality. + if (!isMSVCEHPersonality(Personality)) return false; - for (ResumeInst *Resume : Resumes) { - IRBuilder<>(Resume).CreateUnreachable(); - Resume->eraseFromParent(); + if (isAsynchronousEHPersonality(Personality) && !SEHPrepare) { + // Replace all resume instructions with unreachable. + // FIXME: Remove this once the backend can handle the prepared IR. + for (ResumeInst *Resume : Resumes) { + IRBuilder<>(Resume).CreateUnreachable(); + Resume->eraseFromParent(); + } + return true; } + // If there were any landing pads, prepareExceptionHandlers will make changes. + prepareExceptionHandlers(Fn, LPads); return true; } bool WinEHPrepare::doFinalization(Module &M) { - return DwarfPrepare->doFinalization(M); + return false; } -void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { - DwarfPrepare->getAnalysisUsage(AU); -} +void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {} -bool WinEHPrepare::prepareCPPEHHandlers( +bool WinEHPrepare::prepareExceptionHandlers( Function &F, SmallVectorImpl<LandingPadInst *> &LPads) { // These containers are used to re-map frame variables that are used in // outlined catch and cleanup handlers. They will be populated as the // handlers are outlined. FrameVarInfoMap FrameVarInfo; - SmallVector<CallInst *, 4> HandlerAllocs; - SmallVector<AllocaInst *, 4> HandlerEHObjPtrs; bool HandlersOutlined = false; + Module *M = F.getParent(); + LLVMContext &Context = M->getContext(); + + // Create a new function to receive the handler contents. + PointerType *Int8PtrType = Type::getInt8PtrTy(Context); + Type *Int32Type = Type::getInt32Ty(Context); + Function *ActionIntrin = Intrinsic::getDeclaration(M, Intrinsic::eh_actions); + for (LandingPadInst *LPad : LPads) { // Look for evidence that this landingpad has already been processed. bool LPadHasActionList = false; BasicBlock *LPadBB = LPad->getParent(); - for (Instruction &Inst : LPadBB->getInstList()) { - // FIXME: Make this an intrinsic. - if (auto *Call = dyn_cast<CallInst>(&Inst)) - if (Call->getCalledFunction()->getName() == "llvm.eh.actions") { + for (Instruction &Inst : *LPadBB) { + if (auto *IntrinCall = dyn_cast<IntrinsicInst>(&Inst)) { + if (IntrinCall->getIntrinsicID() == Intrinsic::eh_actions) { LPadHasActionList = true; break; } + } + // FIXME: This is here to help with the development of nested landing pad + // outlining. It should be removed when that is finished. + if (isa<UnreachableInst>(Inst)) { + LPadHasActionList = true; + break; + } } // If we've already outlined the handlers for this landingpad, @@ -206,177 +422,244 @@ bool WinEHPrepare::prepareCPPEHHandlers( if (LPadHasActionList) continue; - for (unsigned Idx = 0, NumClauses = LPad->getNumClauses(); Idx < NumClauses; - ++Idx) { - if (LPad->isCatch(Idx)) { - // Create a new instance of the handler data structure in the - // HandlerData vector. - CallInst *EHAlloc = nullptr; - AllocaInst *EHObjPtr = nullptr; - bool Outlined = outlineCatchHandler(&F, LPad->getClause(Idx), LPad, - EHAlloc, EHObjPtr, FrameVarInfo); - if (Outlined) { + LandingPadActions Actions; + mapLandingPadBlocks(LPad, Actions); + + for (ActionHandler *Action : Actions) { + if (Action->hasBeenProcessed()) + continue; + BasicBlock *StartBB = Action->getStartBlock(); + + // SEH doesn't do any outlining for catches. Instead, pass the handler + // basic block addr to llvm.eh.actions and list the block as a return + // target. + if (isAsynchronousEHPersonality(Personality)) { + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { + processSEHCatchHandler(CatchAction, StartBB); HandlersOutlined = true; - // These values must be resolved after all handlers have been - // outlined. - if (EHAlloc) - HandlerAllocs.push_back(EHAlloc); - if (EHObjPtr) - HandlerEHObjPtrs.push_back(EHObjPtr); + continue; } - } // End if (isCatch) - } // End for each clause - } // End for each landingpad + } + + if (outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo)) { + HandlersOutlined = true; + } + } // End for each Action + + // FIXME: We need a guard against partially outlined functions. + if (!HandlersOutlined) + continue; + + // Replace the landing pad with a new llvm.eh.action based landing pad. + BasicBlock *NewLPadBB = BasicBlock::Create(Context, "lpad", &F, LPadBB); + assert(!isa<PHINode>(LPadBB->begin())); + Instruction *NewLPad = LPad->clone(); + NewLPadBB->getInstList().push_back(NewLPad); + while (!pred_empty(LPadBB)) { + auto *pred = *pred_begin(LPadBB); + InvokeInst *Invoke = cast<InvokeInst>(pred->getTerminator()); + Invoke->setUnwindDest(NewLPadBB); + } + + // Replace uses of the old lpad in phis with this block and delete the old + // block. + LPadBB->replaceSuccessorsPhiUsesWith(NewLPadBB); + LPadBB->getTerminator()->eraseFromParent(); + new UnreachableInst(LPadBB->getContext(), LPadBB); + + // Add a call to describe the actions for this landing pad. + std::vector<Value *> ActionArgs; + for (ActionHandler *Action : Actions) { + // Action codes from docs are: 0 cleanup, 1 catch. + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { + ActionArgs.push_back(ConstantInt::get(Int32Type, 1)); + ActionArgs.push_back(CatchAction->getSelector()); + Value *EHObj = const_cast<Value *>(CatchAction->getExceptionVar()); + if (EHObj) + ActionArgs.push_back(EHObj); + else + ActionArgs.push_back(ConstantPointerNull::get(Int8PtrType)); + } else { + ActionArgs.push_back(ConstantInt::get(Int32Type, 0)); + } + ActionArgs.push_back(Action->getHandlerBlockOrFunc()); + } + CallInst *Recover = + CallInst::Create(ActionIntrin, ActionArgs, "recover", NewLPadBB); + + // Add an indirect branch listing possible successors of the catch handlers. + IndirectBrInst *Branch = IndirectBrInst::Create(Recover, 0, NewLPadBB); + for (ActionHandler *Action : Actions) { + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { + for (auto *Target : CatchAction->getReturnTargets()) { + Branch->addDestination(Target); + } + } + } + } // End for each landingpad // If nothing got outlined, there is no more processing to be done. if (!HandlersOutlined) return false; - // FIXME: We will replace the landingpad bodies with llvm.eh.actions - // calls and indirect branches here and then delete blocks - // which are no longer reachable. That will get rid of the - // handlers that we have outlined. There is code below - // that looks for allocas with no uses in the parent function. - // That will only happen after the pruning is implemented. - - // Remap the frame variables. - SmallVector<Type *, 2> StructTys; - StructTys.push_back(Type::getInt32Ty(F.getContext())); // EH state - StructTys.push_back(Type::getInt8PtrTy(F.getContext())); // EH object - - // Start the index at two since we always have the above fields at 0 and 1. - int Idx = 2; - - // FIXME: Sort the FrameVarInfo vector by the ParentAlloca size and alignment - // and add padding as necessary to provide the proper alignment. - - // Map the alloca instructions to the corresponding index in the - // frame allocation structure. If any alloca is used only in a single - // handler and is not used in the parent frame after outlining, it will - // be assigned an index of -1, meaning the handler can keep its - // "temporary" alloca and the original alloca can be erased from the - // parent function. If we later encounter this alloca in a second - // handler, we will assign it a place in the frame allocation structure - // at that time. Since the instruction replacement doesn't happen until - // all the entries in the HandlerData have been processed this isn't a - // problem. - for (auto &VarInfoEntry : FrameVarInfo) { - AllocaInst *ParentAlloca = VarInfoEntry.first; - HandlerAllocas &AllocaInfo = VarInfoEntry.second; - - // If the instruction still has uses in the parent function or if it is - // referenced by more than one handler, add it to the frame allocation - // structure. - if (ParentAlloca->getNumUses() != 0 || AllocaInfo.Allocas.size() > 1) { - Type *VarTy = ParentAlloca->getAllocatedType(); - StructTys.push_back(VarTy); - AllocaInfo.ParentFrameAllocationIndex = Idx++; - } else { - // If the variable is not used in the parent frame and it is only used - // in one handler, the alloca can be removed from the parent frame - // and the handler will keep its "temporary" alloca to define the value. - // An element index of -1 is used to indicate this condition. - AllocaInfo.ParentFrameAllocationIndex = -1; - } - } + // Delete any blocks that were only used by handlers that were outlined above. + removeUnreachableBlocks(F); - // Having filled the StructTys vector and assigned an index to each element, - // we can now create the structure. - StructType *EHDataStructTy = StructType::create( - F.getContext(), StructTys, "struct." + F.getName().str() + ".ehdata"); - IRBuilder<> Builder(F.getParent()->getContext()); - - // Create a frame allocation. - Module *M = F.getParent(); - LLVMContext &Context = M->getContext(); BasicBlock *Entry = &F.getEntryBlock(); + IRBuilder<> Builder(F.getParent()->getContext()); Builder.SetInsertPoint(Entry->getFirstInsertionPt()); - Function *FrameAllocFn = - Intrinsic::getDeclaration(M, Intrinsic::frameallocate); - uint64_t EHAllocSize = M->getDataLayout()->getTypeAllocSize(EHDataStructTy); - Value *FrameAllocArgs[] = { - ConstantInt::get(Type::getInt32Ty(Context), EHAllocSize)}; - CallInst *FrameAlloc = - Builder.CreateCall(FrameAllocFn, FrameAllocArgs, "frame.alloc"); - - Value *FrameEHData = Builder.CreateBitCast( - FrameAlloc, EHDataStructTy->getPointerTo(), "eh.data"); - - // Now visit each handler that is using the structure and bitcast its EHAlloc - // value to be a pointer to the frame alloc structure. - DenseMap<Function *, Value *> EHDataMap; - for (CallInst *EHAlloc : HandlerAllocs) { - // The EHAlloc has no uses at this time, so we need to just insert the - // cast before the next instruction. There is always a next instruction. - BasicBlock::iterator II = EHAlloc; - ++II; - Builder.SetInsertPoint(cast<Instruction>(II)); - Value *EHData = Builder.CreateBitCast( - EHAlloc, EHDataStructTy->getPointerTo(), "eh.data"); - EHDataMap[EHAlloc->getParent()->getParent()] = EHData; - } - // Next, replace the place-holder EHObjPtr allocas with GEP instructions - // that pull the EHObjPtr from the frame alloc structure - for (AllocaInst *EHObjPtr : HandlerEHObjPtrs) { - Value *EHData = EHDataMap[EHObjPtr->getParent()->getParent()]; - Builder.SetInsertPoint(EHObjPtr); - Value *ElementPtr = Builder.CreateConstInBoundsGEP2_32(EHData, 0, 1); - EHObjPtr->replaceAllUsesWith(ElementPtr); - EHObjPtr->removeFromParent(); - ElementPtr->takeName(EHObjPtr); - delete EHObjPtr; - } + Function *FrameEscapeFn = + Intrinsic::getDeclaration(M, Intrinsic::frameescape); + Function *RecoverFrameFn = + Intrinsic::getDeclaration(M, Intrinsic::framerecover); // Finally, replace all of the temporary allocas for frame variables used in - // the outlined handlers and the original frame allocas with GEP instructions - // that get the equivalent pointer from the frame allocation struct. + // the outlined handlers with calls to llvm.framerecover. + BasicBlock::iterator II = Entry->getFirstInsertionPt(); + Instruction *AllocaInsertPt = II; + SmallVector<Value *, 8> AllocasToEscape; for (auto &VarInfoEntry : FrameVarInfo) { - AllocaInst *ParentAlloca = VarInfoEntry.first; - HandlerAllocas &AllocaInfo = VarInfoEntry.second; - int Idx = AllocaInfo.ParentFrameAllocationIndex; - - // If we have an index of -1 for this instruction, it means it isn't used - // outside of this handler. In that case, we just keep the "temporary" - // alloca in the handler and erase the original alloca from the parent. - if (Idx == -1) { + Value *ParentVal = VarInfoEntry.first; + TinyPtrVector<AllocaInst *> &Allocas = VarInfoEntry.second; + + // If the mapped value isn't already an alloca, we need to spill it if it + // is a computed value or copy it if it is an argument. + AllocaInst *ParentAlloca = dyn_cast<AllocaInst>(ParentVal); + if (!ParentAlloca) { + if (auto *Arg = dyn_cast<Argument>(ParentVal)) { + // Lower this argument to a copy and then demote that to the stack. + // We can't just use the argument location because the handler needs + // it to be in the frame allocation block. + // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction. + Value *TrueValue = ConstantInt::getTrue(Context); + Value *UndefValue = UndefValue::get(Arg->getType()); + Instruction *SI = + SelectInst::Create(TrueValue, Arg, UndefValue, + Arg->getName() + ".tmp", AllocaInsertPt); + Arg->replaceAllUsesWith(SI); + // Reset the select operand, because it was clobbered by the RAUW above. + SI->setOperand(1, Arg); + ParentAlloca = DemoteRegToStack(*SI, true, SI); + } else if (auto *PN = dyn_cast<PHINode>(ParentVal)) { + ParentAlloca = DemotePHIToStack(PN, AllocaInsertPt); + } else { + Instruction *ParentInst = cast<Instruction>(ParentVal); + // FIXME: This is a work-around to temporarily handle the case where an + // instruction that is only used in handlers is not sunk. + // Without uses, DemoteRegToStack would just eliminate the value. + // This will fail if ParentInst is an invoke. + if (ParentInst->getNumUses() == 0) { + BasicBlock::iterator InsertPt = ParentInst; + ++InsertPt; + ParentAlloca = + new AllocaInst(ParentInst->getType(), nullptr, + ParentInst->getName() + ".reg2mem", InsertPt); + new StoreInst(ParentInst, ParentAlloca, InsertPt); + } else { + ParentAlloca = DemoteRegToStack(*ParentInst, true, ParentInst); + } + } + } + + // If the parent alloca is no longer used and only one of the handlers used + // it, erase the parent and leave the copy in the outlined handler. + if (ParentAlloca->getNumUses() == 0 && Allocas.size() == 1) { ParentAlloca->eraseFromParent(); - } else { - // Otherwise, we replace the parent alloca and all outlined allocas - // which map to it with GEP instructions. - - // First replace the original alloca. - Builder.SetInsertPoint(ParentAlloca); - Builder.SetCurrentDebugLocation(ParentAlloca->getDebugLoc()); - Value *ElementPtr = - Builder.CreateConstInBoundsGEP2_32(FrameEHData, 0, Idx); - ParentAlloca->replaceAllUsesWith(ElementPtr); - ParentAlloca->removeFromParent(); - ElementPtr->takeName(ParentAlloca); - delete ParentAlloca; - - // Next replace all outlined allocas that are mapped to it. - for (AllocaInst *TempAlloca : AllocaInfo.Allocas) { - Value *EHData = EHDataMap[TempAlloca->getParent()->getParent()]; - // FIXME: Sink this GEP into the blocks where it is used. - Builder.SetInsertPoint(TempAlloca); - Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc()); - ElementPtr = Builder.CreateConstInBoundsGEP2_32(EHData, 0, Idx); - TempAlloca->replaceAllUsesWith(ElementPtr); - TempAlloca->removeFromParent(); - ElementPtr->takeName(TempAlloca); - delete TempAlloca; + continue; + } + + // Add this alloca to the list of things to escape. + AllocasToEscape.push_back(ParentAlloca); + + // Next replace all outlined allocas that are mapped to it. + for (AllocaInst *TempAlloca : Allocas) { + Function *HandlerFn = TempAlloca->getParent()->getParent(); + // FIXME: Sink this GEP into the blocks where it is used. + Builder.SetInsertPoint(TempAlloca); + Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc()); + Value *RecoverArgs[] = { + Builder.CreateBitCast(&F, Int8PtrType, ""), + &(HandlerFn->getArgumentList().back()), + llvm::ConstantInt::get(Int32Type, AllocasToEscape.size() - 1)}; + Value *RecoveredAlloca = Builder.CreateCall(RecoverFrameFn, RecoverArgs); + // Add a pointer bitcast if the alloca wasn't an i8. + if (RecoveredAlloca->getType() != TempAlloca->getType()) { + RecoveredAlloca->setName(Twine(TempAlloca->getName()) + ".i8"); + RecoveredAlloca = + Builder.CreateBitCast(RecoveredAlloca, TempAlloca->getType()); } - } // end else of if (Idx == -1) - } // End for each FrameVarInfo entry. + TempAlloca->replaceAllUsesWith(RecoveredAlloca); + TempAlloca->removeFromParent(); + RecoveredAlloca->takeName(TempAlloca); + delete TempAlloca; + } + } // End for each FrameVarInfo entry. + + // Insert 'call void (...)* @llvm.frameescape(...)' at the end of the entry + // block. + Builder.SetInsertPoint(&F.getEntryBlock().back()); + Builder.CreateCall(FrameEscapeFn, AllocasToEscape); + + // Insert an alloca for the EH state in the entry block. On x86, we will also + // insert stores to update the EH state, but on other ISAs, the runtime does + // it for us. + // FIXME: This record is different on x86. + Type *UnwindHelpTy = Type::getInt64Ty(Context); + AllocaInst *UnwindHelp = + new AllocaInst(UnwindHelpTy, "unwindhelp", &F.getEntryBlock().front()); + Builder.CreateStore(llvm::ConstantInt::get(UnwindHelpTy, -2), UnwindHelp); + Function *UnwindHelpFn = + Intrinsic::getDeclaration(M, Intrinsic::eh_unwindhelp); + Builder.CreateCall(UnwindHelpFn, + Builder.CreateBitCast(UnwindHelp, Int8PtrType)); + + // Clean up the handler action maps we created for this function + DeleteContainerSeconds(CatchHandlerMap); + CatchHandlerMap.clear(); + DeleteContainerSeconds(CleanupHandlerMap); + CleanupHandlerMap.clear(); return HandlersOutlined; } -bool WinEHPrepare::outlineCatchHandler(Function *SrcFn, Constant *SelectorType, - LandingPadInst *LPad, CallInst *&EHAlloc, - AllocaInst *&EHObjPtr, - FrameVarInfoMap &VarInfo) { +// This function examines a block to determine whether the block ends with a +// conditional branch to a catch handler based on a selector comparison. +// This function is used both by the WinEHPrepare::findSelectorComparison() and +// WinEHCleanupDirector::handleTypeIdFor(). +static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler, + Constant *&Selector, BasicBlock *&NextBB) { + ICmpInst::Predicate Pred; + BasicBlock *TBB, *FBB; + Value *LHS, *RHS; + + if (!match(BB->getTerminator(), + m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TBB, FBB))) + return false; + + if (!match(LHS, + m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))) && + !match(RHS, m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector)))) + return false; + + if (Pred == CmpInst::ICMP_EQ) { + CatchHandler = TBB; + NextBB = FBB; + return true; + } + + if (Pred == CmpInst::ICMP_NE) { + CatchHandler = FBB; + NextBB = TBB; + return true; + } + + return false; +} + +bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn, + LandingPadInst *LPad, BasicBlock *StartBB, + FrameVarInfoMap &VarInfo) { Module *M = SrcFn->getParent(); LLVMContext &Context = M->getContext(); @@ -385,133 +668,241 @@ bool WinEHPrepare::outlineCatchHandler(Function *SrcFn, Constant *SelectorType, std::vector<Type *> ArgTys; ArgTys.push_back(Int8PtrType); ArgTys.push_back(Int8PtrType); - FunctionType *FnType = FunctionType::get(Int8PtrType, ArgTys, false); - Function *CatchHandler = Function::Create( - FnType, GlobalVariable::ExternalLinkage, SrcFn->getName() + ".catch", M); + Function *Handler; + if (Action->getType() == Catch) { + FunctionType *FnType = FunctionType::get(Int8PtrType, ArgTys, false); + Handler = Function::Create(FnType, GlobalVariable::InternalLinkage, + SrcFn->getName() + ".catch", M); + } else { + FunctionType *FnType = + FunctionType::get(Type::getVoidTy(Context), ArgTys, false); + Handler = Function::Create(FnType, GlobalVariable::InternalLinkage, + SrcFn->getName() + ".cleanup", M); + } // Generate a standard prolog to setup the frame recovery structure. IRBuilder<> Builder(Context); - BasicBlock *Entry = BasicBlock::Create(Context, "catch.entry"); - CatchHandler->getBasicBlockList().push_front(Entry); + BasicBlock *Entry = BasicBlock::Create(Context, "entry"); + Handler->getBasicBlockList().push_front(Entry); Builder.SetInsertPoint(Entry); Builder.SetCurrentDebugLocation(LPad->getDebugLoc()); - // The outlined handler will be called with the parent's frame pointer as - // its second argument. To enable the handler to access variables from - // the parent frame, we use that pointer to get locate a special block - // of memory that was allocated using llvm.eh.allocateframe for this - // purpose. During the outlining process we will determine which frame - // variables are used in handlers and create a structure that maps these - // variables into the frame allocation block. - // - // The frame allocation block also contains an exception state variable - // used by the runtime and a pointer to the exception object pointer - // which will be filled in by the runtime for use in the handler. - Function *RecoverFrameFn = - Intrinsic::getDeclaration(M, Intrinsic::framerecover); - Value *RecoverArgs[] = {Builder.CreateBitCast(SrcFn, Int8PtrType, ""), - &(CatchHandler->getArgumentList().back())}; - EHAlloc = Builder.CreateCall(RecoverFrameFn, RecoverArgs, "eh.alloc"); - - // This alloca is only temporary. We'll be replacing it once we know all the - // frame variables that need to go in the frame allocation structure. - EHObjPtr = Builder.CreateAlloca(Int8PtrType, 0, "eh.obj.ptr"); - - // This will give us a raw pointer to the exception object, which - // corresponds to the formal parameter of the catch statement. If the - // handler uses this object, we will generate code during the outlining - // process to cast the pointer to the appropriate type and deference it - // as necessary. The un-outlined landing pad code represents the - // exception object as the result of the llvm.eh.begincatch call. - Value *EHObj = Builder.CreateLoad(EHObjPtr, false, "eh.obj"); + std::unique_ptr<WinEHCloningDirectorBase> Director; ValueToValueMapTy VMap; - // FIXME: Map other values referenced in the filter handler. - - WinEHCatchDirector Director(LPad, CatchHandler, SelectorType, EHObj, VarInfo); + LandingPadMap &LPadMap = LPadMaps[LPad]; + if (!LPadMap.isInitialized()) + LPadMap.mapLandingPad(LPad); + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { + Constant *Sel = CatchAction->getSelector(); + Director.reset(new WinEHCatchDirector(Handler, Sel, VarInfo, LPadMap)); + LPadMap.remapSelector(VMap, ConstantInt::get(Type::getInt32Ty(Context), 1)); + } else { + Director.reset(new WinEHCleanupDirector(Handler, VarInfo, LPadMap)); + } SmallVector<ReturnInst *, 8> Returns; - ClonedCodeInfo InlinedFunctionInfo; + ClonedCodeInfo OutlinedFunctionInfo; + + // If the start block contains PHI nodes, we need to map them. + BasicBlock::iterator II = StartBB->begin(); + while (auto *PN = dyn_cast<PHINode>(II)) { + bool Mapped = false; + // Look for PHI values that we have already mapped (such as the selector). + for (Value *Val : PN->incoming_values()) { + if (VMap.count(Val)) { + VMap[PN] = VMap[Val]; + Mapped = true; + } + } + // If we didn't find a match for this value, map it as an undef. + if (!Mapped) { + VMap[PN] = UndefValue::get(PN->getType()); + } + ++II; + } - BasicBlock::iterator II = LPad; + // Skip over PHIs and, if applicable, landingpad instructions. + II = StartBB->getFirstInsertionPt(); - CloneAndPruneIntoFromInst(CatchHandler, SrcFn, ++II, VMap, + CloneAndPruneIntoFromInst(Handler, SrcFn, II, VMap, /*ModuleLevelChanges=*/false, Returns, "", - &InlinedFunctionInfo, - SrcFn->getParent()->getDataLayout(), &Director); + &OutlinedFunctionInfo, Director.get()); // Move all the instructions in the first cloned block into our entry block. BasicBlock *FirstClonedBB = std::next(Function::iterator(Entry)); Entry->getInstList().splice(Entry->end(), FirstClonedBB->getInstList()); FirstClonedBB->eraseFromParent(); + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { + WinEHCatchDirector *CatchDirector = + reinterpret_cast<WinEHCatchDirector *>(Director.get()); + CatchAction->setExceptionVar(CatchDirector->getExceptionVar()); + CatchAction->setReturnTargets(CatchDirector->getReturnTargets()); + } + + Action->setHandlerBlockOrFunc(Handler); + return true; } -CloningDirector::CloningAction WinEHCatchDirector::handleInstruction( - ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { - // Intercept instructions which extract values from the landing pad aggregate. - if (auto *Extract = dyn_cast<ExtractValueInst>(Inst)) { - if (Extract->getAggregateOperand() == LPI) { - assert(Extract->getNumIndices() == 1 && - "Unexpected operation: extracting both landing pad values"); - assert((*(Extract->idx_begin()) == 0 || *(Extract->idx_begin()) == 1) && - "Unexpected operation: extracting an unknown landing pad element"); - - if (*(Extract->idx_begin()) == 0) { - // Element 0 doesn't directly corresponds to anything in the WinEH - // scheme. - // It will be stored to a memory location, then later loaded and finally - // the loaded value will be used as the argument to an - // llvm.eh.begincatch - // call. We're tracking it here so that we can skip the store and load. - ExtractedEHPtr = Inst; - } else { - // Element 1 corresponds to the filter selector. We'll map it to 1 for - // matching purposes, but it will also probably be stored to memory and - // reloaded, so we need to track the instuction so that we can map the - // loaded value too. - VMap[Inst] = ConstantInt::get(SelectorIDType, 1); - ExtractedSelector = Inst; - } - - // Tell the caller not to clone this instruction. - return CloningDirector::SkipInstruction; - } - // Other extract value instructions just get cloned. - return CloningDirector::CloneInstruction; +/// This BB must end in a selector dispatch. All we need to do is pass the +/// handler block to llvm.eh.actions and list it as a possible indirectbr +/// target. +void WinEHPrepare::processSEHCatchHandler(CatchHandler *CatchAction, + BasicBlock *StartBB) { + BasicBlock *HandlerBB; + BasicBlock *NextBB; + Constant *Selector; + bool Res = isSelectorDispatch(StartBB, HandlerBB, Selector, NextBB); + if (Res) { + // If this was EH dispatch, this must be a conditional branch to the handler + // block. + // FIXME: Handle instructions in the dispatch block. Currently we drop them, + // leading to crashes if some optimization hoists stuff here. + assert(CatchAction->getSelector() && HandlerBB && + "expected catch EH dispatch"); + } else { + // This must be a catch-all. Split the block after the landingpad. + assert(CatchAction->getSelector()->isNullValue() && "expected catch-all"); + HandlerBB = + StartBB->splitBasicBlock(StartBB->getFirstInsertionPt(), "catch.all"); } + CatchAction->setHandlerBlockOrFunc(BlockAddress::get(HandlerBB)); + TinyPtrVector<BasicBlock *> Targets(HandlerBB); + CatchAction->setReturnTargets(Targets); +} - if (auto *Store = dyn_cast<StoreInst>(Inst)) { - // Look for and suppress stores of the extracted landingpad values. - const Value *StoredValue = Store->getValueOperand(); - if (StoredValue == ExtractedEHPtr) { - EHPtrStoreAddr = Store->getPointerOperand(); - return CloningDirector::SkipInstruction; +void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) { + // Each instance of this class should only ever be used to map a single + // landing pad. + assert(OriginLPad == nullptr || OriginLPad == LPad); + + // If the landing pad has already been mapped, there's nothing more to do. + if (OriginLPad == LPad) + return; + + OriginLPad = LPad; + + // The landingpad instruction returns an aggregate value. Typically, its + // value will be passed to a pair of extract value instructions and the + // results of those extracts are often passed to store instructions. + // In unoptimized code the stored value will often be loaded and then stored + // again. + for (auto *U : LPad->users()) { + const ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U); + if (!Extract) + continue; + assert(Extract->getNumIndices() == 1 && + "Unexpected operation: extracting both landing pad values"); + unsigned int Idx = *(Extract->idx_begin()); + assert((Idx == 0 || Idx == 1) && + "Unexpected operation: extracting an unknown landing pad element"); + if (Idx == 0) { + // Element 0 doesn't directly corresponds to anything in the WinEH + // scheme. + // It will be stored to a memory location, then later loaded and finally + // the loaded value will be used as the argument to an + // llvm.eh.begincatch + // call. We're tracking it here so that we can skip the store and load. + ExtractedEHPtrs.push_back(Extract); + } else if (Idx == 1) { + // Element 1 corresponds to the filter selector. We'll map it to 1 for + // matching purposes, but it will also probably be stored to memory and + // reloaded, so we need to track the instuction so that we can map the + // loaded value too. + ExtractedSelectors.push_back(Extract); } - if (StoredValue == ExtractedSelector) { - SelectorStoreAddr = Store->getPointerOperand(); - return CloningDirector::SkipInstruction; + + // Look for stores of the extracted values. + for (auto *EU : Extract->users()) { + if (auto *Store = dyn_cast<StoreInst>(EU)) { + if (Idx == 1) { + SelectorStores.push_back(Store); + SelectorStoreAddrs.push_back(Store->getPointerOperand()); + } else { + EHPtrStores.push_back(Store); + EHPtrStoreAddrs.push_back(Store->getPointerOperand()); + } + } } + } +} - // Any other store just gets cloned. - return CloningDirector::CloneInstruction; +bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const { + if (Inst == OriginLPad) + return true; + for (auto *Extract : ExtractedEHPtrs) { + if (Inst == Extract) + return true; + } + for (auto *Extract : ExtractedSelectors) { + if (Inst == Extract) + return true; + } + for (auto *Store : EHPtrStores) { + if (Inst == Store) + return true; + } + for (auto *Store : SelectorStores) { + if (Inst == Store) + return true; + } + + return false; +} + +void LandingPadMap::remapSelector(ValueToValueMapTy &VMap, + Value *MappedValue) const { + // Remap all selector extract instructions to the specified value. + for (auto *Extract : ExtractedSelectors) + VMap[Extract] = MappedValue; +} + +bool LandingPadMap::mapIfEHLoad(const LoadInst *Load, + SmallVectorImpl<const StoreInst *> &Stores, + SmallVectorImpl<const Value *> &StoreAddrs) { + // This makes the assumption that a store we've previously seen dominates + // this load instruction. That might seem like a rather huge assumption, + // but given the way that landingpads are constructed its fairly safe. + // FIXME: Add debug/assert code that verifies this. + const Value *LoadAddr = Load->getPointerOperand(); + for (auto *StoreAddr : StoreAddrs) { + if (LoadAddr == StoreAddr) { + // Handle the common debug scenario where this loaded value is stored + // to a different location. + for (auto *U : Load->users()) { + if (auto *Store = dyn_cast<StoreInst>(U)) { + Stores.push_back(Store); + StoreAddrs.push_back(Store->getPointerOperand()); + } + } + return true; + } } + return false; +} + +CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // If this is one of the boilerplate landing pad instructions, skip it. + // The instruction will have already been remapped in VMap. + if (LPadMap.isLandingPadSpecificInst(Inst)) + return CloningDirector::SkipInstruction; if (auto *Load = dyn_cast<LoadInst>(Inst)) { // Look for loads of (previously suppressed) landingpad values. - // The EHPtr load can be ignored (it should only be used as - // an argument to llvm.eh.begincatch), but the selector value - // needs to be mapped to a constant value of 1 to be used to - // simplify the branching to always flow to the current handler. - const Value *LoadAddr = Load->getPointerOperand(); - if (LoadAddr == EHPtrStoreAddr) { - VMap[Inst] = UndefValue::get(Int8PtrType); + // The EHPtr load can be mapped to an undef value as it should only be used + // as an argument to llvm.eh.begincatch, but the selector value needs to be + // mapped to a constant value of 1. This value will be used to simplify the + // branching to always flow to the current handler. + if (LPadMap.mapIfSelectorLoad(Load)) { + VMap[Inst] = ConstantInt::get(SelectorIDType, 1); return CloningDirector::SkipInstruction; } - if (LoadAddr == SelectorStoreAddr) { - VMap[Inst] = ConstantInt::get(SelectorIDType, 1); + if (LPadMap.mapIfEHPtrLoad(Load)) { + VMap[Inst] = UndefValue::get(Int8PtrType); return CloningDirector::SkipInstruction; } @@ -519,108 +910,576 @@ CloningDirector::CloningAction WinEHCatchDirector::handleInstruction( return CloningDirector::CloneInstruction; } - if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>())) { - // The argument to the call is some form of the first element of the - // landingpad aggregate value, but that doesn't matter. It isn't used - // here. - // The return value of this instruction, however, is used to access the - // EH object pointer. We have generated an instruction to get that value - // from the EH alloc block, so we can just map to that here. - VMap[Inst] = EHObj; - return CloningDirector::SkipInstruction; - } - if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) { - auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst); - // It might be interesting to track whether or not we are inside a catch - // function, but that might make the algorithm more brittle than it needs - // to be. - - // The end catch call can occur in one of two places: either in a - // landingpad - // block that is part of the catch handlers exception mechanism, or at the - // end of the catch block. If it occurs in a landing pad, we must skip it - // and continue so that the landing pad gets cloned. - // FIXME: This case isn't fully supported yet and shouldn't turn up in any - // of the test cases until it is. - if (IntrinCall->getParent()->isLandingPad()) - return CloningDirector::SkipInstruction; - - // If an end catch occurs anywhere else the next instruction should be an - // unconditional branch instruction that we want to replace with a return - // to the the address of the branch target. - const BasicBlock *EndCatchBB = IntrinCall->getParent(); - const TerminatorInst *Terminator = EndCatchBB->getTerminator(); - const BranchInst *Branch = dyn_cast<BranchInst>(Terminator); - assert(Branch && Branch->isUnconditional()); - assert(std::next(BasicBlock::const_iterator(IntrinCall)) == - BasicBlock::const_iterator(Branch)); - - ReturnInst::Create(NewBB->getContext(), - BlockAddress::get(Branch->getSuccessor(0)), NewBB); - - // We just added a terminator to the cloned block. - // Tell the caller to stop processing the current basic block so that - // the branch instruction will be skipped. + // Nested landing pads will be cloned as stubs, with just the + // landingpad instruction and an unreachable instruction. When + // all landingpads have been outlined, we'll replace this with the + // llvm.eh.actions call and indirect branch created when the + // landing pad was outlined. + if (auto *NestedLPad = dyn_cast<LandingPadInst>(Inst)) { + Instruction *NewInst = NestedLPad->clone(); + if (NestedLPad->hasName()) + NewInst->setName(NestedLPad->getName()); + // FIXME: Store this mapping somewhere else also. + VMap[NestedLPad] = NewInst; + BasicBlock::InstListType &InstList = NewBB->getInstList(); + InstList.push_back(NewInst); + InstList.push_back(new UnreachableInst(NewBB->getContext())); return CloningDirector::StopCloningBB; } - if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) { - auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst); - Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts(); - // This causes a replacement that will collapse the landing pad CFG based - // on the filter function we intend to match. - if (Selector == CurrentSelector) - VMap[Inst] = ConstantInt::get(SelectorIDType, 1); - else - VMap[Inst] = ConstantInt::get(SelectorIDType, 0); - // Tell the caller not to clone this instruction. - return CloningDirector::SkipInstruction; - } + + if (auto *Invoke = dyn_cast<InvokeInst>(Inst)) + return handleInvoke(VMap, Invoke, NewBB); + + if (auto *Resume = dyn_cast<ResumeInst>(Inst)) + return handleResume(VMap, Resume, NewBB); + + if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>())) + return handleBeginCatch(VMap, Inst, NewBB); + if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) + return handleEndCatch(VMap, Inst, NewBB); + if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) + return handleTypeIdFor(VMap, Inst, NewBB); // Continue with the default cloning behavior. return CloningDirector::CloneInstruction; } +CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // The argument to the call is some form of the first element of the + // landingpad aggregate value, but that doesn't matter. It isn't used + // here. + // The second argument is an outparameter where the exception object will be + // stored. Typically the exception object is a scalar, but it can be an + // aggregate when catching by value. + // FIXME: Leave something behind to indicate where the exception object lives + // for this handler. Should it be part of llvm.eh.actions? + assert(ExceptionObjectVar == nullptr && "Multiple calls to " + "llvm.eh.begincatch found while " + "outlining catch handler."); + ExceptionObjectVar = Inst->getOperand(1)->stripPointerCasts(); + return CloningDirector::SkipInstruction; +} + +CloningDirector::CloningAction +WinEHCatchDirector::handleEndCatch(ValueToValueMapTy &VMap, + const Instruction *Inst, BasicBlock *NewBB) { + auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst); + // It might be interesting to track whether or not we are inside a catch + // function, but that might make the algorithm more brittle than it needs + // to be. + + // The end catch call can occur in one of two places: either in a + // landingpad block that is part of the catch handlers exception mechanism, + // or at the end of the catch block. If it occurs in a landing pad, we must + // skip it and continue so that the landing pad gets cloned. + // FIXME: This case isn't fully supported yet and shouldn't turn up in any + // of the test cases until it is. + if (IntrinCall->getParent()->isLandingPad()) + return CloningDirector::SkipInstruction; + + // If an end catch occurs anywhere else the next instruction should be an + // unconditional branch instruction that we want to replace with a return + // to the the address of the branch target. + const BasicBlock *EndCatchBB = IntrinCall->getParent(); + const TerminatorInst *Terminator = EndCatchBB->getTerminator(); + const BranchInst *Branch = dyn_cast<BranchInst>(Terminator); + assert(Branch && Branch->isUnconditional()); + assert(std::next(BasicBlock::const_iterator(IntrinCall)) == + BasicBlock::const_iterator(Branch)); + + BasicBlock *ContinueLabel = Branch->getSuccessor(0); + ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueLabel), + NewBB); + ReturnTargets.push_back(ContinueLabel); + + // We just added a terminator to the cloned block. + // Tell the caller to stop processing the current basic block so that + // the branch instruction will be skipped. + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction WinEHCatchDirector::handleTypeIdFor( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst); + Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts(); + // This causes a replacement that will collapse the landing pad CFG based + // on the filter function we intend to match. + if (Selector == CurrentSelector) + VMap[Inst] = ConstantInt::get(SelectorIDType, 1); + else + VMap[Inst] = ConstantInt::get(SelectorIDType, 0); + // Tell the caller not to clone this instruction. + return CloningDirector::SkipInstruction; +} + +CloningDirector::CloningAction +WinEHCatchDirector::handleInvoke(ValueToValueMapTy &VMap, + const InvokeInst *Invoke, BasicBlock *NewBB) { + return CloningDirector::CloneInstruction; +} + +CloningDirector::CloningAction +WinEHCatchDirector::handleResume(ValueToValueMapTy &VMap, + const ResumeInst *Resume, BasicBlock *NewBB) { + // Resume instructions shouldn't be reachable from catch handlers. + // We still need to handle it, but it will be pruned. + BasicBlock::InstListType &InstList = NewBB->getInstList(); + InstList.push_back(new UnreachableInst(NewBB->getContext())); + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // Catch blocks within cleanup handlers will always be unreachable. + // We'll insert an unreachable instruction now, but it will be pruned + // before the cloning process is complete. + BasicBlock::InstListType &InstList = NewBB->getInstList(); + InstList.push_back(new UnreachableInst(NewBB->getContext())); + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleEndCatch( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // Catch blocks within cleanup handlers will always be unreachable. + // We'll insert an unreachable instruction now, but it will be pruned + // before the cloning process is complete. + BasicBlock::InstListType &InstList = NewBB->getInstList(); + InstList.push_back(new UnreachableInst(NewBB->getContext())); + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleTypeIdFor( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // If we encounter a selector comparison while cloning a cleanup handler, + // we want to stop cloning immediately. Anything after the dispatch + // will be outlined into a different handler. + BasicBlock *CatchHandler; + Constant *Selector; + BasicBlock *NextBB; + if (isSelectorDispatch(const_cast<BasicBlock *>(Inst->getParent()), + CatchHandler, Selector, NextBB)) { + ReturnInst::Create(NewBB->getContext(), nullptr, NewBB); + return CloningDirector::StopCloningBB; + } + // If eg.typeid.for is called for any other reason, it can be ignored. + VMap[Inst] = ConstantInt::get(SelectorIDType, 0); + return CloningDirector::SkipInstruction; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke( + ValueToValueMapTy &VMap, const InvokeInst *Invoke, BasicBlock *NewBB) { + // All invokes in cleanup handlers can be replaced with calls. + SmallVector<Value *, 16> CallArgs(Invoke->op_begin(), Invoke->op_end() - 3); + // Insert a normal call instruction... + CallInst *NewCall = + CallInst::Create(const_cast<Value *>(Invoke->getCalledValue()), CallArgs, + Invoke->getName(), NewBB); + NewCall->setCallingConv(Invoke->getCallingConv()); + NewCall->setAttributes(Invoke->getAttributes()); + NewCall->setDebugLoc(Invoke->getDebugLoc()); + VMap[Invoke] = NewCall; + + // Insert an unconditional branch to the normal destination. + BranchInst::Create(Invoke->getNormalDest(), NewBB); + + // The unwind destination won't be cloned into the new function, so + // we don't need to clean up its phi nodes. + + // We just added a terminator to the cloned block. + // Tell the caller to stop processing the current basic block. + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleResume( + ValueToValueMapTy &VMap, const ResumeInst *Resume, BasicBlock *NewBB) { + ReturnInst::Create(NewBB->getContext(), nullptr, NewBB); + + // We just added a terminator to the cloned block. + // Tell the caller to stop processing the current basic block so that + // the branch instruction will be skipped. + return CloningDirector::StopCloningBB; +} + WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer( Function *OutlinedFn, FrameVarInfoMap &FrameVarInfo) : FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) { Builder.SetInsertPoint(&OutlinedFn->getEntryBlock()); - // FIXME: Do something with the FrameVarMapped so that it is shared across the - // function. } Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) { - // If we're asked to materialize an alloca variable, we temporarily - // create a matching alloca in the outlined function. When all the - // outlining is complete, we'll collect these into a structure and - // replace these temporary allocas with GEPs referencing the frame - // allocation block. + // If we're asked to materialize a value that is an instruction, we + // temporarily create an alloca in the outlined function and add this + // to the FrameVarInfo map. When all the outlining is complete, we'll + // collect these into a structure, spilling non-alloca values in the + // parent frame as necessary, and replace these temporary allocas with + // GEPs referencing the frame allocation block. + + // If the value is an alloca, the mapping is direct. if (auto *AV = dyn_cast<AllocaInst>(V)) { - AllocaInst *NewAlloca = Builder.CreateAlloca( - AV->getAllocatedType(), AV->getArraySize(), AV->getName()); - FrameVarInfo[AV].Allocas.push_back(NewAlloca); + AllocaInst *NewAlloca = dyn_cast<AllocaInst>(AV->clone()); + Builder.Insert(NewAlloca, AV->getName()); + FrameVarInfo[AV].push_back(NewAlloca); return NewAlloca; } -// FIXME: Do PHI nodes need special handling? + // For other types of instructions or arguments, we need an alloca based on + // the value's type and a load of the alloca. The alloca will be replaced + // by a GEP, but the load will stay. In the parent function, the value will + // be spilled to a location in the frame allocation block. + if (isa<Instruction>(V) || isa<Argument>(V)) { + AllocaInst *NewAlloca = + Builder.CreateAlloca(V->getType(), nullptr, "eh.temp.alloca"); + FrameVarInfo[V].push_back(NewAlloca); + LoadInst *NewLoad = Builder.CreateLoad(NewAlloca, V->getName() + ".reload"); + return NewLoad; + } -// FIXME: Are there other cases we can handle better? GEP, ExtractValue, etc. + // Don't materialize other values. + return nullptr; +} -// FIXME: This doesn't work during cloning because it finds an instruction -// in the use list that isn't yet part of a basic block. -#if 0 - // If we're asked to remap some other instruction, we'll need to - // spill it to an alloca variable in the parent function and add a - // temporary alloca in the outlined function to be processed as - // described above. - Instruction *Inst = dyn_cast<Instruction>(V); - if (Inst) { - AllocaInst *Spill = DemoteRegToStack(*Inst, true); - AllocaInst *NewAlloca = Builder.CreateAlloca(Spill->getAllocatedType(), - Spill->getArraySize()); - FrameVarMap[AV] = NewAlloca; - return NewAlloca; +// This function maps the catch and cleanup handlers that are reachable from the +// specified landing pad. The landing pad sequence will have this basic shape: +// +// <cleanup handler> +// <selector comparison> +// <catch handler> +// <cleanup handler> +// <selector comparison> +// <catch handler> +// <cleanup handler> +// ... +// +// Any of the cleanup slots may be absent. The cleanup slots may be occupied by +// any arbitrary control flow, but all paths through the cleanup code must +// eventually reach the next selector comparison and no path can skip to a +// different selector comparisons, though some paths may terminate abnormally. +// Therefore, we will use a depth first search from the start of any given +// cleanup block and stop searching when we find the next selector comparison. +// +// If the landingpad instruction does not have a catch clause, we will assume +// that any instructions other than selector comparisons and catch handlers can +// be ignored. In practice, these will only be the boilerplate instructions. +// +// The catch handlers may also have any control structure, but we are only +// interested in the start of the catch handlers, so we don't need to actually +// follow the flow of the catch handlers. The start of the catch handlers can +// be located from the compare instructions, but they can be skipped in the +// flow by following the contrary branch. +void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad, + LandingPadActions &Actions) { + unsigned int NumClauses = LPad->getNumClauses(); + unsigned int HandlersFound = 0; + BasicBlock *BB = LPad->getParent(); + + DEBUG(dbgs() << "Mapping landing pad: " << BB->getName() << "\n"); + + if (NumClauses == 0) { + // This landing pad contains only cleanup code. + CleanupHandler *Action = new CleanupHandler(BB); + CleanupHandlerMap[BB] = Action; + Actions.insertCleanupHandler(Action); + DEBUG(dbgs() << " Assuming cleanup code in block " << BB->getName() + << "\n"); + assert(LPad->isCleanup()); + return; + } + + VisitedBlockSet VisitedBlocks; + + while (HandlersFound != NumClauses) { + BasicBlock *NextBB = nullptr; + + // See if the clause we're looking for is a catch-all. + // If so, the catch begins immediately. + if (isa<ConstantPointerNull>(LPad->getClause(HandlersFound))) { + // The catch all must occur last. + assert(HandlersFound == NumClauses - 1); + + // For C++ EH, check if there is any interesting cleanup code before we + // begin the catch. This is important because cleanups cannot rethrow + // exceptions but code called from catches can. For SEH, it isn't + // important if some finally code before a catch-all is executed out of + // line or after recovering from the exception. + if (Personality == EHPersonality::MSVC_CXX) { + if (auto *CleanupAction = findCleanupHandler(BB, BB)) { + // Add a cleanup entry to the list + Actions.insertCleanupHandler(CleanupAction); + DEBUG(dbgs() << " Found cleanup code in block " + << CleanupAction->getStartBlock()->getName() << "\n"); + } + } + + // Add the catch handler to the action list. + CatchHandler *Action = + new CatchHandler(BB, LPad->getClause(HandlersFound), nullptr); + CatchHandlerMap[BB] = Action; + Actions.insertCatchHandler(Action); + DEBUG(dbgs() << " Catch all handler at block " << BB->getName() << "\n"); + ++HandlersFound; + + // Once we reach a catch-all, don't expect to hit a resume instruction. + BB = nullptr; + break; + } + + CatchHandler *CatchAction = findCatchHandler(BB, NextBB, VisitedBlocks); + // See if there is any interesting code executed before the dispatch. + if (auto *CleanupAction = + findCleanupHandler(BB, CatchAction->getStartBlock())) { + // Add a cleanup entry to the list + Actions.insertCleanupHandler(CleanupAction); + DEBUG(dbgs() << " Found cleanup code in block " + << CleanupAction->getStartBlock()->getName() << "\n"); + } + + assert(CatchAction); + ++HandlersFound; + + // Add the catch handler to the action list. + Actions.insertCatchHandler(CatchAction); + DEBUG(dbgs() << " Found catch dispatch in block " + << CatchAction->getStartBlock()->getName() << "\n"); + + // Move on to the block after the catch handler. + BB = NextBB; + } + + // If we didn't wind up in a catch-all, see if there is any interesting code + // executed before the resume. + if (auto *CleanupAction = findCleanupHandler(BB, BB)) { + // Add a cleanup entry to the list + Actions.insertCleanupHandler(CleanupAction); + DEBUG(dbgs() << " Found cleanup code in block " + << CleanupAction->getStartBlock()->getName() << "\n"); + } + + // It's possible that some optimization moved code into a landingpad that + // wasn't + // previously being used for cleanup. If that happens, we need to execute + // that + // extra code from a cleanup handler. + if (Actions.includesCleanup() && !LPad->isCleanup()) + LPad->setCleanup(true); +} + +// This function searches starting with the input block for the next +// block that terminates with a branch whose condition is based on a selector +// comparison. This may be the input block. See the mapLandingPadBlocks +// comments for a discussion of control flow assumptions. +// +CatchHandler *WinEHPrepare::findCatchHandler(BasicBlock *BB, + BasicBlock *&NextBB, + VisitedBlockSet &VisitedBlocks) { + // See if we've already found a catch handler use it. + // Call count() first to avoid creating a null entry for blocks + // we haven't seen before. + if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) { + CatchHandler *Action = cast<CatchHandler>(CatchHandlerMap[BB]); + NextBB = Action->getNextBB(); + return Action; } -#endif + // VisitedBlocks applies only to the current search. We still + // need to consider blocks that we've visited while mapping other + // landing pads. + VisitedBlocks.insert(BB); + + BasicBlock *CatchBlock = nullptr; + Constant *Selector = nullptr; + + // If this is the first time we've visited this block from any landing pad + // look to see if it is a selector dispatch block. + if (!CatchHandlerMap.count(BB)) { + if (isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) { + CatchHandler *Action = new CatchHandler(BB, Selector, NextBB); + CatchHandlerMap[BB] = Action; + return Action; + } + } + + // Visit each successor, looking for the dispatch. + // FIXME: We expect to find the dispatch quickly, so this will probably + // work better as a breadth first search. + for (BasicBlock *Succ : successors(BB)) { + if (VisitedBlocks.count(Succ)) + continue; + + CatchHandler *Action = findCatchHandler(Succ, NextBB, VisitedBlocks); + if (Action) + return Action; + } + return nullptr; +} + +// These are helper functions to combine repeated code from findCleanupHandler. +static CleanupHandler *createCleanupHandler(CleanupHandlerMapTy &CleanupHandlerMap, + BasicBlock *BB) { + CleanupHandler *Action = new CleanupHandler(BB); + CleanupHandlerMap[BB] = Action; + return Action; +} + +// This function searches starting with the input block for the next block that +// contains code that is not part of a catch handler and would not be eliminated +// during handler outlining. +// +CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB, + BasicBlock *EndBB) { + // Here we will skip over the following: + // + // landing pad prolog: + // + // Unconditional branches + // + // Selector dispatch + // + // Resume pattern + // + // Anything else marks the start of an interesting block + + BasicBlock *BB = StartBB; + // Anything other than an unconditional branch will kick us out of this loop + // one way or another. + while (BB) { + // If we've already scanned this block, don't scan it again. If it is + // a cleanup block, there will be an action in the CleanupHandlerMap. + // If we've scanned it and it is not a cleanup block, there will be a + // nullptr in the CleanupHandlerMap. If we have not scanned it, there will + // be no entry in the CleanupHandlerMap. We must call count() first to + // avoid creating a null entry for blocks we haven't scanned. + if (CleanupHandlerMap.count(BB)) { + if (auto *Action = CleanupHandlerMap[BB]) { + return cast<CleanupHandler>(Action); + } else { + // Here we handle the case where the cleanup handler map contains a + // value for this block but the value is a nullptr. This means that + // we have previously analyzed the block and determined that it did + // not contain any cleanup code. Based on the earlier analysis, we + // know the the block must end in either an unconditional branch, a + // resume or a conditional branch that is predicated on a comparison + // with a selector. Either the resume or the selector dispatch + // would terminate the search for cleanup code, so the unconditional + // branch is the only case for which we might need to continue + // searching. + if (BB == EndBB) + return nullptr; + BasicBlock *SuccBB; + if (!match(BB->getTerminator(), m_UnconditionalBr(SuccBB))) + return nullptr; + BB = SuccBB; + continue; + } + } + + // Create an entry in the cleanup handler map for this block. Initially + // we create an entry that says this isn't a cleanup block. If we find + // cleanup code, the caller will replace this entry. + CleanupHandlerMap[BB] = nullptr; + + TerminatorInst *Terminator = BB->getTerminator(); + + // Landing pad blocks have extra instructions we need to accept. + LandingPadMap *LPadMap = nullptr; + if (BB->isLandingPad()) { + LandingPadInst *LPad = BB->getLandingPadInst(); + LPadMap = &LPadMaps[LPad]; + if (!LPadMap->isInitialized()) + LPadMap->mapLandingPad(LPad); + } + + // Look for the bare resume pattern: + // %exn2 = load i8** %exn.slot + // %sel2 = load i32* %ehselector.slot + // %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn2, 0 + // %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel2, 1 + // resume { i8*, i32 } %lpad.val2 + if (auto *Resume = dyn_cast<ResumeInst>(Terminator)) { + InsertValueInst *Insert1 = nullptr; + InsertValueInst *Insert2 = nullptr; + Value *ResumeVal = Resume->getOperand(0); + // If there is only one landingpad, we may use the lpad directly with no + // insertions. + if (isa<LandingPadInst>(ResumeVal)) + return nullptr; + if (!isa<PHINode>(ResumeVal)) { + Insert2 = dyn_cast<InsertValueInst>(ResumeVal); + if (!Insert2) + return createCleanupHandler(CleanupHandlerMap, BB); + Insert1 = dyn_cast<InsertValueInst>(Insert2->getAggregateOperand()); + if (!Insert1) + return createCleanupHandler(CleanupHandlerMap, BB); + } + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); + II != IE; ++II) { + Instruction *Inst = II; + if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) + continue; + if (Inst == Insert1 || Inst == Insert2 || Inst == Resume) + continue; + if (!Inst->hasOneUse() || + (Inst->user_back() != Insert1 && Inst->user_back() != Insert2)) { + return createCleanupHandler(CleanupHandlerMap, BB); + } + } + return nullptr; + } + + BranchInst *Branch = dyn_cast<BranchInst>(Terminator); + if (Branch) { + if (Branch->isConditional()) { + // Look for the selector dispatch. + // %sel = load i32* %ehselector.slot + // %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*)) + // %matches = icmp eq i32 %sel12, %2 + // br i1 %matches, label %catch14, label %eh.resume + CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition()); + if (!Compare || !Compare->isEquality()) + return createCleanupHandler(CleanupHandlerMap, BB); + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), + IE = BB->end(); + II != IE; ++II) { + Instruction *Inst = II; + if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) + continue; + if (Inst == Compare || Inst == Branch) + continue; + if (!Inst->hasOneUse() || (Inst->user_back() != Compare)) + return createCleanupHandler(CleanupHandlerMap, BB); + if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) + continue; + if (!isa<LoadInst>(Inst)) + return createCleanupHandler(CleanupHandlerMap, BB); + } + // The selector dispatch block should always terminate our search. + assert(BB == EndBB); + return nullptr; + } else { + // Look for empty blocks with unconditional branches. + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), + IE = BB->end(); + II != IE; ++II) { + Instruction *Inst = II; + if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) + continue; + if (Inst == Branch) + continue; + if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) + continue; + // Anything else makes this interesting cleanup code. + return createCleanupHandler(CleanupHandlerMap, BB); + } + if (BB == EndBB) + return nullptr; + // The branch was unconditional. + BB = Branch->getSuccessor(0); + continue; + } // End else of if branch was conditional + } // End if Branch + + // Anything else makes this interesting cleanup code. + return createCleanupHandler(CleanupHandlerMap, BB); + } return nullptr; } diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index 7d77290..9f56214 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DataTypes.h" diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index fdb6dd2..cd6fbef 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -67,8 +67,7 @@ void DWARFDebugLoc::parse(DataExtractor data, unsigned AddressSize) { // A single location description describing the location of the object... StringRef str = data.getData().substr(Offset, Bytes); Offset += Bytes; - E.Loc.reserve(str.size()); - std::copy(str.begin(), str.end(), std::back_inserter(E.Loc)); + E.Loc.append(str.begin(), str.end()); Loc.Entries.push_back(std::move(E)); } } diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp index 45bd197..6946f83 100644 --- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -18,6 +18,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include <cassert> +#include <climits> using namespace llvm; using namespace dwarf; using namespace syntax; @@ -557,6 +558,24 @@ Optional<uint64_t> DWARFFormValue::getAsUnsignedConstant() const { return Value.uval; } +Optional<int64_t> DWARFFormValue::getAsSignedConstant() const { + if ((!isFormClass(FC_Constant) && !isFormClass(FC_Flag)) || + (Form == DW_FORM_udata && uint64_t(LLONG_MAX) < Value.uval)) + return None; + switch (Form) { + case DW_FORM_data4: + return int32_t(Value.uval); + case DW_FORM_data2: + return int16_t(Value.uval); + case DW_FORM_data1: + return int8_t(Value.uval); + case DW_FORM_sdata: + case DW_FORM_data8: + default: + return Value.sval; + } +} + Optional<ArrayRef<uint8_t>> DWARFFormValue::getAsBlock() const { if (!isFormClass(FC_Block) && !isFormClass(FC_Exprloc)) return None; diff --git a/lib/DebugInfo/PDB/CMakeLists.txt b/lib/DebugInfo/PDB/CMakeLists.txt index 87e357e..ed8c674 100644 --- a/lib/DebugInfo/PDB/CMakeLists.txt +++ b/lib/DebugInfo/PDB/CMakeLists.txt @@ -73,4 +73,4 @@ add_llvm_library(LLVMDebugInfoPDB ${LIBPDB_ADDITIONAL_HEADER_DIRS} ) -target_link_libraries(LLVMDebugInfoPDB ${cmake_2_8_12_INTERFACE} "${LIBPDB_ADDITIONAL_LIBRARIES}") +target_link_libraries(LLVMDebugInfoPDB INTERFACE "${LIBPDB_ADDITIONAL_LIBRARIES}") diff --git a/lib/DebugInfo/PDB/DIA/DIASession.cpp b/lib/DebugInfo/PDB/DIA/DIASession.cpp index 24791f2..4966bea 100644 --- a/lib/DebugInfo/PDB/DIA/DIASession.cpp +++ b/lib/DebugInfo/PDB/DIA/DIASession.cpp @@ -23,28 +23,45 @@ namespace {} DIASession::DIASession(CComPtr<IDiaSession> DiaSession) : Session(DiaSession) {} -DIASession *DIASession::createFromPdb(StringRef Path) { - CComPtr<IDiaDataSource> DataSource; - CComPtr<IDiaSession> Session; +PDB_ErrorCode DIASession::createFromPdb(StringRef Path, + std::unique_ptr<IPDBSession> &Session) { + CComPtr<IDiaDataSource> DiaDataSource; + CComPtr<IDiaSession> DiaSession; // We assume that CoInitializeEx has already been called by the executable. - HRESULT Result = ::CoCreateInstance(CLSID_DiaSource, nullptr, - CLSCTX_INPROC_SERVER, IID_IDiaDataSource, - reinterpret_cast<LPVOID *>(&DataSource)); + HRESULT Result = ::CoCreateInstance( + CLSID_DiaSource, nullptr, CLSCTX_INPROC_SERVER, IID_IDiaDataSource, + reinterpret_cast<LPVOID *>(&DiaDataSource)); if (FAILED(Result)) - return nullptr; + return PDB_ErrorCode::NoPdbImpl; llvm::SmallVector<UTF16, 128> Path16; if (!llvm::convertUTF8ToUTF16String(Path, Path16)) - return nullptr; + return PDB_ErrorCode::InvalidPath; const wchar_t *Path16Str = reinterpret_cast<const wchar_t*>(Path16.data()); - if (FAILED(DataSource->loadDataFromPdb(Path16Str))) - return nullptr; - - if (FAILED(DataSource->openSession(&Session))) - return nullptr; - return new DIASession(Session); + if (FAILED(Result = DiaDataSource->loadDataFromPdb(Path16Str))) { + if (Result == E_PDB_NOT_FOUND) + return PDB_ErrorCode::InvalidPath; + else if (Result == E_PDB_FORMAT) + return PDB_ErrorCode::InvalidFileFormat; + else if (Result == E_INVALIDARG) + return PDB_ErrorCode::InvalidParameter; + else if (Result == E_UNEXPECTED) + return PDB_ErrorCode::AlreadyLoaded; + else + return PDB_ErrorCode::UnknownError; + } + + if (FAILED(Result = DiaDataSource->openSession(&DiaSession))) { + if (Result == E_OUTOFMEMORY) + return PDB_ErrorCode::NoMemory; + else + return PDB_ErrorCode::UnknownError; + } + + Session.reset(new DIASession(DiaSession)); + return PDB_ErrorCode::Success; } uint64_t DIASession::getLoadAddress() const { diff --git a/lib/DebugInfo/PDB/PDB.cpp b/lib/DebugInfo/PDB/PDB.cpp index aa84c28..a07396d 100644 --- a/lib/DebugInfo/PDB/PDB.cpp +++ b/lib/DebugInfo/PDB/PDB.cpp @@ -20,11 +20,11 @@ using namespace llvm; -std::unique_ptr<IPDBSession> llvm::createPDBReader(PDB_ReaderType Type, - StringRef Path) { +PDB_ErrorCode llvm::createPDBReader(PDB_ReaderType Type, StringRef Path, + std::unique_ptr<IPDBSession> &Session) { // Create the correct concrete instance type based on the value of Type. #if HAVE_DIA_SDK - return std::unique_ptr<DIASession>(DIASession::createFromPdb(Path)); + return DIASession::createFromPdb(Path, Session); #endif - return nullptr; + return PDB_ErrorCode::NoPdbImpl; } diff --git a/lib/DebugInfo/PDB/PDBExtras.cpp b/lib/DebugInfo/PDB/PDBExtras.cpp index 1002b2e..4b9437c 100644 --- a/lib/DebugInfo/PDB/PDBExtras.cpp +++ b/lib/DebugInfo/PDB/PDBExtras.cpp @@ -247,27 +247,21 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_SymType &Tag) { return OS; } -raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_BuiltinType &Type) { +raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_MemberAccess &Access) { + switch (Access) { + CASE_OUTPUT_ENUM_CLASS_STR(PDB_MemberAccess, Public, "public", OS) + CASE_OUTPUT_ENUM_CLASS_STR(PDB_MemberAccess, Protected, "protected", OS) + CASE_OUTPUT_ENUM_CLASS_STR(PDB_MemberAccess, Private, "private", OS) + } + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_UdtType &Type) { switch (Type) { - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Void, "void", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Char, "char", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, WCharT, "wchar_t", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Int, "int", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, UInt, "uint", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Float, "float", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, BCD, "BCD", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Bool, "bool", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Long, "long", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, ULong, "ulong", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Currency, "CURRENCY", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Date, "DATE", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Variant, "VARIANT", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Complex, "complex", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Bitfield, "bitfield", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, BSTR, "BSTR", OS) - CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, HResult, "HRESULT", OS) - default: - break; + CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Class, "class", OS) + CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Struct, "struct", OS) + CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Interface, "interface", OS) + CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Union, "union", OS) } return OS; } @@ -309,7 +303,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Variant &Value) { OS << Value.Int64; break; case PDB_VariantType::Int8: - OS << Value.Int8; + OS << static_cast<int>(Value.Int8); break; case PDB_VariantType::Single: OS << Value.Single; @@ -324,12 +318,11 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Variant &Value) { OS << Value.UInt64; break; case PDB_VariantType::UInt8: - OS << Value.UInt8; + OS << static_cast<unsigned>(Value.UInt8); break; default: OS << Value.Type; } - OS << " {" << Value.Type << "}"; return OS; } diff --git a/lib/DebugInfo/PDB/PDBSymDumper.cpp b/lib/DebugInfo/PDB/PDBSymDumper.cpp index 0f29c74..121e2d1 100644 --- a/lib/DebugInfo/PDB/PDBSymDumper.cpp +++ b/lib/DebugInfo/PDB/PDBSymDumper.cpp @@ -21,157 +21,126 @@ PDBSymDumper::PDBSymDumper(bool ShouldRequireImpl) PDBSymDumper::~PDBSymDumper() {} -void PDBSymDumper::dump(const PDBSymbolAnnotation &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolAnnotation &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolAnnotation) } -void PDBSymDumper::dump(const PDBSymbolBlock &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolBlock &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolBlock) } -void PDBSymDumper::dump(const PDBSymbolCompiland &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolCompiland &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolCompiland) } -void PDBSymDumper::dump(const PDBSymbolCompilandDetails &Symbol, - raw_ostream &OS, int Indent) { +void PDBSymDumper::dump(const PDBSymbolCompilandDetails &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolCompilandDetails) } -void PDBSymDumper::dump(const PDBSymbolCompilandEnv &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolCompilandEnv &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolCompilandEnv) } -void PDBSymDumper::dump(const PDBSymbolCustom &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolCustom &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolCustom) } -void PDBSymDumper::dump(const PDBSymbolData &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolData &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolData) } -void PDBSymDumper::dump(const PDBSymbolExe &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolExe &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolExe) } -void PDBSymDumper::dump(const PDBSymbolFunc &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolFunc &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolFunc) } -void PDBSymDumper::dump(const PDBSymbolFuncDebugEnd &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolFuncDebugEnd &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolFuncDebugEnd) } -void PDBSymDumper::dump(const PDBSymbolFuncDebugStart &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolFuncDebugStart &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolFuncDebugStart) } -void PDBSymDumper::dump(const PDBSymbolLabel &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolLabel &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolLabel) } -void PDBSymDumper::dump(const PDBSymbolPublicSymbol &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolPublicSymbol &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolPublicSymbol) } -void PDBSymDumper::dump(const PDBSymbolThunk &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolThunk &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolThunk) } -void PDBSymDumper::dump(const PDBSymbolTypeArray &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolTypeArray &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeArray) } -void PDBSymDumper::dump(const PDBSymbolTypeBaseClass &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolTypeBaseClass &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeBaseClass) } -void PDBSymDumper::dump(const PDBSymbolTypeBuiltin &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolTypeBuiltin &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeBuiltin) } -void PDBSymDumper::dump(const PDBSymbolTypeCustom &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolTypeCustom &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeCustom) } -void PDBSymDumper::dump(const PDBSymbolTypeDimension &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolTypeDimension &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeDimension) } -void PDBSymDumper::dump(const PDBSymbolTypeEnum &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolTypeEnum &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeEnum) } -void PDBSymDumper::dump(const PDBSymbolTypeFriend &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolTypeFriend &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeFriend) } -void PDBSymDumper::dump(const PDBSymbolTypeFunctionArg &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolTypeFunctionArg &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeFunctionArg) } -void PDBSymDumper::dump(const PDBSymbolTypeFunctionSig &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolTypeFunctionSig &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeFunctionSig) } -void PDBSymDumper::dump(const PDBSymbolTypeManaged &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolTypeManaged &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeManaged) } -void PDBSymDumper::dump(const PDBSymbolTypePointer &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolTypePointer &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypePointer) } -void PDBSymDumper::dump(const PDBSymbolTypeTypedef &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolTypeTypedef &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeTypedef) } -void PDBSymDumper::dump(const PDBSymbolTypeUDT &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolTypeUDT &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeUDT) } -void PDBSymDumper::dump(const PDBSymbolTypeVTable &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolTypeVTable &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeVTable) } -void PDBSymDumper::dump(const PDBSymbolTypeVTableShape &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolTypeVTableShape &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeVTableShape) } -void PDBSymDumper::dump(const PDBSymbolUnknown &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolUnknown &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolUnknown) } -void PDBSymDumper::dump(const PDBSymbolUsingNamespace &Symbol, raw_ostream &OS, - int Indent) { +void PDBSymDumper::dump(const PDBSymbolUsingNamespace &Symbol) { PDB_SYMDUMP_UNREACHABLE(PDBSymbolUsingNamespace) } diff --git a/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp b/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp index 4c76e3b..a782cad 100644 --- a/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp @@ -19,7 +19,6 @@ PDBSymbolAnnotation::PDBSymbolAnnotation(const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolAnnotation::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolAnnotation::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp index bb159d5..46b0ea5 100644 --- a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp @@ -20,7 +20,4 @@ PDBSymbolBlock::PDBSymbolBlock(const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolBlock::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); -} +void PDBSymbolBlock::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp b/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp index 0c9b190..7436914 100644 --- a/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp @@ -19,7 +19,6 @@ PDBSymbolCompiland::PDBSymbolCompiland(const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolCompiland::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolCompiland::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp index 208d68f..7b351a0 100644 --- a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp @@ -20,7 +20,6 @@ PDBSymbolCompilandDetails::PDBSymbolCompilandDetails( const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolCompilandDetails::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolCompilandDetails::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp index c54b8fb..e863ccf 100644 --- a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp @@ -26,7 +26,6 @@ std::string PDBSymbolCompilandEnv::getValue() const { return std::string(); } -void PDBSymbolCompilandEnv::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolCompilandEnv::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp index 1b6b50b..45faa0b 100644 --- a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp @@ -25,7 +25,4 @@ void PDBSymbolCustom::getDataBytes(llvm::SmallVector<uint8_t, 32> &bytes) { RawSymbol->getDataBytes(bytes); } -void PDBSymbolCustom::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); -}
\ No newline at end of file +void PDBSymbolCustom::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
\ No newline at end of file diff --git a/lib/DebugInfo/PDB/PDBSymbolData.cpp b/lib/DebugInfo/PDB/PDBSymbolData.cpp index 6bf7e0f..60dcbc1 100644 --- a/lib/DebugInfo/PDB/PDBSymbolData.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolData.cpp @@ -24,7 +24,4 @@ std::unique_ptr<PDBSymbol> PDBSymbolData::getType() const { return Session.getSymbolById(getTypeId()); } -void PDBSymbolData::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); -}
\ No newline at end of file +void PDBSymbolData::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
\ No newline at end of file diff --git a/lib/DebugInfo/PDB/PDBSymbolExe.cpp b/lib/DebugInfo/PDB/PDBSymbolExe.cpp index ef09193..c9e34ea 100644 --- a/lib/DebugInfo/PDB/PDBSymbolExe.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolExe.cpp @@ -19,7 +19,4 @@ PDBSymbolExe::PDBSymbolExe(const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolExe::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); -} +void PDBSymbolExe::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp index e2d859f..b14af07 100644 --- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp @@ -98,7 +98,4 @@ std::unique_ptr<PDBSymbolTypeUDT> PDBSymbolFunc::getClassParent() const { return Session.getConcreteSymbolById<PDBSymbolTypeUDT>(getClassParentId()); } -void PDBSymbolFunc::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); -} +void PDBSymbolFunc::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp index c207488..8e559b3 100644 --- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp @@ -20,7 +20,6 @@ PDBSymbolFuncDebugEnd::PDBSymbolFuncDebugEnd( const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolFuncDebugEnd::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolFuncDebugEnd::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp index 83df22e..ff4254f 100644 --- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp @@ -20,7 +20,6 @@ PDBSymbolFuncDebugStart::PDBSymbolFuncDebugStart( const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolFuncDebugStart::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolFuncDebugStart::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolLabel.cpp b/lib/DebugInfo/PDB/PDBSymbolLabel.cpp index ce569e2..f39dee8 100644 --- a/lib/DebugInfo/PDB/PDBSymbolLabel.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolLabel.cpp @@ -19,7 +19,4 @@ PDBSymbolLabel::PDBSymbolLabel(const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolLabel::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); -} +void PDBSymbolLabel::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp index a7f156c..bd6fe89 100644 --- a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp @@ -20,7 +20,6 @@ PDBSymbolPublicSymbol::PDBSymbolPublicSymbol( const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolPublicSymbol::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolPublicSymbol::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolThunk.cpp b/lib/DebugInfo/PDB/PDBSymbolThunk.cpp index edade83..733eb5f 100644 --- a/lib/DebugInfo/PDB/PDBSymbolThunk.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolThunk.cpp @@ -19,7 +19,4 @@ PDBSymbolThunk::PDBSymbolThunk(const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolThunk::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); -} +void PDBSymbolThunk::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp index ffe6c80..1980965 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp @@ -24,7 +24,6 @@ std::unique_ptr<PDBSymbol> PDBSymbolTypeArray::getElementType() const { return Session.getSymbolById(getTypeId()); } -void PDBSymbolTypeArray::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolTypeArray::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp index c44cc52..c1f0d2f 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp @@ -20,7 +20,6 @@ PDBSymbolTypeBaseClass::PDBSymbolTypeBaseClass( const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolTypeBaseClass::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolTypeBaseClass::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp index f0c94c7..b302b66 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp @@ -19,7 +19,6 @@ PDBSymbolTypeBuiltin::PDBSymbolTypeBuiltin( const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolTypeBuiltin::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolTypeBuiltin::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp index 0fa8f45..cc391f1 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp @@ -20,7 +20,6 @@ PDBSymbolTypeCustom::PDBSymbolTypeCustom(const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolTypeCustom::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolTypeCustom::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp index 47fb08d..1e19d0b 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp @@ -21,7 +21,6 @@ PDBSymbolTypeDimension::PDBSymbolTypeDimension( const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolTypeDimension::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolTypeDimension::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp index 121d41e..8dd26a3 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp @@ -9,7 +9,10 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h" +#include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" #include <utility> @@ -19,7 +22,13 @@ PDBSymbolTypeEnum::PDBSymbolTypeEnum(const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolTypeEnum::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +std::unique_ptr<PDBSymbolTypeUDT> PDBSymbolTypeEnum::getClassParent() const { + return Session.getConcreteSymbolById<PDBSymbolTypeUDT>(getClassParentId()); } + +std::unique_ptr<PDBSymbolTypeBuiltin> +PDBSymbolTypeEnum::getUnderlyingType() const { + return Session.getConcreteSymbolById<PDBSymbolTypeBuiltin>(getTypeId()); +} + +void PDBSymbolTypeEnum::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp index b2bf72e..d332660 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp @@ -20,7 +20,6 @@ PDBSymbolTypeFriend::PDBSymbolTypeFriend(const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolTypeFriend::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolTypeFriend::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp index f394c04..f8f71ea 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp @@ -19,7 +19,6 @@ PDBSymbolTypeFunctionArg::PDBSymbolTypeFunctionArg( const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolTypeFunctionArg::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolTypeFunctionArg::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp index 1ba397b..8018206 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp @@ -83,7 +83,6 @@ std::unique_ptr<PDBSymbol> PDBSymbolTypeFunctionSig::getClassParent() const { return Session.getSymbolById(ClassId); } -void PDBSymbolTypeFunctionSig::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolTypeFunctionSig::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp index e04fb66..a7fac30 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp @@ -20,7 +20,6 @@ PDBSymbolTypeManaged::PDBSymbolTypeManaged( const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolTypeManaged::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolTypeManaged::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp b/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp index d274bf5..082ed83 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp @@ -24,7 +24,6 @@ std::unique_ptr<PDBSymbol> PDBSymbolTypePointer::getPointeeType() const { return Session.getSymbolById(getTypeId()); } -void PDBSymbolTypePointer::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolTypePointer::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp index 12e3ead..5a42699 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp @@ -19,7 +19,6 @@ PDBSymbolTypeTypedef::PDBSymbolTypeTypedef( const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolTypeTypedef::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolTypeTypedef::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp index 8a72368..2b5da29 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp @@ -19,7 +19,4 @@ PDBSymbolTypeUDT::PDBSymbolTypeUDT(const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolTypeUDT::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); -} +void PDBSymbolTypeUDT::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp index a100526..b465d02 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp @@ -19,7 +19,6 @@ PDBSymbolTypeVTable::PDBSymbolTypeVTable(const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolTypeVTable::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolTypeVTable::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp index 6aaa668..16052f1 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp @@ -20,7 +20,6 @@ PDBSymbolTypeVTableShape::PDBSymbolTypeVTableShape( const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolTypeVTableShape::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolTypeVTableShape::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp b/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp index 9cfb88a..48dc115 100644 --- a/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp @@ -20,7 +20,4 @@ PDBSymbolUnknown::PDBSymbolUnknown(const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolUnknown::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); -} +void PDBSymbolUnknown::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp index 9176dfb..6cf13de 100644 --- a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp @@ -20,7 +20,6 @@ PDBSymbolUsingNamespace::PDBSymbolUsingNamespace( const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol) : PDBSymbol(PDBSession, std::move(Symbol)) {} -void PDBSymbolUsingNamespace::dump(raw_ostream &OS, int Indent, - PDBSymDumper &Dumper) const { - Dumper.dump(*this, OS, Indent); +void PDBSymbolUsingNamespace::dump(PDBSymDumper &Dumper) const { + Dumper.dump(*this); } diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 12e0e6a..c586ba7 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/ExecutionEngine/GenericValue.h" @@ -399,33 +400,12 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, return runFunction(Fn, GVArgs).IntVal.getZExtValue(); } -EngineBuilder::EngineBuilder() { - InitEngine(); -} +EngineBuilder::EngineBuilder() : EngineBuilder(nullptr) {} EngineBuilder::EngineBuilder(std::unique_ptr<Module> M) - : M(std::move(M)), MCJMM(nullptr) { - InitEngine(); -} - -EngineBuilder::~EngineBuilder() {} - -EngineBuilder &EngineBuilder::setMCJITMemoryManager( - std::unique_ptr<RTDyldMemoryManager> mcjmm) { - MCJMM = std::move(mcjmm); - return *this; -} - -void EngineBuilder::InitEngine() { - WhichEngine = EngineKind::Either; - ErrorStr = nullptr; - OptLevel = CodeGenOpt::Default; - MCJMM = nullptr; - Options = TargetOptions(); - RelocModel = Reloc::Default; - CMModel = CodeModel::JITDefault; - UseOrcMCJITReplacement = false; - + : M(std::move(M)), WhichEngine(EngineKind::Either), ErrorStr(nullptr), + OptLevel(CodeGenOpt::Default), MCJMM(nullptr), RelocModel(Reloc::Default), + CMModel(CodeModel::JITDefault), UseOrcMCJITReplacement(false) { // IR module verification is enabled by default in debug builds, and disabled // by default in release builds. #ifndef NDEBUG @@ -435,6 +415,14 @@ void EngineBuilder::InitEngine() { #endif } +EngineBuilder::~EngineBuilder() = default; + +EngineBuilder &EngineBuilder::setMCJITMemoryManager( + std::unique_ptr<RTDyldMemoryManager> mcjmm) { + MCJMM = std::move(mcjmm); + return *this; +} + ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { std::unique_ptr<TargetMachine> TheTM(TM); // Take ownership. diff --git a/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/lib/ExecutionEngine/Interpreter/CMakeLists.txt index 1aac3ac..4dbc2df 100644 --- a/lib/ExecutionEngine/Interpreter/CMakeLists.txt +++ b/lib/ExecutionEngine/Interpreter/CMakeLists.txt @@ -13,7 +13,7 @@ add_llvm_library(LLVMInterpreter ) if( LLVM_ENABLE_FFI ) - target_link_libraries( LLVMInterpreter ${cmake_2_8_12_PRIVATE} ${FFI_LIBRARY_PATH} ) + target_link_libraries( LLVMInterpreter PRIVATE ${FFI_LIBRARY_PATH} ) endif() add_dependencies(LLVMInterpreter intrinsics_gen) diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 93bb2d1..2e8eb16 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cmath> using namespace llvm; @@ -464,14 +465,14 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2, return Dest; \ } -#define IMPLEMENT_VECTOR_UNORDERED(TY, X,Y, _FUNC) \ - if (TY->isVectorTy()) { \ - GenericValue DestMask = Dest; \ - Dest = _FUNC(Src1, Src2, Ty); \ - for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++) \ - if (DestMask.AggregateVal[_i].IntVal == true) \ - Dest.AggregateVal[_i].IntVal = APInt(1,true); \ - return Dest; \ +#define IMPLEMENT_VECTOR_UNORDERED(TY, X, Y, FUNC) \ + if (TY->isVectorTy()) { \ + GenericValue DestMask = Dest; \ + Dest = FUNC(Src1, Src2, Ty); \ + for (size_t _i = 0; _i < Src1.AggregateVal.size(); _i++) \ + if (DestMask.AggregateVal[_i].IntVal == true) \ + Dest.AggregateVal[_i].IntVal = APInt(1, true); \ + return Dest; \ } static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2, diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index e500d3d..20b8553 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -137,8 +137,7 @@ std::unique_ptr<MemoryBuffer> MCJIT::emitObject(Module *M) { legacy::PassManager PM; - M->setDataLayout(TM->getDataLayout()); - PM.add(new DataLayoutPass()); + M->setDataLayout(*TM->getDataLayout()); // The RuntimeDyld will take ownership of this shortly SmallVector<char, 4096> ObjBufferSV; @@ -258,7 +257,7 @@ uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) { Mangler Mang(TM->getDataLayout()); SmallString<128> FullName; Mang.getNameWithPrefix(FullName, Name); - return Dyld.getSymbolLoadAddress(FullName); + return Dyld.getSymbol(FullName).getAddress(); } Module *MCJIT::findModuleForSymbol(const std::string &Name, @@ -384,7 +383,7 @@ void *MCJIT::getPointerToFunction(Function *F) { // // This is the accessor for the target address, so make sure to check the // load address of the symbol, not the local address. - return (void*)Dyld.getSymbolLoadAddress(Name); + return (void*)Dyld.getSymbol(Name).getAddress(); } void MCJIT::runStaticConstructorsDestructorsInModulePtrSet( diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 61c947f..8cf490f 100644 --- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/ExecutionEngine/Orc/CloneSubModule.h" #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h index 1b7b161..00e39bb 100644 --- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h +++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h @@ -133,8 +133,8 @@ public: // If this module doesn't have a DataLayout attached then attach the // default. - if (!M->getDataLayout()) - M->setDataLayout(getDataLayout()); + if (M->getDataLayout().isDefault()) + M->setDataLayout(*getDataLayout()); Modules.push_back(std::move(M)); std::vector<Module *> Ms; diff --git a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp index b5dda8e..6fe5301 100644 --- a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp +++ b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp @@ -39,7 +39,7 @@ template <typename OStream> void restoreX86Regs(OStream &OS) { } template <typename TargetT> -uint64_t executeCompileCallback(JITCompileCallbackManagerBase<TargetT> *JCBM, +uint64_t executeCompileCallback(JITCompileCallbackManagerBase *JCBM, TargetAddress CallbackID) { return JCBM->executeCompileCallback(CallbackID); } @@ -52,7 +52,8 @@ namespace orc { const char* OrcX86_64::ResolverBlockName = "orc_resolver_block"; void OrcX86_64::insertResolverBlock( - Module &M, JITCompileCallbackManagerBase<OrcX86_64> &JCBM) { + Module &M, JITCompileCallbackManagerBase &JCBM) { + const unsigned X86_64_TrampolineLength = 6; auto CallbackPtr = executeCompileCallback<OrcX86_64>; uint64_t CallbackAddr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(CallbackPtr)); @@ -77,6 +78,7 @@ void OrcX86_64::insertResolverBlock( AsmStream << " leaq jit_callback_manager_addr(%rip), %rdi\n" << " movq (%rdi), %rdi\n" << " movq " << ReturnAddrOffset << "(%rsp), %rsi\n" + << " subq $" << X86_64_TrampolineLength << ", %rsi\n" << " movabsq $" << CallbackAddr << ", %rax\n" << " callq *%rax\n" << " movq %rax, " << ReturnAddrOffset << "(%rsp)\n"; diff --git a/lib/ExecutionEngine/RuntimeDyld/Android.mk b/lib/ExecutionEngine/RuntimeDyld/Android.mk index 76aae67..40fdd7c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Android.mk +++ b/lib/ExecutionEngine/RuntimeDyld/Android.mk @@ -8,6 +8,7 @@ LOCAL_SRC_FILES := \ RTDyldMemoryManager.cpp \ RuntimeDyldChecker.cpp \ RuntimeDyld.cpp \ + RuntimeDyldCOFF.cpp \ RuntimeDyldELF.cpp \ RuntimeDyldMachO.cpp diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt index 12bbcc6..e78408a 100644 --- a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt +++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_library(LLVMRuntimeDyld RTDyldMemoryManager.cpp RuntimeDyld.cpp RuntimeDyldChecker.cpp + RuntimeDyldCOFF.cpp RuntimeDyldELF.cpp RuntimeDyldMachO.cpp ) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 54f1a1c..a0ed7cf 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -13,10 +13,12 @@ #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "RuntimeDyldCheckerImpl.h" +#include "RuntimeDyldCOFF.h" #include "RuntimeDyldELF.h" #include "RuntimeDyldImpl.h" #include "RuntimeDyldMachO.h" #include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/COFF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MutexGuard.h" @@ -195,10 +197,13 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { << " SID: " << SectionID << " Offset: " << format("%p", (uintptr_t)SectOffset) << " flags: " << Flags << "\n"); - SymbolInfo::Visibility Vis = - (Flags & SymbolRef::SF_Exported) ? - SymbolInfo::Default : SymbolInfo::Hidden; - GlobalSymbolTable[Name] = SymbolInfo(SectionID, SectOffset, Vis); + JITSymbolFlags RTDyldSymFlags = JITSymbolFlags::None; + if (Flags & SymbolRef::SF_Weak) + RTDyldSymFlags |= JITSymbolFlags::Weak; + if (Flags & SymbolRef::SF_Exported) + RTDyldSymFlags |= JITSymbolFlags::Exported; + GlobalSymbolTable[Name] = + SymbolTableEntry(SectionID, SectOffset, RTDyldSymFlags); } } } @@ -264,6 +269,20 @@ static bool isRequiredForExecution(const SectionRef &Section) { const ObjectFile *Obj = Section.getObject(); if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj)) return ELFObj->getSectionFlags(Section) & ELF::SHF_ALLOC; + if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(Obj)) { + const coff_section *CoffSection = COFFObj->getCOFFSection(Section); + // Avoid loading zero-sized COFF sections. + // In PE files, VirtualSize gives the section size, and SizeOfRawData + // may be zero for sections with content. In Obj files, SizeOfRawData + // gives the section size, and VirtualSize is always zero. Hence + // the need to check for both cases below. + bool HasContent = (CoffSection->VirtualSize > 0) + || (CoffSection->SizeOfRawData > 0); + bool IsDiscardable = CoffSection->Characteristics & + (COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_LNK_INFO); + return HasContent && !IsDiscardable; + } + assert(isa<MachOObjectFile>(Obj)); return true; } @@ -273,6 +292,15 @@ static bool isReadOnlyData(const SectionRef &Section) { if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj)) return !(ELFObj->getSectionFlags(Section) & (ELF::SHF_WRITE | ELF::SHF_EXECINSTR)); + if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(Obj)) + return ((COFFObj->getCOFFSection(Section)->Characteristics & + (COFF::IMAGE_SCN_CNT_INITIALIZED_DATA + | COFF::IMAGE_SCN_MEM_READ + | COFF::IMAGE_SCN_MEM_WRITE)) + == + (COFF::IMAGE_SCN_CNT_INITIALIZED_DATA + | COFF::IMAGE_SCN_MEM_READ)); + assert(isa<MachOObjectFile>(Obj)); return false; } @@ -281,6 +309,9 @@ static bool isZeroInit(const SectionRef &Section) { const ObjectFile *Obj = Section.getObject(); if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj)) return ELFObj->getSectionType(Section) == ELF::SHT_NOBITS; + if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(Obj)) + return COFFObj->getCOFFSection(Section)->Characteristics & + COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA; auto *MachO = cast<MachOObjectFile>(Obj); unsigned SectionType = MachO->getSectionType(Section); @@ -497,12 +528,15 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj, Offset += AlignOffset; } uint32_t Flags = Sym.getFlags(); - SymbolInfo::Visibility Vis = - (Flags & SymbolRef::SF_Exported) ? - SymbolInfo::Default : SymbolInfo::Hidden; + JITSymbolFlags RTDyldSymFlags = JITSymbolFlags::None; + if (Flags & SymbolRef::SF_Weak) + RTDyldSymFlags |= JITSymbolFlags::Weak; + if (Flags & SymbolRef::SF_Exported) + RTDyldSymFlags |= JITSymbolFlags::Exported; DEBUG(dbgs() << "Allocating common symbol " << Name << " address " << format("%p", Addr) << "\n"); - GlobalSymbolTable[Name] = SymbolInfo(SectionID, Offset, Vis); + GlobalSymbolTable[Name] = + SymbolTableEntry(SectionID, Offset, RTDyldSymFlags); Offset += Size; Addr += Size; } @@ -512,7 +546,6 @@ unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj, const SectionRef &Section, bool IsCode) { StringRef data; - Check(Section.getContents(data)); uint64_t Alignment64 = Section.getAlignment(); unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL; @@ -542,6 +575,7 @@ unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj, // Some sections, such as debug info, don't need to be loaded for execution. // Leave those where they are. if (IsRequired) { + Check(Section.getContents(data)); Allocate = DataSize + PaddingSize + StubBufSize; Addr = IsCode ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID, Name) @@ -816,6 +850,15 @@ RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) { RuntimeDyld::~RuntimeDyld() {} +static std::unique_ptr<RuntimeDyldCOFF> +createRuntimeDyldCOFF(Triple::ArchType Arch, RTDyldMemoryManager *MM, + bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) { + std::unique_ptr<RuntimeDyldCOFF> Dyld(RuntimeDyldCOFF::create(Arch, MM)); + Dyld->setProcessAllSections(ProcessAllSections); + Dyld->setRuntimeDyldChecker(Checker); + return Dyld; +} + static std::unique_ptr<RuntimeDyldELF> createRuntimeDyldELF(RTDyldMemoryManager *MM, bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) { @@ -843,6 +886,10 @@ RuntimeDyld::loadObject(const ObjectFile &Obj) { Dyld = createRuntimeDyldMachO( static_cast<Triple::ArchType>(Obj.getArch()), MM, ProcessAllSections, Checker); + else if (Obj.isCOFF()) + Dyld = createRuntimeDyldCOFF( + static_cast<Triple::ArchType>(Obj.getArch()), MM, + ProcessAllSections, Checker); else report_fatal_error("Incompatible object format!"); } @@ -853,22 +900,16 @@ RuntimeDyld::loadObject(const ObjectFile &Obj) { return Dyld->loadObject(Obj); } -void *RuntimeDyld::getSymbolAddress(StringRef Name) const { +void *RuntimeDyld::getSymbolLocalAddress(StringRef Name) const { if (!Dyld) return nullptr; - return Dyld->getSymbolAddress(Name); + return Dyld->getSymbolLocalAddress(Name); } -uint64_t RuntimeDyld::getSymbolLoadAddress(StringRef Name) const { +RuntimeDyld::SymbolInfo RuntimeDyld::getSymbol(StringRef Name) const { if (!Dyld) - return 0; - return Dyld->getSymbolLoadAddress(Name); -} - -uint64_t RuntimeDyld::getExportedSymbolLoadAddress(StringRef Name) const { - if (!Dyld) - return 0; - return Dyld->getExportedSymbolLoadAddress(Name); + return nullptr; + return Dyld->getSymbol(Name); } void RuntimeDyld::resolveRelocations() { Dyld->resolveRelocations(); } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp new file mode 100644 index 0000000..56bcb8e --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp @@ -0,0 +1,85 @@ +//===-- RuntimeDyldCOFF.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of COFF support for the MC-JIT runtime dynamic linker. +// +//===----------------------------------------------------------------------===// + +#include "RuntimeDyldCOFF.h" +#include "Targets/RuntimeDyldCOFFX86_64.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Object/ObjectFile.h" + +using namespace llvm; +using namespace llvm::object; + +#define DEBUG_TYPE "dyld" + +namespace { + +class LoadedCOFFObjectInfo : public RuntimeDyld::LoadedObjectInfo { +public: + LoadedCOFFObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx, + unsigned EndIdx) + : RuntimeDyld::LoadedObjectInfo(RTDyld, BeginIdx, EndIdx) {} + + OwningBinary<ObjectFile> + getObjectForDebug(const ObjectFile &Obj) const override { + return OwningBinary<ObjectFile>(); + } +}; +} + +namespace llvm { + +std::unique_ptr<RuntimeDyldCOFF> +llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch, RTDyldMemoryManager *MM) { + switch (Arch) { + default: + llvm_unreachable("Unsupported target for RuntimeDyldCOFF."); + break; + case Triple::x86_64: + return make_unique<RuntimeDyldCOFFX86_64>(MM); + } +} + +std::unique_ptr<RuntimeDyld::LoadedObjectInfo> +RuntimeDyldCOFF::loadObject(const object::ObjectFile &O) { + unsigned SectionStartIdx, SectionEndIdx; + std::tie(SectionStartIdx, SectionEndIdx) = loadObjectImpl(O); + return llvm::make_unique<LoadedCOFFObjectInfo>(*this, SectionStartIdx, + SectionEndIdx); +} + +uint64_t RuntimeDyldCOFF::getSymbolOffset(const SymbolRef &Sym) { + uint64_t Address; + if (Sym.getAddress(Address)) + return UnknownAddressOrSize; + + if (Address == UnknownAddressOrSize) + return UnknownAddressOrSize; + + const ObjectFile *Obj = Sym.getObject(); + section_iterator SecI(Obj->section_end()); + if (Sym.getSection(SecI)) + return UnknownAddressOrSize; + + if (SecI == Obj->section_end()) + return UnknownAddressOrSize; + + uint64_t SectionAddress = SecI->getAddress(); + return Address - SectionAddress; +} + +bool RuntimeDyldCOFF::isCompatibleFile(const object::ObjectFile &Obj) const { + return Obj.isCOFF(); +} + +} // namespace llvm diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h new file mode 100644 index 0000000..681a3e5 --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h @@ -0,0 +1,46 @@ +//===-- RuntimeDyldCOFF.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// COFF support for MC-JIT runtime dynamic linker. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_RUNTIME_DYLD_COFF_H +#define LLVM_RUNTIME_DYLD_COFF_H + +#include "RuntimeDyldImpl.h" +#include "llvm/ADT/DenseMap.h" + +#define DEBUG_TYPE "dyld" + +using namespace llvm; + +namespace llvm { + +// Common base class for COFF dynamic linker support. +// Concrete subclasses for each target can be found in ./Targets. +class RuntimeDyldCOFF : public RuntimeDyldImpl { + +public: + std::unique_ptr<RuntimeDyld::LoadedObjectInfo> + loadObject(const object::ObjectFile &Obj) override; + bool isCompatibleFile(const object::ObjectFile &Obj) const override; + static std::unique_ptr<RuntimeDyldCOFF> create(Triple::ArchType Arch, + RTDyldMemoryManager *MM); + +protected: + RuntimeDyldCOFF(RTDyldMemoryManager *MM) : RuntimeDyldImpl(MM) {} + uint64_t getSymbolOffset(const SymbolRef &Sym); +}; + +} // end namespace llvm + +#undef DEBUG_TYPE + +#endif diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index 976a434..c991408 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -310,7 +310,7 @@ private: ""); uint64_t SymbolAddr = PCtx.IsInsideLoad - ? Checker.getSymbolLinkerAddr(Symbol) + ? Checker.getSymbolLocalAddr(Symbol) : Checker.getSymbolRemoteAddr(Symbol); uint64_t NextPC = SymbolAddr + InstSize; @@ -437,7 +437,7 @@ private: // The value for the symbol depends on the context we're evaluating in: // Inside a load this is the address in the linker's memory, outside a // load it's the address in the target processes memory. - uint64_t Value = PCtx.IsInsideLoad ? Checker.getSymbolLinkerAddr(Symbol) + uint64_t Value = PCtx.IsInsideLoad ? Checker.getSymbolLocalAddr(Symbol) : Checker.getSymbolRemoteAddr(Symbol); // Looks like a plain symbol reference. @@ -727,17 +727,17 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix, } bool RuntimeDyldCheckerImpl::isSymbolValid(StringRef Symbol) const { - return getRTDyld().getSymbolAddress(Symbol) != nullptr; + return getRTDyld().getSymbolLocalAddress(Symbol) != nullptr; } -uint64_t RuntimeDyldCheckerImpl::getSymbolLinkerAddr(StringRef Symbol) const { +uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const { return static_cast<uint64_t>( - reinterpret_cast<uintptr_t>(getRTDyld().getSymbolAddress(Symbol))); + reinterpret_cast<uintptr_t>(getRTDyld().getSymbolLocalAddress(Symbol))); } uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const { - if (uint64_t InternalSymbolAddr = getRTDyld().getSymbolLoadAddress(Symbol)) - return InternalSymbolAddr; + if (auto InternalSymbol = getRTDyld().getSymbol(Symbol)) + return InternalSymbol.getAddress(); return getRTDyld().MemMgr->getSymbolAddress(Symbol); } @@ -929,6 +929,6 @@ bool RuntimeDyldChecker::checkAllRulesInBuffer(StringRef RulePrefix, std::pair<uint64_t, std::string> RuntimeDyldChecker::getSectionAddr(StringRef FileName, StringRef SectionName, - bool LinkerAddress) { - return Impl->getSectionAddr(FileName, SectionName, LinkerAddress); + bool LocalAddress) { + return Impl->getSectionAddr(FileName, SectionName, LocalAddress); } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h index de20c1e..e8d299a 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h @@ -42,7 +42,7 @@ private: RuntimeDyldImpl &getRTDyld() const { return *RTDyld.Dyld; } bool isSymbolValid(StringRef Symbol) const; - uint64_t getSymbolLinkerAddr(StringRef Symbol) const; + uint64_t getSymbolLocalAddr(StringRef Symbol) const; uint64_t getSymbolRemoteAddr(StringRef Symbol) const; uint64_t readMemoryAtAddr(uint64_t Addr, unsigned Size) const; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 0f3ca0f..6278170 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -1128,7 +1128,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( RangeOverflow = true; } } - if (SymType == SymbolRef::ST_Unknown || RangeOverflow == true) { + if (SymType == SymbolRef::ST_Unknown || RangeOverflow) { // It is an external symbol (SymbolRef::ST_Unknown) or within a range // larger than 24-bits. StubMap::const_iterator i = Stubs.find(Value); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index b4414b0..71260d0 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -20,16 +20,6 @@ using namespace llvm; namespace llvm { -namespace { -// Helper for extensive error checking in debug builds. -std::error_code Check(std::error_code Err) { - if (Err) { - report_fatal_error(Err.message()); - } - return Err; -} - -} // end anonymous namespace class RuntimeDyldELF : public RuntimeDyldImpl { diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index f37a9a7..05060dd 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -36,6 +36,14 @@ using namespace llvm::object; namespace llvm { + // Helper for extensive error checking in debug builds. +inline std::error_code Check(std::error_code Err) { + if (Err) { + report_fatal_error(Err.message()); + } + return Err; +} + class Twine; /// SectionEntry - represents a section emitted into memory by the dynamic @@ -156,27 +164,24 @@ public: } }; -/// @brief Symbol info for RuntimeDyld. -class SymbolInfo { +/// @brief Symbol info for RuntimeDyld. +class SymbolTableEntry : public JITSymbolBase { public: - typedef enum { Hidden = 0, Default = 1 } Visibility; - - SymbolInfo() : Offset(0), SectionID(0), Vis(Hidden) {} + SymbolTableEntry() + : JITSymbolBase(JITSymbolFlags::None), Offset(0), SectionID(0) {} - SymbolInfo(unsigned SectionID, uint64_t Offset, Visibility Vis) - : Offset(Offset), SectionID(SectionID), Vis(Vis) {} + SymbolTableEntry(unsigned SectionID, uint64_t Offset, JITSymbolFlags Flags) + : JITSymbolBase(Flags), Offset(Offset), SectionID(SectionID) {} unsigned getSectionID() const { return SectionID; } uint64_t getOffset() const { return Offset; } - Visibility getVisibility() const { return Vis; } private: uint64_t Offset; - unsigned SectionID : 31; - Visibility Vis : 1; + unsigned SectionID; }; -typedef StringMap<SymbolInfo> RTDyldSymbolTable; +typedef StringMap<SymbolTableEntry> RTDyldSymbolTable; class RuntimeDyldImpl { friend class RuntimeDyld::LoadedObjectInfo; @@ -386,7 +391,7 @@ public: virtual std::unique_ptr<RuntimeDyld::LoadedObjectInfo> loadObject(const object::ObjectFile &Obj) = 0; - uint8_t* getSymbolAddress(StringRef Name) const { + uint8_t* getSymbolLocalAddress(StringRef Name) const { // FIXME: Just look up as a function for now. Overly simple of course. // Work in progress. RTDyldSymbolTable::const_iterator pos = GlobalSymbolTable.find(Name); @@ -396,24 +401,16 @@ public: return getSectionAddress(SymInfo.getSectionID()) + SymInfo.getOffset(); } - uint64_t getSymbolLoadAddress(StringRef Name) const { + RuntimeDyld::SymbolInfo getSymbol(StringRef Name) const { // FIXME: Just look up as a function for now. Overly simple of course. // Work in progress. RTDyldSymbolTable::const_iterator pos = GlobalSymbolTable.find(Name); if (pos == GlobalSymbolTable.end()) - return 0; - const auto &SymInfo = pos->second; - return getSectionLoadAddress(SymInfo.getSectionID()) + SymInfo.getOffset(); - } - - uint64_t getExportedSymbolLoadAddress(StringRef Name) const { - RTDyldSymbolTable::const_iterator pos = GlobalSymbolTable.find(Name); - if (pos == GlobalSymbolTable.end()) - return 0; - const auto &SymInfo = pos->second; - if (SymInfo.getVisibility() == SymbolInfo::Hidden) - return 0; - return getSectionLoadAddress(SymInfo.getSectionID()) + SymInfo.getOffset(); + return nullptr; + const auto &SymEntry = pos->second; + uint64_t TargetAddr = + getSectionLoadAddress(SymEntry.getSectionID()) + SymEntry.getOffset(); + return RuntimeDyld::SymbolInfo(TargetAddr, SymEntry.getFlags()); } void resolveRelocations(); diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h new file mode 100644 index 0000000..ce2f4a2 --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h @@ -0,0 +1,214 @@ +//===-- RuntimeDyldCOFFX86_64.h --- COFF/X86_64 specific code ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// COFF x86_x64 support for MC-JIT runtime dynamic linker. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFF86_64_H +#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFF86_64_H + +#include "llvm/Object/COFF.h" +#include "llvm/Support/COFF.h" +#include "../RuntimeDyldCOFF.h" + +#define DEBUG_TYPE "dyld" + +namespace llvm { + +class RuntimeDyldCOFFX86_64 : public RuntimeDyldCOFF { + +private: + // When a module is loaded we save the SectionID of the unwind + // sections in a table until we receive a request to register all + // unregisteredEH frame sections with the memory manager. + SmallVector<SID, 2> UnregisteredEHFrameSections; + SmallVector<SID, 2> RegisteredEHFrameSections; + +public: + RuntimeDyldCOFFX86_64(RTDyldMemoryManager *MM) : RuntimeDyldCOFF(MM) {} + + unsigned getMaxStubSize() override { + return 6; // 2-byte jmp instruction + 32-bit relative address + } + + // The target location for the relocation is described by RE.SectionID and + // RE.Offset. RE.SectionID can be used to find the SectionEntry. Each + // SectionEntry has three members describing its location. + // SectionEntry::Address is the address at which the section has been loaded + // into memory in the current (host) process. SectionEntry::LoadAddress is + // the address that the section will have in the target process. + // SectionEntry::ObjAddress is the address of the bits for this section in the + // original emitted object image (also in the current address space). + // + // Relocations will be applied as if the section were loaded at + // SectionEntry::LoadAddress, but they will be applied at an address based + // on SectionEntry::Address. SectionEntry::ObjAddress will be used to refer + // to Target memory contents if they are required for value calculations. + // + // The Value parameter here is the load address of the symbol for the + // relocation to be applied. For relocations which refer to symbols in the + // current object Value will be the LoadAddress of the section in which + // the symbol resides (RE.Addend provides additional information about the + // symbol location). For external symbols, Value will be the address of the + // symbol in the target address space. + void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override { + const SectionEntry &Section = Sections[RE.SectionID]; + uint8_t *Target = Section.Address + RE.Offset; + + switch (RE.RelType) { + + case COFF::IMAGE_REL_AMD64_REL32: + case COFF::IMAGE_REL_AMD64_REL32_1: + case COFF::IMAGE_REL_AMD64_REL32_2: + case COFF::IMAGE_REL_AMD64_REL32_3: + case COFF::IMAGE_REL_AMD64_REL32_4: + case COFF::IMAGE_REL_AMD64_REL32_5: { + uint32_t *TargetAddress = (uint32_t *)Target; + uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + // Delta is the distance from the start of the reloc to the end of the + // instruction with the reloc. + uint64_t Delta = 4 + (RE.RelType - COFF::IMAGE_REL_AMD64_REL32); + Value -= FinalAddress + Delta; + uint64_t Result = Value + RE.Addend; + assert(((int64_t)Result <= INT32_MAX) && "Relocation overflow"); + assert(((int64_t)Result >= INT32_MIN) && "Relocation underflow"); + *TargetAddress = Result; + break; + } + + case COFF::IMAGE_REL_AMD64_ADDR32NB: { + // Note ADDR32NB requires a well-established notion of + // image base. This address must be less than or equal + // to every section's load address, and all sections must be + // within a 32 bit offset from the base. + // + // For now we just set these to zero. + uint32_t *TargetAddress = (uint32_t *)Target; + *TargetAddress = 0; + break; + } + + case COFF::IMAGE_REL_AMD64_ADDR64: { + uint64_t *TargetAddress = (uint64_t *)Target; + *TargetAddress = Value + RE.Addend; + break; + } + + default: + llvm_unreachable("Relocation type not implemented yet!"); + break; + } + } + + relocation_iterator processRelocationRef(unsigned SectionID, + relocation_iterator RelI, + const ObjectFile &Obj, + ObjSectionToIDMap &ObjSectionToID, + StubMap &Stubs) override { + // Find the symbol referred to in the relocation, and + // get its section and offset. + // + // Insist for now that all symbols be resolvable within + // the scope of this object file. + symbol_iterator Symbol = RelI->getSymbol(); + if (Symbol == Obj.symbol_end()) + report_fatal_error("Unknown symbol in relocation"); + unsigned TargetSectionID = 0; + uint64_t TargetOffset = UnknownAddressOrSize; + section_iterator SecI(Obj.section_end()); + Symbol->getSection(SecI); + if (SecI == Obj.section_end()) + report_fatal_error("Unknown section in relocation"); + bool IsCode = SecI->isText(); + TargetSectionID = findOrEmitSection(Obj, *SecI, IsCode, ObjSectionToID); + TargetOffset = getSymbolOffset(*Symbol); + + // Determine the Addend used to adjust the relocation value. + uint64_t RelType; + Check(RelI->getType(RelType)); + uint64_t Offset; + Check(RelI->getOffset(Offset)); + uint64_t Addend = 0; + SectionEntry &Section = Sections[SectionID]; + uintptr_t ObjTarget = Section.ObjAddress + Offset; + + switch (RelType) { + + case COFF::IMAGE_REL_AMD64_REL32: + case COFF::IMAGE_REL_AMD64_REL32_1: + case COFF::IMAGE_REL_AMD64_REL32_2: + case COFF::IMAGE_REL_AMD64_REL32_3: + case COFF::IMAGE_REL_AMD64_REL32_4: + case COFF::IMAGE_REL_AMD64_REL32_5: + case COFF::IMAGE_REL_AMD64_ADDR32NB: { + uint32_t *Displacement = (uint32_t *)ObjTarget; + Addend = *Displacement; + break; + } + + case COFF::IMAGE_REL_AMD64_ADDR64: { + uint64_t *Displacement = (uint64_t *)ObjTarget; + Addend = *Displacement; + break; + } + + default: + break; + } + + StringRef TargetName; + Symbol->getName(TargetName); + DEBUG(dbgs() << "\t\tIn Section " << SectionID << " Offset " << Offset + << " RelType: " << RelType << " TargetName: " << TargetName + << " Addend " << Addend << "\n"); + + RelocationEntry RE(SectionID, Offset, RelType, TargetOffset + Addend); + addRelocationForSection(RE, TargetSectionID); + + return ++RelI; + } + + unsigned getStubAlignment() override { return 1; } + void registerEHFrames() override { + if (!MemMgr) + return; + for (auto const &EHFrameSID : UnregisteredEHFrameSections) { + uint8_t *EHFrameAddr = Sections[EHFrameSID].Address; + uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress; + size_t EHFrameSize = Sections[EHFrameSID].Size; + MemMgr->registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); + RegisteredEHFrameSections.push_back(EHFrameSID); + } + UnregisteredEHFrameSections.clear(); + } + void deregisterEHFrames() override { + // Stub + } + void finalizeLoad(const ObjectFile &Obj, + ObjSectionToIDMap &SectionMap) override { + // Look for and record the EH frame section IDs. + for (const auto &SectionPair : SectionMap) { + const SectionRef &Section = SectionPair.first; + StringRef Name; + Check(Section.getName(Name)); + // Note unwind info is split across .pdata and .xdata, so this + // may not be sufficiently general for all users. + if (Name == ".xdata") { + UnregisteredEHFrameSections.push_back(SectionPair.second); + } + } + } +}; + +} // end namespace llvm + +#undef DEBUG_TYPE + +#endif diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp index 1746afd..9ccd744 100644 --- a/lib/Fuzzer/FuzzerDriver.cpp +++ b/lib/Fuzzer/FuzzerDriver.cpp @@ -158,6 +158,7 @@ int FuzzerDriver(int argc, char **argv, UserCallback Callback) { Options.DoCrossOver = Flags.cross_over; Options.MutateDepth = Flags.mutate_depth; Options.ExitOnFirst = Flags.exit_on_first; + Options.UseCounters = Flags.use_counters; Options.UseFullCoverageSet = Flags.use_full_coverage_set; Options.UseCoveragePairs = Flags.use_coverage_pairs; Options.PreferSmallDuringInitialShuffle = diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def index 068f245..08176af 100644 --- a/lib/Fuzzer/FuzzerFlags.def +++ b/lib/Fuzzer/FuzzerFlags.def @@ -32,6 +32,7 @@ FUZZER_FLAG(int, help, 0, "Print help.") FUZZER_FLAG( int, save_minimized_corpus, 0, "If 1, the minimized corpus is saved into the first input directory") +FUZZER_FLAG(int, use_counters, 0, "Use coverage counters") FUZZER_FLAG(int, use_full_coverage_set, 0, "Experimental: Maximize the number of different full" " coverage sets as opposed to maximizing the total coverage." diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h index 980b00e..e4e5eb7 100644 --- a/lib/Fuzzer/FuzzerInternal.h +++ b/lib/Fuzzer/FuzzerInternal.h @@ -48,6 +48,7 @@ class Fuzzer { bool DoCrossOver = true; int MutateDepth = 5; bool ExitOnFirst = false; + bool UseCounters = false; bool UseFullCoverageSet = false; bool UseCoveragePairs = false; int PreferSmallDuringInitialShuffle = -1; @@ -95,6 +96,15 @@ class Fuzzer { std::vector<Unit> Corpus; std::unordered_set<uintptr_t> FullCoverageSets; std::unordered_set<uint64_t> CoveragePairs; + + // For UseCounters + std::vector<uint8_t> CounterBitmap; + size_t TotalBits() { // Slow. Call it only for printing stats. + size_t Res = 0; + for (auto x : CounterBitmap) Res += __builtin_popcount(x); + return Res; + } + UserCallback Callback; FuzzingOptions Options; system_clock::time_point ProcessStartTime = system_clock::now(); diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp index 70b63eb..563fbf4 100644 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ b/lib/Fuzzer/FuzzerLoop.cpp @@ -138,17 +138,28 @@ size_t Fuzzer::RunOneMaximizeFullCoverageSet(const Unit &U) { } size_t Fuzzer::RunOneMaximizeTotalCoverage(const Unit &U) { + size_t NumCounters = __sanitizer_get_number_of_counters(); + if (Options.UseCounters) { + CounterBitmap.resize(NumCounters); + __sanitizer_update_counter_bitset_and_clear_counters(0); + } size_t OldCoverage = __sanitizer_get_total_unique_coverage(); Callback(U.data(), U.size()); size_t NewCoverage = __sanitizer_get_total_unique_coverage(); + size_t NumNewBits = 0; + if (Options.UseCounters) + NumNewBits = __sanitizer_update_counter_bitset_and_clear_counters( + CounterBitmap.data()); + if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && Options.Verbosity) { size_t Seconds = secondsSinceProcessStartUp(); std::cerr << "#" << TotalNumberOfRuns << "\tcov: " << NewCoverage + << "\tbits: " << TotalBits() << "\texec/s: " << (Seconds ? TotalNumberOfRuns / Seconds : 0) << "\n"; } - if (NewCoverage > OldCoverage) + if (NewCoverage > OldCoverage || NumNewBits) return NewCoverage; return 0; } @@ -189,6 +200,7 @@ size_t Fuzzer::MutateAndTestOne(Unit *U) { if (Options.Verbosity) { std::cerr << "#" << TotalNumberOfRuns << "\tNEW: " << NewCoverage + << " B: " << TotalBits() << " L: " << U->size() << " S: " << Corpus.size() << " I: " << i diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt index bed9cd8..08130c6 100644 --- a/lib/Fuzzer/test/CMakeLists.txt +++ b/lib/Fuzzer/test/CMakeLists.txt @@ -5,6 +5,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O0 -fsanitize-coverage=4") set(Tests + CounterTest FourIndependentBranchesTest FullCoverageSetTest InfiniteTest diff --git a/lib/Fuzzer/test/CounterTest.cpp b/lib/Fuzzer/test/CounterTest.cpp new file mode 100644 index 0000000..332ccfe --- /dev/null +++ b/lib/Fuzzer/test/CounterTest.cpp @@ -0,0 +1,14 @@ +// Test for a fuzzer: must find the case where a particular basic block is +// executed many times. +#include <iostream> + +extern "C" void TestOneInput(const uint8_t *Data, size_t Size) { + int Num = 0; + for (size_t i = 0; i < Size; i++) + if (Data[i] == 'A' + i) + Num++; + if (Num >= 4) { + std::cerr << "BINGO!\n"; + exit(1); + } +} diff --git a/lib/Fuzzer/test/fuzzer.test b/lib/Fuzzer/test/fuzzer.test index 1e42e72..45691f5 100644 --- a/lib/Fuzzer/test/fuzzer.test +++ b/lib/Fuzzer/test/fuzzer.test @@ -17,3 +17,6 @@ FullCoverageSetTest: BINGO RUN: not ./LLVMFuzzer-FourIndependentBranchesTest -timeout=15 -seed=1 -use_coverage_pairs=1 2>&1 | FileCheck %s --check-prefix=FourIndependentBranchesTest FourIndependentBranchesTest: BINGO + +RUN: not ./LLVMFuzzer-CounterTest -use_counters=1 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s --check-prefix=CounterTest +CounterTest: BINGO diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index de0e614..ae0beba 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -14,9 +14,9 @@ // //===----------------------------------------------------------------------===// -#include "AsmWriter.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/AssemblyAnnotationWriter.h" @@ -32,12 +32,14 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/TypeFinder.h" +#include "llvm/IR/UseListOrder.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cctype> using namespace llvm; @@ -275,6 +277,15 @@ static const Module *getModuleFromVal(const Value *V) { if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) return GV->getParent(); + + if (const auto *MAV = dyn_cast<MetadataAsValue>(V)) { + for (const User *U : MAV->users()) + if (isa<Instruction>(U)) + if (const Module *M = getModuleFromVal(U)) + return M; + return nullptr; + } + return nullptr; } @@ -378,7 +389,29 @@ static void PrintLLVMName(raw_ostream &OS, const Value *V) { } -namespace llvm { +namespace { +class TypePrinting { + TypePrinting(const TypePrinting &) = delete; + void operator=(const TypePrinting&) = delete; +public: + + /// NamedTypes - The named types that are used by the current module. + TypeFinder NamedTypes; + + /// NumberedTypes - The numbered types, along with their value. + DenseMap<StructType*, unsigned> NumberedTypes; + + + TypePrinting() {} + ~TypePrinting() {} + + void incorporateTypes(const Module &M); + + void print(Type *Ty, raw_ostream &OS); + + void printStructBody(StructType *Ty, raw_ostream &OS); +}; +} // namespace void TypePrinting::incorporateTypes(const Module &M) { NamedTypes.run(M, false); @@ -508,6 +541,7 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) { OS << '>'; } +namespace { //===----------------------------------------------------------------------===// // SlotTracker Class: Enumerate slot numbers for unnamed values //===----------------------------------------------------------------------===// @@ -525,6 +559,7 @@ private: /// TheFunction - The function for which we are holding slot numbers. const Function* TheFunction; bool FunctionProcessed; + bool ShouldInitializeAllMetadata; /// mMap - The slot map for the module level data. ValueMap mMap; @@ -542,10 +577,20 @@ private: DenseMap<AttributeSet, unsigned> asMap; unsigned asNext; public: - /// Construct from a module - explicit SlotTracker(const Module *M); + /// Construct from a module. + /// + /// If \c ShouldInitializeAllMetadata, initializes all metadata in all + /// functions, giving correct numbering for metadata referenced only from + /// within a function (even if no functions have been initialized). + explicit SlotTracker(const Module *M, + bool ShouldInitializeAllMetadata = false); /// Construct from a function, starting out in incorp state. - explicit SlotTracker(const Function *F); + /// + /// If \c ShouldInitializeAllMetadata, initializes all metadata in all + /// functions, giving correct numbering for metadata referenced only from + /// within a function (even if no functions have been initialized). + explicit SlotTracker(const Function *F, + bool ShouldInitializeAllMetadata = false); /// Return the slot number of the specified value in it's type /// plane. If something is not in the SlotTracker, return -1. @@ -606,11 +651,18 @@ private: /// Add all of the functions arguments, basic blocks, and instructions. void processFunction(); + /// Add all of the metadata from a function. + void processFunctionMetadata(const Function &F); + + /// Add all of the metadata from an instruction. + void processInstructionMetadata(const Instruction &I); + SlotTracker(const SlotTracker &) = delete; void operator=(const SlotTracker &) = delete; }; +} // namespace -SlotTracker *createSlotTracker(const Module *M) { +static SlotTracker *createSlotTracker(const Module *M) { return new SlotTracker(M); } @@ -645,15 +697,18 @@ static SlotTracker *createSlotTracker(const Value *V) { // Module level constructor. Causes the contents of the Module (sans functions) // to be added to the slot table. -SlotTracker::SlotTracker(const Module *M) - : TheModule(M), TheFunction(nullptr), FunctionProcessed(false), mNext(0), +SlotTracker::SlotTracker(const Module *M, bool ShouldInitializeAllMetadata) + : TheModule(M), TheFunction(nullptr), FunctionProcessed(false), + ShouldInitializeAllMetadata(ShouldInitializeAllMetadata), mNext(0), fNext(0), mdnNext(0), asNext(0) {} // Function level constructor. Causes the contents of the Module and the one // function provided to be added to the slot table. -SlotTracker::SlotTracker(const Function *F) +SlotTracker::SlotTracker(const Function *F, bool ShouldInitializeAllMetadata) : TheModule(F ? F->getParent() : nullptr), TheFunction(F), - FunctionProcessed(false), mNext(0), fNext(0), mdnNext(0), asNext(0) {} + FunctionProcessed(false), + ShouldInitializeAllMetadata(ShouldInitializeAllMetadata), mNext(0), + fNext(0), mdnNext(0), asNext(0) {} inline void SlotTracker::initialize() { if (TheModule) { @@ -692,6 +747,9 @@ void SlotTracker::processModule() { // Add all the unnamed functions to the table. CreateModuleSlot(I); + if (ShouldInitializeAllMetadata) + processFunctionMetadata(*I); + // Add all the function attributes to the table. // FIXME: Add attributes of other objects? AttributeSet FnAttrs = I->getAttributes().getFnAttributes(); @@ -715,46 +773,30 @@ void SlotTracker::processFunction() { ST_DEBUG("Inserting Instructions:\n"); - SmallVector<std::pair<unsigned, MDNode *>, 4> MDForInst; - // Add all of the basic blocks and instructions with no names. - for (Function::const_iterator BB = TheFunction->begin(), - E = TheFunction->end(); BB != E; ++BB) { - if (!BB->hasName()) - CreateFunctionSlot(BB); - - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; - ++I) { - if (!I->getType()->isVoidTy() && !I->hasName()) - CreateFunctionSlot(I); - - // Intrinsics can directly use metadata. We allow direct calls to any - // llvm.foo function here, because the target may not be linked into the - // optimizer. - if (const CallInst *CI = dyn_cast<CallInst>(I)) { - if (Function *F = CI->getCalledFunction()) - if (F->isIntrinsic()) - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (auto *V = dyn_cast_or_null<MetadataAsValue>(I->getOperand(i))) - if (MDNode *N = dyn_cast<MDNode>(V->getMetadata())) - CreateMetadataSlot(N); + for (auto &BB : *TheFunction) { + if (!BB.hasName()) + CreateFunctionSlot(&BB); + + for (auto &I : BB) { + if (!I.getType()->isVoidTy() && !I.hasName()) + CreateFunctionSlot(&I); + + processInstructionMetadata(I); + // We allow direct calls to any llvm.foo function here, because the + // target may not be linked into the optimizer. + if (const CallInst *CI = dyn_cast<CallInst>(&I)) { // Add all the call attributes to the table. AttributeSet Attrs = CI->getAttributes().getFnAttributes(); if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) CreateAttributeSetSlot(Attrs); - } else if (const InvokeInst *II = dyn_cast<InvokeInst>(I)) { + } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) { // Add all the call attributes to the table. AttributeSet Attrs = II->getAttributes().getFnAttributes(); if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) CreateAttributeSetSlot(Attrs); } - - // Process metadata attached with this instruction. - I->getAllMetadata(MDForInst); - for (unsigned i = 0, e = MDForInst.size(); i != e; ++i) - CreateMetadataSlot(MDForInst[i].second); - MDForInst.clear(); } } @@ -763,6 +805,29 @@ void SlotTracker::processFunction() { ST_DEBUG("end processFunction!\n"); } +void SlotTracker::processFunctionMetadata(const Function &F) { + for (auto &BB : F) + for (auto &I : BB) + processInstructionMetadata(I); +} + +void SlotTracker::processInstructionMetadata(const Instruction &I) { + // Process metadata used directly by intrinsics. + if (const CallInst *CI = dyn_cast<CallInst>(&I)) + if (Function *F = CI->getCalledFunction()) + if (F->isIntrinsic()) + for (auto &Op : I.operands()) + if (auto *V = dyn_cast_or_null<MetadataAsValue>(Op)) + if (MDNode *N = dyn_cast<MDNode>(V->getMetadata())) + CreateMetadataSlot(N); + + // Process metadata attached to this instruction. + SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; + I.getAllMetadata(MDs); + for (auto &MD : MDs) + CreateMetadataSlot(MD.second); +} + /// Clean up after incorporating a function. This is the only way to get out of /// the function incorporation state that affects get*Slot/Create*Slot. Function /// incorporation state is indicated by TheFunction != 0. @@ -1010,7 +1075,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, (StrVal[1] >= '0' && StrVal[1] <= '9'))) { // Reparse stringized version! if (APFloat(APFloat::IEEEdouble, StrVal).convertToDouble() == Val) { - Out << StrVal.str(); + Out << StrVal; return; } } @@ -1223,6 +1288,14 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, Out << ' ' << getPredicateText(CE->getPredicate()); Out << " ("; + if (const GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) { + TypePrinter.print( + cast<PointerType>(GEP->getPointerOperandType()->getScalarType()) + ->getElementType(), + Out); + Out << ", "; + } + for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) { TypePrinter.print((*OI)->getType(), Out); Out << ' '; @@ -1285,8 +1358,52 @@ raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { } return OS << FS.Sep; } +struct MDFieldPrinter { + raw_ostream &Out; + FieldSeparator FS; + TypePrinting *TypePrinter; + SlotTracker *Machine; + const Module *Context; + + explicit MDFieldPrinter(raw_ostream &Out) + : Out(Out), TypePrinter(nullptr), Machine(nullptr), Context(nullptr) {} + MDFieldPrinter(raw_ostream &Out, TypePrinting *TypePrinter, + SlotTracker *Machine, const Module *Context) + : Out(Out), TypePrinter(TypePrinter), Machine(Machine), Context(Context) { + } + void printTag(const DebugNode *N); + void printString(StringRef Name, StringRef Value, + bool ShouldSkipEmpty = true); + void printMetadata(StringRef Name, const Metadata *MD, + bool ShouldSkipNull = true); + template <class IntTy> + void printInt(StringRef Name, IntTy Int, bool ShouldSkipZero = true); + void printBool(StringRef Name, bool Value); + void printDIFlags(StringRef Name, unsigned Flags); + template <class IntTy, class Stringifier> + void printDwarfEnum(StringRef Name, IntTy Value, Stringifier toString, + bool ShouldSkipZero = true); +}; } // end namespace +void MDFieldPrinter::printTag(const DebugNode *N) { + Out << FS << "tag: "; + if (const char *Tag = dwarf::TagString(N->getTag())) + Out << Tag; + else + Out << N->getTag(); +} + +void MDFieldPrinter::printString(StringRef Name, StringRef Value, + bool ShouldSkipEmpty) { + if (ShouldSkipEmpty && Value.empty()) + return; + + Out << FS << Name << ": \""; + PrintEscapedString(Value, Out); + Out << "\""; +} + static void writeMetadataAsOperand(raw_ostream &Out, const Metadata *MD, TypePrinting *TypePrinter, SlotTracker *Machine, @@ -1298,27 +1415,68 @@ static void writeMetadataAsOperand(raw_ostream &Out, const Metadata *MD, WriteAsOperandInternal(Out, MD, TypePrinter, Machine, Context); } -static void writeTag(raw_ostream &Out, FieldSeparator &FS, const DebugNode *N) { - Out << FS << "tag: "; - if (const char *Tag = dwarf::TagString(N->getTag())) - Out << Tag; +void MDFieldPrinter::printMetadata(StringRef Name, const Metadata *MD, + bool ShouldSkipNull) { + if (ShouldSkipNull && !MD) + return; + + Out << FS << Name << ": "; + writeMetadataAsOperand(Out, MD, TypePrinter, Machine, Context); +} + +template <class IntTy> +void MDFieldPrinter::printInt(StringRef Name, IntTy Int, bool ShouldSkipZero) { + if (ShouldSkipZero && !Int) + return; + + Out << FS << Name << ": " << Int; +} + +void MDFieldPrinter::printBool(StringRef Name, bool Value) { + Out << FS << Name << ": " << (Value ? "true" : "false"); +} + +void MDFieldPrinter::printDIFlags(StringRef Name, unsigned Flags) { + if (!Flags) + return; + + Out << FS << Name << ": "; + + SmallVector<unsigned, 8> SplitFlags; + unsigned Extra = DIDescriptor::splitFlags(Flags, SplitFlags); + + FieldSeparator FlagsFS(" | "); + for (unsigned F : SplitFlags) { + const char *StringF = DIDescriptor::getFlagString(F); + assert(StringF && "Expected valid flag"); + Out << FlagsFS << StringF; + } + if (Extra || SplitFlags.empty()) + Out << FlagsFS << Extra; +} + +template <class IntTy, class Stringifier> +void MDFieldPrinter::printDwarfEnum(StringRef Name, IntTy Value, + Stringifier toString, bool ShouldSkipZero) { + if (!Value) + return; + + Out << FS << Name << ": "; + if (const char *S = toString(Value)) + Out << S; else - Out << N->getTag(); + Out << Value; } static void writeGenericDebugNode(raw_ostream &Out, const GenericDebugNode *N, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { Out << "!GenericDebugNode("; - FieldSeparator FS; - writeTag(Out, FS, N); - if (!N->getHeader().empty()) { - Out << FS << "header: \""; - PrintEscapedString(N->getHeader(), Out); - Out << "\""; - } + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printTag(N); + Printer.printString("header", N->getHeader()); if (N->getNumDwarfOperands()) { - Out << FS << "operands: {"; + Out << Printer.FS << "operands: {"; FieldSeparator IFS; for (auto &I : N->dwarf_operands()) { Out << IFS; @@ -1333,111 +1491,64 @@ static void writeMDLocation(raw_ostream &Out, const MDLocation *DL, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { Out << "!MDLocation("; - FieldSeparator FS; + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); // Always output the line, since 0 is a relevant and important value for it. - Out << FS << "line: " << DL->getLine(); - if (DL->getColumn()) - Out << FS << "column: " << DL->getColumn(); - Out << FS << "scope: "; - WriteAsOperandInternal(Out, DL->getScope(), TypePrinter, Machine, Context); - if (DL->getInlinedAt()) { - Out << FS << "inlinedAt: "; - WriteAsOperandInternal(Out, DL->getInlinedAt(), TypePrinter, Machine, - Context); - } + Printer.printInt("line", DL->getLine(), /* ShouldSkipZero */ false); + Printer.printInt("column", DL->getColumn()); + Printer.printMetadata("scope", DL->getRawScope(), /* ShouldSkipNull */ false); + Printer.printMetadata("inlinedAt", DL->getRawInlinedAt()); Out << ")"; } static void writeMDSubrange(raw_ostream &Out, const MDSubrange *N, TypePrinting *, SlotTracker *, const Module *) { Out << "!MDSubrange("; - FieldSeparator FS; - Out << FS << "count: " << N->getCount(); - if (N->getLo()) - Out << FS << "lowerBound: " << N->getLo(); + MDFieldPrinter Printer(Out); + Printer.printInt("count", N->getCount(), /* ShouldSkipZero */ false); + Printer.printInt("lowerBound", N->getLo()); Out << ")"; } static void writeMDEnumerator(raw_ostream &Out, const MDEnumerator *N, TypePrinting *, SlotTracker *, const Module *) { Out << "!MDEnumerator("; - FieldSeparator FS; - Out << FS << "name: \"" << N->getName() << "\""; - Out << FS << "value: " << N->getValue(); + MDFieldPrinter Printer(Out); + Printer.printString("name", N->getName(), /* ShouldSkipEmpty */ false); + Printer.printInt("value", N->getValue(), /* ShouldSkipZero */ false); Out << ")"; } static void writeMDBasicType(raw_ostream &Out, const MDBasicType *N, TypePrinting *, SlotTracker *, const Module *) { Out << "!MDBasicType("; - FieldSeparator FS; - writeTag(Out, FS, N); - if (!N->getName().empty()) - Out << FS << "name: \"" << N->getName() << "\""; - if (N->getSizeInBits()) - Out << FS << "size: " << N->getSizeInBits(); - if (N->getAlignInBits()) - Out << FS << "align: " << N->getAlignInBits(); - if (unsigned Encoding = N->getEncoding()) { - Out << FS << "encoding: "; - if (const char *S = dwarf::AttributeEncodingString(Encoding)) - Out << S; - else - Out << Encoding; - } + MDFieldPrinter Printer(Out); + if (N->getTag() != dwarf::DW_TAG_base_type) + Printer.printTag(N); + Printer.printString("name", N->getName()); + Printer.printInt("size", N->getSizeInBits()); + Printer.printInt("align", N->getAlignInBits()); + Printer.printDwarfEnum("encoding", N->getEncoding(), + dwarf::AttributeEncodingString); Out << ")"; } -static void writeDIFlags(raw_ostream &Out, unsigned Flags) { - SmallVector<unsigned, 8> SplitFlags; - unsigned Extra = DIDescriptor::splitFlags(Flags, SplitFlags); - - FieldSeparator FS(" | "); - for (unsigned F : SplitFlags) { - const char *StringF = DIDescriptor::getFlagString(F); - assert(StringF && "Expected valid flag"); - Out << FS << StringF; - } - if (Extra || SplitFlags.empty()) - Out << FS << Extra; -} - static void writeMDDerivedType(raw_ostream &Out, const MDDerivedType *N, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { Out << "!MDDerivedType("; - FieldSeparator FS; - writeTag(Out, FS, N); - if (!N->getName().empty()) - Out << FS << "name: \"" << N->getName() << "\""; - if (N->getFile()) { - Out << FS << "file: "; - writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine, - Context); - } - if (N->getLine()) - Out << FS << "line: " << N->getLine(); - if (N->getScope()) { - Out << FS << "scope: "; - writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context); - } - Out << FS << "baseType: "; - writeMetadataAsOperand(Out, N->getBaseType(), TypePrinter, Machine, Context); - if (N->getSizeInBits()) - Out << FS << "size: " << N->getSizeInBits(); - if (N->getAlignInBits()) - Out << FS << "align: " << N->getAlignInBits(); - if (N->getOffsetInBits()) - Out << FS << "offset: " << N->getOffsetInBits(); - if (auto Flags = N->getFlags()) { - Out << FS << "flags: "; - writeDIFlags(Out, Flags); - } - if (N->getExtraData()) { - Out << FS << "extraData: "; - writeMetadataAsOperand(Out, N->getExtraData(), TypePrinter, Machine, - Context); - } + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printTag(N); + Printer.printString("name", N->getName()); + Printer.printMetadata("scope", N->getScope()); + Printer.printMetadata("file", N->getFile()); + Printer.printInt("line", N->getLine()); + Printer.printMetadata("baseType", N->getBaseType(), + /* ShouldSkipNull */ false); + Printer.printInt("size", N->getSizeInBits()); + Printer.printInt("align", N->getAlignInBits()); + Printer.printInt("offset", N->getOffsetInBits()); + Printer.printDIFlags("flags", N->getFlags()); + Printer.printMetadata("extraData", N->getExtraData()); Out << ")"; } @@ -1445,61 +1556,23 @@ static void writeMDCompositeType(raw_ostream &Out, const MDCompositeType *N, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { Out << "!MDCompositeType("; - FieldSeparator FS; - writeTag(Out, FS, N); - if (!N->getName().empty()) - Out << FS << "name: \"" << N->getName() << "\""; - if (N->getFile()) { - Out << FS << "file: "; - writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine, - Context); - } - if (N->getLine()) - Out << FS << "line: " << N->getLine(); - if (N->getScope()) { - Out << FS << "scope: "; - writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context); - } - if (N->getBaseType()) { - Out << FS << "baseType: "; - writeMetadataAsOperand(Out, N->getBaseType(), TypePrinter, Machine, - Context); - } - if (N->getSizeInBits()) - Out << FS << "size: " << N->getSizeInBits(); - if (N->getAlignInBits()) - Out << FS << "align: " << N->getAlignInBits(); - if (N->getOffsetInBits()) - Out << FS << "offset: " << N->getOffsetInBits(); - if (auto Flags = N->getFlags()) { - Out << FS << "flags: "; - writeDIFlags(Out, Flags); - } - if (N->getElements()) { - Out << FS << "elements: "; - writeMetadataAsOperand(Out, N->getElements(), TypePrinter, Machine, - Context); - } - if (unsigned Lang = N->getRuntimeLang()) { - Out << FS << "runtimeLang: "; - if (const char *S = dwarf::LanguageString(Lang)) - Out << S; - else - Out << Lang; - } - - if (N->getVTableHolder()) { - Out << FS << "vtableHolder: "; - writeMetadataAsOperand(Out, N->getVTableHolder(), TypePrinter, Machine, - Context); - } - if (N->getTemplateParams()) { - Out << FS << "templateParams: "; - writeMetadataAsOperand(Out, N->getTemplateParams(), TypePrinter, Machine, - Context); - } - if (!N->getIdentifier().empty()) - Out << FS << "identifier: \"" << N->getIdentifier() << "\""; + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printTag(N); + Printer.printString("name", N->getName()); + Printer.printMetadata("scope", N->getScope()); + Printer.printMetadata("file", N->getFile()); + Printer.printInt("line", N->getLine()); + Printer.printMetadata("baseType", N->getBaseType()); + Printer.printInt("size", N->getSizeInBits()); + Printer.printInt("align", N->getAlignInBits()); + Printer.printInt("offset", N->getOffsetInBits()); + Printer.printDIFlags("flags", N->getFlags()); + Printer.printMetadata("elements", N->getElements()); + Printer.printDwarfEnum("runtimeLang", N->getRuntimeLang(), + dwarf::LanguageString); + Printer.printMetadata("vtableHolder", N->getVTableHolder()); + Printer.printMetadata("templateParams", N->getTemplateParams()); + Printer.printString("identifier", N->getIdentifier()); Out << ")"; } @@ -1507,22 +1580,20 @@ static void writeMDSubroutineType(raw_ostream &Out, const MDSubroutineType *N, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { Out << "!MDSubroutineType("; - FieldSeparator FS; - if (auto Flags = N->getFlags()) { - Out << FS << "flags: "; - writeDIFlags(Out, Flags); - } - Out << FS << "types: "; - writeMetadataAsOperand(Out, N->getTypeArray(), TypePrinter, Machine, Context); + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printDIFlags("flags", N->getFlags()); + Printer.printMetadata("types", N->getTypeArray(), /* ShouldSkipNull */ false); Out << ")"; } static void writeMDFile(raw_ostream &Out, const MDFile *N, TypePrinting *, SlotTracker *, const Module *) { Out << "!MDFile("; - FieldSeparator FS; - Out << FS << "filename: \"" << N->getFilename() << "\""; - Out << FS << "directory: \"" << N->getDirectory() << "\""; + MDFieldPrinter Printer(Out); + Printer.printString("filename", N->getFilename(), + /* ShouldSkipEmpty */ false); + Printer.printString("directory", N->getDirectory(), + /* ShouldSkipEmpty */ false); Out << ")"; } @@ -1530,48 +1601,23 @@ static void writeMDCompileUnit(raw_ostream &Out, const MDCompileUnit *N, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { Out << "!MDCompileUnit("; - FieldSeparator FS; - Out << FS << "language: "; - if (const char *Lang = dwarf::LanguageString(N->getSourceLanguage())) - Out << Lang; - else - Out << N->getSourceLanguage(); - Out << FS << "file: "; - writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine, Context); - if (!N->getProducer().empty()) - Out << FS << "producer: \"" << N->getProducer() << "\""; - Out << FS << "isOptimized: " << (N->isOptimized() ? "true" : "false"); - if (!N->getFlags().empty()) - Out << FS << "flags: \"" << N->getFlags() << "\""; - Out << FS << "runtimeVersion: " << N->getRuntimeVersion(); - if (!N->getSplitDebugFilename().empty()) - Out << FS << "splitDebugFilename: \"" << N->getSplitDebugFilename() << "\""; - Out << FS << "emissionKind: " << N->getEmissionKind(); - if (N->getEnumTypes()) { - Out << FS << "enums: "; - writeMetadataAsOperand(Out, N->getEnumTypes(), TypePrinter, Machine, - Context); - } - if (N->getRetainedTypes()) { - Out << FS << "retainedTypes: "; - writeMetadataAsOperand(Out, N->getRetainedTypes(), TypePrinter, Machine, - Context); - } - if (N->getSubprograms()) { - Out << FS << "subprograms: "; - writeMetadataAsOperand(Out, N->getSubprograms(), TypePrinter, Machine, - Context); - } - if (N->getGlobalVariables()) { - Out << FS << "globals: "; - writeMetadataAsOperand(Out, N->getGlobalVariables(), TypePrinter, Machine, - Context); - } - if (N->getImportedEntities()) { - Out << FS << "imports: "; - writeMetadataAsOperand(Out, N->getImportedEntities(), TypePrinter, Machine, - Context); - } + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printDwarfEnum("language", N->getSourceLanguage(), + dwarf::LanguageString, /* ShouldSkipZero */ false); + Printer.printMetadata("file", N->getFile(), /* ShouldSkipNull */ false); + Printer.printString("producer", N->getProducer()); + Printer.printBool("isOptimized", N->isOptimized()); + Printer.printString("flags", N->getFlags()); + Printer.printInt("runtimeVersion", N->getRuntimeVersion(), + /* ShouldSkipZero */ false); + Printer.printString("splitDebugFilename", N->getSplitDebugFilename()); + Printer.printInt("emissionKind", N->getEmissionKind(), + /* ShouldSkipZero */ false); + Printer.printMetadata("enums", N->getEnumTypes()); + Printer.printMetadata("retainedTypes", N->getRetainedTypes()); + Printer.printMetadata("subprograms", N->getSubprograms()); + Printer.printMetadata("globals", N->getGlobalVariables()); + Printer.printMetadata("imports", N->getImportedEntities()); Out << ")"; } @@ -1579,67 +1625,26 @@ static void writeMDSubprogram(raw_ostream &Out, const MDSubprogram *N, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { Out << "!MDSubprogram("; - FieldSeparator FS; - Out << FS << "scope: "; - writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context); - Out << FS << "name: \"" << N->getName() << "\""; - if (!N->getLinkageName().empty()) - Out << FS << "linkageName: \"" << N->getLinkageName() << "\""; - if (N->getFile()) { - Out << FS << "file: "; - writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine, - Context); - } - if (N->getLine()) - Out << FS << "line: " << N->getLine(); - if (N->getType()) { - Out << FS << "type: "; - writeMetadataAsOperand(Out, N->getType(), TypePrinter, Machine, - Context); - } - Out << FS << "isLocal: " << (N->isLocalToUnit() ? "true" : "false"); - Out << FS << "isDefinition: " << (N->isDefinition() ? "true" : "false"); - if (N->getScopeLine()) - Out << FS << "scopeLine: " << N->getScopeLine(); - if (N->getContainingType()) { - Out << FS << "containingType: "; - writeMetadataAsOperand(Out, N->getContainingType(), TypePrinter, Machine, - Context); - } - if (unsigned V = N->getVirtuality()) { - Out << FS << "virtuality: "; - if (const char *S = dwarf::VirtualityString(V)) - Out << S; - else - Out << V; - } - if (N->getVirtualIndex()) - Out << FS << "virtualIndex: " << N->getVirtualIndex(); - if (auto Flags = N->getFlags()) { - Out << FS << "flags: "; - writeDIFlags(Out, Flags); - } - Out << FS << "isOptimized: " << (N->isOptimized() ? "true" : "false"); - if (N->getFunction()) { - Out << FS << "function: "; - writeMetadataAsOperand(Out, N->getFunction(), TypePrinter, Machine, - Context); - } - if (N->getTemplateParams()) { - Out << FS << "templateParams: "; - writeMetadataAsOperand(Out, N->getTemplateParams(), TypePrinter, Machine, - Context); - } - if (N->getDeclaration()) { - Out << FS << "declaration: "; - writeMetadataAsOperand(Out, N->getDeclaration(), TypePrinter, Machine, - Context); - } - if (N->getVariables()) { - Out << FS << "variables: "; - writeMetadataAsOperand(Out, N->getVariables(), TypePrinter, Machine, - Context); - } + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printString("name", N->getName()); + Printer.printString("linkageName", N->getLinkageName()); + Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false); + Printer.printMetadata("file", N->getFile()); + Printer.printInt("line", N->getLine()); + Printer.printMetadata("type", N->getType()); + Printer.printBool("isLocal", N->isLocalToUnit()); + Printer.printBool("isDefinition", N->isDefinition()); + Printer.printInt("scopeLine", N->getScopeLine()); + Printer.printMetadata("containingType", N->getContainingType()); + Printer.printDwarfEnum("virtuality", N->getVirtuality(), + dwarf::VirtualityString); + Printer.printInt("virtualIndex", N->getVirtualIndex()); + Printer.printDIFlags("flags", N->getFlags()); + Printer.printBool("isOptimized", N->isOptimized()); + Printer.printMetadata("function", N->getFunction()); + Printer.printMetadata("templateParams", N->getTemplateParams()); + Printer.printMetadata("declaration", N->getDeclaration()); + Printer.printMetadata("variables", N->getVariables()); Out << ")"; } @@ -1647,18 +1652,11 @@ static void writeMDLexicalBlock(raw_ostream &Out, const MDLexicalBlock *N, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { Out << "!MDLexicalBlock("; - FieldSeparator FS; - Out << FS << "scope: "; - writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context); - if (N->getFile()) { - Out << FS << "file: "; - writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine, - Context); - } - if (N->getLine()) - Out << FS << "line: " << N->getLine(); - if (N->getColumn()) - Out << FS << "column: " << N->getColumn(); + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false); + Printer.printMetadata("file", N->getFile()); + Printer.printInt("line", N->getLine()); + Printer.printInt("column", N->getColumn()); Out << ")"; } @@ -1668,15 +1666,11 @@ static void writeMDLexicalBlockFile(raw_ostream &Out, SlotTracker *Machine, const Module *Context) { Out << "!MDLexicalBlockFile("; - FieldSeparator FS; - Out << FS << "scope: "; - writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context); - if (N->getFile()) { - Out << FS << "file: "; - writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine, - Context); - } - Out << FS << "discriminator: " << N->getDiscriminator(); + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false); + Printer.printMetadata("file", N->getFile()); + Printer.printInt("discriminator", N->getDiscriminator(), + /* ShouldSkipZero */ false); Out << ")"; } @@ -1684,17 +1678,11 @@ static void writeMDNamespace(raw_ostream &Out, const MDNamespace *N, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { Out << "!MDNamespace("; - FieldSeparator FS; - Out << FS << "scope: "; - writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context); - if (N->getFile()) { - Out << FS << "file: "; - writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine, Context); - } - if (!N->getName().empty()) - Out << FS << "name: \"" << N->getName() << "\""; - if (N->getLine()) - Out << FS << "line: " << N->getLine(); + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printString("name", N->getName()); + Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false); + Printer.printMetadata("file", N->getFile()); + Printer.printInt("line", N->getLine()); Out << ")"; } @@ -1704,10 +1692,9 @@ static void writeMDTemplateTypeParameter(raw_ostream &Out, SlotTracker *Machine, const Module *Context) { Out << "!MDTemplateTypeParameter("; - FieldSeparator FS; - Out << FS << "name: \"" << N->getName() << "\""; - Out << FS << "type: "; - writeMetadataAsOperand(Out, N->getType(), TypePrinter, Machine, Context); + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printString("name", N->getName()); + Printer.printMetadata("type", N->getType(), /* ShouldSkipNull */ false); Out << ")"; } @@ -1717,13 +1704,12 @@ static void writeMDTemplateValueParameter(raw_ostream &Out, SlotTracker *Machine, const Module *Context) { Out << "!MDTemplateValueParameter("; - FieldSeparator FS; - writeTag(Out, FS, N); - Out << FS << "name: \"" << N->getName() << "\""; - Out << FS << "type: "; - writeMetadataAsOperand(Out, N->getType(), TypePrinter, Machine, Context); - Out << FS << "value: "; - writeMetadataAsOperand(Out, N->getValue(), TypePrinter, Machine, Context); + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + if (N->getTag() != dwarf::DW_TAG_template_value_parameter) + Printer.printTag(N); + Printer.printString("name", N->getName()); + Printer.printMetadata("type", N->getType()); + Printer.printMetadata("value", N->getValue(), /* ShouldSkipNull */ false); Out << ")"; } @@ -1731,36 +1717,17 @@ static void writeMDGlobalVariable(raw_ostream &Out, const MDGlobalVariable *N, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { Out << "!MDGlobalVariable("; - FieldSeparator FS; - Out << FS << "scope: "; - writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context); - Out << FS << "name: \"" << N->getName() << "\""; - if (!N->getLinkageName().empty()) - Out << FS << "linkageName: \"" << N->getLinkageName() << "\""; - if (N->getFile()) { - Out << FS << "file: "; - writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine, - Context); - } - if (N->getLine()) - Out << FS << "line: " << N->getLine(); - if (N->getType()) { - Out << FS << "type: "; - writeMetadataAsOperand(Out, N->getType(), TypePrinter, Machine, - Context); - } - Out << FS << "isLocal: " << (N->isLocalToUnit() ? "true" : "false"); - Out << FS << "isDefinition: " << (N->isDefinition() ? "true" : "false"); - if (N->getVariable()) { - Out << FS << "variable: "; - writeMetadataAsOperand(Out, N->getVariable(), TypePrinter, Machine, - Context); - } - if (N->getStaticDataMemberDeclaration()) { - Out << FS << "declaration: "; - writeMetadataAsOperand(Out, N->getStaticDataMemberDeclaration(), - TypePrinter, Machine, Context); - } + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printString("name", N->getName()); + Printer.printString("linkageName", N->getLinkageName()); + Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false); + Printer.printMetadata("file", N->getFile()); + Printer.printInt("line", N->getLine()); + Printer.printMetadata("type", N->getType()); + Printer.printBool("isLocal", N->isLocalToUnit()); + Printer.printBool("isDefinition", N->isDefinition()); + Printer.printMetadata("variable", N->getVariable()); + Printer.printMetadata("declaration", N->getStaticDataMemberDeclaration()); Out << ")"; } @@ -1768,34 +1735,18 @@ static void writeMDLocalVariable(raw_ostream &Out, const MDLocalVariable *N, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { Out << "!MDLocalVariable("; - FieldSeparator FS; - writeTag(Out, FS, N); - Out << FS << "scope: "; - writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context); - Out << FS << "name: \"" << N->getName() << "\""; - if (N->getFile()) { - Out << FS << "file: "; - writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine, - Context); - } - if (N->getLine()) - Out << FS << "line: " << N->getLine(); - if (N->getType()) { - Out << FS << "type: "; - writeMetadataAsOperand(Out, N->getType(), TypePrinter, Machine, - Context); - } - if (N->getTag() == dwarf::DW_TAG_arg_variable || N->getArg()) - Out << FS << "arg: " << N->getArg(); - if (auto Flags = N->getFlags()) { - Out << FS << "flags: "; - writeDIFlags(Out, Flags); - } - if (N->getInlinedAt()) { - Out << FS << "inlinedAt: "; - writeMetadataAsOperand(Out, N->getInlinedAt(), TypePrinter, Machine, - Context); - } + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printTag(N); + Printer.printString("name", N->getName()); + Printer.printInt("arg", N->getArg(), + /* ShouldSkipZero */ + N->getTag() == dwarf::DW_TAG_auto_variable); + Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false); + Printer.printMetadata("file", N->getFile()); + Printer.printInt("line", N->getLine()); + Printer.printMetadata("type", N->getType()); + Printer.printDIFlags("flags", N->getFlags()); + Printer.printMetadata("inlinedAt", N->getInlinedAt()); Out << ")"; } @@ -1824,24 +1775,14 @@ static void writeMDObjCProperty(raw_ostream &Out, const MDObjCProperty *N, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { Out << "!MDObjCProperty("; - FieldSeparator FS; - Out << FS << "name: \"" << N->getName() << "\""; - if (N->getFile()) { - Out << FS << "file: "; - writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine, Context); - } - if (N->getLine()) - Out << FS << "line: " << N->getLine(); - if (!N->getSetterName().empty()) - Out << FS << "setter: \"" << N->getSetterName() << "\""; - if (!N->getGetterName().empty()) - Out << FS << "getter: \"" << N->getGetterName() << "\""; - if (N->getAttributes()) - Out << FS << "attributes: " << N->getAttributes(); - if (N->getType()) { - Out << FS << "type: "; - writeMetadataAsOperand(Out, N->getType(), TypePrinter, Machine, Context); - } + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printString("name", N->getName()); + Printer.printMetadata("file", N->getFile()); + Printer.printInt("line", N->getLine()); + Printer.printString("setter", N->getSetterName()); + Printer.printString("getter", N->getGetterName()); + Printer.printInt("attributes", N->getAttributes()); + Printer.printMetadata("type", N->getType()); Out << ")"; } @@ -1849,17 +1790,12 @@ static void writeMDImportedEntity(raw_ostream &Out, const MDImportedEntity *N, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { Out << "!MDImportedEntity("; - FieldSeparator FS; - writeTag(Out, FS, N); - Out << FS << "scope: "; - writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context); - if (N->getEntity()) { - Out << FS << "entity: "; - writeMetadataAsOperand(Out, N->getEntity(), TypePrinter, Machine, Context); - } - if (N->getLine()) - Out << FS << "line: " << N->getLine(); - Out << FS << "name: \"" << N->getName() << "\""; + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printTag(N); + Printer.printString("name", N->getName()); + Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false); + Printer.printMetadata("entity", N->getEntity()); + Printer.printInt("line", N->getLine()); Out << ")"; } @@ -1868,10 +1804,10 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { - assert(!Node->isTemporary() && "Unexpected forward declaration"); - if (Node->isDistinct()) Out << "distinct "; + else if (Node->isTemporary()) + Out << "<temporary!> "; // Handle broken code. switch (Node->getMetadataID()) { default: @@ -1998,6 +1934,64 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD, WriteAsOperandInternal(Out, V->getValue(), TypePrinter, Machine, Context); } +namespace { +class AssemblyWriter { + formatted_raw_ostream &Out; + const Module *TheModule; + std::unique_ptr<SlotTracker> ModuleSlotTracker; + SlotTracker &Machine; + TypePrinting TypePrinter; + AssemblyAnnotationWriter *AnnotationWriter; + SetVector<const Comdat *> Comdats; + UseListOrderStack UseListOrders; + +public: + /// Construct an AssemblyWriter with an external SlotTracker + AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, + const Module *M, AssemblyAnnotationWriter *AAW); + + /// Construct an AssemblyWriter with an internally allocated SlotTracker + AssemblyWriter(formatted_raw_ostream &o, const Module *M, + AssemblyAnnotationWriter *AAW); + + void printMDNodeBody(const MDNode *MD); + void printNamedMDNode(const NamedMDNode *NMD); + + void printModule(const Module *M); + + void writeOperand(const Value *Op, bool PrintType); + void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx); + void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope); + void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, + SynchronizationScope SynchScope); + + void writeAllMDNodes(); + void writeMDNode(unsigned Slot, const MDNode *Node); + void writeAllAttributeGroups(); + + void printTypeIdentities(); + void printGlobal(const GlobalVariable *GV); + void printAlias(const GlobalAlias *GV); + void printComdat(const Comdat *C); + void printFunction(const Function *F); + void printArgument(const Argument *FA, AttributeSet Attrs, unsigned Idx); + void printBasicBlock(const BasicBlock *BB); + void printInstructionLine(const Instruction &I); + void printInstruction(const Instruction &I); + + void printUseListOrder(const UseListOrder &Order); + void printUseLists(const Function *F); + +private: + void init(); + + // printInfoComment - Print a little comment after the instruction indicating + // which slot it occupies. + void printInfoComment(const Value &V); +}; +} // namespace + void AssemblyWriter::init() { if (!TheModule) return; @@ -2025,8 +2019,6 @@ AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, const Module *M, init(); } -AssemblyWriter::~AssemblyWriter() { } - void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { if (!Operand) { Out << "<null operand!>"; @@ -2876,7 +2868,13 @@ void AssemblyWriter::printInstruction(const Instruction &I) { if (AI->isUsedWithInAlloca()) Out << "inalloca "; TypePrinter.print(AI->getAllocatedType(), Out); - if (!AI->getArraySize() || AI->isArrayAllocation()) { + + // Explicitly write the array size if the code is broken, if it's an array + // allocation, or if the type is not canonical for scalar allocations. The + // latter case prevents the type from mutating when round-tripping through + // assembly. + if (!AI->getArraySize() || AI->isArrayAllocation() || + !AI->getArraySize()->getType()->isIntegerTy(32)) { Out << ", "; writeOperand(AI->getArraySize(), true); } @@ -2898,6 +2896,15 @@ void AssemblyWriter::printInstruction(const Instruction &I) { Out << ", "; TypePrinter.print(I.getType(), Out); } else if (Operand) { // Print the normal way. + if (const auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { + Out << ' '; + TypePrinter.print(GEP->getSourceElementType(), Out); + Out << ','; + } else if (const auto *LI = dyn_cast<LoadInst>(&I)) { + Out << ' '; + TypePrinter.print(LI->getType(), Out); + Out << ','; + } // PrintAllTypes - Instructions who have operands of all the same type // omit the type from all but the first operand. If the instruction has @@ -2974,29 +2981,6 @@ void AssemblyWriter::printInstruction(const Instruction &I) { printInfoComment(I); } -static void WriteMDNodeComment(const MDNode *Node, - formatted_raw_ostream &Out) { - if (Node->getNumOperands() < 1) - return; - - Metadata *Op = Node->getOperand(0); - if (!Op || !isa<MDString>(Op)) - return; - - DIDescriptor Desc(Node); - if (!Desc.Verify()) - return; - - unsigned Tag = Desc.getTag(); - Out.PadToColumn(50); - if (dwarf::TagString(Tag)) { - Out << "; "; - Desc.print(Out); - } else if (Tag == dwarf::DW_TAG_user_base) { - Out << "; [ DW_TAG_user_base ]"; - } -} - void AssemblyWriter::writeMDNode(unsigned Slot, const MDNode *Node) { Out << '!' << Slot << " = "; printMDNodeBody(Node); @@ -3017,7 +3001,6 @@ void AssemblyWriter::writeAllMDNodes() { void AssemblyWriter::printMDNodeBody(const MDNode *Node) { WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine, TheModule); - WriteMDNodeComment(Node, Out); } void AssemblyWriter::writeAllAttributeGroups() { @@ -3034,8 +3017,6 @@ void AssemblyWriter::writeAllAttributeGroups() { << I->first.getAsString(AttributeSet::FunctionIndex, true) << " }\n"; } -} // namespace llvm - void AssemblyWriter::printUseListOrder(const UseListOrder &Order) { bool IsInFunction = Machine.getFunction(); if (IsInFunction) @@ -3130,11 +3111,24 @@ void Type::print(raw_ostream &OS) const { } } +static bool isReferencingMDNode(const Instruction &I) { + if (const auto *CI = dyn_cast<CallInst>(&I)) + if (Function *F = CI->getCalledFunction()) + if (F->isIntrinsic()) + for (auto &Op : I.operands()) + if (auto *V = dyn_cast_or_null<MetadataAsValue>(Op)) + if (isa<MDNode>(V->getMetadata())) + return true; + return false; +} + void Value::print(raw_ostream &ROS) const { formatted_raw_ostream OS(ROS); if (const Instruction *I = dyn_cast<Instruction>(this)) { const Function *F = I->getParent() ? I->getParent()->getParent() : nullptr; - SlotTracker SlotTable(F); + SlotTracker SlotTable( + F, + /* ShouldInitializeAllMetadata */ isReferencingMDNode(*I)); AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), nullptr); W.printInstruction(*I); } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(this)) { @@ -3142,7 +3136,8 @@ void Value::print(raw_ostream &ROS) const { AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), nullptr); W.printBasicBlock(BB); } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) { - SlotTracker SlotTable(GV->getParent()); + SlotTracker SlotTable(GV->getParent(), + /* ShouldInitializeAllMetadata */ isa<Function>(GV)); AssemblyWriter W(OS, SlotTable, GV->getParent(), nullptr); if (const GlobalVariable *V = dyn_cast<GlobalVariable>(GV)) W.printGlobal(V); @@ -3151,7 +3146,7 @@ void Value::print(raw_ostream &ROS) const { else W.printAlias(cast<GlobalAlias>(GV)); } else if (const MetadataAsValue *V = dyn_cast<MetadataAsValue>(this)) { - V->getMetadata()->print(ROS); + V->getMetadata()->print(ROS, getModuleFromVal(V)); } else if (const Constant *C = dyn_cast<Constant>(this)) { TypePrinting TypePrinter; TypePrinter.print(C->getType(), OS); @@ -3167,8 +3162,9 @@ void Value::print(raw_ostream &ROS) const { void Value::printAsOperand(raw_ostream &O, bool PrintType, const Module *M) const { // Fast path: Don't construct and populate a TypePrinting object if we // won't be needing any types printed. - if (!PrintType && ((!isa<Constant>(this) && !isa<MetadataAsValue>(this)) || - hasName() || isa<GlobalValue>(this))) { + bool IsMetadata = isa<MetadataAsValue>(this); + if (!PrintType && ((!isa<Constant>(this) && !IsMetadata) || hasName() || + isa<GlobalValue>(this))) { WriteAsOperandInternal(O, this, nullptr, nullptr, M); return; } @@ -3184,33 +3180,35 @@ void Value::printAsOperand(raw_ostream &O, bool PrintType, const Module *M) cons O << ' '; } - WriteAsOperandInternal(O, this, &TypePrinter, nullptr, M); + SlotTracker Machine(M, /* ShouldInitializeAllMetadata */ IsMetadata); + WriteAsOperandInternal(O, this, &TypePrinter, &Machine, M); } -void Metadata::print(raw_ostream &ROS) const { +static void printMetadataImpl(raw_ostream &ROS, const Metadata &MD, + const Module *M, bool OnlyAsOperand) { formatted_raw_ostream OS(ROS); - if (auto *N = dyn_cast<MDNode>(this)) { - SlotTracker SlotTable(static_cast<Function *>(nullptr)); - AssemblyWriter W(OS, SlotTable, nullptr, nullptr); - W.printMDNodeBody(N); + auto *N = dyn_cast<MDNode>(&MD); + TypePrinting TypePrinter; + SlotTracker Machine(M, /* ShouldInitializeAllMetadata */ N); + if (M) + TypePrinter.incorporateTypes(*M); + + WriteAsOperandInternal(OS, &MD, &TypePrinter, &Machine, M, + /* FromValue */ true); + if (OnlyAsOperand || !N) return; - } - printAsOperand(OS); + + OS << " = "; + WriteMDNodeBodyInternal(OS, N, &TypePrinter, &Machine, M); } -void Metadata::printAsOperand(raw_ostream &ROS, bool PrintType, - const Module *M) const { - formatted_raw_ostream OS(ROS); +void Metadata::printAsOperand(raw_ostream &OS, const Module *M) const { + printMetadataImpl(OS, *this, M, /* OnlyAsOperand */ true); +} - std::unique_ptr<TypePrinting> TypePrinter; - if (PrintType) { - TypePrinter.reset(new TypePrinting); - if (M) - TypePrinter->incorporateTypes(*M); - } - WriteAsOperandInternal(OS, this, TypePrinter.get(), nullptr, M, - /* FromValue */ true); +void Metadata::print(raw_ostream &OS, const Module *M) const { + printMetadataImpl(OS, *this, M, /* OnlyAsOperand */ false); } // Value::dump - allow easy printing of Values from the debugger. @@ -3234,7 +3232,10 @@ LLVM_DUMP_METHOD void NamedMDNode::dump() const { print(dbgs()); } LLVM_DUMP_METHOD -void Metadata::dump() const { - print(dbgs()); +void Metadata::dump() const { dump(nullptr); } + +LLVM_DUMP_METHOD +void Metadata::dump(const Module *M) const { + print(dbgs(), M); dbgs() << '\n'; } diff --git a/lib/IR/AsmWriter.h b/lib/IR/AsmWriter.h deleted file mode 100644 index 7716fa6..0000000 --- a/lib/IR/AsmWriter.h +++ /dev/null @@ -1,129 +0,0 @@ -//===-- llvm/IR/AsmWriter.h - Printing LLVM IR as an assembly file - C++ --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This files defines the interface for the AssemblyWriter class used to print -// LLVM IR and various helper classes that are used in printing. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_IR_ASMWRITER_H -#define LLVM_LIB_IR_ASMWRITER_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/TypeFinder.h" -#include "llvm/IR/UseListOrder.h" -#include "llvm/Support/FormattedStream.h" - -namespace llvm { - -class BasicBlock; -class Function; -class GlobalValue; -class Comdat; -class Module; -class NamedMDNode; -class Value; -class SlotTracker; - -/// Create a new SlotTracker for a Module -SlotTracker *createSlotTracker(const Module *M); - -//===----------------------------------------------------------------------===// -// TypePrinting Class: Type printing machinery -//===----------------------------------------------------------------------===// - -class TypePrinting { - TypePrinting(const TypePrinting &) = delete; - void operator=(const TypePrinting&) = delete; -public: - - /// NamedTypes - The named types that are used by the current module. - TypeFinder NamedTypes; - - /// NumberedTypes - The numbered types, along with their value. - DenseMap<StructType*, unsigned> NumberedTypes; - - - TypePrinting() {} - ~TypePrinting() {} - - void incorporateTypes(const Module &M); - - void print(Type *Ty, raw_ostream &OS); - - void printStructBody(StructType *Ty, raw_ostream &OS); -}; - -class AssemblyWriter { -protected: - formatted_raw_ostream &Out; - const Module *TheModule; - -private: - std::unique_ptr<SlotTracker> ModuleSlotTracker; - SlotTracker &Machine; - TypePrinting TypePrinter; - AssemblyAnnotationWriter *AnnotationWriter; - SetVector<const Comdat *> Comdats; - UseListOrderStack UseListOrders; - -public: - /// Construct an AssemblyWriter with an external SlotTracker - AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, - const Module *M, AssemblyAnnotationWriter *AAW); - - /// Construct an AssemblyWriter with an internally allocated SlotTracker - AssemblyWriter(formatted_raw_ostream &o, const Module *M, - AssemblyAnnotationWriter *AAW); - - virtual ~AssemblyWriter(); - - void printMDNodeBody(const MDNode *MD); - void printNamedMDNode(const NamedMDNode *NMD); - - void printModule(const Module *M); - - void writeOperand(const Value *Op, bool PrintType); - void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx); - void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope); - void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering, - AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope); - - void writeAllMDNodes(); - void writeMDNode(unsigned Slot, const MDNode *Node); - void writeAllAttributeGroups(); - - void printTypeIdentities(); - void printGlobal(const GlobalVariable *GV); - void printAlias(const GlobalAlias *GV); - void printComdat(const Comdat *C); - void printFunction(const Function *F); - void printArgument(const Argument *FA, AttributeSet Attrs, unsigned Idx); - void printBasicBlock(const BasicBlock *BB); - void printInstructionLine(const Instruction &I); - void printInstruction(const Instruction &I); - - void printUseListOrder(const UseListOrder &Order); - void printUseLists(const Function *F); - -private: - void init(); - - // printInfoComment - Print a little comment after the instruction indicating - // which slot it occupies. - void printInfoComment(const Value &V); -}; - -} // namespace llvm - -#endif diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 0da7784..d2dfeaa 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -7,7 +7,9 @@ // //===----------------------------------------------------------------------===// // -// This file implements the auto-upgrade helper functions +// This file implements the auto-upgrade helper functions. +// This is where deprecated IR intrinsics and other IR features are updated to +// current specifications. // //===----------------------------------------------------------------------===// @@ -156,6 +158,14 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name.startswith("x86.avx2.pcmpeq.") || Name.startswith("x86.avx2.pcmpgt.") || Name.startswith("x86.avx.vpermil.") || + Name == "x86.avx.vinsertf128.pd.256" || + Name == "x86.avx.vinsertf128.ps.256" || + Name == "x86.avx.vinsertf128.si.256" || + Name == "x86.avx2.vinserti128" || + Name == "x86.avx.vextractf128.pd.256" || + Name == "x86.avx.vextractf128.ps.256" || + Name == "x86.avx.vextractf128.si.256" || + Name == "x86.avx2.vextracti128" || Name == "x86.avx.movnt.dq.256" || Name == "x86.avx.movnt.pd.256" || Name == "x86.avx.movnt.ps.256" || @@ -171,6 +181,15 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "x86.sse2.psrl.dq.bs" || Name == "x86.avx2.psll.dq.bs" || Name == "x86.avx2.psrl.dq.bs" || + Name == "x86.sse41.pblendw" || + Name == "x86.sse41.blendpd" || + Name == "x86.sse41.blendps" || + Name == "x86.avx.blend.pd.256" || + Name == "x86.avx.blend.ps.256" || + Name == "x86.avx2.pblendw" || + Name == "x86.avx2.pblendd.128" || + Name == "x86.avx2.pblendd.256" || + Name == "x86.avx2.vbroadcasti128" || (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { NewFn = nullptr; return true; @@ -184,17 +203,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { if (Name == "x86.sse41.ptestnzc") return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn); } - // Several blend and other instructions with maskes used the wrong number of + // Several blend and other instructions with masks used the wrong number of // bits. - if (Name == "x86.sse41.pblendw") - return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_pblendw, - NewFn); - if (Name == "x86.sse41.blendpd") - return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendpd, - NewFn); - if (Name == "x86.sse41.blendps") - return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendps, - NewFn); if (Name == "x86.sse41.insertps") return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, NewFn); @@ -207,24 +217,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { if (Name == "x86.sse41.mpsadbw") return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, NewFn); - if (Name == "x86.avx.blend.pd.256") - return UpgradeX86IntrinsicsWith8BitMask( - F, Intrinsic::x86_avx_blend_pd_256, NewFn); - if (Name == "x86.avx.blend.ps.256") - return UpgradeX86IntrinsicsWith8BitMask( - F, Intrinsic::x86_avx_blend_ps_256, NewFn); if (Name == "x86.avx.dp.ps.256") return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, NewFn); - if (Name == "x86.avx2.pblendw") - return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_pblendw, - NewFn); - if (Name == "x86.avx2.pblendd.128") - return UpgradeX86IntrinsicsWith8BitMask( - F, Intrinsic::x86_avx2_pblendd_128, NewFn); - if (Name == "x86.avx2.pblendd.256") - return UpgradeX86IntrinsicsWith8BitMask( - F, Intrinsic::x86_avx2_pblendd_256, NewFn); if (Name == "x86.avx2.mpsadbw") return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, NewFn); @@ -569,6 +564,15 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { for (unsigned I = 0; I < EltNum; ++I) Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I)); + } else if (Name == "llvm.x86.avx2.vbroadcasti128") { + // Replace vbroadcasts with a vector shuffle. + Value *Op = Builder.CreatePointerCast( + CI->getArgOperand(0), + PointerType::getUnqual(VectorType::get(Type::getInt64Ty(C), 2))); + Value *Load = Builder.CreateLoad(Op); + const int Idxs[4] = { 0, 1, 0, 1 }; + Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), + Idxs); } else if (Name == "llvm.x86.sse2.psll.dq") { // 128-bit shift left specified in bits. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); @@ -609,6 +613,94 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, Shift); + } else if (Name == "llvm.x86.sse41.pblendw" || + Name == "llvm.x86.sse41.blendpd" || + Name == "llvm.x86.sse41.blendps" || + Name == "llvm.x86.avx.blend.pd.256" || + Name == "llvm.x86.avx.blend.ps.256" || + Name == "llvm.x86.avx2.pblendw" || + Name == "llvm.x86.avx2.pblendd.128" || + Name == "llvm.x86.avx2.pblendd.256") { + Value *Op0 = CI->getArgOperand(0); + Value *Op1 = CI->getArgOperand(1); + unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); + VectorType *VecTy = cast<VectorType>(CI->getType()); + unsigned NumElts = VecTy->getNumElements(); + + SmallVector<Constant*, 16> Idxs; + for (unsigned i = 0; i != NumElts; ++i) { + unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i; + Idxs.push_back(Builder.getInt32(Idx)); + } + + Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs)); + } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" || + Name == "llvm.x86.avx.vinsertf128.ps.256" || + Name == "llvm.x86.avx.vinsertf128.si.256" || + Name == "llvm.x86.avx2.vinserti128") { + Value *Op0 = CI->getArgOperand(0); + Value *Op1 = CI->getArgOperand(1); + unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); + VectorType *VecTy = cast<VectorType>(CI->getType()); + unsigned NumElts = VecTy->getNumElements(); + + // Mask off the high bits of the immediate value; hardware ignores those. + Imm = Imm & 1; + + // Extend the second operand into a vector that is twice as big. + Value *UndefV = UndefValue::get(Op1->getType()); + SmallVector<Constant*, 8> Idxs; + for (unsigned i = 0; i != NumElts; ++i) { + Idxs.push_back(Builder.getInt32(i)); + } + Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs)); + + // Insert the second operand into the first operand. + + // Note that there is no guarantee that instruction lowering will actually + // produce a vinsertf128 instruction for the created shuffles. In + // particular, the 0 immediate case involves no lane changes, so it can + // be handled as a blend. + + // Example of shuffle mask for 32-bit elements: + // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> + // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > + + SmallVector<Constant*, 8> Idxs2; + // The low half of the result is either the low half of the 1st operand + // or the low half of the 2nd operand (the inserted vector). + for (unsigned i = 0; i != NumElts / 2; ++i) { + unsigned Idx = Imm ? i : (i + NumElts); + Idxs2.push_back(Builder.getInt32(Idx)); + } + // The high half of the result is either the low half of the 2nd operand + // (the inserted vector) or the high half of the 1st operand. + for (unsigned i = NumElts / 2; i != NumElts; ++i) { + unsigned Idx = Imm ? (i + NumElts / 2) : i; + Idxs2.push_back(Builder.getInt32(Idx)); + } + Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2)); + } else if (Name == "llvm.x86.avx.vextractf128.pd.256" || + Name == "llvm.x86.avx.vextractf128.ps.256" || + Name == "llvm.x86.avx.vextractf128.si.256" || + Name == "llvm.x86.avx2.vextracti128") { + Value *Op0 = CI->getArgOperand(0); + unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); + VectorType *VecTy = cast<VectorType>(CI->getType()); + unsigned NumElts = VecTy->getNumElements(); + + // Mask off the high bits of the immediate value; hardware ignores those. + Imm = Imm & 1; + + // Get indexes for either the high half or low half of the input vector. + SmallVector<Constant*, 4> Idxs(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + unsigned Idx = Imm ? (i + NumElts) : i; + Idxs[i] = Builder.getInt32(Idx); + } + + Value *UndefV = UndefValue::get(Op0->getType()); + Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs)); } else { bool PD128 = false, PD256 = false, PS128 = false, PS256 = false; if (Name == "llvm.x86.avx.vpermil.pd.256") @@ -739,19 +831,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { return; } - case Intrinsic::x86_sse41_pblendw: - case Intrinsic::x86_sse41_blendpd: - case Intrinsic::x86_sse41_blendps: case Intrinsic::x86_sse41_insertps: case Intrinsic::x86_sse41_dppd: case Intrinsic::x86_sse41_dpps: case Intrinsic::x86_sse41_mpsadbw: - case Intrinsic::x86_avx_blend_pd_256: - case Intrinsic::x86_avx_blend_ps_256: case Intrinsic::x86_avx_dp_ps_256: - case Intrinsic::x86_avx2_pblendw: - case Intrinsic::x86_avx2_pblendd_128: - case Intrinsic::x86_avx2_pblendd_256: case Intrinsic::x86_avx2_mpsadbw: { // Need to truncate the last argument from i32 to i8 -- this argument models // an inherently 8-bit immediate operand to these x86 instructions. diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp index b3b3cbf..fe38385 100644 --- a/lib/IR/BasicBlock.cpp +++ b/lib/IR/BasicBlock.cpp @@ -29,10 +29,6 @@ ValueSymbolTable *BasicBlock::getValueSymbolTable() { return nullptr; } -const DataLayout *BasicBlock::getDataLayout() const { - return getParent()->getDataLayout(); -} - LLVMContext &BasicBlock::getContext() const { return getType()->getContext(); } @@ -102,14 +98,14 @@ void BasicBlock::eraseFromParent() { getParent()->getBasicBlockList().erase(this); } -/// moveBefore - Unlink this basic block from its current function and +/// Unlink this basic block from its current function and /// insert it into the function that MovePos lives in, right before MovePos. void BasicBlock::moveBefore(BasicBlock *MovePos) { MovePos->getParent()->getBasicBlockList().splice(MovePos, getParent()->getBasicBlockList(), this); } -/// moveAfter - Unlink this basic block from its current function and +/// Unlink this basic block from its current function and /// insert it into the function that MovePos lives in, right after MovePos. void BasicBlock::moveAfter(BasicBlock *MovePos) { Function::iterator I = MovePos; @@ -117,6 +113,9 @@ void BasicBlock::moveAfter(BasicBlock *MovePos) { getParent()->getBasicBlockList(), this); } +const Module *BasicBlock::getModule() const { + return getParent()->getParent(); +} TerminatorInst *BasicBlock::getTerminator() { if (InstList.empty()) return nullptr; @@ -210,7 +209,7 @@ void BasicBlock::dropAllReferences() { I->dropAllReferences(); } -/// getSinglePredecessor - If this basic block has a single predecessor block, +/// If this basic block has a single predecessor block, /// return the block, otherwise return a null pointer. BasicBlock *BasicBlock::getSinglePredecessor() { pred_iterator PI = pred_begin(this), E = pred_end(this); @@ -220,7 +219,7 @@ BasicBlock *BasicBlock::getSinglePredecessor() { return (PI == E) ? ThePred : nullptr /*multiple preds*/; } -/// getUniquePredecessor - If this basic block has a unique predecessor block, +/// If this basic block has a unique predecessor block, /// return the block, otherwise return a null pointer. /// Note that unique predecessor doesn't mean single edge, there can be /// multiple edges from the unique predecessor to this block (for example @@ -253,7 +252,7 @@ BasicBlock *BasicBlock::getUniqueSuccessor() { return SuccBB; } -/// removePredecessor - This method is used to notify a BasicBlock that the +/// This method is used to notify a BasicBlock that the /// specified Predecessor of the block is no longer able to reach it. This is /// actually not used to update the Predecessor list, but is actually used to /// update the PHI nodes that reside in the block. Note that this should be @@ -330,7 +329,7 @@ void BasicBlock::removePredecessor(BasicBlock *Pred, } -/// splitBasicBlock - This splits a basic block into two at the specified +/// This splits a basic block into two at the specified /// instruction. Note that all instructions BEFORE the specified iterator stay /// as part of the original basic block, an unconditional branch is added to /// the new BB, and the rest of the instructions in the BB are moved to the new @@ -401,14 +400,13 @@ void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) { } } -/// isLandingPad - Return true if this basic block is a landing pad. I.e., it's +/// Return true if this basic block is a landing pad. I.e., it's /// the destination of the 'unwind' edge of an invoke instruction. bool BasicBlock::isLandingPad() const { return isa<LandingPadInst>(getFirstNonPHI()); } -/// getLandingPadInst() - Return the landingpad instruction associated with -/// the landing pad. +/// Return the landingpad instruction associated with the landing pad. LandingPadInst *BasicBlock::getLandingPadInst() { return dyn_cast<LandingPadInst>(getFirstNonPHI()); } diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index a915d28..d97d2c4 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -1120,27 +1120,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, return ConstantInt::get(CI1->getContext(), C1V | C2V); case Instruction::Xor: return ConstantInt::get(CI1->getContext(), C1V ^ C2V); - case Instruction::Shl: { - uint32_t shiftAmt = C2V.getZExtValue(); - if (shiftAmt < C1V.getBitWidth()) - return ConstantInt::get(CI1->getContext(), C1V.shl(shiftAmt)); - else - return UndefValue::get(C1->getType()); // too big shift is undef - } - case Instruction::LShr: { - uint32_t shiftAmt = C2V.getZExtValue(); - if (shiftAmt < C1V.getBitWidth()) - return ConstantInt::get(CI1->getContext(), C1V.lshr(shiftAmt)); - else - return UndefValue::get(C1->getType()); // too big shift is undef - } - case Instruction::AShr: { - uint32_t shiftAmt = C2V.getZExtValue(); - if (shiftAmt < C1V.getBitWidth()) - return ConstantInt::get(CI1->getContext(), C1V.ashr(shiftAmt)); - else - return UndefValue::get(C1->getType()); // too big shift is undef - } + case Instruction::Shl: + if (C2V.ult(C1V.getBitWidth())) + return ConstantInt::get(CI1->getContext(), C1V.shl(C2V)); + return UndefValue::get(C1->getType()); // too big shift is undef + case Instruction::LShr: + if (C2V.ult(C1V.getBitWidth())) + return ConstantInt::get(CI1->getContext(), C1V.lshr(C2V)); + return UndefValue::get(C1->getType()); // too big shift is undef + case Instruction::AShr: + if (C2V.ult(C1V.getBitWidth())) + return ConstantInt::get(CI1->getContext(), C1V.ashr(C2V)); + return UndefValue::get(C1->getType()); // too big shift is undef } } @@ -1327,7 +1318,7 @@ static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) { if (!isa<ConstantExpr>(V1)) { if (!isa<ConstantExpr>(V2)) { - // We distilled thisUse the standard constant folder for a few cases + // Simple case, use the standard constant folder. ConstantInt *R = nullptr; R = dyn_cast<ConstantInt>( ConstantExpr::getFCmp(FCmpInst::FCMP_OEQ, V1, V2)); @@ -1665,15 +1656,22 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, // Handle some degenerate cases first if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) { + CmpInst::Predicate Predicate = CmpInst::Predicate(pred); + bool isIntegerPredicate = ICmpInst::isIntPredicate(Predicate); // For EQ and NE, we can always pick a value for the undef to make the // predicate pass or fail, so we can return undef. - // Also, if both operands are undef, we can return undef. - if (ICmpInst::isEquality(ICmpInst::Predicate(pred)) || - (isa<UndefValue>(C1) && isa<UndefValue>(C2))) + // Also, if both operands are undef, we can return undef for int comparison. + if (ICmpInst::isEquality(Predicate) || (isIntegerPredicate && C1 == C2)) return UndefValue::get(ResultTy); - // Otherwise, pick the same value as the non-undef operand, and fold - // it to true or false. - return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred)); + + // Otherwise, for integer compare, pick the same value as the non-undef + // operand, and fold it to true or false. + if (isIntegerPredicate) + return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred)); + + // Choosing NaN for the undef will always make unordered comparison succeed + // and ordered comparison fails. + return ConstantInt::get(ResultTy, CmpInst::isUnordered(Predicate)); } // icmp eq/ne(null,GV) -> false/true @@ -1789,7 +1787,10 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, return ConstantVector::get(ResElts); } - if (C1->getType()->isFloatingPointTy()) { + if (C1->getType()->isFloatingPointTy() && + // Only call evaluateFCmpRelation if we have a constant expr to avoid + // infinite recursive loop + (isa<ConstantExpr>(C1) || isa<ConstantExpr>(C2))) { int Result = -1; // -1 = unknown, 0 = known false, 1 = known true. switch (evaluateFCmpRelation(C1, C2)) { default: llvm_unreachable("Unknown relation!"); diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp index f8e9ba4..91095cf 100644 --- a/lib/IR/ConstantRange.cpp +++ b/lib/IR/ConstantRange.cpp @@ -49,14 +49,15 @@ ConstantRange::ConstantRange(APIntMoveTy L, APIntMoveTy U) "Lower == Upper, but they aren't min or max value!"); } -ConstantRange ConstantRange::makeICmpRegion(unsigned Pred, - const ConstantRange &CR) { +ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred, + const ConstantRange &CR) { if (CR.isEmptySet()) return CR; uint32_t W = CR.getBitWidth(); switch (Pred) { - default: llvm_unreachable("Invalid ICmp predicate to makeICmpRegion()"); + default: + llvm_unreachable("Invalid ICmp predicate to makeAllowedICmpRegion()"); case CmpInst::ICMP_EQ: return CR; case CmpInst::ICMP_NE: @@ -114,6 +115,16 @@ ConstantRange ConstantRange::makeICmpRegion(unsigned Pred, } } +ConstantRange ConstantRange::makeSatisfyingICmpRegion(CmpInst::Predicate Pred, + const ConstantRange &CR) { + // Follows from De-Morgan's laws: + // + // ~(~A union ~B) == A intersect B. + // + return makeAllowedICmpRegion(CmpInst::getInversePredicate(Pred), CR) + .inverse(); +} + /// isFullSet - Return true if this set contains all of the elements possible /// for this data-type bool ConstantRange::isFullSet() const { @@ -587,6 +598,13 @@ ConstantRange::multiply(const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) return ConstantRange(getBitWidth(), /*isFullSet=*/false); + // Multiplication is signedness-independent. However different ranges can be + // obtained depending on how the input ranges are treated. These different + // ranges are all conservatively correct, but one might be better than the + // other. We calculate two ranges; one treating the inputs as unsigned + // and the other signed, then return the smallest of these ranges. + + // Unsigned range first. APInt this_min = getUnsignedMin().zext(getBitWidth() * 2); APInt this_max = getUnsignedMax().zext(getBitWidth() * 2); APInt Other_min = Other.getUnsignedMin().zext(getBitWidth() * 2); @@ -594,7 +612,26 @@ ConstantRange::multiply(const ConstantRange &Other) const { ConstantRange Result_zext = ConstantRange(this_min * Other_min, this_max * Other_max + 1); - return Result_zext.truncate(getBitWidth()); + ConstantRange UR = Result_zext.truncate(getBitWidth()); + + // Now the signed range. Because we could be dealing with negative numbers + // here, the lower bound is the smallest of the cartesian product of the + // lower and upper ranges; for example: + // [-1,4) * [-2,3) = min(-1*-2, -1*2, 3*-2, 3*2) = -6. + // Similarly for the upper bound, swapping min for max. + + this_min = getSignedMin().sext(getBitWidth() * 2); + this_max = getSignedMax().sext(getBitWidth() * 2); + Other_min = Other.getSignedMin().sext(getBitWidth() * 2); + Other_max = Other.getSignedMax().sext(getBitWidth() * 2); + + auto L = {this_min * Other_min, this_min * Other_max, + this_max * Other_min, this_max * Other_max}; + auto Compare = [](const APInt &A, const APInt &B) { return A.slt(B); }; + ConstantRange Result_sext(std::min(L, Compare), std::max(L, Compare) + 1); + ConstantRange SR = Result_sext.truncate(getBitWidth()); + + return UR.getSetSize().ult(SR.getSetSize()) ? UR : SR; } ConstantRange diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index 0bf61a7..e51a396 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -1215,11 +1215,9 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const { Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty, bool OnlyIfReduced) const { assert(Ops.size() == getNumOperands() && "Operand count mismatch!"); - bool AnyChange = Ty != getType(); - for (unsigned i = 0; i != Ops.size(); ++i) - AnyChange |= Ops[i] != getOperand(i); - if (!AnyChange) // No operands changed, return self. + // If no operands changed return self. + if (Ty == getType() && std::equal(Ops.begin(), Ops.end(), op_begin())) return const_cast<ConstantExpr*>(this); Type *OnlyIfReducedTy = OnlyIfReduced ? Ty : nullptr; @@ -2971,10 +2969,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV, } Instruction *ConstantExpr::getAsInstruction() { - SmallVector<Value*,4> ValueOperands; - for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) - ValueOperands.push_back(cast<Value>(I)); - + SmallVector<Value *, 4> ValueOperands(op_begin(), op_end()); ArrayRef<Value*> Ops(ValueOperands); switch (getOpcode()) { @@ -3006,12 +3001,14 @@ Instruction *ConstantExpr::getAsInstruction() { case Instruction::ShuffleVector: return new ShuffleVectorInst(Ops[0], Ops[1], Ops[2]); - case Instruction::GetElementPtr: - if (cast<GEPOperator>(this)->isInBounds()) - return GetElementPtrInst::CreateInBounds(Ops[0], Ops.slice(1)); - else - return GetElementPtrInst::Create(Ops[0], Ops.slice(1)); - + case Instruction::GetElementPtr: { + const auto *GO = cast<GEPOperator>(this); + if (GO->isInBounds()) + return GetElementPtrInst::CreateInBounds(GO->getSourceElementType(), + Ops[0], Ops.slice(1)); + return GetElementPtrInst::Create(GO->getSourceElementType(), Ops[0], + Ops.slice(1)); + } case Instruction::ICmp: case Instruction::FCmp: return CmpInst::Create((Instruction::OtherOps)getOpcode(), diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index f007616..613147e 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -2506,7 +2506,7 @@ LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer, LLVMValueRef *Indices, unsigned NumIndices, const char *Name) { ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices); - return wrap(unwrap(B)->CreateGEP(unwrap(Pointer), IdxList, Name)); + return wrap(unwrap(B)->CreateGEP(nullptr, unwrap(Pointer), IdxList, Name)); } LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer, diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 2cb27ca..9677de4 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -121,18 +121,10 @@ void DIBuilder::finalize() { } /// If N is compile unit return NULL otherwise return N. -static MDNode *getNonCompileUnitScope(MDNode *N) { - if (DIDescriptor(N).isCompileUnit()) +static MDScope *getNonCompileUnitScope(MDNode *N) { + if (!N || isa<MDCompileUnit>(N)) return nullptr; - return N; -} - -static MDNode *createFilePathPair(LLVMContext &VMContext, StringRef Filename, - StringRef Directory) { - assert(!Filename.empty() && "Unable to create file without name"); - Metadata *Pair[] = {MDString::get(VMContext, Filename), - MDString::get(VMContext, Directory)}; - return MDNode::get(VMContext, Pair); + return cast<MDScope>(N); } DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, @@ -157,22 +149,12 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, TempGVs = MDTuple::getTemporary(VMContext, None).release(); TempImportedModules = MDTuple::getTemporary(VMContext, None).release(); - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_compile_unit) - .concat(Lang) - .concat(Producer) - .concat(isOptimized) - .concat(Flags) - .concat(RunTimeVer) - .concat(SplitName) - .concat(Kind) - .get(VMContext), - createFilePathPair(VMContext, Filename, Directory), - TempEnumTypes, TempRetainTypes, TempSubprograms, TempGVs, - TempImportedModules}; - // TODO: Switch to getDistinct(). We never want to merge compile units based // on contents. - MDNode *CUNode = MDNode::get(VMContext, Elts); + MDNode *CUNode = MDCompileUnit::get( + VMContext, Lang, MDFile::get(VMContext, Filename, Directory), Producer, + isOptimized, Flags, RunTimeVer, SplitName, Kind, TempEnumTypes, + TempRetainTypes, TempSubprograms, TempGVs, TempImportedModules); // Create a named metadata so that it is easier to find cu in a module. // Note that we only generate this when the caller wants to actually @@ -192,11 +174,7 @@ static DIImportedEntity createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope Context, Metadata *NS, unsigned Line, StringRef Name, SmallVectorImpl<TrackingMDNodeRef> &AllImportedModules) { - const MDNode *R; - Metadata *Elts[] = {HeaderBuilder::get(Tag).concat(Line).concat(Name).get(C), - Context, NS}; - R = MDNode::get(C, Elts); - DIImportedEntity M(R); + DIImportedEntity M = MDImportedEntity::get(C, Tag, Context, NS, Line, Name); assert(M.Verify() && "Imported module should be valid"); AllImportedModules.emplace_back(M.get()); return M; @@ -236,39 +214,17 @@ DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context, } DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { - Metadata *Elts[] = { - HeaderBuilder::get(dwarf::DW_TAG_file_type).get(VMContext), - createFilePathPair(VMContext, Filename, Directory)}; - return DIFile(MDNode::get(VMContext, Elts)); + return MDFile::get(VMContext, Filename, Directory); } DIEnumerator DIBuilder::createEnumerator(StringRef Name, int64_t Val) { assert(!Name.empty() && "Unable to create enumerator without name"); - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_enumerator) - .concat(Name) - .concat(Val) - .get(VMContext)}; - return DIEnumerator(MDNode::get(VMContext, Elts)); + return MDEnumerator::get(VMContext, Val, Name); } DIBasicType DIBuilder::createUnspecifiedType(StringRef Name) { assert(!Name.empty() && "Unable to create type without name"); - // Unspecified types are encoded in DIBasicType format. Line number, filename, - // size, alignment, offset and flags are always empty here. - Metadata *Elts[] = { - HeaderBuilder::get(dwarf::DW_TAG_unspecified_type) - .concat(Name) - .concat(0) - .concat(0) - .concat(0) - .concat(0) - .concat(0) - .concat(0) - .get(VMContext), - nullptr, // Filename - nullptr // Unused - }; - return DIBasicType(MDNode::get(VMContext, Elts)); + return MDBasicType::get(VMContext, dwarf::DW_TAG_unspecified_type, Name); } DIBasicType DIBuilder::createNullPtrType() { @@ -279,142 +235,61 @@ DIBasicType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Encoding) { assert(!Name.empty() && "Unable to create type without name"); - // Basic types are encoded in DIBasicType format. Line number, filename, - // offset and flags are always empty here. - Metadata *Elts[] = { - HeaderBuilder::get(dwarf::DW_TAG_base_type) - .concat(Name) - .concat(0) // Line - .concat(SizeInBits) - .concat(AlignInBits) - .concat(0) // Offset - .concat(0) // Flags - .concat(Encoding) - .get(VMContext), - nullptr, // Filename - nullptr // Unused - }; - return DIBasicType(MDNode::get(VMContext, Elts)); + return MDBasicType::get(VMContext, dwarf::DW_TAG_base_type, Name, SizeInBits, + AlignInBits, Encoding); } DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { - // Qualified types are encoded in DIDerivedType format. - Metadata *Elts[] = {HeaderBuilder::get(Tag) - .concat(StringRef()) // Name - .concat(0) // Line - .concat(0) // Size - .concat(0) // Align - .concat(0) // Offset - .concat(0) // Flags - .get(VMContext), - nullptr, // Filename - nullptr, // Unused - FromTy.getRef()}; - return DIDerivedType(MDNode::get(VMContext, Elts)); + return MDDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, + FromTy.getRef(), 0, 0, 0, 0); } DIDerivedType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, uint64_t AlignInBits, StringRef Name) { - // Pointer types are encoded in DIDerivedType format. - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_pointer_type) - .concat(Name) - .concat(0) // Line - .concat(SizeInBits) - .concat(AlignInBits) - .concat(0) // Offset - .concat(0) // Flags - .get(VMContext), - nullptr, // Filename - nullptr, // Unused - PointeeTy.getRef()}; - return DIDerivedType(MDNode::get(VMContext, Elts)); + // FIXME: Why is there a name here? + return MDDerivedType::get(VMContext, dwarf::DW_TAG_pointer_type, Name, + nullptr, 0, nullptr, PointeeTy.getRef(), SizeInBits, + AlignInBits, 0, 0); } DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base, uint64_t SizeInBits, uint64_t AlignInBits) { - // Pointer types are encoded in DIDerivedType format. - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_ptr_to_member_type) - .concat(StringRef()) - .concat(0) // Line - .concat(SizeInBits) // Size - .concat(AlignInBits) // Align - .concat(0) // Offset - .concat(0) // Flags - .get(VMContext), - nullptr, // Filename - nullptr, // Unused - PointeeTy.getRef(), Base.getRef()}; - return DIDerivedType(MDNode::get(VMContext, Elts)); + return MDDerivedType::get(VMContext, dwarf::DW_TAG_ptr_to_member_type, "", + nullptr, 0, nullptr, PointeeTy.getRef(), SizeInBits, + AlignInBits, 0, 0, Base.getRef()); } DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) { assert(RTy.isType() && "Unable to create reference type"); - // References are encoded in DIDerivedType format. - Metadata *Elts[] = {HeaderBuilder::get(Tag) - .concat(StringRef()) // Name - .concat(0) // Line - .concat(0) // Size - .concat(0) // Align - .concat(0) // Offset - .concat(0) // Flags - .get(VMContext), - nullptr, // Filename - nullptr, // TheCU, - RTy.getRef()}; - return DIDerivedType(MDNode::get(VMContext, Elts)); + return MDDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, + RTy.getRef(), 0, 0, 0, 0); } DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, unsigned LineNo, DIDescriptor Context) { - // typedefs are encoded in DIDerivedType format. - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_typedef) - .concat(Name) - .concat(LineNo) - .concat(0) // Size - .concat(0) // Align - .concat(0) // Offset - .concat(0) // Flags - .get(VMContext), - File.getFileNode(), - DIScope(getNonCompileUnitScope(Context)).getRef(), - Ty.getRef()}; - return DIDerivedType(MDNode::get(VMContext, Elts)); + return MDDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name, + File.getFileNode(), LineNo, + DIScope(getNonCompileUnitScope(Context)).getRef(), + Ty.getRef(), 0, 0, 0, 0); } DIDerivedType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { // typedefs are encoded in DIDerivedType format. assert(Ty.isType() && "Invalid type!"); assert(FriendTy.isType() && "Invalid friend type!"); - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_friend) - .concat(StringRef()) // Name - .concat(0) // Line - .concat(0) // Size - .concat(0) // Align - .concat(0) // Offset - .concat(0) // Flags - .get(VMContext), - nullptr, Ty.getRef(), FriendTy.getRef()}; - return DIDerivedType(MDNode::get(VMContext, Elts)); + return MDDerivedType::get(VMContext, dwarf::DW_TAG_friend, "", nullptr, 0, + Ty.getRef(), FriendTy.getRef(), 0, 0, 0, 0); } DIDerivedType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, uint64_t BaseOffset, unsigned Flags) { assert(Ty.isType() && "Unable to create inheritance"); - // TAG_inheritance is encoded in DIDerivedType format. - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_inheritance) - .concat(StringRef()) // Name - .concat(0) // Line - .concat(0) // Size - .concat(0) // Align - .concat(BaseOffset) - .concat(Flags) - .get(VMContext), - nullptr, Ty.getRef(), BaseTy.getRef()}; - auto R = DIDerivedType(MDNode::get(VMContext, Elts)); - return R; + return MDDerivedType::get(VMContext, dwarf::DW_TAG_inheritance, "", nullptr, + 0, Ty.getRef(), BaseTy.getRef(), 0, 0, BaseOffset, + Flags); } DIDerivedType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, @@ -423,22 +298,13 @@ DIDerivedType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, DIType Ty) { - // TAG_member is encoded in DIDerivedType format. - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_member) - .concat(Name) - .concat(LineNumber) - .concat(SizeInBits) - .concat(AlignInBits) - .concat(OffsetInBits) - .concat(Flags) - .get(VMContext), - File.getFileNode(), - DIScope(getNonCompileUnitScope(Scope)).getRef(), - Ty.getRef()}; - return DIDerivedType(MDNode::get(VMContext, Elts)); -} - -static Metadata *getConstantOrNull(Constant *C) { + return MDDerivedType::get( + VMContext, dwarf::DW_TAG_member, Name, File, LineNumber, + DIScope(getNonCompileUnitScope(Scope)).getRef(), Ty.getRef(), SizeInBits, + AlignInBits, OffsetInBits, Flags); +} + +static ConstantAsMetadata *getConstantOrNull(Constant *C) { if (C) return ConstantAsMetadata::get(C); return nullptr; @@ -451,18 +317,10 @@ DIDerivedType DIBuilder::createStaticMemberType(DIDescriptor Scope, llvm::Constant *Val) { // TAG_member is encoded in DIDerivedType format. Flags |= DIDescriptor::FlagStaticMember; - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_member) - .concat(Name) - .concat(LineNumber) - .concat(0) // Size - .concat(0) // Align - .concat(0) // Offset - .concat(Flags) - .get(VMContext), - File.getFileNode(), - DIScope(getNonCompileUnitScope(Scope)).getRef(), - Ty.getRef(), getConstantOrNull(Val)}; - return DIDerivedType(MDNode::get(VMContext, Elts)); + return MDDerivedType::get( + VMContext, dwarf::DW_TAG_member, Name, File, LineNumber, + DIScope(getNonCompileUnitScope(Scope)).getRef(), Ty.getRef(), 0, 0, 0, + Flags, getConstantOrNull(Val)); } DIDerivedType DIBuilder::createObjCIVar(StringRef Name, DIFile File, @@ -471,33 +329,18 @@ DIDerivedType DIBuilder::createObjCIVar(StringRef Name, DIFile File, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, DIType Ty, MDNode *PropertyNode) { - // TAG_member is encoded in DIDerivedType format. - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_member) - .concat(Name) - .concat(LineNumber) - .concat(SizeInBits) - .concat(AlignInBits) - .concat(OffsetInBits) - .concat(Flags) - .get(VMContext), - File.getFileNode(), getNonCompileUnitScope(File), Ty, - PropertyNode}; - return DIDerivedType(MDNode::get(VMContext, Elts)); + return MDDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File, + LineNumber, getNonCompileUnitScope(File), + Ty.getRef(), SizeInBits, AlignInBits, OffsetInBits, + Flags, PropertyNode); } DIObjCProperty DIBuilder::createObjCProperty(StringRef Name, DIFile File, unsigned LineNumber, StringRef GetterName, StringRef SetterName, unsigned PropertyAttributes, DIType Ty) { - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_APPLE_property) - .concat(Name) - .concat(LineNumber) - .concat(GetterName) - .concat(SetterName) - .concat(PropertyAttributes) - .get(VMContext), - File, Ty}; - return DIObjCProperty(MDNode::get(VMContext, Elts)); + return MDObjCProperty::get(VMContext, Name, File, LineNumber, GetterName, + SetterName, PropertyAttributes, Ty); } DITemplateTypeParameter @@ -505,13 +348,7 @@ DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, DIType Ty) { assert(!DIScope(getNonCompileUnitScope(Context)).getRef() && "Expected compile unit"); - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_template_type_parameter) - .concat(Name) - .concat(0) - .concat(0) - .get(VMContext), - nullptr, Ty.getRef(), nullptr}; - return DITemplateTypeParameter(MDNode::get(VMContext, Elts)); + return MDTemplateTypeParameter::get(VMContext, Name, Ty.getRef()); } static DITemplateValueParameter @@ -520,10 +357,7 @@ createTemplateValueParameterHelper(LLVMContext &VMContext, unsigned Tag, DIType Ty, Metadata *MD) { assert(!DIScope(getNonCompileUnitScope(Context)).getRef() && "Expected compile unit"); - Metadata *Elts[] = { - HeaderBuilder::get(Tag).concat(Name).concat(0).concat(0).get(VMContext), - nullptr, Ty.getRef(), MD, nullptr}; - return DITemplateValueParameter(MDNode::get(VMContext, Elts)); + return MDTemplateValueParameter::get(VMContext, Tag, Name, Ty.getRef(), MD); } DITemplateValueParameter @@ -563,23 +397,11 @@ DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, assert((!Context || Context.isScope() || Context.isType()) && "createClassType should be called with a valid Context"); // TAG_class_type is encoded in DICompositeType format. - Metadata *Elts[] = { - HeaderBuilder::get(dwarf::DW_TAG_class_type) - .concat(Name) - .concat(LineNumber) - .concat(SizeInBits) - .concat(AlignInBits) - .concat(OffsetInBits) - .concat(Flags) - .concat(0) - .get(VMContext), - File.getFileNode(), DIScope(getNonCompileUnitScope(Context)).getRef(), - DerivedFrom.getRef(), Elements, VTableHolder.getRef(), TemplateParams, - UniqueIdentifier.empty() ? nullptr - : MDString::get(VMContext, UniqueIdentifier)}; - DICompositeType R(MDNode::get(VMContext, Elts)); - assert(R.isCompositeType() && - "createClassType should return a DICompositeType"); + DICompositeType R = MDCompositeType::get( + VMContext, dwarf::DW_TAG_structure_type, Name, File, LineNumber, + DIScope(getNonCompileUnitScope(Context)).getRef(), DerivedFrom.getRef(), + SizeInBits, AlignInBits, OffsetInBits, Flags, Elements, 0, + VTableHolder.getRef(), TemplateParams, UniqueIdentifier); if (!UniqueIdentifier.empty()) retainType(R); trackIfUnresolved(R); @@ -596,24 +418,11 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context, unsigned RunTimeLang, DIType VTableHolder, StringRef UniqueIdentifier) { - // TAG_structure_type is encoded in DICompositeType format. - Metadata *Elts[] = { - HeaderBuilder::get(dwarf::DW_TAG_structure_type) - .concat(Name) - .concat(LineNumber) - .concat(SizeInBits) - .concat(AlignInBits) - .concat(0) - .concat(Flags) - .concat(RunTimeLang) - .get(VMContext), - File.getFileNode(), DIScope(getNonCompileUnitScope(Context)).getRef(), - DerivedFrom.getRef(), Elements, VTableHolder.getRef(), nullptr, - UniqueIdentifier.empty() ? nullptr - : MDString::get(VMContext, UniqueIdentifier)}; - DICompositeType R(MDNode::get(VMContext, Elts)); - assert(R.isCompositeType() && - "createStructType should return a DICompositeType"); + DICompositeType R = MDCompositeType::get( + VMContext, dwarf::DW_TAG_structure_type, Name, File, LineNumber, + DIScope(getNonCompileUnitScope(Context)).getRef(), DerivedFrom.getRef(), + SizeInBits, AlignInBits, 0, Flags, Elements, RunTimeLang, + VTableHolder.getRef(), nullptr, UniqueIdentifier); if (!UniqueIdentifier.empty()) retainType(R); trackIfUnresolved(R); @@ -627,22 +436,11 @@ DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, DIArray Elements, unsigned RunTimeLang, StringRef UniqueIdentifier) { - // TAG_union_type is encoded in DICompositeType format. - Metadata *Elts[] = { - HeaderBuilder::get(dwarf::DW_TAG_union_type) - .concat(Name) - .concat(LineNumber) - .concat(SizeInBits) - .concat(AlignInBits) - .concat(0) // Offset - .concat(Flags) - .concat(RunTimeLang) - .get(VMContext), - File.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(), - nullptr, Elements, nullptr, nullptr, - UniqueIdentifier.empty() ? nullptr - : MDString::get(VMContext, UniqueIdentifier)}; - DICompositeType R(MDNode::get(VMContext, Elts)); + DICompositeType R = MDCompositeType::get( + VMContext, dwarf::DW_TAG_union_type, Name, File, LineNumber, + DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, SizeInBits, + AlignInBits, 0, Flags, Elements, RunTimeLang, nullptr, nullptr, + UniqueIdentifier); if (!UniqueIdentifier.empty()) retainType(R); trackIfUnresolved(R); @@ -652,43 +450,18 @@ DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, DISubroutineType DIBuilder::createSubroutineType(DIFile File, DITypeArray ParameterTypes, unsigned Flags) { - // TAG_subroutine_type is encoded in DICompositeType format. - Metadata *Elts[] = { - HeaderBuilder::get(dwarf::DW_TAG_subroutine_type) - .concat(StringRef()) - .concat(0) // Line - .concat(0) // Size - .concat(0) // Align - .concat(0) // Offset - .concat(Flags) // Flags - .concat(0) - .get(VMContext), - nullptr, nullptr, nullptr, ParameterTypes, nullptr, nullptr, - nullptr // Type Identifer - }; - return DISubroutineType(MDNode::get(VMContext, Elts)); + return MDSubroutineType::get(VMContext, Flags, ParameterTypes); } DICompositeType DIBuilder::createEnumerationType( DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements, DIType UnderlyingType, StringRef UniqueIdentifier) { - // TAG_enumeration_type is encoded in DICompositeType format. - Metadata *Elts[] = { - HeaderBuilder::get(dwarf::DW_TAG_enumeration_type) - .concat(Name) - .concat(LineNumber) - .concat(SizeInBits) - .concat(AlignInBits) - .concat(0) // Offset - .concat(0) // Flags - .concat(0) - .get(VMContext), - File.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(), - UnderlyingType.getRef(), Elements, nullptr, nullptr, - UniqueIdentifier.empty() ? nullptr - : MDString::get(VMContext, UniqueIdentifier)}; - DICompositeType CTy(MDNode::get(VMContext, Elts)); + DICompositeType CTy = MDCompositeType::get( + VMContext, dwarf::DW_TAG_enumeration_type, Name, File, LineNumber, + DIScope(getNonCompileUnitScope(Scope)).getRef(), UnderlyingType.getRef(), + SizeInBits, AlignInBits, 0, 0, Elements, 0, nullptr, nullptr, + UniqueIdentifier); AllEnumTypes.push_back(CTy); if (!UniqueIdentifier.empty()) retainType(CTy); @@ -698,85 +471,38 @@ DICompositeType DIBuilder::createEnumerationType( DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, DIType Ty, DIArray Subscripts) { - // TAG_array_type is encoded in DICompositeType format. - Metadata *Elts[] = { - HeaderBuilder::get(dwarf::DW_TAG_array_type) - .concat(StringRef()) - .concat(0) // Line - .concat(Size) - .concat(AlignInBits) - .concat(0) // Offset - .concat(0) // Flags - .concat(0) - .get(VMContext), - nullptr, // Filename/Directory, - nullptr, // Unused - Ty.getRef(), Subscripts, nullptr, nullptr, - nullptr // Type Identifer - }; - DICompositeType R(MDNode::get(VMContext, Elts)); + auto *R = MDCompositeType::get(VMContext, dwarf::DW_TAG_array_type, "", + nullptr, 0, nullptr, Ty.getRef(), Size, + AlignInBits, 0, 0, Subscripts, 0, nullptr); trackIfUnresolved(R); return R; } DICompositeType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, DIType Ty, DIArray Subscripts) { - // A vector is an array type with the FlagVector flag applied. - Metadata *Elts[] = { - HeaderBuilder::get(dwarf::DW_TAG_array_type) - .concat("") - .concat(0) // Line - .concat(Size) - .concat(AlignInBits) - .concat(0) // Offset - .concat(DIType::FlagVector) - .concat(0) - .get(VMContext), - nullptr, // Filename/Directory, - nullptr, // Unused - Ty.getRef(), Subscripts, nullptr, nullptr, - nullptr // Type Identifer - }; - DICompositeType R(MDNode::get(VMContext, Elts)); + auto *R = MDCompositeType::get( + VMContext, dwarf::DW_TAG_array_type, "", nullptr, 0, nullptr, Ty.getRef(), + Size, AlignInBits, 0, DIType::FlagVector, Subscripts, 0, nullptr); trackIfUnresolved(R); return R; } -static HeaderBuilder setTypeFlagsInHeader(StringRef Header, - unsigned FlagsToSet) { - DIHeaderFieldIterator I(Header); - std::advance(I, 6); - - unsigned Flags; - if (I->getAsInteger(0, Flags)) - Flags = 0; - Flags |= FlagsToSet; - - return HeaderBuilder() - .concat(I.getPrefix()) - .concat(Flags) - .concat(I.getSuffix()); -} - static DIType createTypeWithFlags(LLVMContext &Context, DIType Ty, unsigned FlagsToSet) { - SmallVector<Metadata *, 9> Elts; - MDNode *N = Ty; - assert(N && "Unexpected input DIType!"); - // Update header field. - Elts.push_back(setTypeFlagsInHeader(Ty.getHeader(), FlagsToSet).get(Context)); - Elts.append(N->op_begin() + 1, N->op_end()); - - return DIType(MDNode::get(Context, Elts)); + TempMDType NewTy = cast<MDType>(static_cast<MDNode *>(Ty))->clone(); + NewTy->setFlags(NewTy->getFlags() | FlagsToSet); + return MDNode::replaceWithUniqued(std::move(NewTy)); } DIType DIBuilder::createArtificialType(DIType Ty) { + // FIXME: Restrict this to the nodes where it's valid. if (Ty.isArtificial()) return Ty; return createTypeWithFlags(VMContext, Ty, DIType::FlagArtificial); } DIType DIBuilder::createObjectPointerType(DIType Ty) { + // FIXME: Restrict this to the nodes where it's valid. if (Ty.isObjectPointer()) return Ty; unsigned Flags = DIType::FlagObjectPointer | DIType::FlagArtificial; @@ -794,26 +520,13 @@ DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope, DIFile F, unsigned Line, unsigned RuntimeLang, uint64_t SizeInBits, uint64_t AlignInBits, StringRef UniqueIdentifier) { - // Create a temporary MDNode. - Metadata *Elts[] = { - HeaderBuilder::get(Tag) - .concat(Name) - .concat(Line) - .concat(SizeInBits) - .concat(AlignInBits) - .concat(0) // Offset - .concat(DIDescriptor::FlagFwdDecl) - .concat(RuntimeLang) - .get(VMContext), - F.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, - DIArray(), nullptr, - nullptr, // TemplateParams - UniqueIdentifier.empty() ? nullptr - : MDString::get(VMContext, UniqueIdentifier)}; - MDNode *Node = MDNode::get(VMContext, Elts); - DICompositeType RetTy(Node); - assert(RetTy.isCompositeType() && - "createForwardDecl result should be a DIType"); + // FIXME: Define in terms of createReplaceableForwardDecl() by calling + // replaceWithUniqued(). + DICompositeType RetTy = MDCompositeType::get( + VMContext, Tag, Name, F.getFileNode(), Line, + DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, SizeInBits, + AlignInBits, 0, DIDescriptor::FlagFwdDecl, nullptr, RuntimeLang, nullptr, + nullptr, UniqueIdentifier); if (!UniqueIdentifier.empty()) retainType(RetTy); trackIfUnresolved(RetTy); @@ -824,25 +537,12 @@ DICompositeType DIBuilder::createReplaceableCompositeType( unsigned Tag, StringRef Name, DIDescriptor Scope, DIFile F, unsigned Line, unsigned RuntimeLang, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, StringRef UniqueIdentifier) { - // Create a temporary MDNode. - Metadata *Elts[] = { - HeaderBuilder::get(Tag) - .concat(Name) - .concat(Line) - .concat(SizeInBits) - .concat(AlignInBits) - .concat(0) // Offset - .concat(Flags) - .concat(RuntimeLang) - .get(VMContext), - F.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, - DIArray(), nullptr, - nullptr, // TemplateParams - UniqueIdentifier.empty() ? nullptr - : MDString::get(VMContext, UniqueIdentifier)}; - DICompositeType RetTy(MDNode::getTemporary(VMContext, Elts).release()); - assert(RetTy.isCompositeType() && - "createReplaceableForwardDecl result should be a DIType"); + DICompositeType RetTy = + MDCompositeType::getTemporary( + VMContext, Tag, Name, F.getFileNode(), Line, + DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, SizeInBits, + AlignInBits, 0, Flags, nullptr, RuntimeLang, + nullptr, nullptr, UniqueIdentifier).release(); if (!UniqueIdentifier.empty()) retainType(RetTy); trackIfUnresolved(RetTy); @@ -865,62 +565,39 @@ DITypeArray DIBuilder::getOrCreateTypeArray(ArrayRef<Metadata *> Elements) { } DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) { - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_subrange_type) - .concat(Lo) - .concat(Count) - .get(VMContext)}; - - return DISubrange(MDNode::get(VMContext, Elts)); + return MDSubrange::get(VMContext, Count, Lo); } -static DIGlobalVariable createGlobalVariableHelper( - LLVMContext &VMContext, DIDescriptor Context, StringRef Name, - StringRef LinkageName, DIFile F, unsigned LineNumber, DITypeRef Ty, - bool isLocalToUnit, Constant *Val, MDNode *Decl, bool isDefinition, - std::function<MDNode *(ArrayRef<Metadata *>)> CreateFunc) { - +static void checkGlobalVariableScope(DIDescriptor Context) { MDNode *TheCtx = getNonCompileUnitScope(Context); if (DIScope(TheCtx).isCompositeType()) { assert(!DICompositeType(TheCtx).getIdentifier() && "Context of a global variable should not be a type with identifier"); } - - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_variable) - .concat(Name) - .concat(Name) - .concat(LinkageName) - .concat(LineNumber) - .concat(isLocalToUnit) - .concat(isDefinition) - .get(VMContext), - TheCtx, F, Ty, getConstantOrNull(Val), - DIDescriptor(Decl)}; - - return DIGlobalVariable(CreateFunc(Elts)); } DIGlobalVariable DIBuilder::createGlobalVariable( DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F, unsigned LineNumber, DITypeRef Ty, bool isLocalToUnit, Constant *Val, MDNode *Decl) { - return createGlobalVariableHelper( - VMContext, Context, Name, LinkageName, F, LineNumber, Ty, isLocalToUnit, - Val, Decl, true, [&](ArrayRef<Metadata *> Elts) -> MDNode *{ - MDNode *Node = MDNode::get(VMContext, Elts); - AllGVs.push_back(Node); - return Node; - }); + checkGlobalVariableScope(Context); + + auto *N = MDGlobalVariable::get(VMContext, Context, Name, LinkageName, F, + LineNumber, Ty, isLocalToUnit, true, + getConstantOrNull(Val), Decl); + AllGVs.push_back(N); + return N; } DIGlobalVariable DIBuilder::createTempGlobalVariableFwdDecl( DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F, unsigned LineNumber, DITypeRef Ty, bool isLocalToUnit, Constant *Val, MDNode *Decl) { - return createGlobalVariableHelper(VMContext, Context, Name, LinkageName, F, - LineNumber, Ty, isLocalToUnit, Val, Decl, - false, [&](ArrayRef<Metadata *> Elts) { - return MDNode::getTemporary(VMContext, Elts).release(); - }); + checkGlobalVariableScope(Context); + + return MDGlobalVariable::getTemporary(VMContext, Context, Name, LinkageName, + F, LineNumber, Ty, isLocalToUnit, false, + getConstantOrNull(Val), Decl).release(); } DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, @@ -928,16 +605,17 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, unsigned LineNo, DITypeRef Ty, bool AlwaysPreserve, unsigned Flags, unsigned ArgNo) { + // FIXME: Why getNonCompileUnitScope()? + // FIXME: Why is "!Context" okay here? + // FIXME: WHy doesn't this check for a subprogram or lexical block (AFAICT + // the only valid scopes)? DIDescriptor Context(getNonCompileUnitScope(Scope)); assert((!Context || Context.isScope()) && "createLocalVariable should be called with a valid Context"); - Metadata *Elts[] = {HeaderBuilder::get(Tag) - .concat(Name) - .concat(LineNo | (ArgNo << 24)) - .concat(Flags) - .get(VMContext), - getNonCompileUnitScope(Scope), File, Ty}; - MDNode *Node = MDNode::get(VMContext, Elts); + + auto *Node = + MDLocalVariable::get(VMContext, Tag, getNonCompileUnitScope(Scope), Name, + File, LineNo, Ty, ArgNo, Flags); if (AlwaysPreserve) { // The optimizer may remove local variable. If there is an interest // to preserve variable info in such situation then stash it in a @@ -946,18 +624,11 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, assert(Fn && "Missing subprogram for local variable"); PreservedVariables[Fn].emplace_back(Node); } - DIVariable RetVar(Node); - assert(RetVar.isVariable() && - "createLocalVariable should return a valid DIVariable"); - return RetVar; + return Node; } DIExpression DIBuilder::createExpression(ArrayRef<uint64_t> Addr) { - auto Header = HeaderBuilder::get(DW_TAG_expression); - for (uint64_t I : Addr) - Header.concat(I); - Metadata *Elts[] = {Header.get(VMContext)}; - return DIExpression(MDNode::get(VMContext, Elts)); + return MDExpression::get(VMContext, Addr); } DIExpression DIBuilder::createExpression(ArrayRef<int64_t> Signed) { @@ -966,10 +637,10 @@ DIExpression DIBuilder::createExpression(ArrayRef<int64_t> Signed) { return createExpression(Addr); } -DIExpression DIBuilder::createBitPieceExpression(unsigned OffsetInBits, - unsigned SizeInBits) { - int64_t Addr[] = {dwarf::DW_OP_bit_piece, OffsetInBits, SizeInBits}; - return createExpression(Addr); +DIExpression DIBuilder::createBitPieceExpression(unsigned OffsetInBytes, + unsigned SizeInBytes) { + uint64_t Addr[] = {dwarf::DW_OP_bit_piece, OffsetInBytes, SizeInBytes}; + return MDExpression::get(VMContext, Addr); } DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name, @@ -987,38 +658,6 @@ DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name, Flags, isOptimized, Fn, TParams, Decl); } -static DISubprogram createFunctionHelper( - LLVMContext &VMContext, DIDescriptor Context, StringRef Name, - StringRef LinkageName, DIFile File, unsigned LineNo, DICompositeType Ty, - bool isLocalToUnit, bool isDefinition, unsigned ScopeLine, unsigned Flags, - bool isOptimized, Function *Fn, MDNode *TParams, MDNode *Decl, MDNode *Vars, - std::function<MDNode *(ArrayRef<Metadata *>)> CreateFunc) { - assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type && - "function types should be subroutines"); - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_subprogram) - .concat(Name) - .concat(Name) - .concat(LinkageName) - .concat(LineNo) - .concat(isLocalToUnit) - .concat(isDefinition) - .concat(0) - .concat(0) - .concat(Flags) - .concat(isOptimized) - .concat(ScopeLine) - .get(VMContext), - File.getFileNode(), - DIScope(getNonCompileUnitScope(Context)).getRef(), Ty, - nullptr, getConstantOrNull(Fn), TParams, Decl, Vars}; - - DISubprogram S(CreateFunc(Elts)); - assert(S.isSubprogram() && - "createFunction should return a valid DISubprogram"); - return S; -} - - DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile File, unsigned LineNo, DICompositeType Ty, @@ -1026,19 +665,18 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name, unsigned ScopeLine, unsigned Flags, bool isOptimized, Function *Fn, MDNode *TParams, MDNode *Decl) { - return createFunctionHelper(VMContext, Context, Name, LinkageName, File, - LineNo, Ty, isLocalToUnit, isDefinition, - ScopeLine, Flags, isOptimized, Fn, TParams, Decl, - MDNode::getTemporary(VMContext, None).release(), - [&](ArrayRef<Metadata *> Elts) -> MDNode *{ - MDNode *Node = MDNode::get(VMContext, Elts); - // Create a named metadata so that we - // do not lose this mdnode. - if (isDefinition) - AllSubprograms.push_back(Node); - trackIfUnresolved(Node); - return Node; - }); + assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type && + "function types should be subroutines"); + auto *Node = MDSubprogram::get( + VMContext, DIScope(getNonCompileUnitScope(Context)).getRef(), Name, + LinkageName, File.getFileNode(), LineNo, Ty, isLocalToUnit, isDefinition, + ScopeLine, nullptr, 0, 0, Flags, isOptimized, getConstantOrNull(Fn), + TParams, Decl, MDNode::getTemporary(VMContext, None).release()); + + if (isDefinition) + AllSubprograms.push_back(Node); + trackIfUnresolved(Node); + return Node; } DISubprogram @@ -1049,12 +687,11 @@ DIBuilder::createTempFunctionFwdDecl(DIDescriptor Context, StringRef Name, unsigned ScopeLine, unsigned Flags, bool isOptimized, Function *Fn, MDNode *TParams, MDNode *Decl) { - return createFunctionHelper(VMContext, Context, Name, LinkageName, File, - LineNo, Ty, isLocalToUnit, isDefinition, - ScopeLine, Flags, isOptimized, Fn, TParams, Decl, - nullptr, [&](ArrayRef<Metadata *> Elts) { - return MDNode::getTemporary(VMContext, Elts).release(); - }); + return MDSubprogram::getTemporary( + VMContext, DIScope(getNonCompileUnitScope(Context)).getRef(), Name, + LinkageName, File.getFileNode(), LineNo, Ty, isLocalToUnit, + isDefinition, ScopeLine, nullptr, 0, 0, Flags, isOptimized, + getConstantOrNull(Fn), TParams, Decl, nullptr).release(); } DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name, @@ -1070,24 +707,13 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name, assert(getNonCompileUnitScope(Context) && "Methods should have both a Context and a context that isn't " "the compile unit."); - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_subprogram) - .concat(Name) - .concat(Name) - .concat(LinkageName) - .concat(LineNo) - .concat(isLocalToUnit) - .concat(isDefinition) - .concat(VK) - .concat(VIndex) - .concat(Flags) - .concat(isOptimized) - .concat(LineNo) - // FIXME: Do we want to use different scope/lines? - .get(VMContext), - F.getFileNode(), DIScope(Context).getRef(), Ty, - VTableHolder.getRef(), getConstantOrNull(Fn), TParam, - nullptr, nullptr}; - MDNode *Node = MDNode::get(VMContext, Elts); + // FIXME: Do we want to use different scope/lines? + auto *Node = MDSubprogram::get( + VMContext, DIScope(Context).getRef(), Name, LinkageName, F.getFileNode(), + LineNo, Ty, isLocalToUnit, isDefinition, LineNo, VTableHolder.getRef(), + VK, VIndex, Flags, isOptimized, getConstantOrNull(Fn), TParam, nullptr, + nullptr); + if (isDefinition) AllSubprograms.push_back(Node); DISubprogram S(Node); @@ -1098,12 +724,8 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name, DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNo) { - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_namespace) - .concat(Name) - .concat(LineNo) - .get(VMContext), - File.getFileNode(), getNonCompileUnitScope(Scope)}; - DINameSpace R(MDNode::get(VMContext, Elts)); + DINameSpace R = MDNamespace::get(VMContext, getNonCompileUnitScope(Scope), + File.getFileNode(), Name, LineNo); assert(R.Verify() && "createNameSpace should return a verifiable DINameSpace"); return R; @@ -1112,11 +734,8 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope, DIFile File, unsigned Discriminator) { - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_lexical_block) - .concat(Discriminator) - .get(VMContext), - File.getFileNode(), Scope}; - DILexicalBlockFile R(MDNode::get(VMContext, Elts)); + DILexicalBlockFile R = MDLexicalBlockFile::get( + VMContext, Scope, File.getFileNode(), Discriminator); assert( R.Verify() && "createLexicalBlockFile should return a verifiable DILexicalBlockFile"); @@ -1125,22 +744,10 @@ DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope, DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, unsigned Line, unsigned Col) { - // FIXME: This isn't thread safe nor the right way to defeat MDNode uniquing. - // I believe the right way is to have a self-referential element in the node. - // Also: why do we bother with line/column - they're not used and the - // documentation (SourceLevelDebugging.rst) claims the line/col are necessary - // for uniquing, yet then we have this other solution (because line/col were - // inadequate) anyway. Remove all 3 and replace them with a self-reference. - - // Defeat MDNode uniquing for lexical blocks by using unique id. - static unsigned int unique_id = 0; - Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_lexical_block) - .concat(Line) - .concat(Col) - .concat(unique_id++) - .get(VMContext), - File.getFileNode(), getNonCompileUnitScope(Scope)}; - DILexicalBlock R(MDNode::get(VMContext, Elts)); + // Make these distinct, to avoid merging two lexical blocks on the same + // file/line/column. + DILexicalBlock R = MDLexicalBlock::getDistinct( + VMContext, getNonCompileUnitScope(Scope), File.getFileNode(), Line, Col); assert(R.Verify() && "createLexicalBlock should return a verifiable DILexicalBlock"); return R; diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp index 9c1dee0..4d867ef 100644 --- a/lib/IR/DataLayout.cpp +++ b/lib/IR/DataLayout.cpp @@ -33,11 +33,6 @@ #include <cstdlib> using namespace llvm; -// Handle the Pass registration stuff necessary to use DataLayout's. - -INITIALIZE_PASS(DataLayoutPass, "datalayout", "Data Layout", false, true) -char DataLayoutPass::ID = 0; - //===----------------------------------------------------------------------===// // Support for StructLayout //===----------------------------------------------------------------------===// @@ -155,8 +150,8 @@ DataLayout::InvalidPointerElem = { 0U, 0U, 0U, ~0U }; const char *DataLayout::getManglingComponent(const Triple &T) { if (T.isOSBinFormatMachO()) return "-m:o"; - if (T.isOSWindows() && T.getArch() == Triple::x86 && T.isOSBinFormatCOFF()) - return "-m:w"; + if (T.isOSWindows() && T.isOSBinFormatCOFF()) + return T.getArch() == Triple::x86 ? "-m:x" : "-m:w"; return "-m:e"; } @@ -221,6 +216,7 @@ static unsigned inBytes(unsigned Bits) { } void DataLayout::parseSpecifier(StringRef Desc) { + StringRepresentation = Desc; while (!Desc.empty()) { // Split at '-'. std::pair<StringRef, StringRef> Split = split(Desc, '-'); @@ -259,6 +255,8 @@ void DataLayout::parseSpecifier(StringRef Desc) { "Missing size specification for pointer in datalayout string"); Split = split(Rest, ':'); unsigned PointerMemSize = inBytes(getInt(Tok)); + if (!PointerMemSize) + report_fatal_error("Invalid pointer size of 0 bytes"); // ABI alignment. if (Rest.empty()) @@ -266,12 +264,18 @@ void DataLayout::parseSpecifier(StringRef Desc) { "Missing alignment specification for pointer in datalayout string"); Split = split(Rest, ':'); unsigned PointerABIAlign = inBytes(getInt(Tok)); + if (!isPowerOf2_64(PointerABIAlign)) + report_fatal_error( + "Pointer ABI alignment must be a power of 2"); // Preferred alignment. unsigned PointerPrefAlign = PointerABIAlign; if (!Rest.empty()) { Split = split(Rest, ':'); PointerPrefAlign = inBytes(getInt(Tok)); + if (!isPowerOf2_64(PointerPrefAlign)) + report_fatal_error( + "Pointer preferred alignment must be a power of 2"); } setPointerAlignment(AddrSpace, PointerABIAlign, PointerPrefAlign, @@ -304,6 +308,9 @@ void DataLayout::parseSpecifier(StringRef Desc) { "Missing alignment specification in datalayout string"); Split = split(Rest, ':'); unsigned ABIAlign = inBytes(getInt(Tok)); + if (AlignType != AGGREGATE_ALIGN && !ABIAlign) + report_fatal_error( + "ABI alignment specification must be >0 for non-aggregate types"); // Preferred alignment. unsigned PrefAlign = ABIAlign; @@ -352,7 +359,10 @@ void DataLayout::parseSpecifier(StringRef Desc) { ManglingMode = MM_Mips; break; case 'w': - ManglingMode = MM_WINCOFF; + ManglingMode = MM_WinCOFF; + break; + case 'x': + ManglingMode = MM_WinCOFFX86; break; } break; @@ -367,13 +377,7 @@ DataLayout::DataLayout(const Module *M) : LayoutMap(nullptr) { init(M); } -void DataLayout::init(const Module *M) { - const DataLayout *Other = M->getDataLayout(); - if (Other) - *this = *Other; - else - reset(""); -} +void DataLayout::init(const Module *M) { *this = M->getDataLayout(); } bool DataLayout::operator==(const DataLayout &Other) const { bool Ret = BigEndian == Other.BigEndian && @@ -381,7 +385,7 @@ bool DataLayout::operator==(const DataLayout &Other) const { ManglingMode == Other.ManglingMode && LegalIntWidths == Other.LegalIntWidths && Alignments == Other.Alignments && Pointers == Other.Pointers; - assert(Ret == (getStringRepresentation() == Other.getStringRepresentation())); + // Note: getStringRepresentation() might differs, it is not canonicalized return Ret; } @@ -394,6 +398,10 @@ DataLayout::setAlignment(AlignTypeEnum align_type, unsigned abi_align, report_fatal_error("Invalid ABI alignment, must be a 16bit integer"); if (!isUInt<16>(pref_align)) report_fatal_error("Invalid preferred alignment, must be a 16bit integer"); + if (abi_align != 0 && !isPowerOf2_64(abi_align)) + report_fatal_error("Invalid ABI alignment, must be a power of 2"); + if (pref_align != 0 && !isPowerOf2_64(pref_align)) + report_fatal_error("Invalid preferred alignment, must be a power of 2"); if (pref_align < abi_align) report_fatal_error( @@ -474,9 +482,7 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, // If we didn't find an integer alignment, fall back on most conservative. if (AlignType == INTEGER_ALIGN) { BestMatchIdx = LargestInt; - } else { - assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!"); - + } else if (AlignType == VECTOR_ALIGN) { // By default, use natural alignment for vector types. This is consistent // with what clang and llvm-gcc do. unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType()); @@ -489,6 +495,19 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, } } + // If we still couldn't find a reasonable default alignment, fall back + // to a simple heuristic that the alignment is the first power of two + // greater-or-equal to the store size of the type. This is a reasonable + // approximation of reality, and if the user wanted something less + // less conservative, they should have specified it explicitly in the data + // layout. + if (BestMatchIdx == -1) { + unsigned Align = getTypeStoreSize(Ty); + if (Align & (Align-1)) + Align = NextPowerOf2(Align); + return Align; + } + // Since we got a "best match" index, just return it. return ABIInfo ? Alignments[BestMatchIdx].ABIAlign : Alignments[BestMatchIdx].PrefAlign; @@ -552,68 +571,6 @@ const StructLayout *DataLayout::getStructLayout(StructType *Ty) const { return L; } -std::string DataLayout::getStringRepresentation() const { - std::string Result; - raw_string_ostream OS(Result); - - OS << (BigEndian ? "E" : "e"); - - switch (ManglingMode) { - case MM_None: - break; - case MM_ELF: - OS << "-m:e"; - break; - case MM_MachO: - OS << "-m:o"; - break; - case MM_WINCOFF: - OS << "-m:w"; - break; - case MM_Mips: - OS << "-m:m"; - break; - } - - for (const PointerAlignElem &PI : Pointers) { - // Skip default. - if (PI.AddressSpace == 0 && PI.ABIAlign == 8 && PI.PrefAlign == 8 && - PI.TypeByteWidth == 8) - continue; - - OS << "-p"; - if (PI.AddressSpace) { - OS << PI.AddressSpace; - } - OS << ":" << PI.TypeByteWidth*8 << ':' << PI.ABIAlign*8; - if (PI.PrefAlign != PI.ABIAlign) - OS << ':' << PI.PrefAlign*8; - } - - for (const LayoutAlignElem &AI : Alignments) { - if (std::find(std::begin(DefaultAlignments), std::end(DefaultAlignments), - AI) != std::end(DefaultAlignments)) - continue; - OS << '-' << (char)AI.AlignType; - if (AI.TypeBitWidth) - OS << AI.TypeBitWidth; - OS << ':' << AI.ABIAlign*8; - if (AI.ABIAlign != AI.PrefAlign) - OS << ':' << AI.PrefAlign*8; - } - - if (!LegalIntWidths.empty()) { - OS << "-n" << (unsigned)LegalIntWidths[0]; - - for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i) - OS << ':' << (unsigned)LegalIntWidths[i]; - } - - if (StackNaturalAlign) - OS << "-S" << StackNaturalAlign*8; - - return OS.str(); -} unsigned DataLayout::getPointerABIAlignment(unsigned AS) const { PointersTy::const_iterator I = findPointerLowerBound(AS); @@ -829,18 +786,3 @@ unsigned DataLayout::getPreferredAlignmentLog(const GlobalVariable *GV) const { return Log2_32(getPreferredAlignment(GV)); } -DataLayoutPass::DataLayoutPass() : ImmutablePass(ID), DL("") { - initializeDataLayoutPassPass(*PassRegistry::getPassRegistry()); -} - -DataLayoutPass::~DataLayoutPass() {} - -bool DataLayoutPass::doInitialization(Module &M) { - DL.init(&M); - return false; -} - -bool DataLayoutPass::doFinalization(Module &M) { - DL.reset(""); - return false; -} diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index 6590661..9a6b953 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -92,7 +92,7 @@ bool DIDescriptor::Verify() const { DIObjCProperty(DbgNode).Verify() || DITemplateTypeParameter(DbgNode).Verify() || DITemplateValueParameter(DbgNode).Verify() || - DIImportedEntity(DbgNode).Verify() || DIExpression(DbgNode).Verify()); + DIImportedEntity(DbgNode).Verify()); } static Metadata *getField(const MDNode *DbgNode, unsigned Elt) { @@ -155,21 +155,6 @@ Function *DIDescriptor::getFunctionField(unsigned Elt) const { return dyn_cast_or_null<Function>(getConstantField(Elt)); } -void DIDescriptor::replaceFunctionField(unsigned Elt, Function *F) { - if (!DbgNode) - return; - - if (Elt < DbgNode->getNumOperands()) { - MDNode *Node = const_cast<MDNode *>(DbgNode); - Node->replaceOperandWith(Elt, F ? ConstantAsMetadata::get(F) : nullptr); - } -} - -static unsigned DIVariableInlinedAtIndex = 4; -MDNode *DIVariable::getInlinedAt() const { - return getNodeField(DbgNode, DIVariableInlinedAtIndex); -} - /// \brief Return the size reported by the variable's type. unsigned DIVariable::getSizeInBits(const DITypeIdentifierMap &Map) { DIType Ty = getType().resolve(Map); @@ -183,13 +168,6 @@ unsigned DIVariable::getSizeInBits(const DITypeIdentifierMap &Map) { return Ty.getSizeInBits(); } -uint64_t DIExpression::getElement(unsigned Idx) const { - unsigned I = Idx + 1; - assert(I < getNumHeaderFields() && - "non-existing complex address element requested"); - return getHeaderFieldAs<int64_t>(I); -} - bool DIExpression::isBitPiece() const { unsigned N = getNumElements(); return N >=3 && getElement(N-3) == dwarf::DW_OP_bit_piece; @@ -205,206 +183,40 @@ uint64_t DIExpression::getBitPieceSize() const { return getElement(getNumElements()-1); } -DIExpression::iterator DIExpression::begin() const { - return DIExpression::iterator(*this); -} - -DIExpression::iterator DIExpression::end() const { - return DIExpression::iterator(); -} - -DIExpression::Operand DIExpression::Operand::getNext() const { +DIExpression::iterator DIExpression::Operand::getNext() const { iterator it(I); - return *(++it); -} - -//===----------------------------------------------------------------------===// -// Predicates -//===----------------------------------------------------------------------===// - -bool DIDescriptor::isSubroutineType() const { - return DbgNode && getTag() == dwarf::DW_TAG_subroutine_type; -} - -bool DIDescriptor::isBasicType() const { - if (!DbgNode) - return false; - switch (getTag()) { - case dwarf::DW_TAG_base_type: - case dwarf::DW_TAG_unspecified_type: - return true; - default: - return false; - } -} - -bool DIDescriptor::isDerivedType() const { - if (!DbgNode) - return false; - switch (getTag()) { - case dwarf::DW_TAG_typedef: - case dwarf::DW_TAG_pointer_type: - case dwarf::DW_TAG_ptr_to_member_type: - case dwarf::DW_TAG_reference_type: - case dwarf::DW_TAG_rvalue_reference_type: - case dwarf::DW_TAG_const_type: - case dwarf::DW_TAG_volatile_type: - case dwarf::DW_TAG_restrict_type: - case dwarf::DW_TAG_member: - case dwarf::DW_TAG_inheritance: - case dwarf::DW_TAG_friend: - return true; - default: - // CompositeTypes are currently modelled as DerivedTypes. - return isCompositeType(); - } -} - -bool DIDescriptor::isCompositeType() const { - if (!DbgNode) - return false; - switch (getTag()) { - case dwarf::DW_TAG_array_type: - case dwarf::DW_TAG_structure_type: - case dwarf::DW_TAG_union_type: - case dwarf::DW_TAG_enumeration_type: - case dwarf::DW_TAG_subroutine_type: - case dwarf::DW_TAG_class_type: - return true; - default: - return false; - } -} - -bool DIDescriptor::isVariable() const { - if (!DbgNode) - return false; - switch (getTag()) { - case dwarf::DW_TAG_auto_variable: - case dwarf::DW_TAG_arg_variable: - return true; - default: - return false; - } -} - -bool DIDescriptor::isType() const { - return isBasicType() || isCompositeType() || isDerivedType(); -} - -bool DIDescriptor::isSubprogram() const { - return DbgNode && getTag() == dwarf::DW_TAG_subprogram; -} - -bool DIDescriptor::isGlobalVariable() const { - return DbgNode && getTag() == dwarf::DW_TAG_variable; -} - -bool DIDescriptor::isScope() const { - if (!DbgNode) - return false; - switch (getTag()) { - case dwarf::DW_TAG_compile_unit: - case dwarf::DW_TAG_lexical_block: - case dwarf::DW_TAG_subprogram: - case dwarf::DW_TAG_namespace: - case dwarf::DW_TAG_file_type: - return true; - default: - break; - } - return isType(); -} - -bool DIDescriptor::isTemplateTypeParameter() const { - return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter; -} - -bool DIDescriptor::isTemplateValueParameter() const { - return DbgNode && (getTag() == dwarf::DW_TAG_template_value_parameter || - getTag() == dwarf::DW_TAG_GNU_template_template_param || - getTag() == dwarf::DW_TAG_GNU_template_parameter_pack); -} - -bool DIDescriptor::isCompileUnit() const { - return DbgNode && getTag() == dwarf::DW_TAG_compile_unit; -} - -bool DIDescriptor::isFile() const { - return DbgNode && getTag() == dwarf::DW_TAG_file_type; -} - -bool DIDescriptor::isNameSpace() const { - return DbgNode && getTag() == dwarf::DW_TAG_namespace; -} - -bool DIDescriptor::isLexicalBlockFile() const { - return DbgNode && getTag() == dwarf::DW_TAG_lexical_block && - DbgNode->getNumOperands() == 3 && getNumHeaderFields() == 2; -} - -bool DIDescriptor::isLexicalBlock() const { - // FIXME: There are always exactly 4 header fields in DILexicalBlock, but - // something relies on this returning true for DILexicalBlockFile. - return DbgNode && getTag() == dwarf::DW_TAG_lexical_block && - DbgNode->getNumOperands() == 3 && - (getNumHeaderFields() == 2 || getNumHeaderFields() == 4); -} - -bool DIDescriptor::isSubrange() const { - return DbgNode && getTag() == dwarf::DW_TAG_subrange_type; -} - -bool DIDescriptor::isEnumerator() const { - return DbgNode && getTag() == dwarf::DW_TAG_enumerator; -} - -bool DIDescriptor::isObjCProperty() const { - return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property; -} - -bool DIDescriptor::isImportedEntity() const { - return DbgNode && (getTag() == dwarf::DW_TAG_imported_module || - getTag() == dwarf::DW_TAG_imported_declaration); -} - -bool DIDescriptor::isExpression() const { - return DbgNode && (getTag() == dwarf::DW_TAG_expression); + return ++it; } //===----------------------------------------------------------------------===// // Simple Descriptor Constructors and other Methods //===----------------------------------------------------------------------===// -void DIDescriptor::replaceAllUsesWith(LLVMContext &VMContext, DIDescriptor D) { - +void DIDescriptor::replaceAllUsesWith(LLVMContext &, DIDescriptor D) { assert(DbgNode && "Trying to replace an unverified type!"); + assert(DbgNode->isTemporary() && "Expected temporary node"); + TempMDNode Temp(get()); // Since we use a TrackingVH for the node, its easy for clients to manufacture // legitimate situations where they want to replaceAllUsesWith() on something // which, due to uniquing, has merged with the source. We shield clients from // this detail by allowing a value to be replaced with replaceAllUsesWith() // itself. - const MDNode *DN = D; - if (DbgNode == DN) { - SmallVector<Metadata *, 10> Ops(DbgNode->op_begin(), DbgNode->op_end()); - DN = MDNode::get(VMContext, Ops); + if (Temp.get() == D.get()) { + DbgNode = MDNode::replaceWithUniqued(std::move(Temp)); + return; } - assert(DbgNode->isTemporary() && "Expected temporary node"); - auto *Node = const_cast<MDNode *>(DbgNode); - Node->replaceAllUsesWith(const_cast<MDNode *>(DN)); - MDNode::deleteTemporary(Node); - DbgNode = DN; + Temp->replaceAllUsesWith(D.get()); + DbgNode = D.get(); } void DIDescriptor::replaceAllUsesWith(MDNode *D) { assert(DbgNode && "Trying to replace an unverified type!"); assert(DbgNode != D && "This replacement should always happen"); assert(DbgNode->isTemporary() && "Expected temporary node"); - auto *Node = const_cast<MDNode *>(DbgNode); + TempMDNode Node(get()); Node->replaceAllUsesWith(D); - MDNode::deleteTemporary(Node); } bool DICompileUnit::Verify() const { @@ -413,31 +225,10 @@ bool DICompileUnit::Verify() const { // Don't bother verifying the compilation directory or producer string // as those could be empty. - if (getFilename().empty()) - return false; - - return DbgNode->getNumOperands() == 7 && getNumHeaderFields() == 8; -} - -bool DIObjCProperty::Verify() const { - if (!isObjCProperty()) - return false; - - // Don't worry about the rest of the strings for now. - return DbgNode->getNumOperands() == 3 && getNumHeaderFields() == 6; -} - -/// \brief Check if a field at position Elt of a MDNode is a MDNode. -static bool fieldIsMDNode(const MDNode *DbgNode, unsigned Elt) { - Metadata *Fld = getField(DbgNode, Elt); - return !Fld || isa<MDNode>(Fld); + return !getFilename().empty(); } -/// \brief Check if a field at position Elt of a MDNode is a MDString. -static bool fieldIsMDString(const MDNode *DbgNode, unsigned Elt) { - Metadata *Fld = getField(DbgNode, Elt); - return !Fld || isa<MDString>(Fld); -} +bool DIObjCProperty::Verify() const { return isObjCProperty(); } /// \brief Check if a value can be a reference to a type. static bool isTypeRef(const Metadata *MD) { @@ -445,14 +236,7 @@ static bool isTypeRef(const Metadata *MD) { return true; if (auto *S = dyn_cast<MDString>(MD)) return !S->getString().empty(); - if (auto *N = dyn_cast<MDNode>(MD)) - return DIType(N).isType(); - return false; -} - -/// \brief Check if referenced field might be a type. -static bool fieldIsTypeRef(const MDNode *DbgNode, unsigned Elt) { - return isTypeRef(dyn_cast_or_null<Metadata>(getField(DbgNode, Elt))); + return isa<MDType>(MD); } /// \brief Check if a value can be a ScopeRef. @@ -461,14 +245,7 @@ static bool isScopeRef(const Metadata *MD) { return true; if (auto *S = dyn_cast<MDString>(MD)) return !S->getString().empty(); - if (auto *N = dyn_cast<MDNode>(MD)) - return DIScope(N).isScope(); - return false; -} - -/// \brief Check if a field at position Elt of a MDNode can be a ScopeRef. -static bool fieldIsScopeRef(const MDNode *DbgNode, unsigned Elt) { - return isScopeRef(dyn_cast_or_null<Metadata>(getField(DbgNode, Elt))); + return isa<MDScope>(MD); } #ifndef NDEBUG @@ -483,92 +260,81 @@ static bool isDescriptorRef(const Metadata *MD) { #endif bool DIType::Verify() const { - if (!isType()) + auto *N = dyn_cast_or_null<MDType>(DbgNode); + if (!N) return false; - // Make sure Context @ field 2 is MDNode. - if (!fieldIsScopeRef(DbgNode, 2)) - return false; - - // FIXME: Sink this into the various subclass verifies. - uint16_t Tag = getTag(); - if (!isBasicType() && Tag != dwarf::DW_TAG_const_type && - Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type && - Tag != dwarf::DW_TAG_ptr_to_member_type && - Tag != dwarf::DW_TAG_reference_type && - Tag != dwarf::DW_TAG_rvalue_reference_type && - Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_array_type && - Tag != dwarf::DW_TAG_enumeration_type && - Tag != dwarf::DW_TAG_subroutine_type && - Tag != dwarf::DW_TAG_inheritance && Tag != dwarf::DW_TAG_friend && - getFilename().empty()) + if (!isScopeRef(N->getScope())) return false; // DIType is abstract, it should be a BasicType, a DerivedType or // a CompositeType. if (isBasicType()) return DIBasicType(DbgNode).Verify(); - else if (isCompositeType()) + + // FIXME: Sink this into the various subclass verifies. + if (getFilename().empty()) { + // Check whether the filename is allowed to be empty. + uint16_t Tag = getTag(); + if (Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && + Tag != dwarf::DW_TAG_pointer_type && + Tag != dwarf::DW_TAG_ptr_to_member_type && + Tag != dwarf::DW_TAG_reference_type && + Tag != dwarf::DW_TAG_rvalue_reference_type && + Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_array_type && + Tag != dwarf::DW_TAG_enumeration_type && + Tag != dwarf::DW_TAG_subroutine_type && + Tag != dwarf::DW_TAG_inheritance && Tag != dwarf::DW_TAG_friend && + Tag != dwarf::DW_TAG_structure_type && Tag != dwarf::DW_TAG_member && + Tag != dwarf::DW_TAG_typedef) + return false; + } + + if (isCompositeType()) return DICompositeType(DbgNode).Verify(); - else if (isDerivedType()) + if (isDerivedType()) return DIDerivedType(DbgNode).Verify(); - else - return false; + return false; } bool DIBasicType::Verify() const { - return isBasicType() && DbgNode->getNumOperands() == 3 && - getNumHeaderFields() == 8; + return dyn_cast_or_null<MDBasicType>(DbgNode); } bool DIDerivedType::Verify() const { - // Make sure DerivedFrom @ field 3 is TypeRef. - if (!fieldIsTypeRef(DbgNode, 3)) + auto *N = dyn_cast_or_null<MDDerivedTypeBase>(DbgNode); + if (!N) return false; - if (getTag() == dwarf::DW_TAG_ptr_to_member_type) - // Make sure ClassType @ field 4 is a TypeRef. - if (!fieldIsTypeRef(DbgNode, 4)) + if (getTag() == dwarf::DW_TAG_ptr_to_member_type) { + auto *D = dyn_cast<MDDerivedType>(N); + if (!D) return false; - - return isDerivedType() && DbgNode->getNumOperands() >= 4 && - DbgNode->getNumOperands() <= 8 && getNumHeaderFields() >= 7 && - getNumHeaderFields() <= 8; + if (!isTypeRef(D->getExtraData())) + return false; + } + return isTypeRef(N->getBaseType()); } bool DICompositeType::Verify() const { - if (!isCompositeType()) - return false; - - // Make sure DerivedFrom @ field 3 and ContainingType @ field 5 are TypeRef. - if (!fieldIsTypeRef(DbgNode, 3)) - return false; - if (!fieldIsTypeRef(DbgNode, 5)) - return false; - - // Make sure the type identifier at field 7 is MDString, it can be null. - if (!fieldIsMDString(DbgNode, 7)) - return false; - - // A subroutine type can't be both & and &&. - if (isLValueReference() && isRValueReference()) - return false; - - return DbgNode->getNumOperands() == 8 && getNumHeaderFields() == 8; + auto *N = dyn_cast_or_null<MDCompositeTypeBase>(DbgNode); + return N && isTypeRef(N->getBaseType()) && isTypeRef(N->getVTableHolder()) && + !(isLValueReference() && isRValueReference()); } bool DISubprogram::Verify() const { - if (!isSubprogram()) + auto *N = dyn_cast_or_null<MDSubprogram>(DbgNode); + if (!N) return false; - // Make sure context @ field 2 is a ScopeRef and type @ field 3 is a MDNode. - if (!fieldIsScopeRef(DbgNode, 2)) + if (!isScopeRef(N->getScope())) return false; - if (!fieldIsMDNode(DbgNode, 3)) - return false; - // Containing type @ field 4. - if (!fieldIsTypeRef(DbgNode, 4)) + + if (auto *Op = N->getType()) + if (!isa<MDNode>(Op)) + return false; + + if (!isTypeRef(getContainingType())) return false; - // A subprogram can't be both & and &&. if (isLValueReference() && isRValueReference()) return false; @@ -603,164 +369,78 @@ bool DISubprogram::Verify() const { } } } - return DbgNode->getNumOperands() == 9 && getNumHeaderFields() == 12; + + return true; } bool DIGlobalVariable::Verify() const { - if (!isGlobalVariable()) - return false; + auto *N = dyn_cast_or_null<MDGlobalVariable>(DbgNode); - if (getDisplayName().empty()) - return false; - // Make sure context @ field 1 is an MDNode. - if (!fieldIsMDNode(DbgNode, 1)) - return false; - // Make sure that type @ field 3 is a DITypeRef. - if (!fieldIsTypeRef(DbgNode, 3)) - return false; - // Make sure StaticDataMemberDeclaration @ field 5 is MDNode. - if (!fieldIsMDNode(DbgNode, 5)) - return false; - - return DbgNode->getNumOperands() == 6 && getNumHeaderFields() == 7; -} - -bool DIVariable::Verify() const { - if (!isVariable()) + if (!N) return false; - // Make sure context @ field 1 is an MDNode. - if (!fieldIsMDNode(DbgNode, 1)) - return false; - // Make sure that type @ field 3 is a DITypeRef. - if (!fieldIsTypeRef(DbgNode, 3)) + if (N->getDisplayName().empty()) return false; - // Check the number of header fields, which is common between complex and - // simple variables. - if (getNumHeaderFields() != 4) - return false; + if (auto *Op = N->getScope()) + if (!isa<MDNode>(Op)) + return false; - // Variable without an inline location. - if (DbgNode->getNumOperands() == 4) - return true; + if (auto *Op = N->getStaticDataMemberDeclaration()) + if (!isa<MDNode>(Op)) + return false; - // Variable with an inline location. - return getInlinedAt() != nullptr && DbgNode->getNumOperands() == 5; + return isTypeRef(N->getType()); } -bool DIExpression::Verify() const { - // Empty DIExpressions may be represented as a nullptr. - if (!DbgNode) - return true; +bool DIVariable::Verify() const { + auto *N = dyn_cast_or_null<MDLocalVariable>(DbgNode); - if (!(isExpression() && DbgNode->getNumOperands() == 1)) + if (!N) return false; - for (auto Op : *this) - switch (Op) { - case DW_OP_bit_piece: - // Must be the last element of the expression. - return std::distance(Op.getBase(), DIHeaderFieldIterator()) == 3; - case DW_OP_plus: - if (std::distance(Op.getBase(), DIHeaderFieldIterator()) < 2) - return false; - break; - case DW_OP_deref: - break; - default: - // Other operators are not yet supported by the backend. + if (auto *Op = N->getScope()) + if (!isa<MDNode>(Op)) return false; - } - return true; + + return isTypeRef(N->getType()); } bool DILocation::Verify() const { - return DbgNode && isa<MDLocation>(DbgNode); + return dyn_cast_or_null<MDLocation>(DbgNode); } - bool DINameSpace::Verify() const { - if (!isNameSpace()) - return false; - return DbgNode->getNumOperands() == 3 && getNumHeaderFields() == 3; -} - -MDNode *DIFile::getFileNode() const { return getNodeField(DbgNode, 1); } - -bool DIFile::Verify() const { - return isFile() && DbgNode->getNumOperands() == 2; + return dyn_cast_or_null<MDNamespace>(DbgNode); } - +bool DIFile::Verify() const { return dyn_cast_or_null<MDFile>(DbgNode); } bool DIEnumerator::Verify() const { - return isEnumerator() && DbgNode->getNumOperands() == 1 && - getNumHeaderFields() == 3; + return dyn_cast_or_null<MDEnumerator>(DbgNode); } - bool DISubrange::Verify() const { - return isSubrange() && DbgNode->getNumOperands() == 1 && - getNumHeaderFields() == 3; + return dyn_cast_or_null<MDSubrange>(DbgNode); } - bool DILexicalBlock::Verify() const { - return isLexicalBlock() && DbgNode->getNumOperands() == 3 && - getNumHeaderFields() == 4; + return dyn_cast_or_null<MDLexicalBlock>(DbgNode); } - bool DILexicalBlockFile::Verify() const { - return isLexicalBlockFile() && DbgNode->getNumOperands() == 3 && - getNumHeaderFields() == 2; + return dyn_cast_or_null<MDLexicalBlockFile>(DbgNode); } - bool DITemplateTypeParameter::Verify() const { - return isTemplateTypeParameter() && DbgNode->getNumOperands() == 4 && - getNumHeaderFields() == 4; + return dyn_cast_or_null<MDTemplateTypeParameter>(DbgNode); } - bool DITemplateValueParameter::Verify() const { - return isTemplateValueParameter() && DbgNode->getNumOperands() == 5 && - getNumHeaderFields() == 4; + return dyn_cast_or_null<MDTemplateValueParameter>(DbgNode); } - bool DIImportedEntity::Verify() const { - return isImportedEntity() && DbgNode->getNumOperands() == 3 && - getNumHeaderFields() == 3; -} - -MDNode *DIDerivedType::getObjCProperty() const { - return getNodeField(DbgNode, 4); + return dyn_cast_or_null<MDImportedEntity>(DbgNode); } -MDString *DICompositeType::getIdentifier() const { - return cast_or_null<MDString>(getField(DbgNode, 7)); -} - -#ifndef NDEBUG -static void VerifySubsetOf(const MDNode *LHS, const MDNode *RHS) { - for (unsigned i = 0; i != LHS->getNumOperands(); ++i) { - // Skip the 'empty' list (that's a single i32 0, rather than truly empty). - if (i == 0 && mdconst::hasa<ConstantInt>(LHS->getOperand(i))) - continue; - const MDNode *E = cast<MDNode>(LHS->getOperand(i)); - bool found = false; - for (unsigned j = 0; !found && j != RHS->getNumOperands(); ++j) - found = (E == cast<MDNode>(RHS->getOperand(j))); - assert(found && "Losing a member during member list replacement"); - } -} -#endif - void DICompositeType::setArraysHelper(MDNode *Elements, MDNode *TParams) { - TrackingMDNodeRef N(*this); - if (Elements) { -#ifndef NDEBUG - // Check that the new list of members contains all the old members as well. - if (const MDNode *El = cast_or_null<MDNode>(N->getOperand(4))) - VerifySubsetOf(El, Elements); -#endif - N->replaceOperandWith(4, Elements); - } + TypedTrackingMDRef<MDCompositeTypeBase> N(get()); + if (Elements) + N->replaceElements(cast<MDTuple>(Elements)); if (TParams) - N->replaceOperandWith(6, TParams); + N->replaceTemplateParams(cast<MDTuple>(TParams)); DbgNode = N; } @@ -774,8 +454,8 @@ DIScopeRef DIScope::getRef() const { } void DICompositeType::setContainingType(DICompositeType ContainingType) { - TrackingMDNodeRef N(*this); - N->replaceOperandWith(5, ContainingType.getRef()); + TypedTrackingMDRef<MDCompositeTypeBase> N(get()); + N->replaceVTableHolder(ContainingType.getRef()); DbgNode = N; } @@ -788,6 +468,13 @@ bool DIVariable::isInlinedFnArgument(const Function *CurFn) { return !DISubprogram(getContext()).describes(CurFn); } +Function *DISubprogram::getFunction() const { + if (auto *N = get()) + if (auto *C = dyn_cast_or_null<ConstantAsMetadata>(N->getFunction())) + return dyn_cast<Function>(C->getValue()); + return nullptr; +} + bool DISubprogram::describes(const Function *F) { assert(F && "Invalid function"); if (F == getFunction()) @@ -800,16 +487,8 @@ bool DISubprogram::describes(const Function *F) { return false; } -MDNode *DISubprogram::getVariablesNodes() const { - return getNodeField(DbgNode, 8); -} - -DIArray DISubprogram::getVariables() const { - return DIArray(getNodeField(DbgNode, 8)); -} - -Metadata *DITemplateValueParameter::getValue() const { - return DbgNode->getOperand(3); +GlobalVariable *DIGlobalVariable::getGlobal() const { + return dyn_cast_or_null<GlobalVariable>(getConstant()); } DIScopeRef DIScope::getContext() const { @@ -847,66 +526,25 @@ StringRef DIScope::getName() const { } StringRef DIScope::getFilename() const { - if (!DbgNode) - return StringRef(); - return ::getStringField(getNodeField(DbgNode, 1), 0); + if (auto *N = get()) + return ::getStringField(dyn_cast_or_null<MDNode>(N->getFile()), 0); + return ""; } StringRef DIScope::getDirectory() const { - if (!DbgNode) - return StringRef(); - return ::getStringField(getNodeField(DbgNode, 1), 1); -} - -DIArray DICompileUnit::getEnumTypes() const { - if (!DbgNode || DbgNode->getNumOperands() < 7) - return DIArray(); - - return DIArray(getNodeField(DbgNode, 2)); -} - -DIArray DICompileUnit::getRetainedTypes() const { - if (!DbgNode || DbgNode->getNumOperands() < 7) - return DIArray(); - - return DIArray(getNodeField(DbgNode, 3)); -} - -DIArray DICompileUnit::getSubprograms() const { - if (!DbgNode || DbgNode->getNumOperands() < 7) - return DIArray(); - - return DIArray(getNodeField(DbgNode, 4)); -} - -DIArray DICompileUnit::getGlobalVariables() const { - if (!DbgNode || DbgNode->getNumOperands() < 7) - return DIArray(); - - return DIArray(getNodeField(DbgNode, 5)); -} - -DIArray DICompileUnit::getImportedEntities() const { - if (!DbgNode || DbgNode->getNumOperands() < 7) - return DIArray(); - - return DIArray(getNodeField(DbgNode, 6)); + if (auto *N = get()) + return ::getStringField(dyn_cast_or_null<MDNode>(N->getFile()), 1); + return ""; } void DICompileUnit::replaceSubprograms(DIArray Subprograms) { assert(Verify() && "Expected compile unit"); - if (Subprograms == getSubprograms()) - return; - - const_cast<MDNode *>(DbgNode)->replaceOperandWith(4, Subprograms); + get()->replaceSubprograms(cast_or_null<MDTuple>(Subprograms.get())); } void DICompileUnit::replaceGlobalVariables(DIArray GlobalVariables) { assert(Verify() && "Expected compile unit"); - if (GlobalVariables == getGlobalVariables()) - return; - - const_cast<MDNode *>(DbgNode)->replaceOperandWith(5, GlobalVariables); + get()->replaceGlobalVariables(cast_or_null<MDTuple>(GlobalVariables.get())); } DILocation DILocation::copyWithNewScope(LLVMContext &Ctx, @@ -927,31 +565,13 @@ unsigned DILocation::computeNewDiscriminator(LLVMContext &Ctx) { DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope, LLVMContext &VMContext) { assert(DIVariable(DV).Verify() && "Expected a DIVariable"); - if (!InlinedScope) - return cleanseInlinedVariable(DV, VMContext); - - // Insert inlined scope. - SmallVector<Metadata *, 8> Elts(DV->op_begin(), - DV->op_begin() + DIVariableInlinedAtIndex); - Elts.push_back(InlinedScope); - - DIVariable Inlined(MDNode::get(VMContext, Elts)); - assert(Inlined.Verify() && "Expected to create a DIVariable"); - return Inlined; + return cast<MDLocalVariable>(DV) + ->withInline(cast_or_null<MDLocation>(InlinedScope)); } DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) { assert(DIVariable(DV).Verify() && "Expected a DIVariable"); - if (!DIVariable(DV).getInlinedAt()) - return DIVariable(DV); - - // Remove inlined scope. - SmallVector<Metadata *, 8> Elts(DV->op_begin(), - DV->op_begin() + DIVariableInlinedAtIndex); - - DIVariable Cleansed(MDNode::get(VMContext, Elts)); - assert(Cleansed.Verify() && "Expected to create a DIVariable"); - return Cleansed; + return cast<MDLocalVariable>(DV)->withoutInline(); } DISubprogram llvm::getDISubprogram(const MDNode *Scope) { @@ -1075,6 +695,8 @@ void DebugInfoFinder::processModule(const Module &M) { DIArray Imports = CU.getImportedEntities(); for (unsigned i = 0, e = Imports.getNumElements(); i != e; ++i) { DIImportedEntity Import = DIImportedEntity(Imports.getElement(i)); + if (!Import) + continue; DIDescriptor Entity = Import.getEntity().resolve(TypeIdentifierMap); if (Entity.isType()) processType(DIType(Entity)); @@ -1267,220 +889,9 @@ void DIDescriptor::dump() const { } void DIDescriptor::print(raw_ostream &OS) const { - if (!DbgNode) - return; - - if (const char *Tag = dwarf::TagString(getTag())) - OS << "[ " << Tag << " ]"; - - if (this->isSubrange()) { - DISubrange(DbgNode).printInternal(OS); - } else if (this->isCompileUnit()) { - DICompileUnit(DbgNode).printInternal(OS); - } else if (this->isFile()) { - DIFile(DbgNode).printInternal(OS); - } else if (this->isEnumerator()) { - DIEnumerator(DbgNode).printInternal(OS); - } else if (this->isBasicType()) { - DIType(DbgNode).printInternal(OS); - } else if (this->isDerivedType()) { - DIDerivedType(DbgNode).printInternal(OS); - } else if (this->isCompositeType()) { - DICompositeType(DbgNode).printInternal(OS); - } else if (this->isSubprogram()) { - DISubprogram(DbgNode).printInternal(OS); - } else if (this->isGlobalVariable()) { - DIGlobalVariable(DbgNode).printInternal(OS); - } else if (this->isVariable()) { - DIVariable(DbgNode).printInternal(OS); - } else if (this->isObjCProperty()) { - DIObjCProperty(DbgNode).printInternal(OS); - } else if (this->isNameSpace()) { - DINameSpace(DbgNode).printInternal(OS); - } else if (this->isScope()) { - DIScope(DbgNode).printInternal(OS); - } else if (this->isExpression()) { - DIExpression(DbgNode).printInternal(OS); - } -} - -void DISubrange::printInternal(raw_ostream &OS) const { - int64_t Count = getCount(); - if (Count != -1) - OS << " [" << getLo() << ", " << Count - 1 << ']'; - else - OS << " [unbounded]"; -} - -void DIScope::printInternal(raw_ostream &OS) const { - OS << " [" << getDirectory() << "/" << getFilename() << ']'; -} - -void DICompileUnit::printInternal(raw_ostream &OS) const { - DIScope::printInternal(OS); - OS << " ["; - unsigned Lang = getLanguage(); - if (const char *LangStr = dwarf::LanguageString(Lang)) - OS << LangStr; - else - (OS << "lang 0x").write_hex(Lang); - OS << ']'; -} - -void DIEnumerator::printInternal(raw_ostream &OS) const { - OS << " [" << getName() << " :: " << getEnumValue() << ']'; -} - -void DIType::printInternal(raw_ostream &OS) const { - if (!DbgNode) + if (!get()) return; - - StringRef Res = getName(); - if (!Res.empty()) - OS << " [" << Res << "]"; - - // TODO: Print context? - - OS << " [line " << getLineNumber() << ", size " << getSizeInBits() - << ", align " << getAlignInBits() << ", offset " << getOffsetInBits(); - if (isBasicType()) - if (const char *Enc = - dwarf::AttributeEncodingString(DIBasicType(DbgNode).getEncoding())) - OS << ", enc " << Enc; - OS << "]"; - - if (isPrivate()) - OS << " [private]"; - else if (isProtected()) - OS << " [protected]"; - else if (isPublic()) - OS << " [public]"; - - if (isArtificial()) - OS << " [artificial]"; - - if (isForwardDecl()) - OS << " [decl]"; - else if (getTag() == dwarf::DW_TAG_structure_type || - getTag() == dwarf::DW_TAG_union_type || - getTag() == dwarf::DW_TAG_enumeration_type || - getTag() == dwarf::DW_TAG_class_type) - OS << " [def]"; - if (isVector()) - OS << " [vector]"; - if (isStaticMember()) - OS << " [static]"; - - if (isLValueReference()) - OS << " [reference]"; - - if (isRValueReference()) - OS << " [rvalue reference]"; -} - -void DIDerivedType::printInternal(raw_ostream &OS) const { - DIType::printInternal(OS); - OS << " [from " << getTypeDerivedFrom().getName() << ']'; -} - -void DICompositeType::printInternal(raw_ostream &OS) const { - DIType::printInternal(OS); - DIArray A = getElements(); - OS << " [" << A.getNumElements() << " elements]"; -} - -void DINameSpace::printInternal(raw_ostream &OS) const { - StringRef Name = getName(); - if (!Name.empty()) - OS << " [" << Name << ']'; - - OS << " [line " << getLineNumber() << ']'; -} - -void DISubprogram::printInternal(raw_ostream &OS) const { - // TODO : Print context - OS << " [line " << getLineNumber() << ']'; - - if (isLocalToUnit()) - OS << " [local]"; - - if (isDefinition()) - OS << " [def]"; - - if (getScopeLineNumber() != getLineNumber()) - OS << " [scope " << getScopeLineNumber() << "]"; - - if (isPrivate()) - OS << " [private]"; - else if (isProtected()) - OS << " [protected]"; - else if (isPublic()) - OS << " [public]"; - - if (isLValueReference()) - OS << " [reference]"; - - if (isRValueReference()) - OS << " [rvalue reference]"; - - StringRef Res = getName(); - if (!Res.empty()) - OS << " [" << Res << ']'; -} - -void DIGlobalVariable::printInternal(raw_ostream &OS) const { - StringRef Res = getName(); - if (!Res.empty()) - OS << " [" << Res << ']'; - - OS << " [line " << getLineNumber() << ']'; - - // TODO : Print context - - if (isLocalToUnit()) - OS << " [local]"; - - if (isDefinition()) - OS << " [def]"; -} - -void DIVariable::printInternal(raw_ostream &OS) const { - StringRef Res = getName(); - if (!Res.empty()) - OS << " [" << Res << ']'; - - OS << " [line " << getLineNumber() << ']'; -} - -void DIExpression::printInternal(raw_ostream &OS) const { - for (auto Op : *this) { - OS << " [" << OperationEncodingString(Op); - switch (Op) { - case DW_OP_plus: { - OS << " " << Op.getArg(1); - break; - } - case DW_OP_bit_piece: { - OS << " offset=" << Op.getArg(1) << ", size=" << Op.getArg(2); - break; - } - case DW_OP_deref: - // No arguments. - break; - default: - llvm_unreachable("unhandled operation"); - } - OS << "]"; - } -} - -void DIObjCProperty::printInternal(raw_ostream &OS) const { - StringRef Name = getObjCPropertyName(); - if (!Name.empty()) - OS << " [" << Name << ']'; - - OS << " [line " << getLineNumber() << ", properties " << getUnsignedField(6) - << ']'; + get()->print(OS); } static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS, diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp index cfb699a..5608589 100644 --- a/lib/IR/DiagnosticInfo.cpp +++ b/lib/IR/DiagnosticInfo.cpp @@ -129,7 +129,7 @@ void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const { } bool DiagnosticInfoOptimizationBase::isLocationAvailable() const { - return getDebugLoc().isUnknown() == false; + return !getDebugLoc().isUnknown(); } void DiagnosticInfoOptimizationBase::getLocation(StringRef *Filename, diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp index 08f44e0..7010ceb 100644 --- a/lib/IR/GCOV.cpp +++ b/lib/IR/GCOV.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <system_error> using namespace llvm; @@ -302,10 +303,12 @@ bool GCOVFunction::readGCDA(GCOVBuffer &Buff, GCOV::GCOVVersion Version) { // required to combine the edge counts that are contained in the GCDA file. for (uint32_t BlockNo = 0; Count > 0; ++BlockNo) { // The last block is always reserved for exit block - if (BlockNo >= Blocks.size() - 1) { + if (BlockNo >= Blocks.size()) { errs() << "Unexpected number of edges (in " << Name << ").\n"; return false; } + if (BlockNo == Blocks.size() - 1) + errs() << "(" << Name << ") has arcs from exit block.\n"; GCOVBlock &Block = *Blocks[BlockNo]; for (size_t EdgeNo = 0, End = Block.getNumDstEdges(); EdgeNo < End; ++EdgeNo) { @@ -443,6 +446,7 @@ static uint32_t branchDiv(uint64_t Numerator, uint64_t Divisor) { return Res; } +namespace { struct formatBranchInfo { formatBranchInfo(const GCOVOptions &Options, uint64_t Count, uint64_t Total) : Options(Options), Count(Count), Total(Total) {} @@ -466,7 +470,6 @@ static raw_ostream &operator<<(raw_ostream &OS, const formatBranchInfo &FBI) { return OS; } -namespace { class LineConsumer { std::unique_ptr<MemoryBuffer> Buffer; StringRef Remaining; diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp index 54197d9..5a6adb3 100644 --- a/lib/IR/Globals.cpp +++ b/lib/IR/Globals.cpp @@ -42,10 +42,6 @@ void GlobalValue::Dematerialize() { getParent()->Dematerialize(this); } -const DataLayout *GlobalValue::getDataLayout() const { - return getParent()->getDataLayout(); -} - /// Override destroyConstant to make sure it doesn't get called on /// GlobalValue's because they shouldn't be treated like other constants. void GlobalValue::destroyConstant() { diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp index 5b73561..b456d9f 100644 --- a/lib/IR/InlineAsm.cpp +++ b/lib/IR/InlineAsm.cpp @@ -75,7 +75,7 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str, ConstraintCodeVector *pCodes = &Codes; // Initialize - isMultipleAlternative = (multipleAlternativeCount > 1 ? true : false); + isMultipleAlternative = multipleAlternativeCount > 1; if (isMultipleAlternative) { multipleAlternatives.resize(multipleAlternativeCount); pCodes = &multipleAlternatives[0].Codes; diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index 92c6e9f..7d9bd7e 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -32,10 +32,6 @@ Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps, } } -const DataLayout *Instruction::getDataLayout() const { - return getParent()->getDataLayout(); -} - Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps, BasicBlock *InsertAtEnd) : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(nullptr) { @@ -58,6 +54,10 @@ void Instruction::setParent(BasicBlock *P) { Parent = P; } +const Module *Instruction::getModule() const { + return getParent()->getModule(); +} + void Instruction::removeFromParent() { getParent()->getInstList().remove(this); } diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 7136923..af2aeb9 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -841,41 +841,19 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) { return Amt; } -AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, - const Twine &Name, Instruction *InsertBefore) - : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, - getAISize(Ty->getContext(), ArraySize), InsertBefore) { - setAlignment(0); - assert(!Ty->isVoidTy() && "Cannot allocate void!"); - setName(Name); -} +AllocaInst::AllocaInst(Type *Ty, const Twine &Name, Instruction *InsertBefore) + : AllocaInst(Ty, /*ArraySize=*/nullptr, Name, InsertBefore) {} -AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, - const Twine &Name, BasicBlock *InsertAtEnd) - : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, - getAISize(Ty->getContext(), ArraySize), InsertAtEnd) { - setAlignment(0); - assert(!Ty->isVoidTy() && "Cannot allocate void!"); - setName(Name); -} +AllocaInst::AllocaInst(Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) + : AllocaInst(Ty, /*ArraySize=*/nullptr, Name, InsertAtEnd) {} -AllocaInst::AllocaInst(Type *Ty, const Twine &Name, +AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, const Twine &Name, Instruction *InsertBefore) - : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, - getAISize(Ty->getContext(), nullptr), InsertBefore) { - setAlignment(0); - assert(!Ty->isVoidTy() && "Cannot allocate void!"); - setName(Name); -} + : AllocaInst(Ty, ArraySize, /*Align=*/0, Name, InsertBefore) {} -AllocaInst::AllocaInst(Type *Ty, const Twine &Name, +AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, const Twine &Name, BasicBlock *InsertAtEnd) - : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, - getAISize(Ty->getContext(), nullptr), InsertAtEnd) { - setAlignment(0); - assert(!Ty->isVoidTy() && "Cannot allocate void!"); - setName(Name); -} + : AllocaInst(Ty, ArraySize, /*Align=*/0, Name, InsertAtEnd) {} AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align, const Twine &Name, Instruction *InsertBefore) @@ -942,67 +920,27 @@ void LoadInst::AssertOK() { } LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef) - : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(), - Load, Ptr, InsertBef) { - setVolatile(false); - setAlignment(0); - setAtomic(NotAtomic); - AssertOK(); - setName(Name); -} + : LoadInst(Ptr, Name, /*isVolatile=*/false, InsertBef) {} LoadInst::LoadInst(Value *Ptr, const Twine &Name, BasicBlock *InsertAE) - : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(), - Load, Ptr, InsertAE) { - setVolatile(false); - setAlignment(0); - setAtomic(NotAtomic); - AssertOK(); - setName(Name); -} + : LoadInst(Ptr, Name, /*isVolatile=*/false, InsertAE) {} LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, Instruction *InsertBef) - : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(), - Load, Ptr, InsertBef) { - setVolatile(isVolatile); - setAlignment(0); - setAtomic(NotAtomic); - AssertOK(); - setName(Name); -} + : LoadInst(Ptr, Name, isVolatile, /*Align=*/0, InsertBef) {} LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, BasicBlock *InsertAE) - : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(), - Load, Ptr, InsertAE) { - setVolatile(isVolatile); - setAlignment(0); - setAtomic(NotAtomic); - AssertOK(); - setName(Name); -} + : LoadInst(Ptr, Name, isVolatile, /*Align=*/0, InsertAE) {} -LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, +LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, Instruction *InsertBef) - : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(), - Load, Ptr, InsertBef) { - setVolatile(isVolatile); - setAlignment(Align); - setAtomic(NotAtomic); - AssertOK(); - setName(Name); -} + : LoadInst(Ptr, Name, isVolatile, Align, NotAtomic, CrossThread, + InsertBef) {} -LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, +LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, BasicBlock *InsertAE) - : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(), - Load, Ptr, InsertAE) { - setVolatile(isVolatile); - setAlignment(Align); - setAtomic(NotAtomic); - AssertOK(); - setName(Name); + : LoadInst(Ptr, Name, isVolatile, Align, NotAtomic, CrossThread, InsertAE) { } LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, @@ -1097,60 +1035,29 @@ void StoreInst::AssertOK() { "Alignment required for atomic store"); } - StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore) - : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits<StoreInst>::op_begin(this), - OperandTraits<StoreInst>::operands(this), - InsertBefore) { - Op<0>() = val; - Op<1>() = addr; - setVolatile(false); - setAlignment(0); - setAtomic(NotAtomic); - AssertOK(); -} + : StoreInst(val, addr, /*isVolatile=*/false, InsertBefore) {} StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd) - : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits<StoreInst>::op_begin(this), - OperandTraits<StoreInst>::operands(this), - InsertAtEnd) { - Op<0>() = val; - Op<1>() = addr; - setVolatile(false); - setAlignment(0); - setAtomic(NotAtomic); - AssertOK(); -} + : StoreInst(val, addr, /*isVolatile=*/false, InsertAtEnd) {} StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Instruction *InsertBefore) - : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits<StoreInst>::op_begin(this), - OperandTraits<StoreInst>::operands(this), - InsertBefore) { - Op<0>() = val; - Op<1>() = addr; - setVolatile(isVolatile); - setAlignment(0); - setAtomic(NotAtomic); - AssertOK(); -} + : StoreInst(val, addr, isVolatile, /*Align=*/0, InsertBefore) {} StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, - unsigned Align, Instruction *InsertBefore) - : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits<StoreInst>::op_begin(this), - OperandTraits<StoreInst>::operands(this), - InsertBefore) { - Op<0>() = val; - Op<1>() = addr; - setVolatile(isVolatile); - setAlignment(Align); - setAtomic(NotAtomic); - AssertOK(); -} + BasicBlock *InsertAtEnd) + : StoreInst(val, addr, isVolatile, /*Align=*/0, InsertAtEnd) {} + +StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, + Instruction *InsertBefore) + : StoreInst(val, addr, isVolatile, Align, NotAtomic, CrossThread, + InsertBefore) {} + +StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, + BasicBlock *InsertAtEnd) + : StoreInst(val, addr, isVolatile, Align, NotAtomic, CrossThread, + InsertAtEnd) {} StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, AtomicOrdering Order, @@ -1169,34 +1076,6 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, } StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, - BasicBlock *InsertAtEnd) - : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits<StoreInst>::op_begin(this), - OperandTraits<StoreInst>::operands(this), - InsertAtEnd) { - Op<0>() = val; - Op<1>() = addr; - setVolatile(isVolatile); - setAlignment(0); - setAtomic(NotAtomic); - AssertOK(); -} - -StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, - unsigned Align, BasicBlock *InsertAtEnd) - : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits<StoreInst>::op_begin(this), - OperandTraits<StoreInst>::operands(this), - InsertAtEnd) { - Op<0>() = val; - Op<1>() = addr; - setVolatile(isVolatile); - setAlignment(Align); - setAtomic(NotAtomic); - AssertOK(); -} - -StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, AtomicOrdering Order, SynchronizationScope SynchScope, BasicBlock *InsertAtEnd) @@ -2169,21 +2048,15 @@ bool CastInst::isNoopCast(Type *IntPtrTy) const { return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy); } -bool CastInst::isNoopCast(const DataLayout *DL) const { - if (!DL) { - // Assume maximum pointer size. - return isNoopCast(Type::getInt64Ty(getContext())); - } - +bool CastInst::isNoopCast(const DataLayout &DL) const { Type *PtrOpTy = nullptr; if (getOpcode() == Instruction::PtrToInt) PtrOpTy = getOperand(0)->getType(); else if (getOpcode() == Instruction::IntToPtr) PtrOpTy = getType(); - Type *IntPtrTy = PtrOpTy - ? DL->getIntPtrType(PtrOpTy) - : DL->getIntPtrType(getContext(), 0); + Type *IntPtrTy = + PtrOpTy ? DL.getIntPtrType(PtrOpTy) : DL.getIntPtrType(getContext(), 0); return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy); } @@ -2656,44 +2529,38 @@ bool CastInst::isCastable(Type *SrcTy, Type *DestTy) { // Run through the possibilities ... if (DestTy->isIntegerTy()) { // Casting to integral - if (SrcTy->isIntegerTy()) { // Casting from integral + if (SrcTy->isIntegerTy()) // Casting from integral return true; - } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt + if (SrcTy->isFloatingPointTy()) // Casting from floating pt return true; - } else if (SrcTy->isVectorTy()) { // Casting from vector + if (SrcTy->isVectorTy()) // Casting from vector return DestBits == SrcBits; - } else { // Casting from something else - return SrcTy->isPointerTy(); - } - } else if (DestTy->isFloatingPointTy()) { // Casting to floating pt - if (SrcTy->isIntegerTy()) { // Casting from integral + // Casting from something else + return SrcTy->isPointerTy(); + } + if (DestTy->isFloatingPointTy()) { // Casting to floating pt + if (SrcTy->isIntegerTy()) // Casting from integral return true; - } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt + if (SrcTy->isFloatingPointTy()) // Casting from floating pt return true; - } else if (SrcTy->isVectorTy()) { // Casting from vector + if (SrcTy->isVectorTy()) // Casting from vector return DestBits == SrcBits; - } else { // Casting from something else - return false; - } - } else if (DestTy->isVectorTy()) { // Casting to vector + // Casting from something else + return false; + } + if (DestTy->isVectorTy()) // Casting to vector return DestBits == SrcBits; - } else if (DestTy->isPointerTy()) { // Casting to pointer - if (SrcTy->isPointerTy()) { // Casting from pointer + if (DestTy->isPointerTy()) { // Casting to pointer + if (SrcTy->isPointerTy()) // Casting from pointer return true; - } else if (SrcTy->isIntegerTy()) { // Casting from integral - return true; - } else { // Casting from something else - return false; - } - } else if (DestTy->isX86_MMXTy()) { - if (SrcTy->isVectorTy()) { + return SrcTy->isIntegerTy(); // Casting from integral + } + if (DestTy->isX86_MMXTy()) { + if (SrcTy->isVectorTy()) return DestBits == SrcBits; // 64-bit vector to MMX - } else { - return false; - } - } else { // Casting to something else return false; - } + } // Casting to something else + return false; } bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) { @@ -2737,13 +2604,13 @@ bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) { } bool CastInst::isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, - const DataLayout *DL) { + const DataLayout &DL) { if (auto *PtrTy = dyn_cast<PointerType>(SrcTy)) if (auto *IntTy = dyn_cast<IntegerType>(DestTy)) - return DL && IntTy->getBitWidth() == DL->getPointerTypeSizeInBits(PtrTy); + return IntTy->getBitWidth() == DL.getPointerTypeSizeInBits(PtrTy); if (auto *PtrTy = dyn_cast<PointerType>(DestTy)) if (auto *IntTy = dyn_cast<IntegerType>(SrcTy)) - return DL && IntTy->getBitWidth() == DL->getPointerTypeSizeInBits(PtrTy); + return IntTy->getBitWidth() == DL.getPointerTypeSizeInBits(PtrTy); return isBitCastable(SrcTy, DestTy); } diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index 4631246..e380665 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -240,12 +240,12 @@ template <> struct MDNodeKeyImpl<MDLocation> { : Line(Line), Column(Column), Scope(Scope), InlinedAt(InlinedAt) {} MDNodeKeyImpl(const MDLocation *L) - : Line(L->getLine()), Column(L->getColumn()), Scope(L->getScope()), - InlinedAt(L->getInlinedAt()) {} + : Line(L->getLine()), Column(L->getColumn()), Scope(L->getRawScope()), + InlinedAt(L->getRawInlinedAt()) {} bool isKeyOf(const MDLocation *RHS) const { return Line == RHS->getLine() && Column == RHS->getColumn() && - Scope == RHS->getScope() && InlinedAt == RHS->getInlinedAt(); + Scope == RHS->getRawScope() && InlinedAt == RHS->getRawInlinedAt(); } unsigned getHashValue() const { return hash_combine(Line, Column, Scope, InlinedAt); diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index fa8d50e..9a365d1 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -652,7 +652,7 @@ void PMTopLevelManager::schedulePass(Pass *P) { // are already checked are still available. checkAnalysis = true; } else - // Do not schedule this analysis. Lower level analsyis + // Do not schedule this analysis. Lower level analysis // passes are run on the fly. delete AnalysisPass; } diff --git a/lib/IR/Mangler.cpp b/lib/IR/Mangler.cpp index 5eeb797..a0e1b25 100644 --- a/lib/IR/Mangler.cpp +++ b/lib/IR/Mangler.cpp @@ -73,7 +73,7 @@ static bool hasByteCountSuffix(CallingConv::ID CC) { /// Microsoft fastcall and stdcall functions require a suffix on their name /// indicating the number of words of arguments they take. static void addByteCountSuffix(raw_ostream &OS, const Function *F, - const DataLayout &TD) { + const DataLayout &DL) { // Calculate arguments size total. unsigned ArgWords = 0; for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); @@ -83,8 +83,8 @@ static void addByteCountSuffix(raw_ostream &OS, const Function *F, if (AI->hasByValOrInAllocaAttr()) Ty = cast<PointerType>(Ty)->getElementType(); // Size should be aligned to pointer size. - unsigned PtrSize = TD.getPointerSize(); - ArgWords += RoundUpToAlignment(TD.getTypeAllocSize(Ty), PtrSize); + unsigned PtrSize = DL.getPointerSize(); + ArgWords += RoundUpToAlignment(DL.getTypeAllocSize(Ty), PtrSize); } OS << '@' << ArgWords; diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp index b0abe8c..3e8f91f 100644 --- a/lib/IR/Module.cpp +++ b/lib/IR/Module.cpp @@ -365,31 +365,11 @@ void Module::addModuleFlag(MDNode *Node) { void Module::setDataLayout(StringRef Desc) { DL.reset(Desc); - - if (Desc.empty()) { - DataLayoutStr = ""; - } else { - DataLayoutStr = DL.getStringRepresentation(); - // DataLayoutStr is now equivalent to Desc, but since the representation - // is not unique, they may not be identical. - } } -void Module::setDataLayout(const DataLayout *Other) { - if (!Other) { - DataLayoutStr = ""; - DL.reset(""); - } else { - DL = *Other; - DataLayoutStr = DL.getStringRepresentation(); - } -} +void Module::setDataLayout(const DataLayout &Other) { DL = Other; } -const DataLayout *Module::getDataLayout() const { - if (DataLayoutStr.empty()) - return nullptr; - return &DL; -} +const DataLayout &Module::getDataLayout() const { return DL; } //===----------------------------------------------------------------------===// // Methods to control the materialization of GlobalValues in the Module. @@ -433,6 +413,12 @@ std::error_code Module::materializeAllPermanently() { return std::error_code(); } +std::error_code Module::materializeMetadata() { + if (!Materializer) + return std::error_code(); + return Materializer->materializeMetadata(); +} + //===----------------------------------------------------------------------===// // Other module related stuff. // diff --git a/lib/IR/TypeFinder.cpp b/lib/IR/TypeFinder.cpp index e2fb8f8..1d2b808 100644 --- a/lib/IR/TypeFinder.cpp +++ b/lib/IR/TypeFinder.cpp @@ -68,7 +68,7 @@ void TypeFinder::run(const Module &M, bool onlyNamed) { // instructions with this loop.) for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) - if (!isa<Instruction>(OI)) + if (*OI && !isa<Instruction>(OI)) incorporateValue(*OI); // Incorporate types hiding in metadata. diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index 7d205f9..78bfca4 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> using namespace llvm; @@ -69,15 +70,13 @@ Value::~Value() { #ifndef NDEBUG // Only in -g mode... // Check to make sure that there are no uses of this value that are still // around when the value is destroyed. If there are, then we have a dangling - // reference and something is wrong. This code is here to print out what is - // still being referenced. The value in question should be printed as - // a <badref> + // reference and something is wrong. This code is here to print out where + // the value is still being referenced. // if (!use_empty()) { dbgs() << "While deleting: " << *VTy << " %" << getName() << "\n"; - for (use_iterator I = use_begin(), E = use_end(); I != E; ++I) - dbgs() << "Use still stuck around after Def is destroyed:" - << **I << "\n"; + for (auto *U : users()) + dbgs() << "Use still stuck around after Def is destroyed:" << *U << "\n"; } #endif assert(use_empty() && "Uses remain when a value is destroyed!"); @@ -482,7 +481,7 @@ Value *Value::stripInBoundsOffsets() { /// /// Test if V is always a pointer to allocated and suitably aligned memory for /// a simple load or store. -static bool isDereferenceablePointer(const Value *V, const DataLayout *DL, +static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, SmallPtrSetImpl<const Value *> &Visited) { // Note that it is not safe to speculate into a malloc'd region because // malloc may return null. @@ -497,17 +496,14 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout *DL, // to a type of smaller size (or the same size), and the alignment // is at least as large as for the resulting pointer type, then // we can look through the bitcast. - if (DL) - if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) { - Type *STy = BC->getSrcTy()->getPointerElementType(), - *DTy = BC->getDestTy()->getPointerElementType(); - if (STy->isSized() && DTy->isSized() && - (DL->getTypeStoreSize(STy) >= - DL->getTypeStoreSize(DTy)) && - (DL->getABITypeAlignment(STy) >= - DL->getABITypeAlignment(DTy))) - return isDereferenceablePointer(BC->getOperand(0), DL, Visited); - } + if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) { + Type *STy = BC->getSrcTy()->getPointerElementType(), + *DTy = BC->getDestTy()->getPointerElementType(); + if (STy->isSized() && DTy->isSized() && + (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) && + (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy))) + return isDereferenceablePointer(BC->getOperand(0), DL, Visited); + } // Global variables which can't collapse to null are ok. if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) @@ -520,7 +516,7 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout *DL, return true; else if (uint64_t Bytes = A->getDereferenceableBytes()) { Type *Ty = V->getType()->getPointerElementType(); - if (Ty->isSized() && DL && DL->getTypeStoreSize(Ty) <= Bytes) + if (Ty->isSized() && DL.getTypeStoreSize(Ty) <= Bytes) return true; } @@ -532,7 +528,7 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout *DL, if (ImmutableCallSite CS = V) { if (uint64_t Bytes = CS.getDereferenceableBytes(0)) { Type *Ty = V->getType()->getPointerElementType(); - if (Ty->isSized() && DL && DL->getTypeStoreSize(Ty) <= Bytes) + if (Ty->isSized() && DL.getTypeStoreSize(Ty) <= Bytes) return true; } } @@ -586,15 +582,15 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout *DL, return false; } -bool Value::isDereferenceablePointer(const DataLayout *DL) const { +bool Value::isDereferenceablePointer(const DataLayout &DL) const { // When dereferenceability information is provided by a dereferenceable // attribute, we know exactly how many bytes are dereferenceable. If we can // determine the exact offset to the attributed variable, we can use that // information here. Type *Ty = getType()->getPointerElementType(); - if (Ty->isSized() && DL) { - APInt Offset(DL->getTypeStoreSizeInBits(getType()), 0); - const Value *BV = stripAndAccumulateInBoundsConstantOffsets(*DL, Offset); + if (Ty->isSized()) { + APInt Offset(DL.getTypeStoreSizeInBits(getType()), 0); + const Value *BV = stripAndAccumulateInBoundsConstantOffsets(DL, Offset); APInt DerefBytes(Offset.getBitWidth(), 0); if (const Argument *A = dyn_cast<Argument>(BV)) @@ -603,7 +599,7 @@ bool Value::isDereferenceablePointer(const DataLayout *DL) const { DerefBytes = CS.getDereferenceableBytes(0); if (DerefBytes.getBoolValue() && Offset.isNonNegative()) { - if (DerefBytes.uge(Offset + DL->getTypeStoreSize(Ty))) + if (DerefBytes.uge(Offset + DL.getTypeStoreSize(Ty))) return true; } } diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index d01e138..fcf48c4 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -78,7 +78,7 @@ #include <cstdarg> using namespace llvm; -static cl::opt<bool> VerifyDebugInfo("verify-debug-info", cl::init(false)); +static cl::opt<bool> VerifyDebugInfo("verify-debug-info", cl::init(true)); namespace { struct VerifierSupport { @@ -87,11 +87,13 @@ struct VerifierSupport { /// \brief Track the brokenness of the module while recursively visiting. bool Broken; + bool EverBroken; explicit VerifierSupport(raw_ostream &OS) - : OS(OS), M(nullptr), Broken(false) {} + : OS(OS), M(nullptr), Broken(false), EverBroken(false) {} - void WriteValue(const Value *V) { +private: + void Write(const Value *V) { if (!V) return; if (isa<Instruction>(V)) { @@ -102,81 +104,61 @@ struct VerifierSupport { } } - void WriteMetadata(const Metadata *MD) { + void Write(const Metadata *MD) { if (!MD) return; - MD->printAsOperand(OS, true, M); + MD->print(OS, M); + OS << '\n'; + } + + void Write(const NamedMDNode *NMD) { + if (!NMD) + return; + NMD->print(OS); OS << '\n'; } - void WriteType(Type *T) { + void Write(Type *T) { if (!T) return; OS << ' ' << *T; } - void WriteComdat(const Comdat *C) { + void Write(const Comdat *C) { if (!C) return; OS << *C; } - // CheckFailed - A check failed, so print out the condition and the message - // that failed. This provides a nice place to put a breakpoint if you want - // to see why something is not correct. - void CheckFailed(const Twine &Message, const Value *V1 = nullptr, - const Value *V2 = nullptr, const Value *V3 = nullptr, - const Value *V4 = nullptr) { - OS << Message.str() << "\n"; - WriteValue(V1); - WriteValue(V2); - WriteValue(V3); - WriteValue(V4); - Broken = true; - } - - void CheckFailed(const Twine &Message, const Metadata *V1, const Metadata *V2, - const Metadata *V3 = nullptr, const Metadata *V4 = nullptr) { - OS << Message.str() << "\n"; - WriteMetadata(V1); - WriteMetadata(V2); - WriteMetadata(V3); - WriteMetadata(V4); - Broken = true; - } - - void CheckFailed(const Twine &Message, const Metadata *V1, - const Value *V2 = nullptr) { - OS << Message.str() << "\n"; - WriteMetadata(V1); - WriteValue(V2); - Broken = true; - } - - void CheckFailed(const Twine &Message, const Value *V1, Type *T2, - const Value *V3 = nullptr) { - OS << Message.str() << "\n"; - WriteValue(V1); - WriteType(T2); - WriteValue(V3); - Broken = true; - } - - void CheckFailed(const Twine &Message, Type *T1, Type *T2 = nullptr, - Type *T3 = nullptr) { - OS << Message.str() << "\n"; - WriteType(T1); - WriteType(T2); - WriteType(T3); - Broken = true; - } - - void CheckFailed(const Twine &Message, const Comdat *C) { - OS << Message.str() << "\n"; - WriteComdat(C); - Broken = true; + template <typename T1, typename... Ts> + void WriteTs(const T1 &V1, const Ts &... Vs) { + Write(V1); + WriteTs(Vs...); + } + + template <typename... Ts> void WriteTs() {} + +public: + /// \brief A check failed, so printout out the condition and the message. + /// + /// This provides a nice place to put a breakpoint if you want to see why + /// something is not correct. + void CheckFailed(const Twine &Message) { + OS << Message << '\n'; + EverBroken = Broken = true; + } + + /// \brief A check failed (with values to print). + /// + /// This calls the Message-only version so that the above is easier to set a + /// breakpoint on. + template <typename T1, typename... Ts> + void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) { + CheckFailed(Message); + WriteTs(V1, Vs...); } }; + class Verifier : public InstVisitor<Verifier>, VerifierSupport { friend class InstVisitor<Verifier>; @@ -198,14 +180,18 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport { /// personality function. const Value *PersonalityFn; - /// \brief Whether we've seen a call to @llvm.frameallocate in this function + /// \brief Whether we've seen a call to @llvm.frameescape in this function /// already. - bool SawFrameAllocate; + bool SawFrameEscape; + + /// Stores the count of how many objects were passed to llvm.frameescape for a + /// given function and the largest index passed to llvm.framerecover. + DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo; public: - explicit Verifier(raw_ostream &OS = dbgs()) + explicit Verifier(raw_ostream &OS) : VerifierSupport(OS), Context(nullptr), PersonalityFn(nullptr), - SawFrameAllocate(false) {} + SawFrameEscape(false) {} bool verify(const Function &F) { M = F.getParent(); @@ -240,7 +226,7 @@ public: visit(const_cast<Function &>(F)); InstsInThisBlock.clear(); PersonalityFn = nullptr; - SawFrameAllocate = false; + SawFrameEscape = false; return !Broken; } @@ -259,6 +245,10 @@ public: visitFunction(*I); } + // Now that we've visited every function, verify that we never asked to + // recover a frame index that wasn't escaped. + verifyFrameRecoverIndices(); + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) visitGlobalVariable(*I); @@ -278,6 +268,9 @@ public: visitModuleFlags(M); visitModuleIdents(M); + // Verify debug info last. + verifyDebugInfo(); + return !Broken; } @@ -347,6 +340,8 @@ private: void visitUserOp1(Instruction &I); void visitUserOp2(Instruction &I) { visitUserOp1(I); } void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI); + template <class DbgIntrinsicTy> + void visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII); void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI); void visitAtomicRMWInst(AtomicRMWInst &RMWI); void visitFenceInst(FenceInst &FI); @@ -373,18 +368,9 @@ private: void VerifyConstantExprBitcastType(const ConstantExpr *CE); void VerifyStatepoint(ImmutableCallSite CS); -}; -class DebugInfoVerifier : public VerifierSupport { -public: - explicit DebugInfoVerifier(raw_ostream &OS = dbgs()) : VerifierSupport(OS) {} + void verifyFrameRecoverIndices(); - bool verify(const Module &M) { - this->M = &M; - verifyDebugInfo(); - return !Broken; - } - -private: + // Module-level debug info verification... void verifyDebugInfo(); void processInstructions(DebugInfoFinder &Finder); void processCallInst(DebugInfoFinder &Finder, const CallInst &CI); @@ -392,66 +378,58 @@ private: } // End anonymous namespace // Assert - We know that cond should be true, if not print an error message. -#define Assert(C, M) \ - do { if (!(C)) { CheckFailed(M); return; } } while (0) -#define Assert1(C, M, V1) \ - do { if (!(C)) { CheckFailed(M, V1); return; } } while (0) -#define Assert2(C, M, V1, V2) \ - do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0) -#define Assert3(C, M, V1, V2, V3) \ - do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0) -#define Assert4(C, M, V1, V2, V3, V4) \ - do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0) +#define Assert(C, ...) \ + do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0) void Verifier::visit(Instruction &I) { for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) - Assert1(I.getOperand(i) != nullptr, "Operand is null", &I); + Assert(I.getOperand(i) != nullptr, "Operand is null", &I); InstVisitor<Verifier>::visit(I); } void Verifier::visitGlobalValue(const GlobalValue &GV) { - Assert1(!GV.isDeclaration() || GV.hasExternalLinkage() || - GV.hasExternalWeakLinkage(), - "Global is external, but doesn't have external or weak linkage!", - &GV); + Assert(!GV.isDeclaration() || GV.hasExternalLinkage() || + GV.hasExternalWeakLinkage(), + "Global is external, but doesn't have external or weak linkage!", &GV); - Assert1(GV.getAlignment() <= Value::MaximumAlignment, - "huge alignment values are unsupported", &GV); - Assert1(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV), - "Only global variables can have appending linkage!", &GV); + Assert(GV.getAlignment() <= Value::MaximumAlignment, + "huge alignment values are unsupported", &GV); + Assert(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV), + "Only global variables can have appending linkage!", &GV); if (GV.hasAppendingLinkage()) { const GlobalVariable *GVar = dyn_cast<GlobalVariable>(&GV); - Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(), - "Only global arrays can have appending linkage!", GVar); + Assert(GVar && GVar->getType()->getElementType()->isArrayTy(), + "Only global arrays can have appending linkage!", GVar); } } void Verifier::visitGlobalVariable(const GlobalVariable &GV) { if (GV.hasInitializer()) { - Assert1(GV.getInitializer()->getType() == GV.getType()->getElementType(), - "Global variable initializer type does not match global " - "variable type!", &GV); + Assert(GV.getInitializer()->getType() == GV.getType()->getElementType(), + "Global variable initializer type does not match global " + "variable type!", + &GV); // If the global has common linkage, it must have a zero initializer and // cannot be constant. if (GV.hasCommonLinkage()) { - Assert1(GV.getInitializer()->isNullValue(), - "'common' global must have a zero initializer!", &GV); - Assert1(!GV.isConstant(), "'common' global may not be marked constant!", - &GV); - Assert1(!GV.hasComdat(), "'common' global may not be in a Comdat!", &GV); + Assert(GV.getInitializer()->isNullValue(), + "'common' global must have a zero initializer!", &GV); + Assert(!GV.isConstant(), "'common' global may not be marked constant!", + &GV); + Assert(!GV.hasComdat(), "'common' global may not be in a Comdat!", &GV); } } else { - Assert1(GV.hasExternalLinkage() || GV.hasExternalWeakLinkage(), - "invalid linkage type for global declaration", &GV); + Assert(GV.hasExternalLinkage() || GV.hasExternalWeakLinkage(), + "invalid linkage type for global declaration", &GV); } if (GV.hasName() && (GV.getName() == "llvm.global_ctors" || GV.getName() == "llvm.global_dtors")) { - Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(), - "invalid linkage for intrinsic global variable", &GV); + Assert(!GV.hasInitializer() || GV.hasAppendingLinkage(), + "invalid linkage for intrinsic global variable", &GV); // Don't worry about emitting an error for it not being an array, // visitGlobalValue will complain on appending non-array. if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getType()->getElementType())) { @@ -459,48 +437,48 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) { PointerType *FuncPtrTy = FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo(); // FIXME: Reject the 2-field form in LLVM 4.0. - Assert1(STy && (STy->getNumElements() == 2 || - STy->getNumElements() == 3) && - STy->getTypeAtIndex(0u)->isIntegerTy(32) && - STy->getTypeAtIndex(1) == FuncPtrTy, - "wrong type for intrinsic global variable", &GV); + Assert(STy && + (STy->getNumElements() == 2 || STy->getNumElements() == 3) && + STy->getTypeAtIndex(0u)->isIntegerTy(32) && + STy->getTypeAtIndex(1) == FuncPtrTy, + "wrong type for intrinsic global variable", &GV); if (STy->getNumElements() == 3) { Type *ETy = STy->getTypeAtIndex(2); - Assert1(ETy->isPointerTy() && - cast<PointerType>(ETy)->getElementType()->isIntegerTy(8), - "wrong type for intrinsic global variable", &GV); + Assert(ETy->isPointerTy() && + cast<PointerType>(ETy)->getElementType()->isIntegerTy(8), + "wrong type for intrinsic global variable", &GV); } } } if (GV.hasName() && (GV.getName() == "llvm.used" || GV.getName() == "llvm.compiler.used")) { - Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(), - "invalid linkage for intrinsic global variable", &GV); + Assert(!GV.hasInitializer() || GV.hasAppendingLinkage(), + "invalid linkage for intrinsic global variable", &GV); Type *GVType = GV.getType()->getElementType(); if (ArrayType *ATy = dyn_cast<ArrayType>(GVType)) { PointerType *PTy = dyn_cast<PointerType>(ATy->getElementType()); - Assert1(PTy, "wrong type for intrinsic global variable", &GV); + Assert(PTy, "wrong type for intrinsic global variable", &GV); if (GV.hasInitializer()) { const Constant *Init = GV.getInitializer(); const ConstantArray *InitArray = dyn_cast<ConstantArray>(Init); - Assert1(InitArray, "wrong initalizer for intrinsic global variable", - Init); + Assert(InitArray, "wrong initalizer for intrinsic global variable", + Init); for (unsigned i = 0, e = InitArray->getNumOperands(); i != e; ++i) { Value *V = Init->getOperand(i)->stripPointerCastsNoFollowAliases(); - Assert1( - isa<GlobalVariable>(V) || isa<Function>(V) || isa<GlobalAlias>(V), - "invalid llvm.used member", V); - Assert1(V->hasName(), "members of llvm.used must be named", V); + Assert(isa<GlobalVariable>(V) || isa<Function>(V) || + isa<GlobalAlias>(V), + "invalid llvm.used member", V); + Assert(V->hasName(), "members of llvm.used must be named", V); } } } } - Assert1(!GV.hasDLLImportStorageClass() || - (GV.isDeclaration() && GV.hasExternalLinkage()) || - GV.hasAvailableExternallyLinkage(), - "Global is marked as dllimport, but not external", &GV); + Assert(!GV.hasDLLImportStorageClass() || + (GV.isDeclaration() && GV.hasExternalLinkage()) || + GV.hasAvailableExternallyLinkage(), + "Global is marked as dllimport, but not external", &GV); if (!GV.hasInitializer()) { visitGlobalValue(GV); @@ -540,13 +518,13 @@ void Verifier::visitAliaseeSubExpr(const GlobalAlias &GA, const Constant &C) { void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias*> &Visited, const GlobalAlias &GA, const Constant &C) { if (const auto *GV = dyn_cast<GlobalValue>(&C)) { - Assert1(!GV->isDeclaration(), "Alias must point to a definition", &GA); + Assert(!GV->isDeclaration(), "Alias must point to a definition", &GA); if (const auto *GA2 = dyn_cast<GlobalAlias>(GV)) { - Assert1(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA); + Assert(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA); - Assert1(!GA2->mayBeOverridden(), "Alias cannot point to a weak alias", - &GA); + Assert(!GA2->mayBeOverridden(), "Alias cannot point to a weak alias", + &GA); } else { // Only continue verifying subexpressions of GlobalAliases. // Do not recurse into global initializers. @@ -567,19 +545,18 @@ void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias*> &Visited, } void Verifier::visitGlobalAlias(const GlobalAlias &GA) { - Assert1(!GA.getName().empty(), - "Alias name cannot be empty!", &GA); - Assert1(GlobalAlias::isValidLinkage(GA.getLinkage()), - "Alias should have private, internal, linkonce, weak, linkonce_odr, " - "weak_odr, or external linkage!", - &GA); + Assert(!GA.getName().empty(), "Alias name cannot be empty!", &GA); + Assert(GlobalAlias::isValidLinkage(GA.getLinkage()), + "Alias should have private, internal, linkonce, weak, linkonce_odr, " + "weak_odr, or external linkage!", + &GA); const Constant *Aliasee = GA.getAliasee(); - Assert1(Aliasee, "Aliasee cannot be NULL!", &GA); - Assert1(GA.getType() == Aliasee->getType(), - "Alias and aliasee types should match!", &GA); + Assert(Aliasee, "Aliasee cannot be NULL!", &GA); + Assert(GA.getType() == Aliasee->getType(), + "Alias and aliasee types should match!", &GA); - Assert1(isa<GlobalValue>(Aliasee) || isa<ConstantExpr>(Aliasee), - "Aliasee should be either GlobalValue or ConstantExpr", &GA); + Assert(isa<GlobalValue>(Aliasee) || isa<ConstantExpr>(Aliasee), + "Aliasee should be either GlobalValue or ConstantExpr", &GA); visitAliaseeSubExpr(GA, *Aliasee); @@ -592,6 +569,10 @@ void Verifier::visitNamedMDNode(const NamedMDNode &NMD) { if (!MD) continue; + if (NMD.getName() == "llvm.dbg.cu") { + Assert(isa<MDCompileUnit>(MD), "invalid compile unit", &NMD, MD); + } + visitMDNode(*MD); } } @@ -618,8 +599,8 @@ void Verifier::visitMDNode(const MDNode &MD) { Metadata *Op = MD.getOperand(i); if (!Op) continue; - Assert2(!isa<LocalAsMetadata>(Op), "Invalid operand for global metadata!", - &MD, Op); + Assert(!isa<LocalAsMetadata>(Op), "Invalid operand for global metadata!", + &MD, Op); if (auto *N = dyn_cast<MDNode>(Op)) { visitMDNode(*N); continue; @@ -631,26 +612,26 @@ void Verifier::visitMDNode(const MDNode &MD) { } // Check these last, so we diagnose problems in operands first. - Assert1(!MD.isTemporary(), "Expected no forward declarations!", &MD); - Assert1(MD.isResolved(), "All nodes should be resolved!", &MD); + Assert(!MD.isTemporary(), "Expected no forward declarations!", &MD); + Assert(MD.isResolved(), "All nodes should be resolved!", &MD); } void Verifier::visitValueAsMetadata(const ValueAsMetadata &MD, Function *F) { - Assert1(MD.getValue(), "Expected valid value", &MD); - Assert2(!MD.getValue()->getType()->isMetadataTy(), - "Unexpected metadata round-trip through values", &MD, MD.getValue()); + Assert(MD.getValue(), "Expected valid value", &MD); + Assert(!MD.getValue()->getType()->isMetadataTy(), + "Unexpected metadata round-trip through values", &MD, MD.getValue()); auto *L = dyn_cast<LocalAsMetadata>(&MD); if (!L) return; - Assert1(F, "function-local metadata used outside a function", L); + Assert(F, "function-local metadata used outside a function", L); // If this was an instruction, bb, or argument, verify that it is in the // function that we expect. Function *ActualF = nullptr; if (Instruction *I = dyn_cast<Instruction>(L->getValue())) { - Assert2(I->getParent(), "function-local metadata not in basic block", L, I); + Assert(I->getParent(), "function-local metadata not in basic block", L, I); ActualF = I->getParent()->getParent(); } else if (BasicBlock *BB = dyn_cast<BasicBlock>(L->getValue())) ActualF = BB->getParent(); @@ -658,7 +639,7 @@ void Verifier::visitValueAsMetadata(const ValueAsMetadata &MD, Function *F) { ActualF = A->getParent(); assert(ActualF && "Unimplemented function local metadata case!"); - Assert1(ActualF == F, "function-local metadata used in wrong function", L); + Assert(ActualF == F, "function-local metadata used in wrong function", L); } void Verifier::visitMetadataAsValue(const MetadataAsValue &MDV, Function *F) { @@ -678,126 +659,126 @@ void Verifier::visitMetadataAsValue(const MetadataAsValue &MDV, Function *F) { } void Verifier::visitMDLocation(const MDLocation &N) { - Assert1(N.getScope(), "location requires a valid scope", &N); - if (auto *IA = N.getInlinedAt()) - Assert2(isa<MDLocation>(IA), "inlined-at should be a location", &N, IA); + Assert(N.getRawScope() && isa<MDLocalScope>(N.getRawScope()), + "location requires a valid scope", &N, N.getRawScope()); + if (auto *IA = N.getRawInlinedAt()) + Assert(isa<MDLocation>(IA), "inlined-at should be a location", &N, IA); } void Verifier::visitGenericDebugNode(const GenericDebugNode &N) { - Assert1(N.getTag(), "invalid tag", &N); + Assert(N.getTag(), "invalid tag", &N); } void Verifier::visitMDSubrange(const MDSubrange &N) { - Assert1(N.getTag() == dwarf::DW_TAG_subrange_type, "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_subrange_type, "invalid tag", &N); } void Verifier::visitMDEnumerator(const MDEnumerator &N) { - Assert1(N.getTag() == dwarf::DW_TAG_enumerator, "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_enumerator, "invalid tag", &N); } void Verifier::visitMDBasicType(const MDBasicType &N) { - Assert1(N.getTag() == dwarf::DW_TAG_base_type || - N.getTag() == dwarf::DW_TAG_unspecified_type, - "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_base_type || + N.getTag() == dwarf::DW_TAG_unspecified_type, + "invalid tag", &N); } void Verifier::visitMDDerivedType(const MDDerivedType &N) { - Assert1(N.getTag() == dwarf::DW_TAG_typedef || - N.getTag() == dwarf::DW_TAG_pointer_type || - N.getTag() == dwarf::DW_TAG_ptr_to_member_type || - N.getTag() == dwarf::DW_TAG_reference_type || - N.getTag() == dwarf::DW_TAG_rvalue_reference_type || - N.getTag() == dwarf::DW_TAG_const_type || - N.getTag() == dwarf::DW_TAG_volatile_type || - N.getTag() == dwarf::DW_TAG_restrict_type || - N.getTag() == dwarf::DW_TAG_member || - N.getTag() == dwarf::DW_TAG_inheritance || - N.getTag() == dwarf::DW_TAG_friend, - "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_typedef || + N.getTag() == dwarf::DW_TAG_pointer_type || + N.getTag() == dwarf::DW_TAG_ptr_to_member_type || + N.getTag() == dwarf::DW_TAG_reference_type || + N.getTag() == dwarf::DW_TAG_rvalue_reference_type || + N.getTag() == dwarf::DW_TAG_const_type || + N.getTag() == dwarf::DW_TAG_volatile_type || + N.getTag() == dwarf::DW_TAG_restrict_type || + N.getTag() == dwarf::DW_TAG_member || + N.getTag() == dwarf::DW_TAG_inheritance || + N.getTag() == dwarf::DW_TAG_friend, + "invalid tag", &N); } void Verifier::visitMDCompositeType(const MDCompositeType &N) { - Assert1(N.getTag() == dwarf::DW_TAG_array_type || - N.getTag() == dwarf::DW_TAG_structure_type || - N.getTag() == dwarf::DW_TAG_union_type || - N.getTag() == dwarf::DW_TAG_enumeration_type || - N.getTag() == dwarf::DW_TAG_subroutine_type || - N.getTag() == dwarf::DW_TAG_class_type, - "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_array_type || + N.getTag() == dwarf::DW_TAG_structure_type || + N.getTag() == dwarf::DW_TAG_union_type || + N.getTag() == dwarf::DW_TAG_enumeration_type || + N.getTag() == dwarf::DW_TAG_subroutine_type || + N.getTag() == dwarf::DW_TAG_class_type, + "invalid tag", &N); } void Verifier::visitMDSubroutineType(const MDSubroutineType &N) { - Assert1(N.getTag() == dwarf::DW_TAG_subroutine_type, "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_subroutine_type, "invalid tag", &N); } void Verifier::visitMDFile(const MDFile &N) { - Assert1(N.getTag() == dwarf::DW_TAG_file_type, "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_file_type, "invalid tag", &N); } void Verifier::visitMDCompileUnit(const MDCompileUnit &N) { - Assert1(N.getTag() == dwarf::DW_TAG_compile_unit, "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_compile_unit, "invalid tag", &N); } void Verifier::visitMDSubprogram(const MDSubprogram &N) { - Assert1(N.getTag() == dwarf::DW_TAG_subprogram, "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_subprogram, "invalid tag", &N); } void Verifier::visitMDLexicalBlock(const MDLexicalBlock &N) { - Assert1(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N); } void Verifier::visitMDLexicalBlockFile(const MDLexicalBlockFile &N) { - Assert1(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N); } void Verifier::visitMDNamespace(const MDNamespace &N) { - Assert1(N.getTag() == dwarf::DW_TAG_namespace, "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_namespace, "invalid tag", &N); } void Verifier::visitMDTemplateTypeParameter(const MDTemplateTypeParameter &N) { - Assert1(N.getTag() == dwarf::DW_TAG_template_type_parameter, "invalid tag", - &N); + Assert(N.getTag() == dwarf::DW_TAG_template_type_parameter, "invalid tag", + &N); } void Verifier::visitMDTemplateValueParameter( const MDTemplateValueParameter &N) { - Assert1(N.getTag() == dwarf::DW_TAG_template_value_parameter || - N.getTag() == dwarf::DW_TAG_GNU_template_template_param || - N.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack, - "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_template_value_parameter || + N.getTag() == dwarf::DW_TAG_GNU_template_template_param || + N.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack, + "invalid tag", &N); } void Verifier::visitMDGlobalVariable(const MDGlobalVariable &N) { - Assert1(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N); } void Verifier::visitMDLocalVariable(const MDLocalVariable &N) { - Assert1(N.getTag() == dwarf::DW_TAG_auto_variable || - N.getTag() == dwarf::DW_TAG_arg_variable, - "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_auto_variable || + N.getTag() == dwarf::DW_TAG_arg_variable, + "invalid tag", &N); } void Verifier::visitMDExpression(const MDExpression &N) { - Assert1(N.getTag() == dwarf::DW_TAG_expression, "invalid tag", &N); - Assert1(N.isValid(), "invalid expression", &N); + Assert(N.isValid(), "invalid expression", &N); } void Verifier::visitMDObjCProperty(const MDObjCProperty &N) { - Assert1(N.getTag() == dwarf::DW_TAG_APPLE_property, "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_APPLE_property, "invalid tag", &N); } void Verifier::visitMDImportedEntity(const MDImportedEntity &N) { - Assert1(N.getTag() == dwarf::DW_TAG_imported_module || - N.getTag() == dwarf::DW_TAG_imported_declaration, - "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_imported_module || + N.getTag() == dwarf::DW_TAG_imported_declaration, + "invalid tag", &N); } void Verifier::visitComdat(const Comdat &C) { // The Module is invalid if the GlobalValue has private linkage. Entities // with private linkage don't have entries in the symbol table. if (const GlobalValue *GV = M->getNamedValue(C.getName())) - Assert1(!GV->hasPrivateLinkage(), "comdat global value has private linkage", - GV); + Assert(!GV->hasPrivateLinkage(), "comdat global value has private linkage", + GV); } void Verifier::visitModuleIdents(const Module &M) { @@ -809,12 +790,12 @@ void Verifier::visitModuleIdents(const Module &M) { // Scan each llvm.ident entry and make sure that this requirement is met. for (unsigned i = 0, e = Idents->getNumOperands(); i != e; ++i) { const MDNode *N = Idents->getOperand(i); - Assert1(N->getNumOperands() == 1, - "incorrect number of operands in llvm.ident metadata", N); - Assert1(dyn_cast_or_null<MDString>(N->getOperand(0)), - ("invalid value for llvm.ident metadata entry operand" - "(the operand should be a string)"), - N->getOperand(0)); + Assert(N->getNumOperands() == 1, + "incorrect number of operands in llvm.ident metadata", N); + Assert(dyn_cast_or_null<MDString>(N->getOperand(0)), + ("invalid value for llvm.ident metadata entry operand" + "(the operand should be a string)"), + N->getOperand(0)); } } @@ -857,22 +838,21 @@ Verifier::visitModuleFlag(const MDNode *Op, SmallVectorImpl<const MDNode *> &Requirements) { // Each module flag should have three arguments, the merge behavior (a // constant int), the flag ID (an MDString), and the value. - Assert1(Op->getNumOperands() == 3, - "incorrect number of operands in module flag", Op); + Assert(Op->getNumOperands() == 3, + "incorrect number of operands in module flag", Op); Module::ModFlagBehavior MFB; if (!Module::isValidModFlagBehavior(Op->getOperand(0), MFB)) { - Assert1( + Assert( mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)), "invalid behavior operand in module flag (expected constant integer)", Op->getOperand(0)); - Assert1(false, - "invalid behavior operand in module flag (unexpected constant)", - Op->getOperand(0)); + Assert(false, + "invalid behavior operand in module flag (unexpected constant)", + Op->getOperand(0)); } MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1)); - Assert1(ID, - "invalid ID operand in module flag (expected metadata string)", - Op->getOperand(1)); + Assert(ID, "invalid ID operand in module flag (expected metadata string)", + Op->getOperand(1)); // Sanity check the values for behaviors with additional requirements. switch (MFB) { @@ -886,13 +866,13 @@ Verifier::visitModuleFlag(const MDNode *Op, // The value should itself be an MDNode with two operands, a flag ID (an // MDString), and a value. MDNode *Value = dyn_cast<MDNode>(Op->getOperand(2)); - Assert1(Value && Value->getNumOperands() == 2, - "invalid value for 'require' module flag (expected metadata pair)", - Op->getOperand(2)); - Assert1(isa<MDString>(Value->getOperand(0)), - ("invalid value for 'require' module flag " - "(first value operand should be a string)"), - Value->getOperand(0)); + Assert(Value && Value->getNumOperands() == 2, + "invalid value for 'require' module flag (expected metadata pair)", + Op->getOperand(2)); + Assert(isa<MDString>(Value->getOperand(0)), + ("invalid value for 'require' module flag " + "(first value operand should be a string)"), + Value->getOperand(0)); // Append it to the list of requirements, to check once all module flags are // scanned. @@ -903,9 +883,10 @@ Verifier::visitModuleFlag(const MDNode *Op, case Module::Append: case Module::AppendUnique: { // These behavior types require the operand be an MDNode. - Assert1(isa<MDNode>(Op->getOperand(2)), - "invalid value for 'append'-type module flag " - "(expected a metadata node)", Op->getOperand(2)); + Assert(isa<MDNode>(Op->getOperand(2)), + "invalid value for 'append'-type module flag " + "(expected a metadata node)", + Op->getOperand(2)); break; } } @@ -913,9 +894,8 @@ Verifier::visitModuleFlag(const MDNode *Op, // Unless this is a "requires" flag, check the ID is unique. if (MFB != Module::Require) { bool Inserted = SeenIDs.insert(std::make_pair(ID, Op)).second; - Assert1(Inserted, - "module flag identifiers must be unique (or of 'require' type)", - ID); + Assert(Inserted, + "module flag identifiers must be unique (or of 'require' type)", ID); } } @@ -991,14 +971,15 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty, VerifyAttributeTypes(Attrs, Idx, false, V); if (isReturnValue) - Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) && - !Attrs.hasAttribute(Idx, Attribute::Nest) && - !Attrs.hasAttribute(Idx, Attribute::StructRet) && - !Attrs.hasAttribute(Idx, Attribute::NoCapture) && - !Attrs.hasAttribute(Idx, Attribute::Returned) && - !Attrs.hasAttribute(Idx, Attribute::InAlloca), - "Attributes 'byval', 'inalloca', 'nest', 'sret', 'nocapture', and " - "'returned' do not apply to return values!", V); + Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal) && + !Attrs.hasAttribute(Idx, Attribute::Nest) && + !Attrs.hasAttribute(Idx, Attribute::StructRet) && + !Attrs.hasAttribute(Idx, Attribute::NoCapture) && + !Attrs.hasAttribute(Idx, Attribute::Returned) && + !Attrs.hasAttribute(Idx, Attribute::InAlloca), + "Attributes 'byval', 'inalloca', 'nest', 'sret', 'nocapture', and " + "'returned' do not apply to return values!", + V); // Check for mutually incompatible attributes. Only inreg is compatible with // sret. @@ -1008,45 +989,58 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty, AttrCount += Attrs.hasAttribute(Idx, Attribute::StructRet) || Attrs.hasAttribute(Idx, Attribute::InReg); AttrCount += Attrs.hasAttribute(Idx, Attribute::Nest); - Assert1(AttrCount <= 1, "Attributes 'byval', 'inalloca', 'inreg', 'nest', " - "and 'sret' are incompatible!", V); - - Assert1(!(Attrs.hasAttribute(Idx, Attribute::InAlloca) && - Attrs.hasAttribute(Idx, Attribute::ReadOnly)), "Attributes " - "'inalloca and readonly' are incompatible!", V); - - Assert1(!(Attrs.hasAttribute(Idx, Attribute::StructRet) && - Attrs.hasAttribute(Idx, Attribute::Returned)), "Attributes " - "'sret and returned' are incompatible!", V); - - Assert1(!(Attrs.hasAttribute(Idx, Attribute::ZExt) && - Attrs.hasAttribute(Idx, Attribute::SExt)), "Attributes " - "'zeroext and signext' are incompatible!", V); - - Assert1(!(Attrs.hasAttribute(Idx, Attribute::ReadNone) && - Attrs.hasAttribute(Idx, Attribute::ReadOnly)), "Attributes " - "'readnone and readonly' are incompatible!", V); - - Assert1(!(Attrs.hasAttribute(Idx, Attribute::NoInline) && - Attrs.hasAttribute(Idx, Attribute::AlwaysInline)), "Attributes " - "'noinline and alwaysinline' are incompatible!", V); - - Assert1(!AttrBuilder(Attrs, Idx). - hasAttributes(AttributeFuncs::typeIncompatible(Ty, Idx), Idx), - "Wrong types for attribute: " + - AttributeFuncs::typeIncompatible(Ty, Idx).getAsString(Idx), V); + Assert(AttrCount <= 1, "Attributes 'byval', 'inalloca', 'inreg', 'nest', " + "and 'sret' are incompatible!", + V); + + Assert(!(Attrs.hasAttribute(Idx, Attribute::InAlloca) && + Attrs.hasAttribute(Idx, Attribute::ReadOnly)), + "Attributes " + "'inalloca and readonly' are incompatible!", + V); + + Assert(!(Attrs.hasAttribute(Idx, Attribute::StructRet) && + Attrs.hasAttribute(Idx, Attribute::Returned)), + "Attributes " + "'sret and returned' are incompatible!", + V); + + Assert(!(Attrs.hasAttribute(Idx, Attribute::ZExt) && + Attrs.hasAttribute(Idx, Attribute::SExt)), + "Attributes " + "'zeroext and signext' are incompatible!", + V); + + Assert(!(Attrs.hasAttribute(Idx, Attribute::ReadNone) && + Attrs.hasAttribute(Idx, Attribute::ReadOnly)), + "Attributes " + "'readnone and readonly' are incompatible!", + V); + + Assert(!(Attrs.hasAttribute(Idx, Attribute::NoInline) && + Attrs.hasAttribute(Idx, Attribute::AlwaysInline)), + "Attributes " + "'noinline and alwaysinline' are incompatible!", + V); + + Assert(!AttrBuilder(Attrs, Idx) + .hasAttributes(AttributeFuncs::typeIncompatible(Ty, Idx), Idx), + "Wrong types for attribute: " + + AttributeFuncs::typeIncompatible(Ty, Idx).getAsString(Idx), + V); if (PointerType *PTy = dyn_cast<PointerType>(Ty)) { - if (!PTy->getElementType()->isSized()) { - Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) && - !Attrs.hasAttribute(Idx, Attribute::InAlloca), - "Attributes 'byval' and 'inalloca' do not support unsized types!", - V); + SmallPtrSet<const Type*, 4> Visited; + if (!PTy->getElementType()->isSized(&Visited)) { + Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal) && + !Attrs.hasAttribute(Idx, Attribute::InAlloca), + "Attributes 'byval' and 'inalloca' do not support unsized types!", + V); } } else { - Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal), - "Attribute 'byval' only applies to parameters with pointer type!", - V); + Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal), + "Attribute 'byval' only applies to parameters with pointer type!", + V); } } @@ -1078,28 +1072,30 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs, continue; if (Attrs.hasAttribute(Idx, Attribute::Nest)) { - Assert1(!SawNest, "More than one parameter has attribute nest!", V); + Assert(!SawNest, "More than one parameter has attribute nest!", V); SawNest = true; } if (Attrs.hasAttribute(Idx, Attribute::Returned)) { - Assert1(!SawReturned, "More than one parameter has attribute returned!", - V); - Assert1(Ty->canLosslesslyBitCastTo(FT->getReturnType()), "Incompatible " - "argument and return types for 'returned' attribute", V); + Assert(!SawReturned, "More than one parameter has attribute returned!", + V); + Assert(Ty->canLosslesslyBitCastTo(FT->getReturnType()), + "Incompatible " + "argument and return types for 'returned' attribute", + V); SawReturned = true; } if (Attrs.hasAttribute(Idx, Attribute::StructRet)) { - Assert1(!SawSRet, "Cannot have multiple 'sret' parameters!", V); - Assert1(Idx == 1 || Idx == 2, - "Attribute 'sret' is not on first or second parameter!", V); + Assert(!SawSRet, "Cannot have multiple 'sret' parameters!", V); + Assert(Idx == 1 || Idx == 2, + "Attribute 'sret' is not on first or second parameter!", V); SawSRet = true; } if (Attrs.hasAttribute(Idx, Attribute::InAlloca)) { - Assert1(Idx == FT->getNumParams(), - "inalloca isn't on the last parameter!", V); + Assert(Idx == FT->getNumParams(), "inalloca isn't on the last parameter!", + V); } } @@ -1108,39 +1104,35 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs, VerifyAttributeTypes(Attrs, AttributeSet::FunctionIndex, true, V); - Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::ReadNone) && - Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::ReadOnly)), - "Attributes 'readnone and readonly' are incompatible!", V); + Assert( + !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone) && + Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly)), + "Attributes 'readnone and readonly' are incompatible!", V); - Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::NoInline) && - Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::AlwaysInline)), - "Attributes 'noinline and alwaysinline' are incompatible!", V); + Assert( + !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::NoInline) && + Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::AlwaysInline)), + "Attributes 'noinline and alwaysinline' are incompatible!", V); if (Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeNone)) { - Assert1(Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::NoInline), - "Attribute 'optnone' requires 'noinline'!", V); + Assert(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::NoInline), + "Attribute 'optnone' requires 'noinline'!", V); - Assert1(!Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize), - "Attributes 'optsize and optnone' are incompatible!", V); + Assert(!Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize), + "Attributes 'optsize and optnone' are incompatible!", V); - Assert1(!Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::MinSize), - "Attributes 'minsize and optnone' are incompatible!", V); + Assert(!Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize), + "Attributes 'minsize and optnone' are incompatible!", V); } if (Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::JumpTable)) { const GlobalValue *GV = cast<GlobalValue>(V); - Assert1(GV->hasUnnamedAddr(), - "Attribute 'jumptable' requires 'unnamed_addr'", V); - + Assert(GV->hasUnnamedAddr(), + "Attribute 'jumptable' requires 'unnamed_addr'", V); } } @@ -1148,9 +1140,9 @@ void Verifier::VerifyConstantExprBitcastType(const ConstantExpr *CE) { if (CE->getOpcode() != Instruction::BitCast) return; - Assert1(CastInst::castIsValid(Instruction::BitCast, CE->getOperand(0), - CE->getType()), - "Invalid bitcast", CE); + Assert(CastInst::castIsValid(Instruction::BitCast, CE->getOperand(0), + CE->getType()), + "Invalid bitcast", CE); } bool Verifier::VerifyAttributeCount(AttributeSet Attrs, unsigned Params) { @@ -1175,84 +1167,86 @@ void Verifier::VerifyStatepoint(ImmutableCallSite CS) { const Instruction &CI = *CS.getInstruction(); - Assert1(!CS.doesNotAccessMemory() && - !CS.onlyReadsMemory(), - "gc.statepoint must read and write memory to preserve " - "reordering restrictions required by safepoint semantics", &CI); - + Assert(!CS.doesNotAccessMemory() && !CS.onlyReadsMemory(), + "gc.statepoint must read and write memory to preserve " + "reordering restrictions required by safepoint semantics", + &CI); + const Value *Target = CS.getArgument(0); const PointerType *PT = dyn_cast<PointerType>(Target->getType()); - Assert2(PT && PT->getElementType()->isFunctionTy(), - "gc.statepoint callee must be of function pointer type", - &CI, Target); + Assert(PT && PT->getElementType()->isFunctionTy(), + "gc.statepoint callee must be of function pointer type", &CI, Target); FunctionType *TargetFuncType = cast<FunctionType>(PT->getElementType()); const Value *NumCallArgsV = CS.getArgument(1); - Assert1(isa<ConstantInt>(NumCallArgsV), - "gc.statepoint number of arguments to underlying call " - "must be constant integer", &CI); + Assert(isa<ConstantInt>(NumCallArgsV), + "gc.statepoint number of arguments to underlying call " + "must be constant integer", + &CI); const int NumCallArgs = cast<ConstantInt>(NumCallArgsV)->getZExtValue(); - Assert1(NumCallArgs >= 0, - "gc.statepoint number of arguments to underlying call " - "must be positive", &CI); + Assert(NumCallArgs >= 0, + "gc.statepoint number of arguments to underlying call " + "must be positive", + &CI); const int NumParams = (int)TargetFuncType->getNumParams(); if (TargetFuncType->isVarArg()) { - Assert1(NumCallArgs >= NumParams, - "gc.statepoint mismatch in number of vararg call args", &CI); + Assert(NumCallArgs >= NumParams, + "gc.statepoint mismatch in number of vararg call args", &CI); // TODO: Remove this limitation - Assert1(TargetFuncType->getReturnType()->isVoidTy(), - "gc.statepoint doesn't support wrapping non-void " - "vararg functions yet", &CI); + Assert(TargetFuncType->getReturnType()->isVoidTy(), + "gc.statepoint doesn't support wrapping non-void " + "vararg functions yet", + &CI); } else - Assert1(NumCallArgs == NumParams, - "gc.statepoint mismatch in number of call args", &CI); + Assert(NumCallArgs == NumParams, + "gc.statepoint mismatch in number of call args", &CI); const Value *Unused = CS.getArgument(2); - Assert1(isa<ConstantInt>(Unused) && - cast<ConstantInt>(Unused)->isNullValue(), - "gc.statepoint parameter #3 must be zero", &CI); + Assert(isa<ConstantInt>(Unused) && cast<ConstantInt>(Unused)->isNullValue(), + "gc.statepoint parameter #3 must be zero", &CI); // Verify that the types of the call parameter arguments match // the type of the wrapped callee. for (int i = 0; i < NumParams; i++) { Type *ParamType = TargetFuncType->getParamType(i); Type *ArgType = CS.getArgument(3+i)->getType(); - Assert1(ArgType == ParamType, - "gc.statepoint call argument does not match wrapped " - "function type", &CI); + Assert(ArgType == ParamType, + "gc.statepoint call argument does not match wrapped " + "function type", + &CI); } const int EndCallArgsInx = 2+NumCallArgs; const Value *NumDeoptArgsV = CS.getArgument(EndCallArgsInx+1); - Assert1(isa<ConstantInt>(NumDeoptArgsV), - "gc.statepoint number of deoptimization arguments " - "must be constant integer", &CI); + Assert(isa<ConstantInt>(NumDeoptArgsV), + "gc.statepoint number of deoptimization arguments " + "must be constant integer", + &CI); const int NumDeoptArgs = cast<ConstantInt>(NumDeoptArgsV)->getZExtValue(); - Assert1(NumDeoptArgs >= 0, - "gc.statepoint number of deoptimization arguments " - "must be positive", &CI); + Assert(NumDeoptArgs >= 0, "gc.statepoint number of deoptimization arguments " + "must be positive", + &CI); + + Assert(4 + NumCallArgs + NumDeoptArgs <= (int)CS.arg_size(), + "gc.statepoint too few arguments according to length fields", &CI); - Assert1(4 + NumCallArgs + NumDeoptArgs <= (int)CS.arg_size(), - "gc.statepoint too few arguments according to length fields", &CI); - // Check that the only uses of this gc.statepoint are gc.result or // gc.relocate calls which are tied to this statepoint and thus part // of the same statepoint sequence for (const User *U : CI.users()) { const CallInst *Call = dyn_cast<const CallInst>(U); - Assert2(Call, "illegal use of statepoint token", &CI, U); + Assert(Call, "illegal use of statepoint token", &CI, U); if (!Call) continue; - Assert2(isGCRelocate(Call) || isGCResult(Call), - "gc.result or gc.relocate are the only value uses" - "of a gc.statepoint", &CI, U); + Assert(isGCRelocate(Call) || isGCResult(Call), + "gc.result or gc.relocate are the only value uses" + "of a gc.statepoint", + &CI, U); if (isGCResult(Call)) { - Assert2(Call->getArgOperand(0) == &CI, - "gc.result connected to wrong gc.statepoint", - &CI, Call); + Assert(Call->getArgOperand(0) == &CI, + "gc.result connected to wrong gc.statepoint", &CI, Call); } else if (isGCRelocate(Call)) { - Assert2(Call->getArgOperand(0) == &CI, - "gc.relocate connected to wrong gc.statepoint", - &CI, Call); + Assert(Call->getArgOperand(0) == &CI, + "gc.relocate connected to wrong gc.statepoint", &CI, Call); } } @@ -1266,6 +1260,19 @@ void Verifier::VerifyStatepoint(ImmutableCallSite CS) { // about. See example statepoint.ll in the verifier subdirectory } +void Verifier::verifyFrameRecoverIndices() { + for (auto &Counts : FrameEscapeInfo) { + Function *F = Counts.first; + unsigned EscapedObjectCount = Counts.second.first; + unsigned MaxRecoveredIndex = Counts.second.second; + Assert(MaxRecoveredIndex <= EscapedObjectCount, + "all indices passed to llvm.framerecover must be less than the " + "number of arguments passed ot llvm.frameescape in the parent " + "function", + F); + } +} + // visitFunction - Verify that a function is ok. // void Verifier::visitFunction(const Function &F) { @@ -1273,25 +1280,24 @@ void Verifier::visitFunction(const Function &F) { FunctionType *FT = F.getFunctionType(); unsigned NumArgs = F.arg_size(); - Assert1(Context == &F.getContext(), - "Function context does not match Module context!", &F); + Assert(Context == &F.getContext(), + "Function context does not match Module context!", &F); - Assert1(!F.hasCommonLinkage(), "Functions may not have common linkage", &F); - Assert2(FT->getNumParams() == NumArgs, - "# formal arguments must match # of arguments for function type!", - &F, FT); - Assert1(F.getReturnType()->isFirstClassType() || - F.getReturnType()->isVoidTy() || - F.getReturnType()->isStructTy(), - "Functions cannot return aggregate values!", &F); + Assert(!F.hasCommonLinkage(), "Functions may not have common linkage", &F); + Assert(FT->getNumParams() == NumArgs, + "# formal arguments must match # of arguments for function type!", &F, + FT); + Assert(F.getReturnType()->isFirstClassType() || + F.getReturnType()->isVoidTy() || F.getReturnType()->isStructTy(), + "Functions cannot return aggregate values!", &F); - Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(), - "Invalid struct return type!", &F); + Assert(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(), + "Invalid struct return type!", &F); AttributeSet Attrs = F.getAttributes(); - Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()), - "Attribute after last parameter!", &F); + Assert(VerifyAttributeCount(Attrs, FT->getNumParams()), + "Attribute after last parameter!", &F); // Check function attributes. VerifyFunctionAttrs(FT, Attrs, &F); @@ -1299,9 +1305,8 @@ void Verifier::visitFunction(const Function &F) { // On function declarations/definitions, we do not support the builtin // attribute. We do not check this in VerifyFunctionAttrs since that is // checking for Attributes that can/can not ever be on functions. - Assert1(!Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::Builtin), - "Attribute 'builtin' can only be applied to a callsite.", &F); + Assert(!Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::Builtin), + "Attribute 'builtin' can only be applied to a callsite.", &F); // Check that this function meets the restrictions on this calling convention. // Sometimes varargs is used for perfectly forwarding thunks, so some of these @@ -1315,8 +1320,9 @@ void Verifier::visitFunction(const Function &F) { case CallingConv::Intel_OCL_BI: case CallingConv::PTX_Kernel: case CallingConv::PTX_Device: - Assert1(!F.isVarArg(), "Calling convention does not support varargs or " - "perfect forwarding!", &F); + Assert(!F.isVarArg(), "Calling convention does not support varargs or " + "perfect forwarding!", + &F); break; } @@ -1327,35 +1333,35 @@ void Verifier::visitFunction(const Function &F) { unsigned i = 0; for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++i) { - Assert2(I->getType() == FT->getParamType(i), - "Argument value does not match function argument type!", - I, FT->getParamType(i)); - Assert1(I->getType()->isFirstClassType(), - "Function arguments must have first-class types!", I); + Assert(I->getType() == FT->getParamType(i), + "Argument value does not match function argument type!", I, + FT->getParamType(i)); + Assert(I->getType()->isFirstClassType(), + "Function arguments must have first-class types!", I); if (!isLLVMdotName) - Assert2(!I->getType()->isMetadataTy(), - "Function takes metadata but isn't an intrinsic", I, &F); + Assert(!I->getType()->isMetadataTy(), + "Function takes metadata but isn't an intrinsic", I, &F); } if (F.isMaterializable()) { // Function has a body somewhere we can't see. } else if (F.isDeclaration()) { - Assert1(F.hasExternalLinkage() || F.hasExternalWeakLinkage(), - "invalid linkage type for function declaration", &F); + Assert(F.hasExternalLinkage() || F.hasExternalWeakLinkage(), + "invalid linkage type for function declaration", &F); } else { // Verify that this function (which has a body) is not named "llvm.*". It // is not legal to define intrinsics. - Assert1(!isLLVMdotName, "llvm intrinsics cannot be defined!", &F); + Assert(!isLLVMdotName, "llvm intrinsics cannot be defined!", &F); // Check the entry node const BasicBlock *Entry = &F.getEntryBlock(); - Assert1(pred_empty(Entry), - "Entry block to function must not have predecessors!", Entry); + Assert(pred_empty(Entry), + "Entry block to function must not have predecessors!", Entry); // The address of the entry block cannot be taken, unless it is dead. if (Entry->hasAddressTaken()) { - Assert1(!BlockAddress::lookup(Entry)->isConstantUsed(), - "blockaddress may not be used with the entry block!", Entry); + Assert(!BlockAddress::lookup(Entry)->isConstantUsed(), + "blockaddress may not be used with the entry block!", Entry); } } @@ -1364,13 +1370,13 @@ void Verifier::visitFunction(const Function &F) { if (F.getIntrinsicID()) { const User *U; if (F.hasAddressTaken(&U)) - Assert1(0, "Invalid user of intrinsic instruction!", U); + Assert(0, "Invalid user of intrinsic instruction!", U); } - Assert1(!F.hasDLLImportStorageClass() || - (F.isDeclaration() && F.hasExternalLinkage()) || - F.hasAvailableExternallyLinkage(), - "Function is marked as dllimport, but not external.", &F); + Assert(!F.hasDLLImportStorageClass() || + (F.isDeclaration() && F.hasExternalLinkage()) || + F.hasAvailableExternallyLinkage(), + "Function is marked as dllimport, but not external.", &F); } // verifyBasicBlock - Verify that a basic block is well formed... @@ -1379,7 +1385,7 @@ void Verifier::visitBasicBlock(BasicBlock &BB) { InstsInThisBlock.clear(); // Ensure that basic blocks have terminators! - Assert1(BB.getTerminator(), "Basic Block does not have terminator!", &BB); + Assert(BB.getTerminator(), "Basic Block does not have terminator!", &BB); // Check constraints that this basic block imposes on all of the PHI nodes in // it. @@ -1390,12 +1396,14 @@ void Verifier::visitBasicBlock(BasicBlock &BB) { PHINode *PN; for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I));++I) { // Ensure that PHI nodes have at least one entry! - Assert1(PN->getNumIncomingValues() != 0, - "PHI nodes must have at least one entry. If the block is dead, " - "the PHI should be removed!", PN); - Assert1(PN->getNumIncomingValues() == Preds.size(), - "PHINode should have one entry for each predecessor of its " - "parent basic block!", PN); + Assert(PN->getNumIncomingValues() != 0, + "PHI nodes must have at least one entry. If the block is dead, " + "the PHI should be removed!", + PN); + Assert(PN->getNumIncomingValues() == Preds.size(), + "PHINode should have one entry for each predecessor of its " + "parent basic block!", + PN); // Get and sort all incoming values in the PHI node... Values.clear(); @@ -1410,17 +1418,17 @@ void Verifier::visitBasicBlock(BasicBlock &BB) { // particular basic block in this PHI node, that the incoming values are // all identical. // - Assert4(i == 0 || Values[i].first != Values[i-1].first || - Values[i].second == Values[i-1].second, - "PHI node has multiple entries for the same basic block with " - "different incoming values!", PN, Values[i].first, - Values[i].second, Values[i-1].second); + Assert(i == 0 || Values[i].first != Values[i - 1].first || + Values[i].second == Values[i - 1].second, + "PHI node has multiple entries for the same basic block with " + "different incoming values!", + PN, Values[i].first, Values[i].second, Values[i - 1].second); // Check to make sure that the predecessors and PHI node entries are // matched up. - Assert3(Values[i].first == Preds[i], - "PHI node entries do not match predecessors!", PN, - Values[i].first, Preds[i]); + Assert(Values[i].first == Preds[i], + "PHI node entries do not match predecessors!", PN, + Values[i].first, Preds[i]); } } } @@ -1434,15 +1442,15 @@ void Verifier::visitBasicBlock(BasicBlock &BB) { void Verifier::visitTerminatorInst(TerminatorInst &I) { // Ensure that terminators only exist at the end of the basic block. - Assert1(&I == I.getParent()->getTerminator(), - "Terminator found in the middle of a basic block!", I.getParent()); + Assert(&I == I.getParent()->getTerminator(), + "Terminator found in the middle of a basic block!", I.getParent()); visitInstruction(I); } void Verifier::visitBranchInst(BranchInst &BI) { if (BI.isConditional()) { - Assert2(BI.getCondition()->getType()->isIntegerTy(1), - "Branch condition is not 'i1' type!", &BI, BI.getCondition()); + Assert(BI.getCondition()->getType()->isIntegerTy(1), + "Branch condition is not 'i1' type!", &BI, BI.getCondition()); } visitTerminatorInst(BI); } @@ -1451,13 +1459,15 @@ void Verifier::visitReturnInst(ReturnInst &RI) { Function *F = RI.getParent()->getParent(); unsigned N = RI.getNumOperands(); if (F->getReturnType()->isVoidTy()) - Assert2(N == 0, - "Found return instr that returns non-void in Function of void " - "return type!", &RI, F->getReturnType()); + Assert(N == 0, + "Found return instr that returns non-void in Function of void " + "return type!", + &RI, F->getReturnType()); else - Assert2(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(), - "Function return type does not match operand " - "type of return inst!", &RI, F->getReturnType()); + Assert(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(), + "Function return type does not match operand " + "type of return inst!", + &RI, F->getReturnType()); // Check to make sure that the return value has necessary properties for // terminators... @@ -1470,32 +1480,32 @@ void Verifier::visitSwitchInst(SwitchInst &SI) { Type *SwitchTy = SI.getCondition()->getType(); SmallPtrSet<ConstantInt*, 32> Constants; for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { - Assert1(i.getCaseValue()->getType() == SwitchTy, - "Switch constants must all be same type as switch value!", &SI); - Assert2(Constants.insert(i.getCaseValue()).second, - "Duplicate integer as switch case", &SI, i.getCaseValue()); + Assert(i.getCaseValue()->getType() == SwitchTy, + "Switch constants must all be same type as switch value!", &SI); + Assert(Constants.insert(i.getCaseValue()).second, + "Duplicate integer as switch case", &SI, i.getCaseValue()); } visitTerminatorInst(SI); } void Verifier::visitIndirectBrInst(IndirectBrInst &BI) { - Assert1(BI.getAddress()->getType()->isPointerTy(), - "Indirectbr operand must have pointer type!", &BI); + Assert(BI.getAddress()->getType()->isPointerTy(), + "Indirectbr operand must have pointer type!", &BI); for (unsigned i = 0, e = BI.getNumDestinations(); i != e; ++i) - Assert1(BI.getDestination(i)->getType()->isLabelTy(), - "Indirectbr destinations must all have pointer type!", &BI); + Assert(BI.getDestination(i)->getType()->isLabelTy(), + "Indirectbr destinations must all have pointer type!", &BI); visitTerminatorInst(BI); } void Verifier::visitSelectInst(SelectInst &SI) { - Assert1(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1), - SI.getOperand(2)), - "Invalid operands for select instruction!", &SI); + Assert(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1), + SI.getOperand(2)), + "Invalid operands for select instruction!", &SI); - Assert1(SI.getTrueValue()->getType() == SI.getType(), - "Select values must have same type as select instruction!", &SI); + Assert(SI.getTrueValue()->getType() == SI.getType(), + "Select values must have same type as select instruction!", &SI); visitInstruction(SI); } @@ -1503,7 +1513,7 @@ void Verifier::visitSelectInst(SelectInst &SI) { /// a pass, if any exist, it's an error. /// void Verifier::visitUserOp1(Instruction &I) { - Assert1(0, "User-defined operators should not live outside of a pass!", &I); + Assert(0, "User-defined operators should not live outside of a pass!", &I); } void Verifier::visitTruncInst(TruncInst &I) { @@ -1515,11 +1525,11 @@ void Verifier::visitTruncInst(TruncInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert1(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I); - Assert1(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I); - Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(), - "trunc source and destination must both be a vector or neither", &I); - Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I); + Assert(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I); + Assert(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I); + Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), + "trunc source and destination must both be a vector or neither", &I); + Assert(SrcBitSize > DestBitSize, "DestTy too big for Trunc", &I); visitInstruction(I); } @@ -1530,14 +1540,14 @@ void Verifier::visitZExtInst(ZExtInst &I) { Type *DestTy = I.getType(); // Get the size of the types in bits, we'll need this later - Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I); - Assert1(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I); - Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(), - "zext source and destination must both be a vector or neither", &I); + Assert(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I); + Assert(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I); + Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), + "zext source and destination must both be a vector or neither", &I); unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert1(SrcBitSize < DestBitSize,"Type too small for ZExt", &I); + Assert(SrcBitSize < DestBitSize, "Type too small for ZExt", &I); visitInstruction(I); } @@ -1551,11 +1561,11 @@ void Verifier::visitSExtInst(SExtInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert1(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I); - Assert1(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I); - Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(), - "sext source and destination must both be a vector or neither", &I); - Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I); + Assert(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I); + Assert(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I); + Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), + "sext source and destination must both be a vector or neither", &I); + Assert(SrcBitSize < DestBitSize, "Type too small for SExt", &I); visitInstruction(I); } @@ -1568,11 +1578,11 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert1(SrcTy->isFPOrFPVectorTy(),"FPTrunc only operates on FP", &I); - Assert1(DestTy->isFPOrFPVectorTy(),"FPTrunc only produces an FP", &I); - Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(), - "fptrunc source and destination must both be a vector or neither",&I); - Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I); + Assert(SrcTy->isFPOrFPVectorTy(), "FPTrunc only operates on FP", &I); + Assert(DestTy->isFPOrFPVectorTy(), "FPTrunc only produces an FP", &I); + Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), + "fptrunc source and destination must both be a vector or neither", &I); + Assert(SrcBitSize > DestBitSize, "DestTy too big for FPTrunc", &I); visitInstruction(I); } @@ -1586,11 +1596,11 @@ void Verifier::visitFPExtInst(FPExtInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert1(SrcTy->isFPOrFPVectorTy(),"FPExt only operates on FP", &I); - Assert1(DestTy->isFPOrFPVectorTy(),"FPExt only produces an FP", &I); - Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(), - "fpext source and destination must both be a vector or neither", &I); - Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I); + Assert(SrcTy->isFPOrFPVectorTy(), "FPExt only operates on FP", &I); + Assert(DestTy->isFPOrFPVectorTy(), "FPExt only produces an FP", &I); + Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), + "fpext source and destination must both be a vector or neither", &I); + Assert(SrcBitSize < DestBitSize, "DestTy too small for FPExt", &I); visitInstruction(I); } @@ -1603,17 +1613,17 @@ void Verifier::visitUIToFPInst(UIToFPInst &I) { bool SrcVec = SrcTy->isVectorTy(); bool DstVec = DestTy->isVectorTy(); - Assert1(SrcVec == DstVec, - "UIToFP source and dest must both be vector or scalar", &I); - Assert1(SrcTy->isIntOrIntVectorTy(), - "UIToFP source must be integer or integer vector", &I); - Assert1(DestTy->isFPOrFPVectorTy(), - "UIToFP result must be FP or FP vector", &I); + Assert(SrcVec == DstVec, + "UIToFP source and dest must both be vector or scalar", &I); + Assert(SrcTy->isIntOrIntVectorTy(), + "UIToFP source must be integer or integer vector", &I); + Assert(DestTy->isFPOrFPVectorTy(), "UIToFP result must be FP or FP vector", + &I); if (SrcVec && DstVec) - Assert1(cast<VectorType>(SrcTy)->getNumElements() == - cast<VectorType>(DestTy)->getNumElements(), - "UIToFP source and dest vector length mismatch", &I); + Assert(cast<VectorType>(SrcTy)->getNumElements() == + cast<VectorType>(DestTy)->getNumElements(), + "UIToFP source and dest vector length mismatch", &I); visitInstruction(I); } @@ -1626,17 +1636,17 @@ void Verifier::visitSIToFPInst(SIToFPInst &I) { bool SrcVec = SrcTy->isVectorTy(); bool DstVec = DestTy->isVectorTy(); - Assert1(SrcVec == DstVec, - "SIToFP source and dest must both be vector or scalar", &I); - Assert1(SrcTy->isIntOrIntVectorTy(), - "SIToFP source must be integer or integer vector", &I); - Assert1(DestTy->isFPOrFPVectorTy(), - "SIToFP result must be FP or FP vector", &I); + Assert(SrcVec == DstVec, + "SIToFP source and dest must both be vector or scalar", &I); + Assert(SrcTy->isIntOrIntVectorTy(), + "SIToFP source must be integer or integer vector", &I); + Assert(DestTy->isFPOrFPVectorTy(), "SIToFP result must be FP or FP vector", + &I); if (SrcVec && DstVec) - Assert1(cast<VectorType>(SrcTy)->getNumElements() == - cast<VectorType>(DestTy)->getNumElements(), - "SIToFP source and dest vector length mismatch", &I); + Assert(cast<VectorType>(SrcTy)->getNumElements() == + cast<VectorType>(DestTy)->getNumElements(), + "SIToFP source and dest vector length mismatch", &I); visitInstruction(I); } @@ -1649,17 +1659,17 @@ void Verifier::visitFPToUIInst(FPToUIInst &I) { bool SrcVec = SrcTy->isVectorTy(); bool DstVec = DestTy->isVectorTy(); - Assert1(SrcVec == DstVec, - "FPToUI source and dest must both be vector or scalar", &I); - Assert1(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector", - &I); - Assert1(DestTy->isIntOrIntVectorTy(), - "FPToUI result must be integer or integer vector", &I); + Assert(SrcVec == DstVec, + "FPToUI source and dest must both be vector or scalar", &I); + Assert(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector", + &I); + Assert(DestTy->isIntOrIntVectorTy(), + "FPToUI result must be integer or integer vector", &I); if (SrcVec && DstVec) - Assert1(cast<VectorType>(SrcTy)->getNumElements() == - cast<VectorType>(DestTy)->getNumElements(), - "FPToUI source and dest vector length mismatch", &I); + Assert(cast<VectorType>(SrcTy)->getNumElements() == + cast<VectorType>(DestTy)->getNumElements(), + "FPToUI source and dest vector length mismatch", &I); visitInstruction(I); } @@ -1672,17 +1682,17 @@ void Verifier::visitFPToSIInst(FPToSIInst &I) { bool SrcVec = SrcTy->isVectorTy(); bool DstVec = DestTy->isVectorTy(); - Assert1(SrcVec == DstVec, - "FPToSI source and dest must both be vector or scalar", &I); - Assert1(SrcTy->isFPOrFPVectorTy(), - "FPToSI source must be FP or FP vector", &I); - Assert1(DestTy->isIntOrIntVectorTy(), - "FPToSI result must be integer or integer vector", &I); + Assert(SrcVec == DstVec, + "FPToSI source and dest must both be vector or scalar", &I); + Assert(SrcTy->isFPOrFPVectorTy(), "FPToSI source must be FP or FP vector", + &I); + Assert(DestTy->isIntOrIntVectorTy(), + "FPToSI result must be integer or integer vector", &I); if (SrcVec && DstVec) - Assert1(cast<VectorType>(SrcTy)->getNumElements() == - cast<VectorType>(DestTy)->getNumElements(), - "FPToSI source and dest vector length mismatch", &I); + Assert(cast<VectorType>(SrcTy)->getNumElements() == + cast<VectorType>(DestTy)->getNumElements(), + "FPToSI source and dest vector length mismatch", &I); visitInstruction(I); } @@ -1692,18 +1702,18 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) { Type *SrcTy = I.getOperand(0)->getType(); Type *DestTy = I.getType(); - Assert1(SrcTy->getScalarType()->isPointerTy(), - "PtrToInt source must be pointer", &I); - Assert1(DestTy->getScalarType()->isIntegerTy(), - "PtrToInt result must be integral", &I); - Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(), - "PtrToInt type mismatch", &I); + Assert(SrcTy->getScalarType()->isPointerTy(), + "PtrToInt source must be pointer", &I); + Assert(DestTy->getScalarType()->isIntegerTy(), + "PtrToInt result must be integral", &I); + Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "PtrToInt type mismatch", + &I); if (SrcTy->isVectorTy()) { VectorType *VSrc = dyn_cast<VectorType>(SrcTy); VectorType *VDest = dyn_cast<VectorType>(DestTy); - Assert1(VSrc->getNumElements() == VDest->getNumElements(), - "PtrToInt Vector width mismatch", &I); + Assert(VSrc->getNumElements() == VDest->getNumElements(), + "PtrToInt Vector width mismatch", &I); } visitInstruction(I); @@ -1714,23 +1724,23 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) { Type *SrcTy = I.getOperand(0)->getType(); Type *DestTy = I.getType(); - Assert1(SrcTy->getScalarType()->isIntegerTy(), - "IntToPtr source must be an integral", &I); - Assert1(DestTy->getScalarType()->isPointerTy(), - "IntToPtr result must be a pointer",&I); - Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(), - "IntToPtr type mismatch", &I); + Assert(SrcTy->getScalarType()->isIntegerTy(), + "IntToPtr source must be an integral", &I); + Assert(DestTy->getScalarType()->isPointerTy(), + "IntToPtr result must be a pointer", &I); + Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "IntToPtr type mismatch", + &I); if (SrcTy->isVectorTy()) { VectorType *VSrc = dyn_cast<VectorType>(SrcTy); VectorType *VDest = dyn_cast<VectorType>(DestTy); - Assert1(VSrc->getNumElements() == VDest->getNumElements(), - "IntToPtr Vector width mismatch", &I); + Assert(VSrc->getNumElements() == VDest->getNumElements(), + "IntToPtr Vector width mismatch", &I); } visitInstruction(I); } void Verifier::visitBitCastInst(BitCastInst &I) { - Assert1( + Assert( CastInst::castIsValid(Instruction::BitCast, I.getOperand(0), I.getType()), "Invalid bitcast", &I); visitInstruction(I); @@ -1740,15 +1750,15 @@ void Verifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) { Type *SrcTy = I.getOperand(0)->getType(); Type *DestTy = I.getType(); - Assert1(SrcTy->isPtrOrPtrVectorTy(), - "AddrSpaceCast source must be a pointer", &I); - Assert1(DestTy->isPtrOrPtrVectorTy(), - "AddrSpaceCast result must be a pointer", &I); - Assert1(SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace(), - "AddrSpaceCast must be between different address spaces", &I); + Assert(SrcTy->isPtrOrPtrVectorTy(), "AddrSpaceCast source must be a pointer", + &I); + Assert(DestTy->isPtrOrPtrVectorTy(), "AddrSpaceCast result must be a pointer", + &I); + Assert(SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace(), + "AddrSpaceCast must be between different address spaces", &I); if (SrcTy->isVectorTy()) - Assert1(SrcTy->getVectorNumElements() == DestTy->getVectorNumElements(), - "AddrSpaceCast vector pointer number of elements mismatch", &I); + Assert(SrcTy->getVectorNumElements() == DestTy->getVectorNumElements(), + "AddrSpaceCast vector pointer number of elements mismatch", &I); visitInstruction(I); } @@ -1759,16 +1769,15 @@ void Verifier::visitPHINode(PHINode &PN) { // This can be tested by checking whether the instruction before this is // either nonexistent (because this is begin()) or is a PHI node. If not, // then there is some other instruction before a PHI. - Assert2(&PN == &PN.getParent()->front() || - isa<PHINode>(--BasicBlock::iterator(&PN)), - "PHI nodes not grouped at top of basic block!", - &PN, PN.getParent()); + Assert(&PN == &PN.getParent()->front() || + isa<PHINode>(--BasicBlock::iterator(&PN)), + "PHI nodes not grouped at top of basic block!", &PN, PN.getParent()); // Check that all of the values of the PHI node have the same type as the // result, and that the incoming blocks are really basic blocks. for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - Assert1(PN.getType() == PN.getIncomingValue(i)->getType(), - "PHI node operands are not the same type as the result!", &PN); + Assert(PN.getType() == PN.getIncomingValue(i)->getType(), + "PHI node operands are not the same type as the result!", &PN); } // All other PHI node constraints are checked in the visitBasicBlock method. @@ -1779,32 +1788,32 @@ void Verifier::visitPHINode(PHINode &PN) { void Verifier::VerifyCallSite(CallSite CS) { Instruction *I = CS.getInstruction(); - Assert1(CS.getCalledValue()->getType()->isPointerTy(), - "Called function must be a pointer!", I); + Assert(CS.getCalledValue()->getType()->isPointerTy(), + "Called function must be a pointer!", I); PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType()); - Assert1(FPTy->getElementType()->isFunctionTy(), - "Called function is not pointer to function type!", I); + Assert(FPTy->getElementType()->isFunctionTy(), + "Called function is not pointer to function type!", I); FunctionType *FTy = cast<FunctionType>(FPTy->getElementType()); // Verify that the correct number of arguments are being passed if (FTy->isVarArg()) - Assert1(CS.arg_size() >= FTy->getNumParams(), - "Called function requires more parameters than were provided!",I); + Assert(CS.arg_size() >= FTy->getNumParams(), + "Called function requires more parameters than were provided!", I); else - Assert1(CS.arg_size() == FTy->getNumParams(), - "Incorrect number of arguments passed to called function!", I); + Assert(CS.arg_size() == FTy->getNumParams(), + "Incorrect number of arguments passed to called function!", I); // Verify that all arguments to the call match the function type. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - Assert3(CS.getArgument(i)->getType() == FTy->getParamType(i), - "Call parameter type does not match function signature!", - CS.getArgument(i), FTy->getParamType(i), I); + Assert(CS.getArgument(i)->getType() == FTy->getParamType(i), + "Call parameter type does not match function signature!", + CS.getArgument(i), FTy->getParamType(i), I); AttributeSet Attrs = CS.getAttributes(); - Assert1(VerifyAttributeCount(Attrs, CS.arg_size()), - "Attribute after last parameter!", I); + Assert(VerifyAttributeCount(Attrs, CS.arg_size()), + "Attribute after last parameter!", I); // Verify call attributes. VerifyFunctionAttrs(FTy, Attrs, I); @@ -1815,8 +1824,8 @@ void Verifier::VerifyCallSite(CallSite CS) { if (CS.hasInAllocaArgument()) { Value *InAllocaArg = CS.getArgument(FTy->getNumParams() - 1); if (auto AI = dyn_cast<AllocaInst>(InAllocaArg->stripInBoundsOffsets())) - Assert2(AI->isUsedWithInAlloca(), - "inalloca argument for call has mismatched alloca", AI, I); + Assert(AI->isUsedWithInAlloca(), + "inalloca argument for call has mismatched alloca", AI, I); } if (FTy->isVarArg()) { @@ -1837,25 +1846,25 @@ void Verifier::VerifyCallSite(CallSite CS) { VerifyParameterAttrs(Attrs, Idx, Ty, false, I); if (Attrs.hasAttribute(Idx, Attribute::Nest)) { - Assert1(!SawNest, "More than one parameter has attribute nest!", I); + Assert(!SawNest, "More than one parameter has attribute nest!", I); SawNest = true; } if (Attrs.hasAttribute(Idx, Attribute::Returned)) { - Assert1(!SawReturned, "More than one parameter has attribute returned!", - I); - Assert1(Ty->canLosslesslyBitCastTo(FTy->getReturnType()), - "Incompatible argument and return types for 'returned' " - "attribute", I); + Assert(!SawReturned, "More than one parameter has attribute returned!", + I); + Assert(Ty->canLosslesslyBitCastTo(FTy->getReturnType()), + "Incompatible argument and return types for 'returned' " + "attribute", + I); SawReturned = true; } - Assert1(!Attrs.hasAttribute(Idx, Attribute::StructRet), - "Attribute 'sret' cannot be used for vararg call arguments!", I); + Assert(!Attrs.hasAttribute(Idx, Attribute::StructRet), + "Attribute 'sret' cannot be used for vararg call arguments!", I); if (Attrs.hasAttribute(Idx, Attribute::InAlloca)) - Assert1(Idx == CS.arg_size(), "inalloca isn't on the last argument!", - I); + Assert(Idx == CS.arg_size(), "inalloca isn't on the last argument!", I); } } @@ -1864,8 +1873,8 @@ void Verifier::VerifyCallSite(CallSite CS) { !CS.getCalledFunction()->getName().startswith("llvm.")) { for (FunctionType::param_iterator PI = FTy->param_begin(), PE = FTy->param_end(); PI != PE; ++PI) - Assert1(!(*PI)->isMetadataTy(), - "Function has metadata parameter but isn't an intrinsic", I); + Assert(!(*PI)->isMetadataTy(), + "Function has metadata parameter but isn't an intrinsic", I); } visitInstruction(*I); @@ -1898,7 +1907,7 @@ static AttrBuilder getParameterABIAttributes(int I, AttributeSet Attrs) { } void Verifier::verifyMustTailCall(CallInst &CI) { - Assert1(!CI.isInlineAsm(), "cannot use musttail call with inline asm", &CI); + Assert(!CI.isInlineAsm(), "cannot use musttail call with inline asm", &CI); // - The caller and callee prototypes must match. Pointer types of // parameters or return types may differ in pointee type, but not @@ -1910,21 +1919,21 @@ void Verifier::verifyMustTailCall(CallInst &CI) { }; FunctionType *CallerTy = GetFnTy(F); FunctionType *CalleeTy = GetFnTy(CI.getCalledValue()); - Assert1(CallerTy->getNumParams() == CalleeTy->getNumParams(), - "cannot guarantee tail call due to mismatched parameter counts", &CI); - Assert1(CallerTy->isVarArg() == CalleeTy->isVarArg(), - "cannot guarantee tail call due to mismatched varargs", &CI); - Assert1(isTypeCongruent(CallerTy->getReturnType(), CalleeTy->getReturnType()), - "cannot guarantee tail call due to mismatched return types", &CI); + Assert(CallerTy->getNumParams() == CalleeTy->getNumParams(), + "cannot guarantee tail call due to mismatched parameter counts", &CI); + Assert(CallerTy->isVarArg() == CalleeTy->isVarArg(), + "cannot guarantee tail call due to mismatched varargs", &CI); + Assert(isTypeCongruent(CallerTy->getReturnType(), CalleeTy->getReturnType()), + "cannot guarantee tail call due to mismatched return types", &CI); for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) { - Assert1( + Assert( isTypeCongruent(CallerTy->getParamType(I), CalleeTy->getParamType(I)), "cannot guarantee tail call due to mismatched parameter types", &CI); } // - The calling conventions of the caller and callee must match. - Assert1(F->getCallingConv() == CI.getCallingConv(), - "cannot guarantee tail call due to mismatched calling conv", &CI); + Assert(F->getCallingConv() == CI.getCallingConv(), + "cannot guarantee tail call due to mismatched calling conv", &CI); // - All ABI-impacting function attributes, such as sret, byval, inreg, // returned, and inalloca, must match. @@ -1933,9 +1942,10 @@ void Verifier::verifyMustTailCall(CallInst &CI) { for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) { AttrBuilder CallerABIAttrs = getParameterABIAttributes(I, CallerAttrs); AttrBuilder CalleeABIAttrs = getParameterABIAttributes(I, CalleeAttrs); - Assert2(CallerABIAttrs == CalleeABIAttrs, - "cannot guarantee tail call due to mismatched ABI impacting " - "function attributes", &CI, CI.getOperand(I)); + Assert(CallerABIAttrs == CalleeABIAttrs, + "cannot guarantee tail call due to mismatched ABI impacting " + "function attributes", + &CI, CI.getOperand(I)); } // - The call must immediately precede a :ref:`ret <i_ret>` instruction, @@ -1947,18 +1957,18 @@ void Verifier::verifyMustTailCall(CallInst &CI) { // Handle the optional bitcast. if (BitCastInst *BI = dyn_cast_or_null<BitCastInst>(Next)) { - Assert1(BI->getOperand(0) == RetVal, - "bitcast following musttail call must use the call", BI); + Assert(BI->getOperand(0) == RetVal, + "bitcast following musttail call must use the call", BI); RetVal = BI; Next = BI->getNextNode(); } // Check the return. ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next); - Assert1(Ret, "musttail call must be precede a ret with an optional bitcast", - &CI); - Assert1(!Ret->getReturnValue() || Ret->getReturnValue() == RetVal, - "musttail call result must be returned", Ret); + Assert(Ret, "musttail call must be precede a ret with an optional bitcast", + &CI); + Assert(!Ret->getReturnValue() || Ret->getReturnValue() == RetVal, + "musttail call result must be returned", Ret); } void Verifier::visitCallInst(CallInst &CI) { @@ -1977,8 +1987,8 @@ void Verifier::visitInvokeInst(InvokeInst &II) { // Verify that there is a landingpad instruction as the first non-PHI // instruction of the 'unwind' destination. - Assert1(II.getUnwindDest()->isLandingPad(), - "The unwind destination does not have a landingpad instruction!",&II); + Assert(II.getUnwindDest()->isLandingPad(), + "The unwind destination does not have a landingpad instruction!", &II); if (Function *F = II.getCalledFunction()) // TODO: Ideally we should use visitIntrinsicFunction here. But it uses @@ -1994,8 +2004,8 @@ void Verifier::visitInvokeInst(InvokeInst &II) { /// of the same type! /// void Verifier::visitBinaryOperator(BinaryOperator &B) { - Assert1(B.getOperand(0)->getType() == B.getOperand(1)->getType(), - "Both operands to a binary operator are not of the same type!", &B); + Assert(B.getOperand(0)->getType() == B.getOperand(1)->getType(), + "Both operands to a binary operator are not of the same type!", &B); switch (B.getOpcode()) { // Check that integer arithmetic operators are only used with @@ -2007,11 +2017,12 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { case Instruction::UDiv: case Instruction::SRem: case Instruction::URem: - Assert1(B.getType()->isIntOrIntVectorTy(), - "Integer arithmetic operators only work with integral types!", &B); - Assert1(B.getType() == B.getOperand(0)->getType(), - "Integer arithmetic operators must have same type " - "for operands and result!", &B); + Assert(B.getType()->isIntOrIntVectorTy(), + "Integer arithmetic operators only work with integral types!", &B); + Assert(B.getType() == B.getOperand(0)->getType(), + "Integer arithmetic operators must have same type " + "for operands and result!", + &B); break; // Check that floating-point arithmetic operators are only used with // floating-point operands. @@ -2020,30 +2031,32 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { case Instruction::FMul: case Instruction::FDiv: case Instruction::FRem: - Assert1(B.getType()->isFPOrFPVectorTy(), - "Floating-point arithmetic operators only work with " - "floating-point types!", &B); - Assert1(B.getType() == B.getOperand(0)->getType(), - "Floating-point arithmetic operators must have same type " - "for operands and result!", &B); + Assert(B.getType()->isFPOrFPVectorTy(), + "Floating-point arithmetic operators only work with " + "floating-point types!", + &B); + Assert(B.getType() == B.getOperand(0)->getType(), + "Floating-point arithmetic operators must have same type " + "for operands and result!", + &B); break; // Check that logical operators are only used with integral operands. case Instruction::And: case Instruction::Or: case Instruction::Xor: - Assert1(B.getType()->isIntOrIntVectorTy(), - "Logical operators only work with integral types!", &B); - Assert1(B.getType() == B.getOperand(0)->getType(), - "Logical operators must have same type for operands and result!", - &B); + Assert(B.getType()->isIntOrIntVectorTy(), + "Logical operators only work with integral types!", &B); + Assert(B.getType() == B.getOperand(0)->getType(), + "Logical operators must have same type for operands and result!", + &B); break; case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: - Assert1(B.getType()->isIntOrIntVectorTy(), - "Shifts only work with integral types!", &B); - Assert1(B.getType() == B.getOperand(0)->getType(), - "Shift return type must be same as operands!", &B); + Assert(B.getType()->isIntOrIntVectorTy(), + "Shifts only work with integral types!", &B); + Assert(B.getType() == B.getOperand(0)->getType(), + "Shift return type must be same as operands!", &B); break; default: llvm_unreachable("Unknown BinaryOperator opcode!"); @@ -2056,15 +2069,15 @@ void Verifier::visitICmpInst(ICmpInst &IC) { // Check that the operands are the same type Type *Op0Ty = IC.getOperand(0)->getType(); Type *Op1Ty = IC.getOperand(1)->getType(); - Assert1(Op0Ty == Op1Ty, - "Both operands to ICmp instruction are not of the same type!", &IC); + Assert(Op0Ty == Op1Ty, + "Both operands to ICmp instruction are not of the same type!", &IC); // Check that the operands are the right type - Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(), - "Invalid operand types for ICmp instruction", &IC); + Assert(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(), + "Invalid operand types for ICmp instruction", &IC); // Check that the predicate is valid. - Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE && - IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE, - "Invalid predicate in ICmp instruction!", &IC); + Assert(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE && + IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE, + "Invalid predicate in ICmp instruction!", &IC); visitInstruction(IC); } @@ -2073,72 +2086,72 @@ void Verifier::visitFCmpInst(FCmpInst &FC) { // Check that the operands are the same type Type *Op0Ty = FC.getOperand(0)->getType(); Type *Op1Ty = FC.getOperand(1)->getType(); - Assert1(Op0Ty == Op1Ty, - "Both operands to FCmp instruction are not of the same type!", &FC); + Assert(Op0Ty == Op1Ty, + "Both operands to FCmp instruction are not of the same type!", &FC); // Check that the operands are the right type - Assert1(Op0Ty->isFPOrFPVectorTy(), - "Invalid operand types for FCmp instruction", &FC); + Assert(Op0Ty->isFPOrFPVectorTy(), + "Invalid operand types for FCmp instruction", &FC); // Check that the predicate is valid. - Assert1(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE && - FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE, - "Invalid predicate in FCmp instruction!", &FC); + Assert(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE && + FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE, + "Invalid predicate in FCmp instruction!", &FC); visitInstruction(FC); } void Verifier::visitExtractElementInst(ExtractElementInst &EI) { - Assert1(ExtractElementInst::isValidOperands(EI.getOperand(0), - EI.getOperand(1)), - "Invalid extractelement operands!", &EI); + Assert( + ExtractElementInst::isValidOperands(EI.getOperand(0), EI.getOperand(1)), + "Invalid extractelement operands!", &EI); visitInstruction(EI); } void Verifier::visitInsertElementInst(InsertElementInst &IE) { - Assert1(InsertElementInst::isValidOperands(IE.getOperand(0), - IE.getOperand(1), - IE.getOperand(2)), - "Invalid insertelement operands!", &IE); + Assert(InsertElementInst::isValidOperands(IE.getOperand(0), IE.getOperand(1), + IE.getOperand(2)), + "Invalid insertelement operands!", &IE); visitInstruction(IE); } void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) { - Assert1(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1), - SV.getOperand(2)), - "Invalid shufflevector operands!", &SV); + Assert(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1), + SV.getOperand(2)), + "Invalid shufflevector operands!", &SV); visitInstruction(SV); } void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { Type *TargetTy = GEP.getPointerOperandType()->getScalarType(); - Assert1(isa<PointerType>(TargetTy), - "GEP base pointer is not a vector or a vector of pointers", &GEP); - Assert1(cast<PointerType>(TargetTy)->getElementType()->isSized(), - "GEP into unsized type!", &GEP); - Assert1(GEP.getPointerOperandType()->isVectorTy() == - GEP.getType()->isVectorTy(), "Vector GEP must return a vector value", - &GEP); + Assert(isa<PointerType>(TargetTy), + "GEP base pointer is not a vector or a vector of pointers", &GEP); + Assert(cast<PointerType>(TargetTy)->getElementType()->isSized(), + "GEP into unsized type!", &GEP); + Assert(GEP.getPointerOperandType()->isVectorTy() == + GEP.getType()->isVectorTy(), + "Vector GEP must return a vector value", &GEP); SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end()); Type *ElTy = GetElementPtrInst::getIndexedType(GEP.getPointerOperandType(), Idxs); - Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP); + Assert(ElTy, "Invalid indices for GEP pointer type!", &GEP); - Assert2(GEP.getType()->getScalarType()->isPointerTy() && - cast<PointerType>(GEP.getType()->getScalarType())->getElementType() - == ElTy, "GEP is not of right type for indices!", &GEP, ElTy); + Assert(GEP.getType()->getScalarType()->isPointerTy() && + cast<PointerType>(GEP.getType()->getScalarType()) + ->getElementType() == ElTy, + "GEP is not of right type for indices!", &GEP, ElTy); if (GEP.getPointerOperandType()->isVectorTy()) { // Additional checks for vector GEPs. unsigned GepWidth = GEP.getPointerOperandType()->getVectorNumElements(); - Assert1(GepWidth == GEP.getType()->getVectorNumElements(), - "Vector GEP result width doesn't match operand's", &GEP); + Assert(GepWidth == GEP.getType()->getVectorNumElements(), + "Vector GEP result width doesn't match operand's", &GEP); for (unsigned i = 0, e = Idxs.size(); i != e; ++i) { Type *IndexTy = Idxs[i]->getType(); - Assert1(IndexTy->isVectorTy(), - "Vector GEP must have vector indices!", &GEP); + Assert(IndexTy->isVectorTy(), "Vector GEP must have vector indices!", + &GEP); unsigned IndexWidth = IndexTy->getVectorNumElements(); - Assert1(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP); + Assert(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP); } } visitInstruction(GEP); @@ -2155,34 +2168,33 @@ void Verifier::visitRangeMetadata(Instruction& I, "precondition violation"); unsigned NumOperands = Range->getNumOperands(); - Assert1(NumOperands % 2 == 0, "Unfinished range!", Range); + Assert(NumOperands % 2 == 0, "Unfinished range!", Range); unsigned NumRanges = NumOperands / 2; - Assert1(NumRanges >= 1, "It should have at least one range!", Range); - + Assert(NumRanges >= 1, "It should have at least one range!", Range); + ConstantRange LastRange(1); // Dummy initial value for (unsigned i = 0; i < NumRanges; ++i) { ConstantInt *Low = mdconst::dyn_extract<ConstantInt>(Range->getOperand(2 * i)); - Assert1(Low, "The lower limit must be an integer!", Low); + Assert(Low, "The lower limit must be an integer!", Low); ConstantInt *High = mdconst::dyn_extract<ConstantInt>(Range->getOperand(2 * i + 1)); - Assert1(High, "The upper limit must be an integer!", High); - Assert1(High->getType() == Low->getType() && - High->getType() == Ty, "Range types must match instruction type!", - &I); - + Assert(High, "The upper limit must be an integer!", High); + Assert(High->getType() == Low->getType() && High->getType() == Ty, + "Range types must match instruction type!", &I); + APInt HighV = High->getValue(); APInt LowV = Low->getValue(); ConstantRange CurRange(LowV, HighV); - Assert1(!CurRange.isEmptySet() && !CurRange.isFullSet(), - "Range must not be empty!", Range); + Assert(!CurRange.isEmptySet() && !CurRange.isFullSet(), + "Range must not be empty!", Range); if (i != 0) { - Assert1(CurRange.intersectWith(LastRange).isEmptySet(), - "Intervals are overlapping", Range); - Assert1(LowV.sgt(LastRange.getLower()), "Intervals are not in order", - Range); - Assert1(!isContiguous(CurRange, LastRange), "Intervals are contiguous", - Range); + Assert(CurRange.intersectWith(LastRange).isEmptySet(), + "Intervals are overlapping", Range); + Assert(LowV.sgt(LastRange.getLower()), "Intervals are not in order", + Range); + Assert(!isContiguous(CurRange, LastRange), "Intervals are contiguous", + Range); } LastRange = ConstantRange(LowV, HighV); } @@ -2192,38 +2204,37 @@ void Verifier::visitRangeMetadata(Instruction& I, APInt FirstHigh = mdconst::dyn_extract<ConstantInt>(Range->getOperand(1))->getValue(); ConstantRange FirstRange(FirstLow, FirstHigh); - Assert1(FirstRange.intersectWith(LastRange).isEmptySet(), - "Intervals are overlapping", Range); - Assert1(!isContiguous(FirstRange, LastRange), "Intervals are contiguous", - Range); + Assert(FirstRange.intersectWith(LastRange).isEmptySet(), + "Intervals are overlapping", Range); + Assert(!isContiguous(FirstRange, LastRange), "Intervals are contiguous", + Range); } } void Verifier::visitLoadInst(LoadInst &LI) { PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType()); - Assert1(PTy, "Load operand must be a pointer.", &LI); + Assert(PTy, "Load operand must be a pointer.", &LI); Type *ElTy = PTy->getElementType(); - Assert2(ElTy == LI.getType(), - "Load result type does not match pointer operand type!", &LI, ElTy); - Assert1(LI.getAlignment() <= Value::MaximumAlignment, - "huge alignment values are unsupported", &LI); + Assert(ElTy == LI.getType(), + "Load result type does not match pointer operand type!", &LI, ElTy); + Assert(LI.getAlignment() <= Value::MaximumAlignment, + "huge alignment values are unsupported", &LI); if (LI.isAtomic()) { - Assert1(LI.getOrdering() != Release && LI.getOrdering() != AcquireRelease, - "Load cannot have Release ordering", &LI); - Assert1(LI.getAlignment() != 0, - "Atomic load must specify explicit alignment", &LI); + Assert(LI.getOrdering() != Release && LI.getOrdering() != AcquireRelease, + "Load cannot have Release ordering", &LI); + Assert(LI.getAlignment() != 0, + "Atomic load must specify explicit alignment", &LI); if (!ElTy->isPointerTy()) { - Assert2(ElTy->isIntegerTy(), - "atomic load operand must have integer type!", - &LI, ElTy); + Assert(ElTy->isIntegerTy(), "atomic load operand must have integer type!", + &LI, ElTy); unsigned Size = ElTy->getPrimitiveSizeInBits(); - Assert2(Size >= 8 && !(Size & (Size - 1)), - "atomic load operand must be power-of-two byte-sized integer", - &LI, ElTy); + Assert(Size >= 8 && !(Size & (Size - 1)), + "atomic load operand must be power-of-two byte-sized integer", &LI, + ElTy); } } else { - Assert1(LI.getSynchScope() == CrossThread, - "Non-atomic load cannot have SynchronizationScope specified", &LI); + Assert(LI.getSynchScope() == CrossThread, + "Non-atomic load cannot have SynchronizationScope specified", &LI); } visitInstruction(LI); @@ -2231,30 +2242,28 @@ void Verifier::visitLoadInst(LoadInst &LI) { void Verifier::visitStoreInst(StoreInst &SI) { PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType()); - Assert1(PTy, "Store operand must be a pointer.", &SI); + Assert(PTy, "Store operand must be a pointer.", &SI); Type *ElTy = PTy->getElementType(); - Assert2(ElTy == SI.getOperand(0)->getType(), - "Stored value type does not match pointer operand type!", - &SI, ElTy); - Assert1(SI.getAlignment() <= Value::MaximumAlignment, - "huge alignment values are unsupported", &SI); + Assert(ElTy == SI.getOperand(0)->getType(), + "Stored value type does not match pointer operand type!", &SI, ElTy); + Assert(SI.getAlignment() <= Value::MaximumAlignment, + "huge alignment values are unsupported", &SI); if (SI.isAtomic()) { - Assert1(SI.getOrdering() != Acquire && SI.getOrdering() != AcquireRelease, - "Store cannot have Acquire ordering", &SI); - Assert1(SI.getAlignment() != 0, - "Atomic store must specify explicit alignment", &SI); + Assert(SI.getOrdering() != Acquire && SI.getOrdering() != AcquireRelease, + "Store cannot have Acquire ordering", &SI); + Assert(SI.getAlignment() != 0, + "Atomic store must specify explicit alignment", &SI); if (!ElTy->isPointerTy()) { - Assert2(ElTy->isIntegerTy(), - "atomic store operand must have integer type!", - &SI, ElTy); + Assert(ElTy->isIntegerTy(), + "atomic store operand must have integer type!", &SI, ElTy); unsigned Size = ElTy->getPrimitiveSizeInBits(); - Assert2(Size >= 8 && !(Size & (Size - 1)), - "atomic store operand must be power-of-two byte-sized integer", - &SI, ElTy); + Assert(Size >= 8 && !(Size & (Size - 1)), + "atomic store operand must be power-of-two byte-sized integer", + &SI, ElTy); } } else { - Assert1(SI.getSynchScope() == CrossThread, - "Non-atomic store cannot have SynchronizationScope specified", &SI); + Assert(SI.getSynchScope() == CrossThread, + "Non-atomic store cannot have SynchronizationScope specified", &SI); } visitInstruction(SI); } @@ -2262,15 +2271,15 @@ void Verifier::visitStoreInst(StoreInst &SI) { void Verifier::visitAllocaInst(AllocaInst &AI) { SmallPtrSet<const Type*, 4> Visited; PointerType *PTy = AI.getType(); - Assert1(PTy->getAddressSpace() == 0, - "Allocation instruction pointer not in the generic address space!", - &AI); - Assert1(PTy->getElementType()->isSized(&Visited), "Cannot allocate unsized type", - &AI); - Assert1(AI.getArraySize()->getType()->isIntegerTy(), - "Alloca array size must have integer type", &AI); - Assert1(AI.getAlignment() <= Value::MaximumAlignment, - "huge alignment values are unsupported", &AI); + Assert(PTy->getAddressSpace() == 0, + "Allocation instruction pointer not in the generic address space!", + &AI); + Assert(PTy->getElementType()->isSized(&Visited), + "Cannot allocate unsized type", &AI); + Assert(AI.getArraySize()->getType()->isIntegerTy(), + "Alloca array size must have integer type", &AI); + Assert(AI.getAlignment() <= Value::MaximumAlignment, + "huge alignment values are unsupported", &AI); visitInstruction(AI); } @@ -2278,87 +2287,83 @@ void Verifier::visitAllocaInst(AllocaInst &AI) { void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) { // FIXME: more conditions??? - Assert1(CXI.getSuccessOrdering() != NotAtomic, - "cmpxchg instructions must be atomic.", &CXI); - Assert1(CXI.getFailureOrdering() != NotAtomic, - "cmpxchg instructions must be atomic.", &CXI); - Assert1(CXI.getSuccessOrdering() != Unordered, - "cmpxchg instructions cannot be unordered.", &CXI); - Assert1(CXI.getFailureOrdering() != Unordered, - "cmpxchg instructions cannot be unordered.", &CXI); - Assert1(CXI.getSuccessOrdering() >= CXI.getFailureOrdering(), - "cmpxchg instructions be at least as constrained on success as fail", - &CXI); - Assert1(CXI.getFailureOrdering() != Release && - CXI.getFailureOrdering() != AcquireRelease, - "cmpxchg failure ordering cannot include release semantics", &CXI); + Assert(CXI.getSuccessOrdering() != NotAtomic, + "cmpxchg instructions must be atomic.", &CXI); + Assert(CXI.getFailureOrdering() != NotAtomic, + "cmpxchg instructions must be atomic.", &CXI); + Assert(CXI.getSuccessOrdering() != Unordered, + "cmpxchg instructions cannot be unordered.", &CXI); + Assert(CXI.getFailureOrdering() != Unordered, + "cmpxchg instructions cannot be unordered.", &CXI); + Assert(CXI.getSuccessOrdering() >= CXI.getFailureOrdering(), + "cmpxchg instructions be at least as constrained on success as fail", + &CXI); + Assert(CXI.getFailureOrdering() != Release && + CXI.getFailureOrdering() != AcquireRelease, + "cmpxchg failure ordering cannot include release semantics", &CXI); PointerType *PTy = dyn_cast<PointerType>(CXI.getOperand(0)->getType()); - Assert1(PTy, "First cmpxchg operand must be a pointer.", &CXI); + Assert(PTy, "First cmpxchg operand must be a pointer.", &CXI); Type *ElTy = PTy->getElementType(); - Assert2(ElTy->isIntegerTy(), - "cmpxchg operand must have integer type!", - &CXI, ElTy); + Assert(ElTy->isIntegerTy(), "cmpxchg operand must have integer type!", &CXI, + ElTy); unsigned Size = ElTy->getPrimitiveSizeInBits(); - Assert2(Size >= 8 && !(Size & (Size - 1)), - "cmpxchg operand must be power-of-two byte-sized integer", - &CXI, ElTy); - Assert2(ElTy == CXI.getOperand(1)->getType(), - "Expected value type does not match pointer operand type!", - &CXI, ElTy); - Assert2(ElTy == CXI.getOperand(2)->getType(), - "Stored value type does not match pointer operand type!", - &CXI, ElTy); + Assert(Size >= 8 && !(Size & (Size - 1)), + "cmpxchg operand must be power-of-two byte-sized integer", &CXI, ElTy); + Assert(ElTy == CXI.getOperand(1)->getType(), + "Expected value type does not match pointer operand type!", &CXI, + ElTy); + Assert(ElTy == CXI.getOperand(2)->getType(), + "Stored value type does not match pointer operand type!", &CXI, ElTy); visitInstruction(CXI); } void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) { - Assert1(RMWI.getOrdering() != NotAtomic, - "atomicrmw instructions must be atomic.", &RMWI); - Assert1(RMWI.getOrdering() != Unordered, - "atomicrmw instructions cannot be unordered.", &RMWI); + Assert(RMWI.getOrdering() != NotAtomic, + "atomicrmw instructions must be atomic.", &RMWI); + Assert(RMWI.getOrdering() != Unordered, + "atomicrmw instructions cannot be unordered.", &RMWI); PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType()); - Assert1(PTy, "First atomicrmw operand must be a pointer.", &RMWI); + Assert(PTy, "First atomicrmw operand must be a pointer.", &RMWI); Type *ElTy = PTy->getElementType(); - Assert2(ElTy->isIntegerTy(), - "atomicrmw operand must have integer type!", - &RMWI, ElTy); + Assert(ElTy->isIntegerTy(), "atomicrmw operand must have integer type!", + &RMWI, ElTy); unsigned Size = ElTy->getPrimitiveSizeInBits(); - Assert2(Size >= 8 && !(Size & (Size - 1)), - "atomicrmw operand must be power-of-two byte-sized integer", - &RMWI, ElTy); - Assert2(ElTy == RMWI.getOperand(1)->getType(), - "Argument value type does not match pointer operand type!", - &RMWI, ElTy); - Assert1(AtomicRMWInst::FIRST_BINOP <= RMWI.getOperation() && - RMWI.getOperation() <= AtomicRMWInst::LAST_BINOP, - "Invalid binary operation!", &RMWI); + Assert(Size >= 8 && !(Size & (Size - 1)), + "atomicrmw operand must be power-of-two byte-sized integer", &RMWI, + ElTy); + Assert(ElTy == RMWI.getOperand(1)->getType(), + "Argument value type does not match pointer operand type!", &RMWI, + ElTy); + Assert(AtomicRMWInst::FIRST_BINOP <= RMWI.getOperation() && + RMWI.getOperation() <= AtomicRMWInst::LAST_BINOP, + "Invalid binary operation!", &RMWI); visitInstruction(RMWI); } void Verifier::visitFenceInst(FenceInst &FI) { const AtomicOrdering Ordering = FI.getOrdering(); - Assert1(Ordering == Acquire || Ordering == Release || - Ordering == AcquireRelease || Ordering == SequentiallyConsistent, - "fence instructions may only have " - "acquire, release, acq_rel, or seq_cst ordering.", &FI); + Assert(Ordering == Acquire || Ordering == Release || + Ordering == AcquireRelease || Ordering == SequentiallyConsistent, + "fence instructions may only have " + "acquire, release, acq_rel, or seq_cst ordering.", + &FI); visitInstruction(FI); } void Verifier::visitExtractValueInst(ExtractValueInst &EVI) { - Assert1(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(), - EVI.getIndices()) == - EVI.getType(), - "Invalid ExtractValueInst operands!", &EVI); + Assert(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(), + EVI.getIndices()) == EVI.getType(), + "Invalid ExtractValueInst operands!", &EVI); visitInstruction(EVI); } void Verifier::visitInsertValueInst(InsertValueInst &IVI) { - Assert1(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(), - IVI.getIndices()) == - IVI.getOperand(1)->getType(), - "Invalid InsertValueInst operands!", &IVI); + Assert(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(), + IVI.getIndices()) == + IVI.getOperand(1)->getType(), + "Invalid InsertValueInst operands!", &IVI); visitInstruction(IVI); } @@ -2368,43 +2373,44 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) { // The landingpad instruction is ill-formed if it doesn't have any clauses and // isn't a cleanup. - Assert1(LPI.getNumClauses() > 0 || LPI.isCleanup(), - "LandingPadInst needs at least one clause or to be a cleanup.", &LPI); + Assert(LPI.getNumClauses() > 0 || LPI.isCleanup(), + "LandingPadInst needs at least one clause or to be a cleanup.", &LPI); // The landingpad instruction defines its parent as a landing pad block. The // landing pad block may be branched to only by the unwind edge of an invoke. for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator()); - Assert1(II && II->getUnwindDest() == BB && II->getNormalDest() != BB, - "Block containing LandingPadInst must be jumped to " - "only by the unwind edge of an invoke.", &LPI); + Assert(II && II->getUnwindDest() == BB && II->getNormalDest() != BB, + "Block containing LandingPadInst must be jumped to " + "only by the unwind edge of an invoke.", + &LPI); } // The landingpad instruction must be the first non-PHI instruction in the // block. - Assert1(LPI.getParent()->getLandingPadInst() == &LPI, - "LandingPadInst not the first non-PHI instruction in the block.", - &LPI); + Assert(LPI.getParent()->getLandingPadInst() == &LPI, + "LandingPadInst not the first non-PHI instruction in the block.", + &LPI); // The personality functions for all landingpad instructions within the same // function should match. if (PersonalityFn) - Assert1(LPI.getPersonalityFn() == PersonalityFn, - "Personality function doesn't match others in function", &LPI); + Assert(LPI.getPersonalityFn() == PersonalityFn, + "Personality function doesn't match others in function", &LPI); PersonalityFn = LPI.getPersonalityFn(); // All operands must be constants. - Assert1(isa<Constant>(PersonalityFn), "Personality function is not constant!", - &LPI); + Assert(isa<Constant>(PersonalityFn), "Personality function is not constant!", + &LPI); for (unsigned i = 0, e = LPI.getNumClauses(); i < e; ++i) { Constant *Clause = LPI.getClause(i); if (LPI.isCatch(i)) { - Assert1(isa<PointerType>(Clause->getType()), - "Catch operand does not have pointer type!", &LPI); + Assert(isa<PointerType>(Clause->getType()), + "Catch operand does not have pointer type!", &LPI); } else { - Assert1(LPI.isFilter(i), "Clause is neither catch nor filter!", &LPI); - Assert1(isa<ConstantArray>(Clause) || isa<ConstantAggregateZero>(Clause), - "Filter operand is not an array of constants!", &LPI); + Assert(LPI.isFilter(i), "Clause is neither catch nor filter!", &LPI); + Assert(isa<ConstantArray>(Clause) || isa<ConstantAggregateZero>(Clause), + "Filter operand is not an array of constants!", &LPI); } } @@ -2422,46 +2428,46 @@ void Verifier::verifyDominatesUse(Instruction &I, unsigned i) { } const Use &U = I.getOperandUse(i); - Assert2(InstsInThisBlock.count(Op) || DT.dominates(Op, U), - "Instruction does not dominate all uses!", Op, &I); + Assert(InstsInThisBlock.count(Op) || DT.dominates(Op, U), + "Instruction does not dominate all uses!", Op, &I); } /// verifyInstruction - Verify that an instruction is well formed. /// void Verifier::visitInstruction(Instruction &I) { BasicBlock *BB = I.getParent(); - Assert1(BB, "Instruction not embedded in basic block!", &I); + Assert(BB, "Instruction not embedded in basic block!", &I); if (!isa<PHINode>(I)) { // Check that non-phi nodes are not self referential for (User *U : I.users()) { - Assert1(U != (User*)&I || !DT.isReachableFromEntry(BB), - "Only PHI nodes may reference their own value!", &I); + Assert(U != (User *)&I || !DT.isReachableFromEntry(BB), + "Only PHI nodes may reference their own value!", &I); } } // Check that void typed values don't have names - Assert1(!I.getType()->isVoidTy() || !I.hasName(), - "Instruction has a name, but provides a void value!", &I); + Assert(!I.getType()->isVoidTy() || !I.hasName(), + "Instruction has a name, but provides a void value!", &I); // Check that the return value of the instruction is either void or a legal // value type. - Assert1(I.getType()->isVoidTy() || - I.getType()->isFirstClassType(), - "Instruction returns a non-scalar type!", &I); + Assert(I.getType()->isVoidTy() || I.getType()->isFirstClassType(), + "Instruction returns a non-scalar type!", &I); // Check that the instruction doesn't produce metadata. Calls are already // checked against the callee type. - Assert1(!I.getType()->isMetadataTy() || - isa<CallInst>(I) || isa<InvokeInst>(I), - "Invalid use of metadata!", &I); + Assert(!I.getType()->isMetadataTy() || isa<CallInst>(I) || isa<InvokeInst>(I), + "Invalid use of metadata!", &I); // Check that all uses of the instruction, if they are instructions // themselves, actually have parent basic blocks. If the use is not an // instruction, it is an error! for (Use &U : I.uses()) { if (Instruction *Used = dyn_cast<Instruction>(U.getUser())) - Assert2(Used->getParent() != nullptr, "Instruction referencing" - " instruction not embedded in a basic block!", &I, Used); + Assert(Used->getParent() != nullptr, + "Instruction referencing" + " instruction not embedded in a basic block!", + &I, Used); else { CheckFailed("Use of instruction is not an instruction!", U); return; @@ -2469,44 +2475,46 @@ void Verifier::visitInstruction(Instruction &I) { } for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { - Assert1(I.getOperand(i) != nullptr, "Instruction has null operand!", &I); + Assert(I.getOperand(i) != nullptr, "Instruction has null operand!", &I); // Check to make sure that only first-class-values are operands to // instructions. if (!I.getOperand(i)->getType()->isFirstClassType()) { - Assert1(0, "Instruction operands must be first-class values!", &I); + Assert(0, "Instruction operands must be first-class values!", &I); } if (Function *F = dyn_cast<Function>(I.getOperand(i))) { // Check to make sure that the "address of" an intrinsic function is never // taken. - Assert1(!F->isIntrinsic() || i == (isa<CallInst>(I) ? e-1 : - isa<InvokeInst>(I) ? e-3 : 0), - "Cannot take the address of an intrinsic!", &I); - Assert1(!F->isIntrinsic() || isa<CallInst>(I) || + Assert( + !F->isIntrinsic() || + i == (isa<CallInst>(I) ? e - 1 : isa<InvokeInst>(I) ? e - 3 : 0), + "Cannot take the address of an intrinsic!", &I); + Assert( + !F->isIntrinsic() || isa<CallInst>(I) || F->getIntrinsicID() == Intrinsic::donothing || F->getIntrinsicID() == Intrinsic::experimental_patchpoint_void || F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 || F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint, - "Cannot invoke an intrinsinc other than" - " donothing or patchpoint", &I); - Assert1(F->getParent() == M, "Referencing function in another module!", - &I); + "Cannot invoke an intrinsinc other than" + " donothing or patchpoint", + &I); + Assert(F->getParent() == M, "Referencing function in another module!", + &I); } else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) { - Assert1(OpBB->getParent() == BB->getParent(), - "Referring to a basic block in another function!", &I); + Assert(OpBB->getParent() == BB->getParent(), + "Referring to a basic block in another function!", &I); } else if (Argument *OpArg = dyn_cast<Argument>(I.getOperand(i))) { - Assert1(OpArg->getParent() == BB->getParent(), - "Referring to an argument in another function!", &I); + Assert(OpArg->getParent() == BB->getParent(), + "Referring to an argument in another function!", &I); } else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) { - Assert1(GV->getParent() == M, "Referencing global in another module!", - &I); + Assert(GV->getParent() == M, "Referencing global in another module!", &I); } else if (isa<Instruction>(I.getOperand(i))) { verifyDominatesUse(I, i); } else if (isa<InlineAsm>(I.getOperand(i))) { - Assert1((i + 1 == e && isa<CallInst>(I)) || - (i + 3 == e && isa<InvokeInst>(I)), - "Cannot take the address of an inline asm!", &I); + Assert((i + 1 == e && isa<CallInst>(I)) || + (i + 3 == e && isa<InvokeInst>(I)), + "Cannot take the address of an inline asm!", &I); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(I.getOperand(i))) { if (CE->getType()->isPtrOrPtrVectorTy()) { // If we have a ConstantExpr pointer, we need to see if it came from an @@ -2532,31 +2540,37 @@ void Verifier::visitInstruction(Instruction &I) { } if (MDNode *MD = I.getMetadata(LLVMContext::MD_fpmath)) { - Assert1(I.getType()->isFPOrFPVectorTy(), - "fpmath requires a floating point result!", &I); - Assert1(MD->getNumOperands() == 1, "fpmath takes one operand!", &I); + Assert(I.getType()->isFPOrFPVectorTy(), + "fpmath requires a floating point result!", &I); + Assert(MD->getNumOperands() == 1, "fpmath takes one operand!", &I); if (ConstantFP *CFP0 = mdconst::dyn_extract_or_null<ConstantFP>(MD->getOperand(0))) { APFloat Accuracy = CFP0->getValueAPF(); - Assert1(Accuracy.isFiniteNonZero() && !Accuracy.isNegative(), - "fpmath accuracy not a positive number!", &I); + Assert(Accuracy.isFiniteNonZero() && !Accuracy.isNegative(), + "fpmath accuracy not a positive number!", &I); } else { - Assert1(false, "invalid fpmath accuracy!", &I); + Assert(false, "invalid fpmath accuracy!", &I); } } if (MDNode *Range = I.getMetadata(LLVMContext::MD_range)) { - Assert1(isa<LoadInst>(I) || isa<CallInst>(I) || isa<InvokeInst>(I), - "Ranges are only for loads, calls and invokes!", &I); + Assert(isa<LoadInst>(I) || isa<CallInst>(I) || isa<InvokeInst>(I), + "Ranges are only for loads, calls and invokes!", &I); visitRangeMetadata(I, Range, I.getType()); } if (I.getMetadata(LLVMContext::MD_nonnull)) { - Assert1(I.getType()->isPointerTy(), - "nonnull applies only to pointer types", &I); - Assert1(isa<LoadInst>(I), - "nonnull applies only to load instructions, use attributes" - " for calls or invokes", &I); + Assert(I.getType()->isPointerTy(), "nonnull applies only to pointer types", + &I); + Assert(isa<LoadInst>(I), + "nonnull applies only to load instructions, use attributes" + " for calls or invokes", + &I); + } + + if (MDNode *N = I.getDebugLoc().getAsMDNode()) { + Assert(isa<MDLocation>(N), "invalid !dbg metadata attachment", &I, N); + visitMDNode(*N); } InstsInThisBlock.insert(&I); @@ -2717,7 +2731,7 @@ Verifier::VerifyIntrinsicIsVarArg(bool isVarArg, // If there are no descriptors left, then it can't be a vararg. if (Infos.empty()) - return isVarArg ? true : false; + return isVarArg; // There should be only one descriptor remaining at this point. if (Infos.size() != 1) @@ -2727,7 +2741,7 @@ Verifier::VerifyIntrinsicIsVarArg(bool isVarArg, IITDescriptor D = Infos.front(); Infos = Infos.slice(1); if (D.Kind == IITDescriptor::VarArg) - return isVarArg ? false : true; + return !isVarArg; return true; } @@ -2736,8 +2750,8 @@ Verifier::VerifyIntrinsicIsVarArg(bool isVarArg, /// void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { Function *IF = CI.getCalledFunction(); - Assert1(IF->isDeclaration(), "Intrinsic functions should never be defined!", - IF); + Assert(IF->isDeclaration(), "Intrinsic functions should never be defined!", + IF); // Verify that the intrinsic prototype lines up with what the .td files // describe. @@ -2749,31 +2763,33 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { ArrayRef<Intrinsic::IITDescriptor> TableRef = Table; SmallVector<Type *, 4> ArgTys; - Assert1(!VerifyIntrinsicType(IFTy->getReturnType(), TableRef, ArgTys), - "Intrinsic has incorrect return type!", IF); + Assert(!VerifyIntrinsicType(IFTy->getReturnType(), TableRef, ArgTys), + "Intrinsic has incorrect return type!", IF); for (unsigned i = 0, e = IFTy->getNumParams(); i != e; ++i) - Assert1(!VerifyIntrinsicType(IFTy->getParamType(i), TableRef, ArgTys), - "Intrinsic has incorrect argument type!", IF); + Assert(!VerifyIntrinsicType(IFTy->getParamType(i), TableRef, ArgTys), + "Intrinsic has incorrect argument type!", IF); // Verify if the intrinsic call matches the vararg property. if (IsVarArg) - Assert1(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef), - "Intrinsic was not defined with variable arguments!", IF); + Assert(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef), + "Intrinsic was not defined with variable arguments!", IF); else - Assert1(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef), - "Callsite was not defined with variable arguments!", IF); + Assert(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef), + "Callsite was not defined with variable arguments!", IF); // All descriptors should be absorbed by now. - Assert1(TableRef.empty(), "Intrinsic has too few arguments!", IF); + Assert(TableRef.empty(), "Intrinsic has too few arguments!", IF); // Now that we have the intrinsic ID and the actual argument types (and we // know they are legal for the intrinsic!) get the intrinsic name through the // usual means. This allows us to verify the mangling of argument types into // the name. const std::string ExpectedName = Intrinsic::getName(ID, ArgTys); - Assert1(ExpectedName == IF->getName(), - "Intrinsic name not mangled correctly for type arguments! " - "Should be: " + ExpectedName, IF); + Assert(ExpectedName == IF->getName(), + "Intrinsic name not mangled correctly for type arguments! " + "Should be: " + + ExpectedName, + IF); // If the intrinsic takes MDNode arguments, verify that they are either global // or are local to *this* function. @@ -2786,95 +2802,123 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { break; case Intrinsic::ctlz: // llvm.ctlz case Intrinsic::cttz: // llvm.cttz - Assert1(isa<ConstantInt>(CI.getArgOperand(1)), - "is_zero_undef argument of bit counting intrinsics must be a " - "constant int", &CI); + Assert(isa<ConstantInt>(CI.getArgOperand(1)), + "is_zero_undef argument of bit counting intrinsics must be a " + "constant int", + &CI); + break; + case Intrinsic::dbg_declare: // llvm.dbg.declare + Assert(isa<MetadataAsValue>(CI.getArgOperand(0)), + "invalid llvm.dbg.declare intrinsic call 1", &CI); + visitDbgIntrinsic("declare", cast<DbgDeclareInst>(CI)); + break; + case Intrinsic::dbg_value: // llvm.dbg.value + visitDbgIntrinsic("value", cast<DbgValueInst>(CI)); break; - case Intrinsic::dbg_declare: { // llvm.dbg.declare - Assert1(CI.getArgOperand(0) && isa<MetadataAsValue>(CI.getArgOperand(0)), - "invalid llvm.dbg.declare intrinsic call 1", &CI); - } break; case Intrinsic::memcpy: case Intrinsic::memmove: - case Intrinsic::memset: - Assert1(isa<ConstantInt>(CI.getArgOperand(3)), - "alignment argument of memory intrinsics must be a constant int", - &CI); - Assert1(isa<ConstantInt>(CI.getArgOperand(4)), - "isvolatile argument of memory intrinsics must be a constant int", - &CI); + case Intrinsic::memset: { + ConstantInt *AlignCI = dyn_cast<ConstantInt>(CI.getArgOperand(3)); + Assert(AlignCI, + "alignment argument of memory intrinsics must be a constant int", + &CI); + const APInt &AlignVal = AlignCI->getValue(); + Assert(AlignCI->isZero() || AlignVal.isPowerOf2(), + "alignment argument of memory intrinsics must be a power of 2", &CI); + Assert(isa<ConstantInt>(CI.getArgOperand(4)), + "isvolatile argument of memory intrinsics must be a constant int", + &CI); break; + } case Intrinsic::gcroot: case Intrinsic::gcwrite: case Intrinsic::gcread: if (ID == Intrinsic::gcroot) { AllocaInst *AI = dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts()); - Assert1(AI, "llvm.gcroot parameter #1 must be an alloca.", &CI); - Assert1(isa<Constant>(CI.getArgOperand(1)), - "llvm.gcroot parameter #2 must be a constant.", &CI); + Assert(AI, "llvm.gcroot parameter #1 must be an alloca.", &CI); + Assert(isa<Constant>(CI.getArgOperand(1)), + "llvm.gcroot parameter #2 must be a constant.", &CI); if (!AI->getType()->getElementType()->isPointerTy()) { - Assert1(!isa<ConstantPointerNull>(CI.getArgOperand(1)), - "llvm.gcroot parameter #1 must either be a pointer alloca, " - "or argument #2 must be a non-null constant.", &CI); + Assert(!isa<ConstantPointerNull>(CI.getArgOperand(1)), + "llvm.gcroot parameter #1 must either be a pointer alloca, " + "or argument #2 must be a non-null constant.", + &CI); } } - Assert1(CI.getParent()->getParent()->hasGC(), - "Enclosing function does not use GC.", &CI); + Assert(CI.getParent()->getParent()->hasGC(), + "Enclosing function does not use GC.", &CI); break; case Intrinsic::init_trampoline: - Assert1(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()), - "llvm.init_trampoline parameter #2 must resolve to a function.", - &CI); + Assert(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()), + "llvm.init_trampoline parameter #2 must resolve to a function.", + &CI); break; case Intrinsic::prefetch: - Assert1(isa<ConstantInt>(CI.getArgOperand(1)) && - isa<ConstantInt>(CI.getArgOperand(2)) && - cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 && - cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4, - "invalid arguments to llvm.prefetch", - &CI); + Assert(isa<ConstantInt>(CI.getArgOperand(1)) && + isa<ConstantInt>(CI.getArgOperand(2)) && + cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 && + cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4, + "invalid arguments to llvm.prefetch", &CI); break; case Intrinsic::stackprotector: - Assert1(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()), - "llvm.stackprotector parameter #2 must resolve to an alloca.", - &CI); + Assert(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()), + "llvm.stackprotector parameter #2 must resolve to an alloca.", &CI); break; case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: - Assert1(isa<ConstantInt>(CI.getArgOperand(0)), - "size argument of memory use markers must be a constant integer", - &CI); + Assert(isa<ConstantInt>(CI.getArgOperand(0)), + "size argument of memory use markers must be a constant integer", + &CI); break; case Intrinsic::invariant_end: - Assert1(isa<ConstantInt>(CI.getArgOperand(1)), - "llvm.invariant.end parameter #2 must be a constant integer", &CI); + Assert(isa<ConstantInt>(CI.getArgOperand(1)), + "llvm.invariant.end parameter #2 must be a constant integer", &CI); break; - case Intrinsic::frameallocate: { + case Intrinsic::frameescape: { BasicBlock *BB = CI.getParent(); - Assert1(BB == &BB->getParent()->front(), - "llvm.frameallocate used outside of entry block", &CI); - Assert1(!SawFrameAllocate, - "multiple calls to llvm.frameallocate in one function", &CI); - SawFrameAllocate = true; - Assert1(isa<ConstantInt>(CI.getArgOperand(0)), - "llvm.frameallocate argument must be constant integer size", &CI); + Assert(BB == &BB->getParent()->front(), + "llvm.frameescape used outside of entry block", &CI); + Assert(!SawFrameEscape, + "multiple calls to llvm.frameescape in one function", &CI); + for (Value *Arg : CI.arg_operands()) { + auto *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts()); + Assert(AI && AI->isStaticAlloca(), + "llvm.frameescape only accepts static allocas", &CI); + } + FrameEscapeInfo[BB->getParent()].first = CI.getNumArgOperands(); + SawFrameEscape = true; break; } case Intrinsic::framerecover: { Value *FnArg = CI.getArgOperand(0)->stripPointerCasts(); Function *Fn = dyn_cast<Function>(FnArg); - Assert1(Fn && !Fn->isDeclaration(), "llvm.framerecover first " - "argument must be function defined in this module", &CI); + Assert(Fn && !Fn->isDeclaration(), + "llvm.framerecover first " + "argument must be function defined in this module", + &CI); + auto *IdxArg = dyn_cast<ConstantInt>(CI.getArgOperand(2)); + Assert(IdxArg, "idx argument of llvm.framerecover must be a constant int", + &CI); + auto &Entry = FrameEscapeInfo[Fn]; + Entry.second = unsigned( + std::max(uint64_t(Entry.second), IdxArg->getLimitedValue(~0U) + 1)); + break; + } + + case Intrinsic::eh_unwindhelp: { + auto *AI = dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts()); + Assert(AI && AI->isStaticAlloca(), + "llvm.eh.unwindhelp requires a static alloca", &CI); break; } case Intrinsic::experimental_gc_statepoint: - Assert1(!CI.isInlineAsm(), - "gc.statepoint support for inline assembly unimplemented", &CI); + Assert(!CI.isInlineAsm(), + "gc.statepoint support for inline assembly unimplemented", &CI); VerifyStatepoint(ImmutableCallSite(&CI)); break; @@ -2886,56 +2930,52 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { CallSite StatepointCS(CI.getArgOperand(0)); const Function *StatepointFn = StatepointCS.getInstruction() ? StatepointCS.getCalledFunction() : nullptr; - Assert2(StatepointFn && StatepointFn->isDeclaration() && - StatepointFn->getIntrinsicID() == Intrinsic::experimental_gc_statepoint, - "gc.result operand #1 must be from a statepoint", - &CI, CI.getArgOperand(0)); + Assert(StatepointFn && StatepointFn->isDeclaration() && + StatepointFn->getIntrinsicID() == + Intrinsic::experimental_gc_statepoint, + "gc.result operand #1 must be from a statepoint", &CI, + CI.getArgOperand(0)); // Assert that result type matches wrapped callee. const Value *Target = StatepointCS.getArgument(0); const PointerType *PT = cast<PointerType>(Target->getType()); const FunctionType *TargetFuncType = cast<FunctionType>(PT->getElementType()); - Assert1(CI.getType() == TargetFuncType->getReturnType(), - "gc.result result type does not match wrapped callee", - &CI); + Assert(CI.getType() == TargetFuncType->getReturnType(), + "gc.result result type does not match wrapped callee", &CI); break; } case Intrinsic::experimental_gc_relocate: { - Assert1(CI.getNumArgOperands() == 3, "wrong number of arguments", &CI); + Assert(CI.getNumArgOperands() == 3, "wrong number of arguments", &CI); // Check that this relocate is correctly tied to the statepoint // This is case for relocate on the unwinding path of an invoke statepoint if (ExtractValueInst *ExtractValue = dyn_cast<ExtractValueInst>(CI.getArgOperand(0))) { - Assert1(isa<LandingPadInst>(ExtractValue->getAggregateOperand()), - "gc relocate on unwind path incorrectly linked to the statepoint", - &CI); + Assert(isa<LandingPadInst>(ExtractValue->getAggregateOperand()), + "gc relocate on unwind path incorrectly linked to the statepoint", + &CI); const BasicBlock *invokeBB = ExtractValue->getParent()->getUniquePredecessor(); // Landingpad relocates should have only one predecessor with invoke // statepoint terminator - Assert1(invokeBB, - "safepoints should have unique landingpads", - ExtractValue->getParent()); - Assert1(invokeBB->getTerminator(), - "safepoint block should be well formed", - invokeBB); - Assert1(isStatepoint(invokeBB->getTerminator()), - "gc relocate should be linked to a statepoint", - invokeBB); + Assert(invokeBB, "safepoints should have unique landingpads", + ExtractValue->getParent()); + Assert(invokeBB->getTerminator(), "safepoint block should be well formed", + invokeBB); + Assert(isStatepoint(invokeBB->getTerminator()), + "gc relocate should be linked to a statepoint", invokeBB); } else { // In all other cases relocate should be tied to the statepoint directly. // This covers relocates on a normal return path of invoke statepoint and // relocates of a call statepoint auto Token = CI.getArgOperand(0); - Assert2(isa<Instruction>(Token) && isStatepoint(cast<Instruction>(Token)), - "gc relocate is incorrectly tied to the statepoint", - &CI, Token); + Assert(isa<Instruction>(Token) && isStatepoint(cast<Instruction>(Token)), + "gc relocate is incorrectly tied to the statepoint", &CI, Token); } // Verify rest of the relocate arguments @@ -2945,53 +2985,74 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { // Both the base and derived must be piped through the safepoint Value* Base = CI.getArgOperand(1); - Assert1(isa<ConstantInt>(Base), - "gc.relocate operand #2 must be integer offset", &CI); - + Assert(isa<ConstantInt>(Base), + "gc.relocate operand #2 must be integer offset", &CI); + Value* Derived = CI.getArgOperand(2); - Assert1(isa<ConstantInt>(Derived), - "gc.relocate operand #3 must be integer offset", &CI); + Assert(isa<ConstantInt>(Derived), + "gc.relocate operand #3 must be integer offset", &CI); const int BaseIndex = cast<ConstantInt>(Base)->getZExtValue(); const int DerivedIndex = cast<ConstantInt>(Derived)->getZExtValue(); // Check the bounds - Assert1(0 <= BaseIndex && - BaseIndex < (int)StatepointCS.arg_size(), - "gc.relocate: statepoint base index out of bounds", &CI); - Assert1(0 <= DerivedIndex && - DerivedIndex < (int)StatepointCS.arg_size(), - "gc.relocate: statepoint derived index out of bounds", &CI); + Assert(0 <= BaseIndex && BaseIndex < (int)StatepointCS.arg_size(), + "gc.relocate: statepoint base index out of bounds", &CI); + Assert(0 <= DerivedIndex && DerivedIndex < (int)StatepointCS.arg_size(), + "gc.relocate: statepoint derived index out of bounds", &CI); // Check that BaseIndex and DerivedIndex fall within the 'gc parameters' // section of the statepoint's argument - const int NumCallArgs = + Assert(StatepointCS.arg_size() > 0, + "gc.statepoint: insufficient arguments"); + Assert(isa<ConstantInt>(StatepointCS.getArgument(1)), + "gc.statement: number of call arguments must be constant integer"); + const unsigned NumCallArgs = cast<ConstantInt>(StatepointCS.getArgument(1))->getZExtValue(); + Assert(StatepointCS.arg_size() > NumCallArgs+3, + "gc.statepoint: mismatch in number of call arguments"); + Assert(isa<ConstantInt>(StatepointCS.getArgument(NumCallArgs+3)), + "gc.statepoint: number of deoptimization arguments must be " + "a constant integer"); const int NumDeoptArgs = cast<ConstantInt>(StatepointCS.getArgument(NumCallArgs + 3))->getZExtValue(); const int GCParamArgsStart = NumCallArgs + NumDeoptArgs + 4; const int GCParamArgsEnd = StatepointCS.arg_size(); - Assert1(GCParamArgsStart <= BaseIndex && - BaseIndex < GCParamArgsEnd, - "gc.relocate: statepoint base index doesn't fall within the " - "'gc parameters' section of the statepoint call", &CI); - Assert1(GCParamArgsStart <= DerivedIndex && - DerivedIndex < GCParamArgsEnd, - "gc.relocate: statepoint derived index doesn't fall within the " - "'gc parameters' section of the statepoint call", &CI); - + Assert(GCParamArgsStart <= BaseIndex && BaseIndex < GCParamArgsEnd, + "gc.relocate: statepoint base index doesn't fall within the " + "'gc parameters' section of the statepoint call", + &CI); + Assert(GCParamArgsStart <= DerivedIndex && DerivedIndex < GCParamArgsEnd, + "gc.relocate: statepoint derived index doesn't fall within the " + "'gc parameters' section of the statepoint call", + &CI); // Assert that the result type matches the type of the relocated pointer GCRelocateOperands Operands(&CI); - Assert1(Operands.derivedPtr()->getType() == CI.getType(), - "gc.relocate: relocating a pointer shouldn't change its type", - &CI); + Assert(Operands.derivedPtr()->getType() == CI.getType(), + "gc.relocate: relocating a pointer shouldn't change its type", &CI); break; } }; } -void DebugInfoVerifier::verifyDebugInfo() { - if (!VerifyDebugInfo) +template <class DbgIntrinsicTy> +void Verifier::visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII) { + auto *MD = cast<MetadataAsValue>(DII.getArgOperand(0))->getMetadata(); + Assert(isa<ValueAsMetadata>(MD) || + (isa<MDNode>(MD) && !cast<MDNode>(MD)->getNumOperands()), + "invalid llvm.dbg." + Kind + " intrinsic address/value", &DII, MD); + Assert(isa<MDLocalVariable>(DII.getRawVariable()), + "invalid llvm.dbg." + Kind + " intrinsic variable", &DII, + DII.getRawVariable()); + Assert(isa<MDExpression>(DII.getRawExpression()), + "invalid llvm.dbg." + Kind + " intrinsic expression", &DII, + DII.getRawExpression()); +} + +void Verifier::verifyDebugInfo() { + // Run the debug info verifier only if the regular verifier succeeds, since + // sometimes checks that have already failed will cause crashes here. + if (EverBroken || !VerifyDebugInfo) return; DebugInfoFinder Finder; @@ -3002,23 +3063,23 @@ void DebugInfoVerifier::verifyDebugInfo() { // // NOTE: The loud braces are necessary for MSVC compatibility. for (DICompileUnit CU : Finder.compile_units()) { - Assert1(CU.Verify(), "DICompileUnit does not Verify!", CU); + Assert(CU.Verify(), "DICompileUnit does not Verify!", CU); } for (DISubprogram S : Finder.subprograms()) { - Assert1(S.Verify(), "DISubprogram does not Verify!", S); + Assert(S.Verify(), "DISubprogram does not Verify!", S); } for (DIGlobalVariable GV : Finder.global_variables()) { - Assert1(GV.Verify(), "DIGlobalVariable does not Verify!", GV); + Assert(GV.Verify(), "DIGlobalVariable does not Verify!", GV); } for (DIType T : Finder.types()) { - Assert1(T.Verify(), "DIType does not Verify!", T); + Assert(T.Verify(), "DIType does not Verify!", T); } for (DIScope S : Finder.scopes()) { - Assert1(S.Verify(), "DIScope does not Verify!", S); + Assert(S.Verify(), "DIScope does not Verify!", S); } } -void DebugInfoVerifier::processInstructions(DebugInfoFinder &Finder) { +void Verifier::processInstructions(DebugInfoFinder &Finder) { for (const Function &F : *M) for (auto I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { if (MDNode *MD = I->getMetadata(LLVMContext::MD_dbg)) @@ -3028,25 +3089,16 @@ void DebugInfoVerifier::processInstructions(DebugInfoFinder &Finder) { } } -void DebugInfoVerifier::processCallInst(DebugInfoFinder &Finder, - const CallInst &CI) { +void Verifier::processCallInst(DebugInfoFinder &Finder, const CallInst &CI) { if (Function *F = CI.getCalledFunction()) if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) switch (ID) { - case Intrinsic::dbg_declare: { - auto *DDI = cast<DbgDeclareInst>(&CI); - Finder.processDeclare(*M, DDI); - if (auto E = DDI->getExpression()) - Assert1(DIExpression(E).Verify(), "DIExpression does not Verify!", E); + case Intrinsic::dbg_declare: + Finder.processDeclare(*M, cast<DbgDeclareInst>(&CI)); break; - } - case Intrinsic::dbg_value: { - auto *DVI = cast<DbgValueInst>(&CI); - Finder.processValue(*M, DVI); - if (auto E = DVI->getExpression()) - Assert1(DIExpression(E).Verify(), "DIExpression does not Verify!", E); + case Intrinsic::dbg_value: + Finder.processValue(*M, cast<DbgValueInst>(&CI)); break; - } default: break; } @@ -3079,8 +3131,7 @@ bool llvm::verifyModule(const Module &M, raw_ostream *OS) { // Note that this function's return value is inverted from what you would // expect of a function called "verify". - DebugInfoVerifier DIV(OS ? *OS : NullStr); - return !V.verify(M) || !DIV.verify(M) || Broken; + return !V.verify(M) || Broken; } namespace { @@ -3090,7 +3141,7 @@ struct VerifierLegacyPass : public FunctionPass { Verifier V; bool FatalErrors; - VerifierLegacyPass() : FunctionPass(ID), FatalErrors(true) { + VerifierLegacyPass() : FunctionPass(ID), V(dbgs()), FatalErrors(true) { initializeVerifierLegacyPassPass(*PassRegistry::getPassRegistry()); } explicit VerifierLegacyPass(bool FatalErrors) @@ -3116,48 +3167,15 @@ struct VerifierLegacyPass : public FunctionPass { AU.setPreservesAll(); } }; -struct DebugInfoVerifierLegacyPass : public ModulePass { - static char ID; - - DebugInfoVerifier V; - bool FatalErrors; - - DebugInfoVerifierLegacyPass() : ModulePass(ID), FatalErrors(true) { - initializeDebugInfoVerifierLegacyPassPass(*PassRegistry::getPassRegistry()); - } - explicit DebugInfoVerifierLegacyPass(bool FatalErrors) - : ModulePass(ID), V(dbgs()), FatalErrors(FatalErrors) { - initializeDebugInfoVerifierLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override { - if (!V.verify(M) && FatalErrors) - report_fatal_error("Broken debug info found, compilation aborted!"); - - return false; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } -}; } char VerifierLegacyPass::ID = 0; INITIALIZE_PASS(VerifierLegacyPass, "verify", "Module Verifier", false, false) -char DebugInfoVerifierLegacyPass::ID = 0; -INITIALIZE_PASS(DebugInfoVerifierLegacyPass, "verify-di", "Debug Info Verifier", - false, false) - FunctionPass *llvm::createVerifierPass(bool FatalErrors) { return new VerifierLegacyPass(FatalErrors); } -ModulePass *llvm::createDebugInfoVerifierPass(bool FatalErrors) { - return new DebugInfoVerifierLegacyPass(FatalErrors); -} - PreservedAnalyses VerifierPass::run(Module &M) { if (verifyModule(M, &dbgs()) && FatalErrors) report_fatal_error("Broken module found, compilation aborted!"); diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt index ad5b22b..bc2448d 100644 --- a/lib/LLVMBuild.txt +++ b/lib/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = Analysis AsmParser Bitcode CodeGen DebugInfo ExecutionEngine LineEditor Linker IR IRReader LTO MC Object Option ProfileData Support TableGen Target Transforms +subdirectories = Analysis AsmParser Bitcode CodeGen DebugInfo ExecutionEngine LineEditor Linker IR IRReader LTO MC Object Option Passes ProfileData Support TableGen Target Transforms [component_0] type = Group diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 61c2749..a6f980b 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -71,7 +71,7 @@ LTOCodeGenerator::LTOCodeGenerator() LTOCodeGenerator::LTOCodeGenerator(std::unique_ptr<LLVMContext> Context) : OwnedContext(std::move(Context)), Context(*OwnedContext), - IRLinker(new Module("ld-temp.o", *OwnedContext)) { + IRLinker(new Module("ld-temp.o", *OwnedContext)), OptLevel(2) { initialize(); } @@ -291,12 +291,11 @@ const void *LTOCodeGenerator::compileOptimized(size_t *length, bool LTOCodeGenerator::compile_to_file(const char **name, - bool disableOpt, bool disableInline, bool disableGVNLoadPRE, bool disableVectorization, std::string &errMsg) { - if (!optimize(disableOpt, disableInline, disableGVNLoadPRE, + if (!optimize(disableInline, disableGVNLoadPRE, disableVectorization, errMsg)) return false; @@ -304,12 +303,11 @@ bool LTOCodeGenerator::compile_to_file(const char **name, } const void* LTOCodeGenerator::compile(size_t *length, - bool disableOpt, bool disableInline, bool disableGVNLoadPRE, bool disableVectorization, std::string &errMsg) { - if (!optimize(disableOpt, disableInline, disableGVNLoadPRE, + if (!optimize(disableInline, disableGVNLoadPRE, disableVectorization, errMsg)) return nullptr; @@ -363,9 +361,25 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) { MCpu = "cyclone"; } + CodeGenOpt::Level CGOptLevel; + switch (OptLevel) { + case 0: + CGOptLevel = CodeGenOpt::None; + break; + case 1: + CGOptLevel = CodeGenOpt::Less; + break; + case 2: + CGOptLevel = CodeGenOpt::Default; + break; + case 3: + CGOptLevel = CodeGenOpt::Aggressive; + break; + } + TargetMach = march->createTargetMachine(TripleStr, MCpu, FeatureStr, Options, RelocModel, CodeModel::Default, - CodeGenOpt::Aggressive); + CGOptLevel); return true; } @@ -457,7 +471,6 @@ void LTOCodeGenerator::applyScopeRestrictions() { // Start off with a verification pass. legacy::PassManager passes; passes.add(createVerifierPass()); - passes.add(createDebugInfoVerifierPass()); // mark which symbols can not be internalized Mangler Mangler(TargetMach->getDataLayout()); @@ -512,8 +525,7 @@ void LTOCodeGenerator::applyScopeRestrictions() { } /// Optimize merged modules using various IPO passes -bool LTOCodeGenerator::optimize(bool DisableOpt, - bool DisableInline, +bool LTOCodeGenerator::optimize(bool DisableInline, bool DisableGVNLoadPRE, bool DisableVectorization, std::string &errMsg) { @@ -529,9 +541,8 @@ bool LTOCodeGenerator::optimize(bool DisableOpt, legacy::PassManager passes; // Add an appropriate DataLayout instance for this module... - mergedModule->setDataLayout(TargetMach->getDataLayout()); + mergedModule->setDataLayout(*TargetMach->getDataLayout()); - passes.add(new DataLayoutPass()); passes.add( createTargetTransformInfoWrapperPass(TargetMach->getTargetIRAnalysis())); @@ -543,8 +554,7 @@ bool LTOCodeGenerator::optimize(bool DisableOpt, if (!DisableInline) PMB.Inliner = createFunctionInliningPass(); PMB.LibraryInfo = new TargetLibraryInfoImpl(TargetTriple); - if (DisableOpt) - PMB.OptLevel = 0; + PMB.OptLevel = OptLevel; PMB.VerifyInput = true; PMB.VerifyOutput = true; @@ -567,8 +577,6 @@ bool LTOCodeGenerator::compileOptimized(raw_ostream &out, std::string &errMsg) { legacy::PassManager codeGenPasses; - codeGenPasses.add(new DataLayoutPass()); - formatted_raw_ostream Out(out); // If the bitcode files contain ARC code and were compiled with optimization, diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 0d07791..49aa97d 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -179,7 +179,8 @@ static Module *parseBitcodeFileImpl(MemoryBufferRef Buffer, std::unique_ptr<MemoryBuffer> LightweightBuf = MemoryBuffer::getMemBuffer(*MBOrErr, false); ErrorOr<Module *> M = getLazyBitcodeModule(std::move(LightweightBuf), Context, - DiagnosticHandler); + DiagnosticHandler, + true/*ShouldLazyLoadMetadata*/); if (!M) return nullptr; return *M; @@ -229,7 +230,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer, TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr, options); - M->setDataLayout(target->getDataLayout()); + M->setDataLayout(*target->getDataLayout()); std::unique_ptr<object::IRObjectFile> IRObj( new object::IRObjectFile(Buffer, std::move(M))); diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index e6d9acc..21edc50 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -226,6 +226,7 @@ void TypeMapTy::linkDefinedTypeBodies() { Elements[I] = get(SrcSTy->getElementType(I)); DstSTy->setBody(Elements, SrcSTy->isPacked()); + DstStructTypesSet.switchToNonOpaque(DstSTy); } SrcDefinitionsToResolve.clear(); DstResolvedOpaqueTypes.clear(); @@ -672,17 +673,12 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName, getComdatLeader(SrcM, ComdatName, SrcGV)) return true; - const DataLayout *DstDL = DstM->getDataLayout(); - const DataLayout *SrcDL = SrcM->getDataLayout(); - if (!DstDL || !SrcDL) { - return emitError( - "Linking COMDATs named '" + ComdatName + - "': can't do size dependent selection without DataLayout!"); - } + const DataLayout &DstDL = DstM->getDataLayout(); + const DataLayout &SrcDL = SrcM->getDataLayout(); uint64_t DstSize = - DstDL->getTypeAllocSize(DstGV->getType()->getPointerElementType()); + DstDL.getTypeAllocSize(DstGV->getType()->getPointerElementType()); uint64_t SrcSize = - SrcDL->getTypeAllocSize(SrcGV->getType()->getPointerElementType()); + SrcDL.getTypeAllocSize(SrcGV->getType()->getPointerElementType()); if (Result == Comdat::SelectionKind::ExactMatch) { if (SrcGV->getInitializer() != DstGV->getInitializer()) return emitError("Linking COMDATs named '" + ComdatName + @@ -768,9 +764,7 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc, return false; } - // FIXME: Make datalayout mandatory and just use getDataLayout(). - DataLayout DL(Dest.getParent()); - + const DataLayout &DL = Dest.getParent()->getDataLayout(); uint64_t DestSize = DL.getTypeAllocSize(Dest.getType()->getElementType()); uint64_t SrcSize = DL.getTypeAllocSize(Src.getType()->getElementType()); LinkFromSrc = SrcSize > DestSize; @@ -1256,9 +1250,10 @@ void ModuleLinker::linkNamedMDNodes() { /// Drop DISubprograms that have been superseded. /// -/// FIXME: this creates an asymmetric result: we strip losing subprograms from -/// DstM, but leave losing subprograms in SrcM. Instead we should also strip -/// losers from SrcM, but this requires extra plumbing in MapMetadata. +/// FIXME: this creates an asymmetric result: we strip functions from losing +/// subprograms in DstM, but leave losing subprograms in SrcM. +/// TODO: Remove this logic once the backend can correctly determine canonical +/// subprograms. void ModuleLinker::stripReplacedSubprograms() { // Avoid quadratic runtime by returning early when there's nothing to do. if (OverridingFunctions.empty()) @@ -1268,8 +1263,8 @@ void ModuleLinker::stripReplacedSubprograms() { auto Functions = std::move(OverridingFunctions); OverridingFunctions.clear(); - // Drop subprograms whose functions have been overridden by the new compile - // unit. + // Drop functions from subprograms if they've been overridden by the new + // compile unit. NamedMDNode *CompileUnits = DstM->getNamedMetadata("llvm.dbg.cu"); if (!CompileUnits) return; @@ -1280,19 +1275,15 @@ void ModuleLinker::stripReplacedSubprograms() { DITypedArray<DISubprogram> SPs(CU.getSubprograms()); assert(SPs && "Expected valid subprogram array"); - SmallVector<Metadata *, 16> NewSPs; - NewSPs.reserve(SPs.getNumElements()); for (unsigned S = 0, SE = SPs.getNumElements(); S != SE; ++S) { DISubprogram SP = SPs.getElement(S); - if (SP && SP.getFunction() && Functions.count(SP.getFunction())) + if (!SP || !SP.getFunction() || !Functions.count(SP.getFunction())) continue; - NewSPs.push_back(SP); + // Prevent DebugInfoFinder from tagging this as the canonical subprogram, + // since the canonical one is in the incoming module. + SP->replaceFunction(nullptr); } - - // Redirect operand to the overriding subprogram. - if (NewSPs.size() != SPs.getNumElements()) - CU.replaceSubprograms(DIArray(MDNode::get(DstM->getContext(), NewSPs))); } } @@ -1482,11 +1473,10 @@ bool ModuleLinker::run() { // Inherit the target data from the source module if the destination module // doesn't have one already. - if (!DstM->getDataLayout() && SrcM->getDataLayout()) + if (DstM->getDataLayout().isDefault()) DstM->setDataLayout(SrcM->getDataLayout()); - if (SrcM->getDataLayout() && DstM->getDataLayout() && - *SrcM->getDataLayout() != *DstM->getDataLayout()) { + if (SrcM->getDataLayout() != DstM->getDataLayout()) { emitWarning("Linking two modules of different data layouts: '" + SrcM->getModuleIdentifier() + "' is '" + SrcM->getDataLayoutStr() + "' whereas '" + @@ -1570,6 +1560,13 @@ bool ModuleLinker::run() { MapValue(GV, ValueMap, RF_None, &TypeMap, &ValMaterializer); } + // Strip replaced subprograms before mapping any metadata -- so that we're + // not changing metadata from the source module (note that + // linkGlobalValueBody() eventually calls RemapInstruction() and therefore + // MapMetadata()) -- but after linking global value protocols -- so that + // OverridingFunctions has been built. + stripReplacedSubprograms(); + // Link in the function bodies that are defined in the source module into // DstM. for (Function &SF : *SrcM) { @@ -1592,9 +1589,6 @@ bool ModuleLinker::run() { linkGlobalValueBody(Src); } - // Strip replaced subprograms before linking together compile units. - stripReplacedSubprograms(); - // Remap all of the named MDNodes in Src into the DstM module. We do this // after linking GlobalValues so that MDNodes that reference GlobalValues // are properly remapped. @@ -1684,6 +1678,14 @@ void Linker::IdentifiedStructTypeSet::addNonOpaque(StructType *Ty) { NonOpaqueStructTypes.insert(Ty); } +void Linker::IdentifiedStructTypeSet::switchToNonOpaque(StructType *Ty) { + assert(!Ty->isOpaque()); + NonOpaqueStructTypes.insert(Ty); + bool Removed = OpaqueStructTypes.erase(Ty); + (void)Removed; + assert(Removed); +} + void Linker::IdentifiedStructTypeSet::addOpaque(StructType *Ty) { assert(Ty->isOpaque()); OpaqueStructTypes.insert(Ty); @@ -1777,7 +1779,7 @@ bool Linker::LinkModules(Module *Dest, Module *Src) { //===----------------------------------------------------------------------===// LLVMBool LLVMLinkModules(LLVMModuleRef Dest, LLVMModuleRef Src, - unsigned Unused, char **OutMessages) { + LLVMLinkerMode Unused, char **OutMessages) { Module *D = unwrap(Dest); std::string Message; raw_string_ostream Stream(Message); diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 4819905..c99a3ee 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -185,11 +185,25 @@ class ELFObjectWriter : public MCObjectWriter { } public: - ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_ostream &_OS, + ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_ostream &OS, bool IsLittleEndian) - : MCObjectWriter(_OS, IsLittleEndian), FWriter(IsLittleEndian), + : MCObjectWriter(OS, IsLittleEndian), FWriter(IsLittleEndian), TargetObjectWriter(MOTW), NeedsGOT(false) {} + void reset() override { + UsedInReloc.clear(); + WeakrefUsedInReloc.clear(); + Renames.clear(); + Relocations.clear(); + ShStrTabBuilder.clear(); + StrTabBuilder.clear(); + FileSymbolData.clear(); + LocalSymbolData.clear(); + ExternalSymbolData.clear(); + UndefinedSymbolData.clear(); + MCObjectWriter::reset(); + } + virtual ~ELFObjectWriter(); void WriteWord(uint64_t W) { @@ -298,6 +312,8 @@ class ELFObjectWriter : public MCObjectWriter { bool InSet, bool IsPCRel) const override; + bool isWeak(const MCSymbolData &SD) const override; + void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; void writeSection(MCAssembler &Asm, const SectionIndexMapTy &SectionIndexMap, @@ -789,6 +805,10 @@ static const MCSymbol *getWeakRef(const MCSymbolRefExpr &Ref) { return nullptr; } +static bool isWeak(const MCSymbolData &D) { + return D.getFlags() & ELF_STB_Weak || MCELF::GetType(D) == ELF::STT_GNU_IFUNC; +} + void ELFObjectWriter::RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -829,6 +849,10 @@ void ELFObjectWriter::RecordRelocation(MCAssembler &Asm, Fixup.getLoc(), "Cannot represent a difference across sections"); const MCSymbolData &SymBD = Asm.getSymbolData(SymB); + if (::isWeak(SymBD)) + Asm.getContext().FatalError( + Fixup.getLoc(), "Cannot represent a subtraction with a weak symbol"); + uint64_t SymBOffset = Layout.getSymbolOffset(&SymBD); uint64_t K = SymBOffset - FixupOffset; IsPCRel = true; @@ -1186,7 +1210,7 @@ getUncompressedData(MCAsmLayout &Layout, static bool prependCompressionHeader(uint64_t Size, SmallVectorImpl<char> &CompressedContents) { - static const StringRef Magic = "ZLIB"; + const StringRef Magic = "ZLIB"; if (Size <= Magic.size() + sizeof(Size) + CompressedContents.size()) return false; if (sys::IsLittleEndianHost) @@ -1348,7 +1372,8 @@ static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) { return B.Offset - A.Offset; if (B.Type != A.Type) return A.Type - B.Type; - llvm_unreachable("ELFRelocs might be unstable!"); + //llvm_unreachable("ELFRelocs might be unstable!"); + return 0; } static void sortRelocs(const MCAssembler &Asm, @@ -1794,12 +1819,16 @@ ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, const MCFragment &FB, bool InSet, bool IsPCRel) const { - if (DataA.getFlags() & ELF_STB_Weak || MCELF::GetType(DataA) == ELF::STT_GNU_IFUNC) + if (::isWeak(DataA)) return false; return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( Asm, DataA, FB,InSet, IsPCRel); } +bool ELFObjectWriter::isWeak(const MCSymbolData &SD) const { + return ::isWeak(SD); +} + MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_ostream &OS, bool IsLittleEndian) { diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 04b8042..bad257a 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -39,6 +39,7 @@ MCAsmInfo::MCAsmInfo() { CommentString = "#"; LabelSuffix = ":"; UseAssignmentForEHBegin = false; + NeedsLocalForSize = false; PrivateGlobalPrefix = "L"; PrivateLabelPrefix = PrivateGlobalPrefix; LinkerPrivateGlobalPrefix = ""; @@ -68,6 +69,7 @@ MCAsmInfo::MCAsmInfo() { HasAggressiveSymbolFolding = true; COMMDirectiveAlignmentIsInBytes = true; LCOMMDirectiveAlignmentType = LCOMM::NoAlignment; + HasFunctionAlignment = true; HasDotTypeDotSizeDirective = true; HasSingleParameterDotFile = true; HasIdentDirective = false; diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index a2a2504..ae9486d 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -16,7 +16,6 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCStreamer.h" using namespace llvm; bool MCAsmInfoDarwin::isSectionAtomizableBySymbols( diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 2312cd5..62f5279 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -267,7 +267,7 @@ void MCAsmStreamer::EmitCommentsAndEOL() { } CommentStream.flush(); - StringRef Comments = CommentToEmit.str(); + StringRef Comments = CommentToEmit; assert(Comments.back() == '\n' && "Comment array not newline terminated"); diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 50ce845..857eafc 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -142,7 +142,7 @@ static bool getSymbolOffsetImpl(const MCAsmLayout &Layout, // If SD is a variable, evaluate it. MCValue Target; - if (!S.getVariableValue()->EvaluateAsValue(Target, &Layout, nullptr)) + if (!S.getVariableValue()->EvaluateAsRelocatable(Target, &Layout, nullptr)) report_fatal_error("unable to evaluate offset for variable '" + S.getName() + "'"); @@ -188,7 +188,7 @@ const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const { const MCExpr *Expr = Symbol.getVariableValue(); MCValue Value; - if (!Expr->EvaluateAsValue(Value, this, nullptr)) + if (!Expr->evaluateAsValue(Value, *this)) llvm_unreachable("Invalid Expression"); const MCSymbolRefExpr *RefB = Value.getSymB(); @@ -277,9 +277,8 @@ MCFragment::MCFragment() : Kind(FragmentType(~0)) { MCFragment::~MCFragment() { } -MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent) - : Kind(_Kind), Parent(_Parent), Atom(nullptr), Offset(~UINT64_C(0)) -{ +MCFragment::MCFragment(FragmentType Kind, MCSectionData *Parent) + : Kind(Kind), Parent(Parent), Atom(nullptr), Offset(~UINT64_C(0)) { if (Parent) Parent->getFragmentList().push_back(this); } @@ -298,15 +297,10 @@ MCEncodedFragmentWithFixups::~MCEncodedFragmentWithFixups() { MCSectionData::MCSectionData() : Section(nullptr) {} -MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A) - : Section(&_Section), - Ordinal(~UINT32_C(0)), - Alignment(1), - BundleLockState(NotBundleLocked), - BundleLockNestingDepth(0), - BundleGroupBeforeFirstInst(false), - HasInstructions(false) -{ +MCSectionData::MCSectionData(const MCSection &Section, MCAssembler *A) + : Section(&Section), Ordinal(~UINT32_C(0)), Alignment(1), + BundleLockState(NotBundleLocked), BundleLockNestingDepth(0), + BundleGroupBeforeFirstInst(false), HasInstructions(false) { if (A) A->getSectionList().push_back(this); } @@ -364,10 +358,10 @@ void MCSectionData::setBundleLockState(BundleLockStateType NewState) { MCSymbolData::MCSymbolData() : Symbol(nullptr) {} -MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment, - uint64_t _Offset, MCAssembler *A) - : Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset), - SymbolSize(nullptr), CommonAlign(-1U), Flags(0), Index(0) { +MCSymbolData::MCSymbolData(const MCSymbol &Symbol, MCFragment *Fragment, + uint64_t Offset, MCAssembler *A) + : Symbol(&Symbol), Fragment(Fragment), Offset(Offset), SymbolSize(nullptr), + CommonAlign(-1U), Flags(0), Index(0) { if (A) A->getSymbolList().push_back(this); } @@ -479,18 +473,6 @@ const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const { return SD->getFragment()->getAtom(); } -// Try to fully compute Expr to an absolute value and if that fails produce -// a relocatable expr. -// FIXME: Should this be the behavior of EvaluateAsRelocatable itself? -static bool evaluate(const MCExpr &Expr, const MCAsmLayout &Layout, - const MCFixup &Fixup, MCValue &Target) { - if (Expr.EvaluateAsValue(Target, &Layout, &Fixup)) { - if (Target.isAbsolute()) - return true; - } - return Expr.EvaluateAsRelocatable(Target, &Layout, &Fixup); -} - bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, const MCFixup &Fixup, const MCFragment *DF, MCValue &Target, uint64_t &Value) const { @@ -500,7 +482,7 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, // probably merge the two into a single callback that tries to evaluate a // fixup and records a relocation if one is needed. const MCExpr *Expr = Fixup.getValue(); - if (!evaluate(*Expr, Layout, Fixup, Target)) + if (!Expr->EvaluateAsRelocatable(Target, &Layout, &Fixup)) getContext().FatalError(Fixup.getLoc(), "expected relocatable expression"); bool IsPCRel = Backend.getFixupKindInfo( @@ -795,7 +777,7 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, case MCFragment::FT_LEB: { const MCLEBFragment &LF = cast<MCLEBFragment>(F); - OW->WriteBytes(LF.getContents().str()); + OW->WriteBytes(LF.getContents()); break; } @@ -811,12 +793,12 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, case MCFragment::FT_Dwarf: { const MCDwarfLineAddrFragment &OF = cast<MCDwarfLineAddrFragment>(F); - OW->WriteBytes(OF.getContents().str()); + OW->WriteBytes(OF.getContents()); break; } case MCFragment::FT_DwarfFrame: { const MCDwarfCallFrameFragment &CF = cast<MCDwarfCallFrameFragment>(F); - OW->WriteBytes(CF.getContents().str()); + OW->WriteBytes(CF.getContents()); break; } } @@ -1040,7 +1022,10 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout, bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { uint64_t OldSize = LF.getContents().size(); - int64_t Value = LF.getValue().evaluateKnownAbsolute(Layout); + int64_t Value; + bool Abs = LF.getValue().evaluateKnownAbsolute(Value, Layout); + if (!Abs) + report_fatal_error("sleb128 and uleb128 expressions must be absolute"); SmallString<8> &Data = LF.getContents(); Data.clear(); raw_svector_ostream OSE(Data); @@ -1056,7 +1041,10 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF) { MCContext &Context = Layout.getAssembler().getContext(); uint64_t OldSize = DF.getContents().size(); - int64_t AddrDelta = DF.getAddrDelta().evaluateKnownAbsolute(Layout); + int64_t AddrDelta; + bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout); + assert(Abs && "We created a line delta with an invalid expression"); + (void) Abs; int64_t LineDelta; LineDelta = DF.getLineDelta(); SmallString<8> &Data = DF.getContents(); @@ -1071,7 +1059,10 @@ bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout, MCDwarfCallFrameFragment &DF) { MCContext &Context = Layout.getAssembler().getContext(); uint64_t OldSize = DF.getContents().size(); - int64_t AddrDelta = DF.getAddrDelta().evaluateKnownAbsolute(Layout); + int64_t AddrDelta; + bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout); + assert(Abs && "We created call frame with an invalid expression"); + (void) Abs; SmallString<8> &Data = DF.getContents(); Data.clear(); raw_svector_ostream OSE(Data); diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 721edd4..3cb3ea1 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -33,7 +34,7 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri, const MCObjectFileInfo *mofi, const SourceMgr *mgr, bool DoAutoReset) : SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi), Allocator(), - Symbols(Allocator), UsedNames(Allocator), NextUniqueID(0), + Symbols(Allocator), UsedNames(Allocator), CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), DwarfLocSeen(false), GenDwarfForAssembly(false), GenDwarfFileNumber(0), DwarfVersion(4), AllowTemporaryLabels(true), DwarfCompileUnitID(0), @@ -86,7 +87,7 @@ void MCContext::reset() { ELFUniquingMap.clear(); COFFUniquingMap.clear(); - NextUniqueID = 0; + NextID.clear(); AllowTemporaryLabels = true; DwarfLocSeen = false; GenDwarfForAssembly = false; @@ -97,13 +98,15 @@ void MCContext::reset() { // Symbol Manipulation //===----------------------------------------------------------------------===// -MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) { - assert(!Name.empty() && "Normal symbols cannot be unnamed!"); +MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) { + SmallString<128> NameSV; + StringRef NameRef = Name.toStringRef(NameSV); - MCSymbol *&Sym = Symbols[Name]; + assert(!NameRef.empty() && "Normal symbols cannot be unnamed!"); + MCSymbol *&Sym = Symbols[NameRef]; if (!Sym) - Sym = CreateSymbol(Name); + Sym = CreateSymbol(NameRef, false); return Sym; } @@ -130,53 +133,54 @@ MCSymbol *MCContext::getOrCreateSectionSymbol(const MCSectionELF &Section) { return Sym; } -MCSymbol *MCContext::getOrCreateFrameAllocSymbol(StringRef FuncName) { - return GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + - "frameallocation_" + FuncName); +MCSymbol *MCContext::getOrCreateFrameAllocSymbol(StringRef FuncName, + unsigned Idx) { + return GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + FuncName + + "$frame_escape_" + Twine(Idx)); } -MCSymbol *MCContext::CreateSymbol(StringRef Name) { +MCSymbol *MCContext::CreateSymbol(StringRef Name, bool AlwaysAddSuffix) { // Determine whether this is an assembler temporary or normal label, if used. - bool isTemporary = false; + bool IsTemporary = false; if (AllowTemporaryLabels) - isTemporary = Name.startswith(MAI->getPrivateGlobalPrefix()); + IsTemporary = Name.startswith(MAI->getPrivateGlobalPrefix()); - auto NameEntry = UsedNames.insert(std::make_pair(Name, true)); - if (!NameEntry.second) { - assert(isTemporary && "Cannot rename non-temporary symbols"); - SmallString<128> NewName = Name; - do { + SmallString<128> NewName = Name; + bool AddSuffix = AlwaysAddSuffix; + unsigned &NextUniqueID = NextID[Name]; + for (;;) { + if (AddSuffix) { NewName.resize(Name.size()); raw_svector_ostream(NewName) << NextUniqueID++; - NameEntry = UsedNames.insert(std::make_pair(NewName, true)); - } while (!NameEntry.second); + } + auto NameEntry = UsedNames.insert(std::make_pair(NewName, true)); + if (NameEntry.second) { + // Ok, we found a name. Have the MCSymbol object itself refer to the copy + // of the string that is embedded in the UsedNames entry. + MCSymbol *Result = + new (*this) MCSymbol(NameEntry.first->getKey(), IsTemporary); + return Result; + } + assert(IsTemporary && "Cannot rename non-temporary symbols"); + AddSuffix = true; } - - // Ok, the entry doesn't already exist. Have the MCSymbol object itself refer - // to the copy of the string that is embedded in the UsedNames entry. - MCSymbol *Result = - new (*this) MCSymbol(NameEntry.first->getKey(), isTemporary); - - return Result; + llvm_unreachable("Infinite loop"); } -MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) { +MCSymbol *MCContext::createTempSymbol(const Twine &Name, bool AlwaysAddSuffix) { SmallString<128> NameSV; - return GetOrCreateSymbol(Name.toStringRef(NameSV)); + raw_svector_ostream(NameSV) << MAI->getPrivateGlobalPrefix() << Name; + return CreateSymbol(NameSV, AlwaysAddSuffix); } MCSymbol *MCContext::CreateLinkerPrivateTempSymbol() { SmallString<128> NameSV; - raw_svector_ostream(NameSV) - << MAI->getLinkerPrivateGlobalPrefix() << "tmp" << NextUniqueID++; - return CreateSymbol(NameSV); + raw_svector_ostream(NameSV) << MAI->getLinkerPrivateGlobalPrefix() << "tmp"; + return CreateSymbol(NameSV, true); } MCSymbol *MCContext::CreateTempSymbol() { - SmallString<128> NameSV; - raw_svector_ostream(NameSV) - << MAI->getPrivateGlobalPrefix() << "tmp" << NextUniqueID++; - return CreateSymbol(NameSV); + return createTempSymbol("tmp", true); } unsigned MCContext::NextInstance(unsigned LocalLabelVal) { @@ -214,24 +218,20 @@ MCSymbol *MCContext::GetDirectionalLocalSymbol(unsigned LocalLabelVal, return getOrCreateDirectionalLocalSymbol(LocalLabelVal, Instance); } -MCSymbol *MCContext::LookupSymbol(StringRef Name) const { - return Symbols.lookup(Name); -} - MCSymbol *MCContext::LookupSymbol(const Twine &Name) const { SmallString<128> NameSV; - Name.toVector(NameSV); - return LookupSymbol(NameSV.str()); + StringRef NameRef = Name.toStringRef(NameSV); + return Symbols.lookup(NameRef); } //===----------------------------------------------------------------------===// // Section Management //===----------------------------------------------------------------------===// -const MCSectionMachO *MCContext:: -getMachOSection(StringRef Segment, StringRef Section, - unsigned TypeAndAttributes, - unsigned Reserved2, SectionKind Kind) { +const MCSectionMachO * +MCContext::getMachOSection(StringRef Segment, StringRef Section, + unsigned TypeAndAttributes, unsigned Reserved2, + SectionKind Kind, const char *BeginSymName) { // We unique sections by their segment/section pair. The returned section // may not have the same flags as the requested section, if so this should be @@ -244,17 +244,23 @@ getMachOSection(StringRef Segment, StringRef Section, Name += Section; // Do the lookup, if we have a hit, return it. - const MCSectionMachO *&Entry = MachOUniquingMap[Name.str()]; - if (Entry) return Entry; + const MCSectionMachO *&Entry = MachOUniquingMap[Name]; + if (Entry) + return Entry; + + MCSymbol *Begin = nullptr; + if (BeginSymName) + Begin = createTempSymbol(BeginSymName, false); // Otherwise, return a new section. return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes, - Reserved2, Kind); + Reserved2, Kind, Begin); } const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type, - unsigned Flags) { - return getELFSection(Section, Type, Flags, 0, ""); + unsigned Flags, + const char *BeginSymName) { + return getELFSection(Section, Type, Flags, 0, "", BeginSymName); } void MCContext::renameELFSection(const MCSectionELF *Section, StringRef Name) { @@ -272,7 +278,8 @@ void MCContext::renameELFSection(const MCSectionELF *Section, StringRef Name) { const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type, unsigned Flags, unsigned EntrySize, - StringRef Group, bool Unique) { + StringRef Group, bool Unique, + const char *BeginSymName) { // Do the lookup, if we have a hit, return it. auto IterBool = ELFUniquingMap.insert( std::make_pair(SectionGroupPair(Section, Group), nullptr)); @@ -292,8 +299,12 @@ const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type, else Kind = SectionKind::getReadOnly(); - MCSectionELF *Result = new (*this) - MCSectionELF(CachedName, Type, Flags, Kind, EntrySize, GroupSym, Unique); + MCSymbol *Begin = nullptr; + if (BeginSymName) + Begin = createTempSymbol(BeginSymName, false); + + MCSectionELF *Result = new (*this) MCSectionELF( + CachedName, Type, Flags, Kind, EntrySize, GroupSym, Unique, Begin); if (!Unique) Entry.second = Result; return Result; @@ -301,22 +312,23 @@ const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type, const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type, unsigned Flags, unsigned EntrySize, - StringRef Group) { - return getELFSection(Section, Type, Flags, EntrySize, Group, false); + StringRef Group, + const char *BeginSymName) { + return getELFSection(Section, Type, Flags, EntrySize, Group, false, + BeginSymName); } const MCSectionELF *MCContext::CreateELFGroupSection() { - MCSectionELF *Result = - new (*this) MCSectionELF(".group", ELF::SHT_GROUP, 0, - SectionKind::getReadOnly(), 4, nullptr, false); + MCSectionELF *Result = new (*this) + MCSectionELF(".group", ELF::SHT_GROUP, 0, SectionKind::getReadOnly(), 4, + nullptr, false, nullptr); return Result; } -const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section, - unsigned Characteristics, - SectionKind Kind, - StringRef COMDATSymName, - int Selection) { +const MCSectionCOFF * +MCContext::getCOFFSection(StringRef Section, unsigned Characteristics, + SectionKind Kind, StringRef COMDATSymName, + int Selection, const char *BeginSymName) { // Do the lookup, if we have a hit, return it. SectionGroupTriple T(Section, COMDATSymName, Selection); @@ -329,18 +341,23 @@ const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section, if (!COMDATSymName.empty()) COMDATSymbol = GetOrCreateSymbol(COMDATSymName); + MCSymbol *Begin = nullptr; + if (BeginSymName) + Begin = createTempSymbol(BeginSymName, false); + StringRef CachedName = std::get<0>(Iter->first); - MCSectionCOFF *Result = new (*this) - MCSectionCOFF(CachedName, Characteristics, COMDATSymbol, Selection, Kind); + MCSectionCOFF *Result = new (*this) MCSectionCOFF( + CachedName, Characteristics, COMDATSymbol, Selection, Kind, Begin); Iter->second = Result; return Result; } -const MCSectionCOFF * -MCContext::getCOFFSection(StringRef Section, unsigned Characteristics, - SectionKind Kind) { - return getCOFFSection(Section, Characteristics, Kind, "", 0); +const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section, + unsigned Characteristics, + SectionKind Kind, + const char *BeginSymName) { + return getCOFFSection(Section, Characteristics, Kind, "", 0, BeginSymName); } const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) { diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 5d96914..87e7ed1 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -179,28 +179,19 @@ EmitDwarfLineTable(MCObjectStreamer *MCOS, const MCSection *Section, } // Emit a DW_LNE_end_sequence for the end of the section. - // Using the pointer Section create a temporary label at the end of the - // section and use that and the LastLabel to compute the address delta - // and use INT64_MAX as the line delta which is the signal that this is - // actually a DW_LNE_end_sequence. + // Use the section end label to compute the address delta and use INT64_MAX + // as the line delta which is the signal that this is actually a + // DW_LNE_end_sequence. + MCSymbol *SectionEnd = MCOS->endSection(Section); - // Switch to the section to be able to create a symbol at its end. - // TODO: keep track of the last subsection so that this symbol appears in the - // correct place. - MCOS->SwitchSection(Section); + // Switch back the dwarf line section, in case endSection had to switch the + // section. + MCContext &Ctx = MCOS->getContext(); + MCOS->SwitchSection(Ctx.getObjectFileInfo()->getDwarfLineSection()); - MCContext &context = MCOS->getContext(); - // Create a symbol at the end of the section. - MCSymbol *SectionEnd = context.CreateTempSymbol(); - // Set the value of the symbol, as we are at the end of the section. - MCOS->EmitLabel(SectionEnd); - - // Switch back the dwarf line section. - MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection()); - - const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo(); + const MCAsmInfo *AsmInfo = Ctx.getAsmInfo(); MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd, - asmInfo->getPointerSize()); + AsmInfo->getPointerSize()); } // @@ -243,7 +234,8 @@ std::pair<MCSymbol *, MCSymbol *> MCDwarfLineTableHeader::Emit(MCStreamer *MCOS) 0, // length of DW_LNS_set_epilogue_begin 1 // DW_LNS_set_isa }; - assert(array_lengthof(StandardOpcodeLengths) == (DWARF2_LINE_OPCODE_BASE - 1)); + assert(array_lengthof(StandardOpcodeLengths) == + (DWARF2_LINE_OPCODE_BASE - 1)); return Emit(MCOS, StandardOpcodeLengths); } @@ -446,7 +438,7 @@ void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta, if (LineDelta == INT64_MAX) { if (AddrDelta == MAX_SPECIAL_ADDR_DELTA) OS << char(dwarf::DW_LNS_const_add_pc); - else { + else if (AddrDelta) { OS << char(dwarf::DW_LNS_advance_pc); encodeULEB128(AddrDelta, OS); } @@ -1007,11 +999,13 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol, namespace { class FrameEmitterImpl { int CFAOffset; + int InitialCFAOffset; bool IsEH; const MCSymbol *SectionStart; public: FrameEmitterImpl(bool isEH) - : CFAOffset(0), IsEH(isEH), SectionStart(nullptr) {} + : CFAOffset(0), InitialCFAOffset(0), IsEH(isEH), SectionStart(nullptr) { + } void setSectionStart(const MCSymbol *Label) { SectionStart = Label; } @@ -1292,7 +1286,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer, Augmentation += "R"; if (IsSignalFrame) Augmentation += "S"; - streamer.EmitBytes(Augmentation.str()); + streamer.EmitBytes(Augmentation); } streamer.EmitIntValue(0, 1); @@ -1353,6 +1347,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer, EmitCFIInstructions(streamer, Instructions, nullptr); } + InitialCFAOffset = CFAOffset; + // Padding streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getPointerSize()); @@ -1368,6 +1364,8 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer, MCSymbol *fdeEnd = context.CreateTempSymbol(); const MCObjectFileInfo *MOFI = context.getObjectFileInfo(); + CFAOffset = InitialCFAOffset; + // Length const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0); emitAbsValue(streamer, Length, 4); diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index 199825e..cdf5033 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -122,12 +123,11 @@ void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) { // If neither T1 < T2 nor T2 < T1 according to this ordering, use T2 (the user // provided type). static unsigned CombineSymbolTypes(unsigned T1, unsigned T2) { - unsigned TypeOrdering[] = {ELF::STT_NOTYPE, ELF::STT_OBJECT, ELF::STT_FUNC, - ELF::STT_GNU_IFUNC, ELF::STT_TLS}; - for (unsigned i = 0; i != array_lengthof(TypeOrdering); ++i) { - if (T1 == TypeOrdering[i]) + for (unsigned Type : {ELF::STT_NOTYPE, ELF::STT_OBJECT, ELF::STT_FUNC, + ELF::STT_GNU_IFUNC, ELF::STT_TLS}) { + if (T1 == Type) return T2; - if (T2 == TypeOrdering[i]) + if (T2 == Type) return T1; } diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 709dc6b..8a64403 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -192,6 +192,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_GOTPAGE: return "GOTPAGE"; case VK_GOTPAGEOFF: return "GOTPAGEOFF"; case VK_SECREL: return "SECREL32"; + case VK_SIZE: return "SIZE"; case VK_WEAKREF: return "WEAKREF"; case VK_ARM_NONE: return "none"; case VK_ARM_TARGET1: return "target1"; @@ -311,6 +312,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) { .Case("gotpageoff", VK_GOTPAGEOFF) .Case("imgrel", VK_COFF_IMGREL32) .Case("secrel32", VK_SECREL) + .Case("size", VK_SIZE) .Case("l", VK_PPC_LO) .Case("h", VK_PPC_HI) .Case("ha", VK_PPC_HA) @@ -404,13 +406,10 @@ bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const { return EvaluateAsAbsolute(Res, &Asm, nullptr, nullptr); } -int64_t MCExpr::evaluateKnownAbsolute(const MCAsmLayout &Layout) const { - int64_t Res; - bool Abs = - evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr, true); - (void)Abs; - assert(Abs && "Not actually absolute"); - return Res; +bool MCExpr::evaluateKnownAbsolute(int64_t &Res, + const MCAsmLayout &Layout) const { + return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr, + true); } bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, @@ -433,8 +432,8 @@ bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, return true; } - bool IsRelocatable = EvaluateAsRelocatableImpl( - Value, Asm, Layout, nullptr, Addrs, InSet, /*ForceVarExpansion*/ false); + bool IsRelocatable = + EvaluateAsRelocatableImpl(Value, Asm, Layout, nullptr, Addrs, InSet); // Record the current value. Res = Value.getConstant(); @@ -443,13 +442,10 @@ bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, } /// \brief Helper method for \see EvaluateSymbolAdd(). -static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm, - const MCAsmLayout *Layout, - const SectionAddrMap *Addrs, - bool InSet, - const MCSymbolRefExpr *&A, - const MCSymbolRefExpr *&B, - int64_t &Addend) { +static void AttemptToFoldSymbolOffsetDifference( + const MCAssembler *Asm, const MCAsmLayout *Layout, + const SectionAddrMap *Addrs, bool InSet, const MCSymbolRefExpr *&A, + const MCSymbolRefExpr *&B, int64_t &Addend) { if (!A || !B) return; @@ -523,13 +519,11 @@ static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm, /// They might look redundant, but this function can be used before layout /// is done (see the object streamer for example) and having the Asm argument /// lets us avoid relaxations early. -static bool EvaluateSymbolicAdd(const MCAssembler *Asm, - const MCAsmLayout *Layout, - const SectionAddrMap *Addrs, - bool InSet, - const MCValue &LHS,const MCSymbolRefExpr *RHS_A, - const MCSymbolRefExpr *RHS_B, int64_t RHS_Cst, - MCValue &Res) { +static bool +EvaluateSymbolicAdd(const MCAssembler *Asm, const MCAsmLayout *Layout, + const SectionAddrMap *Addrs, bool InSet, const MCValue &LHS, + const MCSymbolRefExpr *RHS_A, const MCSymbolRefExpr *RHS_B, + int64_t RHS_Cst, MCValue &Res) { // FIXME: This routine (and other evaluation parts) are *incredibly* sloppy // about dealing with modifiers. This will ultimately bite us, one day. const MCSymbolRefExpr *LHS_A = LHS.getSymA(); @@ -587,21 +581,29 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res, const MCFixup *Fixup) const { MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr; return EvaluateAsRelocatableImpl(Res, Assembler, Layout, Fixup, nullptr, - false, /*ForceVarExpansion*/ false); + false); } -bool MCExpr::EvaluateAsValue(MCValue &Res, const MCAsmLayout *Layout, - const MCFixup *Fixup) const { - MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr; - return EvaluateAsRelocatableImpl(Res, Assembler, Layout, Fixup, nullptr, - false, /*ForceVarExpansion*/ true); +bool MCExpr::evaluateAsValue(MCValue &Res, const MCAsmLayout &Layout) const { + MCAssembler *Assembler = &Layout.getAssembler(); + return EvaluateAsRelocatableImpl(Res, Assembler, &Layout, nullptr, nullptr, + true); +} + +static bool canExpand(const MCSymbol &Sym, const MCAssembler *Asm, bool InSet) { + if (InSet) + return true; + if (!Asm) + return false; + const MCSymbolData &SD = Asm->getSymbolData(Sym); + return !Asm->getWriter().isWeak(SD); } bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, const MCAsmLayout *Layout, const MCFixup *Fixup, - const SectionAddrMap *Addrs, bool InSet, - bool ForceVarExpansion) const { + const SectionAddrMap *Addrs, + bool InSet) const { ++stats::MCExprEvaluate; switch (getKind()) { @@ -618,28 +620,24 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, const MCSymbol &Sym = SRE->getSymbol(); // Evaluate recursively if this is a variable. - if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None) { + if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None && + canExpand(Sym, Asm, InSet)) { + bool IsMachO = SRE->hasSubsectionsViaSymbols(); if (Sym.getVariableValue()->EvaluateAsRelocatableImpl( - Res, Asm, Layout, Fixup, Addrs, true, ForceVarExpansion)) { + Res, Asm, Layout, Fixup, Addrs, InSet || IsMachO)) { + if (!IsMachO) + return true; + const MCSymbolRefExpr *A = Res.getSymA(); const MCSymbolRefExpr *B = Res.getSymB(); - - if (SRE->hasSubsectionsViaSymbols()) { - // FIXME: This is small hack. Given - // a = b + 4 - // .long a - // the OS X assembler will completely drop the 4. We should probably - // include it in the relocation or produce an error if that is not - // possible. - if (!A && !B) - return true; - } else { - if (ForceVarExpansion) - return true; - bool IsSymbol = A && A->getSymbol().isDefined(); - if (!IsSymbol) - return true; - } + // FIXME: This is small hack. Given + // a = b + 4 + // .long a + // the OS X assembler will completely drop the 4. We should probably + // include it in the relocation or produce an error if that is not + // possible. + if (!A && !B) + return true; } } @@ -651,9 +649,8 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this); MCValue Value; - if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout, - Fixup, Addrs, InSet, - ForceVarExpansion)) + if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout, Fixup, + Addrs, InSet)) return false; switch (AUE->getOpcode()) { @@ -686,12 +683,10 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this); MCValue LHSValue, RHSValue; - if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout, - Fixup, Addrs, InSet, - ForceVarExpansion) || - !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout, - Fixup, Addrs, InSet, - ForceVarExpansion)) + if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout, Fixup, + Addrs, InSet) || + !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout, Fixup, + Addrs, InSet)) return false; // We only support a few operations on non-constant expressions, handle @@ -704,14 +699,12 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, // Negate RHS and add. return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue, RHSValue.getSymB(), RHSValue.getSymA(), - -RHSValue.getConstant(), - Res); + -RHSValue.getConstant(), Res); case MCBinaryExpr::Add: return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue, RHSValue.getSymA(), RHSValue.getSymB(), - RHSValue.getConstant(), - Res); + RHSValue.getConstant(), Res); } } diff --git a/lib/MC/MCLinkerOptimizationHint.cpp b/lib/MC/MCLinkerOptimizationHint.cpp index 7739878..8db22dc 100644 --- a/lib/MC/MCLinkerOptimizationHint.cpp +++ b/lib/MC/MCLinkerOptimizationHint.cpp @@ -9,7 +9,7 @@ #include "llvm/MC/MCLinkerOptimizationHint.h" #include "llvm/MC/MCAsmLayout.h" -#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/Support/LEB128.h" using namespace llvm; diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 79eab49..d5c7101 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -26,6 +26,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -39,6 +40,9 @@ private: /// need for local relocations. False by default. bool LabelSections; + bool DWARFMustBeAtTheEnd; + bool CreatedADWARFSection; + /// HasSectionLabel - map of which sections have already had a non-local /// label emitted to them. Used so we don't emit extraneous linker local /// labels in the middle of the section. @@ -51,9 +55,9 @@ private: public: MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS, - MCCodeEmitter *Emitter, bool label) - : MCObjectStreamer(Context, MAB, OS, Emitter), - LabelSections(label) {} + MCCodeEmitter *Emitter, bool DWARFMustBeAtTheEnd, bool label) + : MCObjectStreamer(Context, MAB, OS, Emitter), LabelSections(label), + DWARFMustBeAtTheEnd(DWARFMustBeAtTheEnd), CreatedADWARFSection(false) {} /// state management void reset() override { @@ -119,10 +123,43 @@ public: } // end anonymous namespace. +static bool canGoAfterDWARF(const MCSectionMachO &MSec) { + // These sections are created by the assembler itself after the end of + // the .s file. + StringRef SegName = MSec.getSegmentName(); + StringRef SecName = MSec.getSectionName(); + + if (SegName == "__LD" && SecName == "__compact_unwind") + return true; + + if (SegName == "__IMPORT") { + if (SecName == "__jump_table") + return true; + + if (SecName == "__pointers") + return true; + } + + if (SegName == "__TEXT" && SecName == "__eh_frame") + return true; + + if (SegName == "__DATA" && SecName == "__nl_symbol_ptr") + return true; + + return false; +} + void MCMachOStreamer::ChangeSection(const MCSection *Section, const MCExpr *Subsection) { // Change the section normally. - MCObjectStreamer::ChangeSection(Section, Subsection); + bool Created = MCObjectStreamer::changeSectionImpl(Section, Subsection); + const MCSectionMachO &MSec = *cast<MCSectionMachO>(Section); + StringRef SegName = MSec.getSegmentName(); + if (SegName == "__DWARF") + CreatedADWARFSection = true; + else if (Created && DWARFMustBeAtTheEnd && !canGoAfterDWARF(MSec)) + assert(!CreatedADWARFSection && "Creating regular section after DWARF"); + // Output a linker-local symbol so we don't need section-relative local // relocations. The linker hates us when we do that. if (LabelSections && !HasSectionLabel[Section]) { @@ -455,9 +492,10 @@ void MCMachOStreamer::FinishImpl() { MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS, MCCodeEmitter *CE, - bool RelaxAll, + bool RelaxAll, bool DWARFMustBeAtTheEnd, bool LabelSections) { - MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE, LabelSections); + MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE, + DWARFMustBeAtTheEnd, LabelSections); if (RelaxAll) S->getAssembler().setRelaxAll(true); return S; diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 11c9cc2..cd70362 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -183,82 +183,60 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { // Debug Information. DwarfAccelNamesSection = - Ctx->getMachOSection("__DWARF", "__apple_names", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__apple_names", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata(), "names_begin"); DwarfAccelObjCSection = - Ctx->getMachOSection("__DWARF", "__apple_objc", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__apple_objc", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata(), "objc_begin"); // 16 character section limit... DwarfAccelNamespaceSection = - Ctx->getMachOSection("__DWARF", "__apple_namespac", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__apple_namespac", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata(), "namespac_begin"); DwarfAccelTypesSection = - Ctx->getMachOSection("__DWARF", "__apple_types", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__apple_types", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata(), "types_begin"); DwarfAbbrevSection = - Ctx->getMachOSection("__DWARF", "__debug_abbrev", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__debug_abbrev", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata(), "section_abbrev"); DwarfInfoSection = - Ctx->getMachOSection("__DWARF", "__debug_info", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__debug_info", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata(), "section_info"); DwarfLineSection = - Ctx->getMachOSection("__DWARF", "__debug_line", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__debug_line", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata(), "section_line"); DwarfFrameSection = - Ctx->getMachOSection("__DWARF", "__debug_frame", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__debug_frame", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfPubNamesSection = - Ctx->getMachOSection("__DWARF", "__debug_pubnames", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__debug_pubnames", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfPubTypesSection = - Ctx->getMachOSection("__DWARF", "__debug_pubtypes", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__debug_pubtypes", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfGnuPubNamesSection = - Ctx->getMachOSection("__DWARF", "__debug_gnu_pubn", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__debug_gnu_pubn", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfGnuPubTypesSection = - Ctx->getMachOSection("__DWARF", "__debug_gnu_pubt", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__debug_gnu_pubt", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfStrSection = - Ctx->getMachOSection("__DWARF", "__debug_str", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__debug_str", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata(), "info_string"); DwarfLocSection = - Ctx->getMachOSection("__DWARF", "__debug_loc", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__debug_loc", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata(), "section_debug_loc"); DwarfARangesSection = - Ctx->getMachOSection("__DWARF", "__debug_aranges", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__debug_aranges", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfRangesSection = - Ctx->getMachOSection("__DWARF", "__debug_ranges", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfMacroInfoSection = - Ctx->getMachOSection("__DWARF", "__debug_macinfo", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__debug_ranges", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata(), "debug_range"); DwarfDebugInlineSection = - Ctx->getMachOSection("__DWARF", "__debug_inlined", - MachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - StackMapSection = - Ctx->getMachOSection("__LLVM_STACKMAPS", "__llvm_stackmaps", 0, - SectionKind::getMetadata()); + Ctx->getMachOSection("__DWARF", "__debug_inlined", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + StackMapSection = Ctx->getMachOSection("__LLVM_STACKMAPS", "__llvm_stackmaps", + 0, SectionKind::getMetadata()); TLSExtraDataSection = TLSTLVSection; } @@ -477,9 +455,10 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { COFFDebugSymbolsSection = nullptr; // Debug Info Sections. - DwarfAbbrevSection = - Ctx->getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0); - DwarfInfoSection = Ctx->getELFSection(".debug_info", ELF::SHT_PROGBITS, 0); + DwarfAbbrevSection = Ctx->getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0, + "section_abbrev"); + DwarfInfoSection = + Ctx->getELFSection(".debug_info", ELF::SHT_PROGBITS, 0, "section_info"); DwarfLineSection = Ctx->getELFSection(".debug_line", ELF::SHT_PROGBITS, 0); DwarfFrameSection = Ctx->getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0); DwarfPubNamesSection = @@ -497,21 +476,19 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { DwarfARangesSection = Ctx->getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0); DwarfRangesSection = - Ctx->getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0); - DwarfMacroInfoSection = - Ctx->getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0); + Ctx->getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0, "debug_range"); // DWARF5 Experimental Debug Info // Accelerator Tables DwarfAccelNamesSection = - Ctx->getELFSection(".apple_names", ELF::SHT_PROGBITS, 0); + Ctx->getELFSection(".apple_names", ELF::SHT_PROGBITS, 0, "names_begin"); DwarfAccelObjCSection = - Ctx->getELFSection(".apple_objc", ELF::SHT_PROGBITS, 0); - DwarfAccelNamespaceSection = - Ctx->getELFSection(".apple_namespaces", ELF::SHT_PROGBITS, 0); + Ctx->getELFSection(".apple_objc", ELF::SHT_PROGBITS, 0, "objc_begin"); + DwarfAccelNamespaceSection = Ctx->getELFSection( + ".apple_namespaces", ELF::SHT_PROGBITS, 0, "namespac_begin"); DwarfAccelTypesSection = - Ctx->getELFSection(".apple_types", ELF::SHT_PROGBITS, 0); + Ctx->getELFSection(".apple_types", ELF::SHT_PROGBITS, 0, "types_begin"); // Fission Sections DwarfInfoDWOSection = @@ -526,72 +503,58 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { DwarfLineDWOSection = Ctx->getELFSection(".debug_line.dwo", ELF::SHT_PROGBITS, 0); DwarfLocDWOSection = - Ctx->getELFSection(".debug_loc.dwo", ELF::SHT_PROGBITS, 0); + Ctx->getELFSection(".debug_loc.dwo", ELF::SHT_PROGBITS, 0, "skel_loc"); DwarfStrOffDWOSection = Ctx->getELFSection(".debug_str_offsets.dwo", ELF::SHT_PROGBITS, 0); - DwarfAddrSection = Ctx->getELFSection(".debug_addr", ELF::SHT_PROGBITS, 0); + DwarfAddrSection = + Ctx->getELFSection(".debug_addr", ELF::SHT_PROGBITS, 0, "addr_sec"); StackMapSection = Ctx->getELFSection(".llvm_stackmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); } - void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { bool IsWoA = T.getArch() == Triple::arm || T.getArch() == Triple::thumb; CommDirectiveSupportsAlignment = true; // COFF - BSSSection = - Ctx->getCOFFSection(".bss", - COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getBSS()); - TextSection = - Ctx->getCOFFSection(".text", - (IsWoA ? COFF::IMAGE_SCN_MEM_16BIT - : (COFF::SectionCharacteristics)0) | - COFF::IMAGE_SCN_CNT_CODE | - COFF::IMAGE_SCN_MEM_EXECUTE | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getText()); - DataSection = - Ctx->getCOFFSection(".data", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); - ReadOnlySection = - Ctx->getCOFFSection(".rdata", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); + BSSSection = Ctx->getCOFFSection( + ".bss", COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getBSS()); + TextSection = Ctx->getCOFFSection( + ".text", + (IsWoA ? COFF::IMAGE_SCN_MEM_16BIT : (COFF::SectionCharacteristics)0) | + COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_MEM_EXECUTE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getText()); + DataSection = Ctx->getCOFFSection( + ".data", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); + ReadOnlySection = Ctx->getCOFFSection( + ".rdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) { StaticCtorSection = - Ctx->getCOFFSection(".CRT$XCU", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); + Ctx->getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); StaticDtorSection = - Ctx->getCOFFSection(".CRT$XTX", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); + Ctx->getCOFFSection(".CRT$XTX", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); } else { - StaticCtorSection = - Ctx->getCOFFSection(".ctors", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); - StaticDtorSection = - Ctx->getCOFFSection(".dtors", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); + StaticCtorSection = Ctx->getCOFFSection( + ".ctors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); + StaticDtorSection = Ctx->getCOFFSection( + ".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); } // FIXME: We're emitting LSDA info into a readonly section on COFF, even @@ -611,187 +574,149 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { // Debug info. COFFDebugSymbolsSection = - Ctx->getCOFFSection(".debug$S", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - - DwarfAbbrevSection = - Ctx->getCOFFSection(".debug_abbrev", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfInfoSection = - Ctx->getCOFFSection(".debug_info", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfLineSection = - Ctx->getCOFFSection(".debug_line", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfFrameSection = - Ctx->getCOFFSection(".debug_frame", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfPubNamesSection = - Ctx->getCOFFSection(".debug_pubnames", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfPubTypesSection = - Ctx->getCOFFSection(".debug_pubtypes", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfGnuPubNamesSection = - Ctx->getCOFFSection(".debug_gnu_pubnames", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfGnuPubTypesSection = - Ctx->getCOFFSection(".debug_gnu_pubtypes", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfStrSection = - Ctx->getCOFFSection(".debug_str", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfLocSection = - Ctx->getCOFFSection(".debug_loc", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfARangesSection = - Ctx->getCOFFSection(".debug_aranges", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfRangesSection = - Ctx->getCOFFSection(".debug_ranges", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfMacroInfoSection = - Ctx->getCOFFSection(".debug_macinfo", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfInfoDWOSection = - Ctx->getCOFFSection(".debug_info.dwo", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfTypesDWOSection = - Ctx->getCOFFSection(".debug_types.dwo", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfAbbrevDWOSection = - Ctx->getCOFFSection(".debug_abbrev.dwo", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfStrDWOSection = - Ctx->getCOFFSection(".debug_str.dwo", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfLineDWOSection = - Ctx->getCOFFSection(".debug_line.dwo", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfLocDWOSection = - Ctx->getCOFFSection(".debug_loc.dwo", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfStrOffDWOSection = - Ctx->getCOFFSection(".debug_str_offsets.dwo", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfAddrSection = - Ctx->getCOFFSection(".debug_addr", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfAccelNamesSection = - Ctx->getCOFFSection(".apple_names", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfAccelNamespaceSection = - Ctx->getCOFFSection(".apple_namespaces", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfAccelTypesSection = - Ctx->getCOFFSection(".apple_types", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - DwarfAccelObjCSection = - Ctx->getCOFFSection(".apple_objc", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - - DrectveSection = - Ctx->getCOFFSection(".drectve", - COFF::IMAGE_SCN_LNK_INFO | - COFF::IMAGE_SCN_LNK_REMOVE, - SectionKind::getMetadata()); - - PDataSection = - Ctx->getCOFFSection(".pdata", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getDataRel()); - - XDataSection = - Ctx->getCOFFSection(".xdata", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getDataRel()); - - TLSDataSection = - Ctx->getCOFFSection(".tls$", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); + Ctx->getCOFFSection(".debug$S", COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + + DwarfAbbrevSection = Ctx->getCOFFSection( + ".debug_abbrev", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "section_abbrev"); + DwarfInfoSection = Ctx->getCOFFSection( + ".debug_info", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "section_info"); + DwarfLineSection = Ctx->getCOFFSection( + ".debug_line", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "section_line"); + + DwarfFrameSection = Ctx->getCOFFSection( + ".debug_frame", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfPubNamesSection = Ctx->getCOFFSection( + ".debug_pubnames", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfPubTypesSection = Ctx->getCOFFSection( + ".debug_pubtypes", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfGnuPubNamesSection = Ctx->getCOFFSection( + ".debug_gnu_pubnames", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfGnuPubTypesSection = Ctx->getCOFFSection( + ".debug_gnu_pubtypes", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfStrSection = Ctx->getCOFFSection( + ".debug_str", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "info_string"); + DwarfLocSection = Ctx->getCOFFSection( + ".debug_loc", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "section_debug_loc"); + DwarfARangesSection = Ctx->getCOFFSection( + ".debug_aranges", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfRangesSection = Ctx->getCOFFSection( + ".debug_ranges", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "debug_range"); + DwarfInfoDWOSection = Ctx->getCOFFSection( + ".debug_info.dwo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "section_info_dwo"); + DwarfTypesDWOSection = Ctx->getCOFFSection( + ".debug_types.dwo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "section_types_dwo"); + DwarfAbbrevDWOSection = Ctx->getCOFFSection( + ".debug_abbrev.dwo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "section_abbrev_dwo"); + DwarfStrDWOSection = Ctx->getCOFFSection( + ".debug_str.dwo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "skel_string"); + DwarfLineDWOSection = Ctx->getCOFFSection( + ".debug_line.dwo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfLocDWOSection = Ctx->getCOFFSection( + ".debug_loc.dwo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "skel_loc"); + DwarfStrOffDWOSection = Ctx->getCOFFSection( + ".debug_str_offsets.dwo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfAddrSection = Ctx->getCOFFSection( + ".debug_addr", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "addr_sec"); + DwarfAccelNamesSection = Ctx->getCOFFSection( + ".apple_names", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "names_begin"); + DwarfAccelNamespaceSection = Ctx->getCOFFSection( + ".apple_namespaces", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "namespac_begin"); + DwarfAccelTypesSection = Ctx->getCOFFSection( + ".apple_types", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "types_begin"); + DwarfAccelObjCSection = Ctx->getCOFFSection( + ".apple_objc", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "objc_begin"); + + DrectveSection = Ctx->getCOFFSection( + ".drectve", COFF::IMAGE_SCN_LNK_INFO | COFF::IMAGE_SCN_LNK_REMOVE, + SectionKind::getMetadata()); + + PDataSection = Ctx->getCOFFSection( + ".pdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, + SectionKind::getDataRel()); + + XDataSection = Ctx->getCOFFSection( + ".xdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, + SectionKind::getDataRel()); + + TLSDataSection = Ctx->getCOFFSection( + ".tls$", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); } void MCObjectFileInfo::InitMCObjectFileInfo(StringRef T, Reloc::Model relocm, diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 08fe501..6aa2de3 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -20,6 +20,7 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, @@ -31,8 +32,8 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter_, - MCAssembler *_Assembler) - : MCStreamer(Context), Assembler(_Assembler), CurSectionData(nullptr), + MCAssembler *Assembler) + : MCStreamer(Context), Assembler(Assembler), CurSectionData(nullptr), EmitEHFrame(true), EmitDebugFrame(false) {} MCObjectStreamer::~MCObjectStreamer() { @@ -181,10 +182,16 @@ void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias, void MCObjectStreamer::ChangeSection(const MCSection *Section, const MCExpr *Subsection) { + changeSectionImpl(Section, Subsection); +} + +bool MCObjectStreamer::changeSectionImpl(const MCSection *Section, + const MCExpr *Subsection) { assert(Section && "Cannot switch to a null section!"); flushPendingLabels(nullptr); - CurSectionData = &getAssembler().getOrCreateSectionData(*Section); + bool Created; + CurSectionData = &getAssembler().getOrCreateSectionData(*Section, &Created); int64_t IntSubsection = 0; if (Subsection && @@ -194,6 +201,7 @@ void MCObjectStreamer::ChangeSection(const MCSection *Section, report_fatal_error("Subsection number out of range"); CurInsertionPoint = CurSectionData->getSubsectionInsertionPoint(unsigned(IntSubsection)); + return Created; } void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp index 94d7cd6..3c536ec 100644 --- a/lib/MC/MCObjectWriter.cpp +++ b/lib/MC/MCObjectWriter.cpp @@ -17,11 +17,9 @@ using namespace llvm; MCObjectWriter::~MCObjectWriter() { } -bool -MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm, - const MCSymbolRefExpr *A, - const MCSymbolRefExpr *B, - bool InSet) const { +bool MCObjectWriter::IsSymbolRefDifferenceFullyResolved( + const MCAssembler &Asm, const MCSymbolRefExpr *A, const MCSymbolRefExpr *B, + bool InSet) const { // Modified symbol references cannot be resolved. if (A->getKind() != MCSymbolRefExpr::VK_None || B->getKind() != MCSymbolRefExpr::VK_None) @@ -54,3 +52,5 @@ MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, // On ELF and COFF A - B is absolute if A and B are in the same section. return &SecA == &SecB; } + +bool MCObjectWriter::isWeak(const MCSymbolData &SD) const { return false; } diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 5c8ec66..b983d99 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -21,7 +21,7 @@ #include <cstdlib> using namespace llvm; -AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { +AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { CurPtr = nullptr; isAtStartOfLine = true; AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index ef6a540..2bf980b 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -339,8 +339,8 @@ private: DK_WEAK_DEF_CAN_BE_HIDDEN, DK_COMM, DK_COMMON, DK_LCOMM, DK_ABORT, DK_INCLUDE, DK_INCBIN, DK_CODE16, DK_CODE16GCC, DK_REPT, DK_IRP, DK_IRPC, DK_IF, DK_IFEQ, DK_IFGE, DK_IFGT, DK_IFLE, DK_IFLT, DK_IFNE, DK_IFB, - DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFDEF, DK_IFNDEF, DK_IFNOTDEF, - DK_ELSEIF, DK_ELSE, DK_ENDIF, + DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFNES, DK_IFDEF, DK_IFNDEF, + DK_IFNOTDEF, DK_ELSEIF, DK_ELSE, DK_ENDIF, DK_SPACE, DK_SKIP, DK_FILE, DK_LINE, DK_LOC, DK_STABS, DK_CFI_SECTIONS, DK_CFI_STARTPROC, DK_CFI_ENDPROC, DK_CFI_DEF_CFA, DK_CFI_DEF_CFA_OFFSET, DK_CFI_ADJUST_CFA_OFFSET, DK_CFI_DEF_CFA_REGISTER, @@ -435,8 +435,8 @@ private: bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank); // ".ifc" or ".ifnc", depending on ExpectEqual. bool parseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual); - // ".ifeqs" - bool parseDirectiveIfeqs(SMLoc DirectiveLoc); + // ".ifeqs" or ".ifnes", depending on ExpectEqual. + bool parseDirectiveIfeqs(SMLoc DirectiveLoc, bool ExpectEqual); // ".ifdef" or ".ifndef", depending on expect_defined bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined); bool parseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif" @@ -486,10 +486,10 @@ extern MCAsmParserExtension *createCOFFAsmParser(); enum { DEFAULT_ADDRSPACE = 0 }; -AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, - const MCAsmInfo &_MAI) - : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM), - PlatformParser(nullptr), CurBuffer(_SM.getMainFileID()), +AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, + const MCAsmInfo &MAI) + : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM), + PlatformParser(nullptr), CurBuffer(SM.getMainFileID()), MacrosEnabledFlag(true), HadError(false), CppHashLineNumber(0), AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false) { // Save the old handler. @@ -500,7 +500,7 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); // Initialize the platform / file format parser. - switch (_Ctx.getObjectFileInfo()->getObjectFileType()) { + switch (Ctx.getObjectFileInfo()->getObjectFileType()) { case MCObjectFileInfo::IsCOFF: PlatformParser.reset(createCOFFAsmParser()); break; @@ -1244,9 +1244,11 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, case DK_IFC: return parseDirectiveIfc(IDLoc, true); case DK_IFEQS: - return parseDirectiveIfeqs(IDLoc); + return parseDirectiveIfeqs(IDLoc, true); case DK_IFNC: return parseDirectiveIfc(IDLoc, false); + case DK_IFNES: + return parseDirectiveIfeqs(IDLoc, false); case DK_IFDEF: return parseDirectiveIfdef(IDLoc, true); case DK_IFNDEF: @@ -2791,7 +2793,7 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) { if (FileNumber == -1) getStreamer().EmitFileDirective(Filename); else { - if (getContext().getGenDwarfForAssembly() == true) + if (getContext().getGenDwarfForAssembly()) Error(DirectiveLoc, "input can't have .file dwarf directives when -g is " "used to generate dwarf debug info for assembly code"); @@ -3943,9 +3945,12 @@ bool AsmParser::parseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) { /// parseDirectiveIfeqs /// ::= .ifeqs string1, string2 -bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc, bool ExpectEqual) { if (Lexer.isNot(AsmToken::String)) { - TokError("expected string parameter for '.ifeqs' directive"); + if (ExpectEqual) + TokError("expected string parameter for '.ifeqs' directive"); + else + TokError("expected string parameter for '.ifnes' directive"); eatToEndOfStatement(); return true; } @@ -3954,7 +3959,10 @@ bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc) { Lex(); if (Lexer.isNot(AsmToken::Comma)) { - TokError("expected comma after first string for '.ifeqs' directive"); + if (ExpectEqual) + TokError("expected comma after first string for '.ifeqs' directive"); + else + TokError("expected comma after first string for '.ifnes' directive"); eatToEndOfStatement(); return true; } @@ -3962,7 +3970,10 @@ bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc) { Lex(); if (Lexer.isNot(AsmToken::String)) { - TokError("expected string parameter for '.ifeqs' directive"); + if (ExpectEqual) + TokError("expected string parameter for '.ifeqs' directive"); + else + TokError("expected string parameter for '.ifnes' directive"); eatToEndOfStatement(); return true; } @@ -3972,7 +3983,7 @@ bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc) { TheCondStack.push_back(TheCondState); TheCondState.TheCond = AsmCond::IfCond; - TheCondState.CondMet = String1 == String2; + TheCondState.CondMet = ExpectEqual == (String1 == String2); TheCondState.Ignore = !TheCondState.CondMet; return false; @@ -4219,6 +4230,7 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".ifc"] = DK_IFC; DirectiveKindMap[".ifeqs"] = DK_IFEQS; DirectiveKindMap[".ifnc"] = DK_IFNC; + DirectiveKindMap[".ifnes"] = DK_IFNES; DirectiveKindMap[".ifdef"] = DK_IFDEF; DirectiveKindMap[".ifndef"] = DK_IFNDEF; DirectiveKindMap[".ifnotdef"] = DK_IFNOTDEF; diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index 3ea745e..9102dc3 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -626,7 +626,7 @@ bool DarwinAsmParser::parseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.secure_log_unique' directive"); - if (getContext().getSecureLogUsed() != false) + if (getContext().getSecureLogUsed()) return Error(IDLoc, ".secure_log_unique specified multiple times"); // Get the secure log path. diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp index ccf4a7d..7889f83 100644 --- a/lib/MC/MCSection.cpp +++ b/lib/MC/MCSection.cpp @@ -10,6 +10,7 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -17,6 +18,14 @@ using namespace llvm; // MCSection //===----------------------------------------------------------------------===// +MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) const { + if (!End) + End = Ctx.createTempSymbol("sec_end", true); + return End; +} + +bool MCSection::hasEnded() const { return End && End->isInSection(); } + MCSection::~MCSection() { } diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp index 46beda4..c9f1591 100644 --- a/lib/MC/MCSectionMachO.cpp +++ b/lib/MC/MCSectionMachO.cpp @@ -70,8 +70,10 @@ ENTRY(nullptr /*FIXME*/, S_ATTR_LOC_RELOC) }; MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section, - unsigned TAA, unsigned reserved2, SectionKind K) - : MCSection(SV_MachO, K), TypeAndAttributes(TAA), Reserved2(reserved2) { + unsigned TAA, unsigned reserved2, SectionKind K, + MCSymbol *Begin) + : MCSection(SV_MachO, K, Begin), TypeAndAttributes(TAA), + Reserved2(reserved2) { assert(Segment.size() <= 16 && Section.size() <= 16 && "Segment or section string too long"); for (unsigned i = 0; i != 16; ++i) { diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index f11ee66..27d0355 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWin64EH.h" #include "llvm/Support/ErrorHandling.h" @@ -661,3 +662,30 @@ void MCStreamer::EmitBundleAlignMode(unsigned AlignPow2) {} void MCStreamer::EmitBundleLock(bool AlignToEnd) {} void MCStreamer::FinishImpl() {} void MCStreamer::EmitBundleUnlock() {} + +void MCStreamer::SwitchSection(const MCSection *Section, + const MCExpr *Subsection) { + assert(Section && "Cannot switch to a null section!"); + MCSectionSubPair curSection = SectionStack.back().first; + SectionStack.back().second = curSection; + if (MCSectionSubPair(Section, Subsection) != curSection) { + SectionStack.back().first = MCSectionSubPair(Section, Subsection); + assert(!Section->hasEnded() && "Section already ended"); + ChangeSection(Section, Subsection); + MCSymbol *Sym = Section->getBeginSymbol(); + if (Sym && !Sym->isInSection()) + EmitLabel(Sym); + } +} + +MCSymbol *MCStreamer::endSection(const MCSection *Section) { + // TODO: keep track of the last subsection so that this symbol appears in the + // correct place. + MCSymbol *Sym = Section->getEndSymbol(Context); + if (Sym->isInSection()) + return Sym; + + SwitchSection(Section); + EmitLabel(Sym); + return Sym; +} diff --git a/lib/MC/MCWinEH.cpp b/lib/MC/MCWinEH.cpp index 47eaf0f..b1c95f8 100644 --- a/lib/MC/MCWinEH.cpp +++ b/lib/MC/MCWinEH.cpp @@ -11,6 +11,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWinEH.h" #include "llvm/Support/COFF.h" diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 588d424..5e9e86f 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachO.h" +#include "llvm/Support/raw_ostream.h" #include <vector> using namespace llvm; diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp index 587be54..ec6c9cb 100644 --- a/lib/MC/SubtargetFeature.cpp +++ b/lib/MC/SubtargetFeature.cpp @@ -201,9 +201,13 @@ SubtargetFeatures::ToggleFeature(uint64_t Bits, StringRef Feature, SetImpliedBits(Bits, FeatureEntry, FeatureTable); } } else { - errs() << "'" << Feature - << "' is not a recognized feature for this target" - << " (ignoring feature)\n"; + // Bug: 20140355 + // Silence this warning for now + if (false) { + errs() << "'" << Feature + << "' is not a recognized feature for this target" + << " (ignoring feature)\n"; + } } return Bits; @@ -281,9 +285,13 @@ SubtargetFeatures::getFeatureBits(StringRef CPU, ClearImpliedBits(Bits, FeatureEntry, FeatureTable); } } else { - errs() << "'" << Feature - << "' is not a recognized feature for this target" - << " (ignoring feature)\n"; + // Bug: 20140355 + // Silence this warning for now + if (false) { + errs() << "'" << Feature + << "' is not a recognized feature for this target" + << " (ignoring feature)\n"; + } } } diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index c519a9d..c6bc81d 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -175,6 +175,8 @@ public: const MCFragment &FB, bool InSet, bool IsPCRel) const override; + bool isWeak(const MCSymbolData &SD) const override; + void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, @@ -661,6 +663,12 @@ bool WinCOFFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( InSet, IsPCRel); } +bool WinCOFFObjectWriter::isWeak(const MCSymbolData &SD) const { + // FIXME: this is for PR23025. Write a good description on + // why this is needed. + return SD.isExternal(); +} + void WinCOFFObjectWriter::RecordRelocation( MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) { diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index 41a3da7..f902d2b 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -230,11 +230,11 @@ void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, AssignSection(Symbol, Section); if (ByteAlignment != 1) - new MCAlignFragment(ByteAlignment, /*_Value=*/0, /*_ValueSize=*/0, + new MCAlignFragment(ByteAlignment, /*Value=*/0, /*ValueSize=*/0, ByteAlignment, &SectionData); MCFillFragment *Fragment = - new MCFillFragment(/*_Value=*/0, /*_ValueSize=*/0, Size, &SectionData); + new MCFillFragment(/*Value=*/0, /*ValueSize=*/0, Size, &SectionData); SD.setFragment(Fragment); } diff --git a/lib/Makefile b/lib/Makefile index 52fdaaf..f75ca58 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -12,6 +12,6 @@ include $(LEVEL)/Makefile.config PARALLEL_DIRS := IR AsmParser Bitcode Analysis Transforms CodeGen Target \ ExecutionEngine Linker LTO MC Object Option DebugInfo \ - IRReader LineEditor ProfileData + IRReader LineEditor ProfileData Passes include $(LEVEL)/Makefile.common diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index 43b0771..7d43daa 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -20,6 +20,7 @@ using namespace llvm; using namespace object; +using namespace llvm::support::endian; static const char *const Magic = "!<arch>\n"; static const char *const ThinMagic = "!<thin>\n"; @@ -363,11 +364,9 @@ ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const { Offsets += sizeof(uint32_t); uint32_t Offset = 0; if (Parent->kind() == K_GNU) { - Offset = - *(reinterpret_cast<const support::ubig32_t *>(Offsets) + SymbolIndex); + Offset = read32be(Offsets + SymbolIndex * 4); } else if (Parent->kind() == K_MIPS64) { - Offset = - *(reinterpret_cast<const support::ubig64_t *>(Offsets) + SymbolIndex); + Offset = read64be(Offsets + SymbolIndex * 8); } else if (Parent->kind() == K_BSD) { // The SymbolIndex is an index into the ranlib structs that start at // Offsets (the first uint32_t is the number of bytes of the ranlib @@ -375,36 +374,29 @@ ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const { // being a string table offset and the second being the offset into // the archive of the member that defines the symbol. Which is what // is needed here. - Offset = *(reinterpret_cast<const support::ulittle32_t *>(Offsets) + - (SymbolIndex * 2) + 1); + Offset = read32le(Offsets + SymbolIndex * 8 + 4); } else { - uint32_t MemberCount = *reinterpret_cast<const support::ulittle32_t*>(Buf); - // Skip offsets. - Buf += sizeof(support::ulittle32_t) + - (MemberCount * sizeof(support::ulittle32_t)); - - uint32_t SymbolCount = *reinterpret_cast<const support::ulittle32_t*>(Buf); + uint32_t MemberCount = read32le(Buf); + Buf += MemberCount * 4 + 4; + uint32_t SymbolCount = read32le(Buf); if (SymbolIndex >= SymbolCount) return object_error::parse_failed; // Skip SymbolCount to get to the indices table. - const char *Indices = Buf + sizeof(support::ulittle32_t); + const char *Indices = Buf + 4; // Get the index of the offset in the file member offset table for this // symbol. - uint16_t OffsetIndex = - *(reinterpret_cast<const support::ulittle16_t*>(Indices) - + SymbolIndex); + uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2); // Subtract 1 since OffsetIndex is 1 based. --OffsetIndex; if (OffsetIndex >= MemberCount) return object_error::parse_failed; - Offset = *(reinterpret_cast<const support::ulittle32_t*>(Offsets) - + OffsetIndex); + Offset = read32le(Offsets + OffsetIndex * 4); } const char *Loc = Parent->getData().begin() + Offset; @@ -430,8 +422,7 @@ Archive::Symbol Archive::Symbol::getNext() const { // the string table followed by the string table. const char *Buf = Parent->SymbolTable->getBuffer().begin(); uint32_t RanlibCount = 0; - RanlibCount = (*reinterpret_cast<const support::ulittle32_t *>(Buf)) / - (sizeof(uint32_t) * 2); + RanlibCount = read32le(Buf) / 8; // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount) // don't change the t.StringIndex as we don't want to reference a ranlib // past RanlibCount. @@ -439,10 +430,8 @@ Archive::Symbol Archive::Symbol::getNext() const { const char *Ranlibs = Buf + 4; uint32_t CurRanStrx = 0; uint32_t NextRanStrx = 0; - CurRanStrx = *(reinterpret_cast<const support::ulittle32_t *>(Ranlibs) + - (t.SymbolIndex * 2)); - NextRanStrx = *(reinterpret_cast<const support::ulittle32_t *>(Ranlibs) + - ((t.SymbolIndex + 1) * 2)); + CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8); + NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8); t.StringIndex -= CurRanStrx; t.StringIndex += NextRanStrx; } @@ -462,10 +451,10 @@ Archive::symbol_iterator Archive::symbol_begin() const { const char *buf = SymbolTable->getBuffer().begin(); if (kind() == K_GNU) { uint32_t symbol_count = 0; - symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf); + symbol_count = read32be(buf); buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t))); } else if (kind() == K_MIPS64) { - uint64_t symbol_count = *reinterpret_cast<const support::ubig64_t *>(buf); + uint64_t symbol_count = read64be(buf); buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t))); } else if (kind() == K_BSD) { // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t @@ -475,11 +464,10 @@ Archive::symbol_iterator Archive::symbol_begin() const { // define the symbol. After that the next uint32_t is the byte count of // the string table followed by the string table. uint32_t ranlib_count = 0; - ranlib_count = (*reinterpret_cast<const support::ulittle32_t *>(buf)) / - (sizeof(uint32_t) * 2); + ranlib_count = read32le(buf) / 8; const char *ranlibs = buf + 4; uint32_t ran_strx = 0; - ran_strx = *(reinterpret_cast<const support::ulittle32_t *>(ranlibs)); + ran_strx = read32le(ranlibs); buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t)))); // Skip the byte count of the string table. buf += sizeof(uint32_t); @@ -487,9 +475,9 @@ Archive::symbol_iterator Archive::symbol_begin() const { } else { uint32_t member_count = 0; uint32_t symbol_count = 0; - member_count = *reinterpret_cast<const support::ulittle32_t*>(buf); + member_count = read32le(buf); buf += 4 + (member_count * 4); // Skip offsets. - symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf); + symbol_count = read32le(buf); buf += 4 + (symbol_count * 2); // Skip indices. } uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin(); @@ -503,17 +491,16 @@ Archive::symbol_iterator Archive::symbol_end() const { const char *buf = SymbolTable->getBuffer().begin(); uint32_t symbol_count = 0; if (kind() == K_GNU) { - symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf); + symbol_count = read32be(buf); } else if (kind() == K_MIPS64) { - symbol_count = *reinterpret_cast<const support::ubig64_t*>(buf); + symbol_count = read64be(buf); } else if (kind() == K_BSD) { - symbol_count = (*reinterpret_cast<const support::ulittle32_t *>(buf)) / - (sizeof(uint32_t) * 2); + symbol_count = read32le(buf) / 8; } else { uint32_t member_count = 0; - member_count = *reinterpret_cast<const support::ulittle32_t*>(buf); + member_count = read32le(buf); buf += 4 + (member_count * 4); // Skip offsets. - symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf); + symbol_count = read32le(buf); } return symbol_iterator(Symbol(this, symbol_count, 0)); } diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index cde6fdc..ad278a4 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -190,7 +190,9 @@ std::error_code COFFObjectFile::getSymbolType(DataRefImpl Ref, Result = SymbolRef::ST_Data; } else if (Symb.isFileRecord()) { Result = SymbolRef::ST_File; - } else if (SectionNumber == COFF::IMAGE_SYM_DEBUG) { + } else if (SectionNumber == COFF::IMAGE_SYM_DEBUG || + Symb.isSectionDefinition()) { + // TODO: perhaps we need a new symbol type ST_Section. Result = SymbolRef::ST_Debug; } else if (!COFF::isReservedSectionNumber(SectionNumber)) { const coff_section *Section = nullptr; @@ -359,12 +361,17 @@ bool COFFObjectFile::isSectionData(DataRefImpl Ref) const { bool COFFObjectFile::isSectionBSS(DataRefImpl Ref) const { const coff_section *Sec = toSec(Ref); - return Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA; + const uint32_t BssFlags = COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; + return (Sec->Characteristics & BssFlags) == BssFlags; } bool COFFObjectFile::isSectionVirtual(DataRefImpl Ref) const { const coff_section *Sec = toSec(Ref); - return Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA; + // In COFF, a virtual section won't have any in-file + // content, so the file pointer to the content will be zero. + return Sec->PointerToRawData == 0; } bool COFFObjectFile::sectionContainsSymbol(DataRefImpl SecRef, diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp index cce05cf..19527e2 100644 --- a/lib/Object/ELFYAML.cpp +++ b/lib/Object/ELFYAML.cpp @@ -235,6 +235,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_ELFOSABI>::enumeration( ECase(ELFOSABI_NSK) ECase(ELFOSABI_AROS) ECase(ELFOSABI_FENIXOS) + ECase(ELFOSABI_CLOUDABI) ECase(ELFOSABI_C6000_ELFABI) ECase(ELFOSABI_C6000_LINUX) ECase(ELFOSABI_ARM) diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp index a2cbdcd..58c4ca3 100644 --- a/lib/Object/IRObjectFile.cpp +++ b/lib/Object/IRObjectFile.cpp @@ -13,6 +13,7 @@ #include "llvm/Object/IRObjectFile.h" #include "RecordStreamer.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/GVMaterializer.h" #include "llvm/IR/LLVMContext.h" @@ -35,12 +36,9 @@ using namespace object; IRObjectFile::IRObjectFile(MemoryBufferRef Object, std::unique_ptr<Module> Mod) : SymbolicFile(Binary::ID_IR, Object), M(std::move(Mod)) { - // If we have a DataLayout, setup a mangler. - const DataLayout *DL = M->getDataLayout(); - if (!DL) - return; - - Mang.reset(new Mangler(DL)); + // Setup a mangler with the DataLayout. + const DataLayout &DL = M->getDataLayout(); + Mang.reset(new Mangler(&DL)); const std::string &InlineAsm = M->getModuleInlineAsm(); if (InlineAsm.empty()) @@ -302,7 +300,9 @@ llvm::object::IRObjectFile::create(MemoryBufferRef Object, std::unique_ptr<MemoryBuffer> Buff( MemoryBuffer::getMemBuffer(BCOrErr.get(), false)); - ErrorOr<Module *> MOrErr = getLazyBitcodeModule(std::move(Buff), Context); + ErrorOr<Module *> MOrErr = + getLazyBitcodeModule(std::move(Buff), Context, nullptr, + /*ShouldLazyLoadMetadata*/ true); if (std::error_code EC = MOrErr.getError()) return EC; diff --git a/lib/Option/Arg.cpp b/lib/Option/Arg.cpp index af632d6..ac00073 100644 --- a/lib/Option/Arg.cpp +++ b/lib/Option/Arg.cpp @@ -17,22 +17,21 @@ using namespace llvm; using namespace llvm::opt; -Arg::Arg(const Option _Opt, StringRef S, unsigned _Index, const Arg *_BaseArg) - : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index), - Claimed(false), OwnsValues(false) { -} - -Arg::Arg(const Option _Opt, StringRef S, unsigned _Index, - const char *Value0, const Arg *_BaseArg) - : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index), - Claimed(false), OwnsValues(false) { +Arg::Arg(const Option Opt, StringRef S, unsigned Index, const Arg *BaseArg) + : Opt(Opt), BaseArg(BaseArg), Spelling(S), Index(Index), Claimed(false), + OwnsValues(false) {} + +Arg::Arg(const Option Opt, StringRef S, unsigned Index, const char *Value0, + const Arg *BaseArg) + : Opt(Opt), BaseArg(BaseArg), Spelling(S), Index(Index), Claimed(false), + OwnsValues(false) { Values.push_back(Value0); } -Arg::Arg(const Option _Opt, StringRef S, unsigned _Index, - const char *Value0, const char *Value1, const Arg *_BaseArg) - : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index), - Claimed(false), OwnsValues(false) { +Arg::Arg(const Option Opt, StringRef S, unsigned Index, const char *Value0, + const char *Value1, const Arg *BaseArg) + : Opt(Opt), BaseArg(BaseArg), Spelling(S), Index(Index), Claimed(false), + OwnsValues(false) { Values.push_back(Value0); Values.push_back(Value1); } diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp index 85e956f..4bc8f92 100644 --- a/lib/Option/ArgList.cpp +++ b/lib/Option/ArgList.cpp @@ -63,6 +63,26 @@ Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1) const { return nullptr; } +Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1, + OptSpecifier Id2) const { + // FIXME: Make search efficient? + for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it) + if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) || + (*it)->getOption().matches(Id2)) + return *it; + return nullptr; +} + +Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1, + OptSpecifier Id2, OptSpecifier Id3) const { + // FIXME: Make search efficient? + for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it) + if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) || + (*it)->getOption().matches(Id2) || (*it)->getOption().matches(Id3)) + return *it; + return nullptr; +} + Arg *ArgList::getLastArg(OptSpecifier Id) const { Arg *Res = nullptr; for (const_iterator it = begin(), ie = end(); it != ie; ++it) { @@ -285,11 +305,6 @@ void ArgList::ClaimAllArgs() const { (*it)->claim(); } -const char *ArgList::MakeArgString(const Twine &T) const { - SmallString<256> Str; - return MakeArgString(T.toStringRef(Str)); -} - const char *ArgList::GetOrMakeJoinedArgString(unsigned Index, StringRef LHS, StringRef RHS) const { @@ -334,19 +349,18 @@ unsigned InputArgList::MakeIndex(StringRef String0, return Index0; } -const char *InputArgList::MakeArgString(StringRef Str) const { +const char *InputArgList::MakeArgStringRef(StringRef Str) const { return getArgString(MakeIndex(Str)); } // -DerivedArgList::DerivedArgList(const InputArgList &_BaseArgs) - : BaseArgs(_BaseArgs) { -} +DerivedArgList::DerivedArgList(const InputArgList &BaseArgs) + : BaseArgs(BaseArgs) {} DerivedArgList::~DerivedArgList() {} -const char *DerivedArgList::MakeArgString(StringRef Str) const { +const char *DerivedArgList::MakeArgStringRef(StringRef Str) const { return BaseArgs.MakeArgString(Str); } diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp index dca02c1..96ba183 100644 --- a/lib/Option/OptTable.cpp +++ b/lib/Option/OptTable.cpp @@ -84,15 +84,11 @@ static inline bool operator<(const OptTable::Info &I, const char *Name) { OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {} -OptTable::OptTable(const Info *_OptionInfos, unsigned _NumOptionInfos, - bool _IgnoreCase) - : OptionInfos(_OptionInfos), - NumOptionInfos(_NumOptionInfos), - IgnoreCase(_IgnoreCase), - TheInputOptionID(0), - TheUnknownOptionID(0), - FirstSearchableIndex(0) -{ +OptTable::OptTable(const Info *OptionInfos, unsigned NumOptionInfos, + bool IgnoreCase) + : OptionInfos(OptionInfos), NumOptionInfos(NumOptionInfos), + IgnoreCase(IgnoreCase), TheInputOptionID(0), TheUnknownOptionID(0), + FirstSearchableIndex(0) { // Explicitly zero initialize the error to work around a bug in array // value-initialization on MinGW with gcc 4.3.5. diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp index cdc63c3..e29d649 100644 --- a/lib/Option/Option.cpp +++ b/lib/Option/Option.cpp @@ -35,9 +35,6 @@ Option::Option(const OptTable::Info *info, const OptTable *owner) } } -Option::~Option() { -} - void Option::dump() const { llvm::errs() << "<"; switch (getKind()) { diff --git a/lib/Passes/Android.mk b/lib/Passes/Android.mk new file mode 100644 index 0000000..6e441d7 --- /dev/null +++ b/lib/Passes/Android.mk @@ -0,0 +1,30 @@ +LOCAL_PATH:= $(call my-dir) + +passes_SRC_FILES := \ + PassBuilder.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_MODULE:= libLLVMPasses +LOCAL_MODULE_TAGS := optional +LOCAL_SRC_FILES := $(passes_SRC_FILES) + +include $(LLVM_HOST_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS)) +include $(CLEAR_VARS) + +LOCAL_MODULE:= libLLVMPasses +LOCAL_MODULE_TAGS := optional +LOCAL_SRC_FILES := $(passes_SRC_FILES) + +include $(LLVM_DEVICE_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_STATIC_LIBRARY) +endif diff --git a/lib/Passes/CMakeLists.txt b/lib/Passes/CMakeLists.txt new file mode 100644 index 0000000..6ceac7b --- /dev/null +++ b/lib/Passes/CMakeLists.txt @@ -0,0 +1,8 @@ +add_llvm_library(LLVMPasses + PassBuilder.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Passes + ) + +add_dependencies(LLVMPasses intrinsics_gen) diff --git a/lib/Passes/LLVMBuild.txt b/lib/Passes/LLVMBuild.txt new file mode 100644 index 0000000..3063fe3 --- /dev/null +++ b/lib/Passes/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/Passes/LLVMBuild.txt -------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Passes +parent = Libraries +required_libraries = Analysis Core IPA IPO InstCombine Scalar Support TransformUtils Vectorize diff --git a/lib/Passes/Makefile b/lib/Passes/Makefile new file mode 100644 index 0000000..413dc5c --- /dev/null +++ b/lib/Passes/Makefile @@ -0,0 +1,14 @@ +##===- lib/Passes/Makefile ---------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMPasses +BUILD_ARCHIVE := 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp new file mode 100644 index 0000000..ba71320 --- /dev/null +++ b/lib/Passes/PassBuilder.cpp @@ -0,0 +1,412 @@ +//===- Parsing, selection, and construction of pass pipelines -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file provides the implementation of the PassBuilder based on our +/// static pass registry as well as related functionality. It also provides +/// helpers to aid in analyzing, debugging, and testing passes and pass +/// pipelines. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" +#include "llvm/Transforms/Scalar/EarlyCSE.h" +#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" +#include "llvm/Transforms/Scalar/SimplifyCFG.h" + +using namespace llvm; + +namespace { + +/// \brief No-op module pass which does nothing. +struct NoOpModulePass { + PreservedAnalyses run(Module &M) { return PreservedAnalyses::all(); } + static StringRef name() { return "NoOpModulePass"; } +}; + +/// \brief No-op module analysis. +struct NoOpModuleAnalysis { + struct Result {}; + Result run(Module &) { return Result(); } + static StringRef name() { return "NoOpModuleAnalysis"; } + static void *ID() { return (void *)&PassID; } +private: + static char PassID; +}; + +char NoOpModuleAnalysis::PassID; + +/// \brief No-op CGSCC pass which does nothing. +struct NoOpCGSCCPass { + PreservedAnalyses run(LazyCallGraph::SCC &C) { + return PreservedAnalyses::all(); + } + static StringRef name() { return "NoOpCGSCCPass"; } +}; + +/// \brief No-op CGSCC analysis. +struct NoOpCGSCCAnalysis { + struct Result {}; + Result run(LazyCallGraph::SCC &) { return Result(); } + static StringRef name() { return "NoOpCGSCCAnalysis"; } + static void *ID() { return (void *)&PassID; } +private: + static char PassID; +}; + +char NoOpCGSCCAnalysis::PassID; + +/// \brief No-op function pass which does nothing. +struct NoOpFunctionPass { + PreservedAnalyses run(Function &F) { return PreservedAnalyses::all(); } + static StringRef name() { return "NoOpFunctionPass"; } +}; + +/// \brief No-op function analysis. +struct NoOpFunctionAnalysis { + struct Result {}; + Result run(Function &) { return Result(); } + static StringRef name() { return "NoOpFunctionAnalysis"; } + static void *ID() { return (void *)&PassID; } +private: + static char PassID; +}; + +char NoOpFunctionAnalysis::PassID; + +} // End anonymous namespace. + +void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) { +#define MODULE_ANALYSIS(NAME, CREATE_PASS) \ + MAM.registerPass(CREATE_PASS); +#include "PassRegistry.def" +} + +void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) { +#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ + CGAM.registerPass(CREATE_PASS); +#include "PassRegistry.def" +} + +void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) { +#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ + FAM.registerPass(CREATE_PASS); +#include "PassRegistry.def" +} + +#ifndef NDEBUG +static bool isModulePassName(StringRef Name) { +#define MODULE_PASS(NAME, CREATE_PASS) if (Name == NAME) return true; +#define MODULE_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \ + return true; +#include "PassRegistry.def" + + return false; +} +#endif + +static bool isCGSCCPassName(StringRef Name) { +#define CGSCC_PASS(NAME, CREATE_PASS) if (Name == NAME) return true; +#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \ + return true; +#include "PassRegistry.def" + + return false; +} + +static bool isFunctionPassName(StringRef Name) { +#define FUNCTION_PASS(NAME, CREATE_PASS) if (Name == NAME) return true; +#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \ + return true; +#include "PassRegistry.def" + + return false; +} + +bool PassBuilder::parseModulePassName(ModulePassManager &MPM, StringRef Name) { +#define MODULE_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + MPM.addPass(CREATE_PASS); \ + return true; \ + } +#define MODULE_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">") { \ + MPM.addPass(RequireAnalysisPass<decltype(CREATE_PASS)>()); \ + return true; \ + } \ + if (Name == "invalidate<" NAME ">") { \ + MPM.addPass(InvalidateAnalysisPass<decltype(CREATE_PASS)>()); \ + return true; \ + } +#include "PassRegistry.def" + + return false; +} + +bool PassBuilder::parseCGSCCPassName(CGSCCPassManager &CGPM, StringRef Name) { +#define CGSCC_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + CGPM.addPass(CREATE_PASS); \ + return true; \ + } +#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">") { \ + CGPM.addPass(RequireAnalysisPass<decltype(CREATE_PASS)>()); \ + return true; \ + } \ + if (Name == "invalidate<" NAME ">") { \ + CGPM.addPass(InvalidateAnalysisPass<decltype(CREATE_PASS)>()); \ + return true; \ + } +#include "PassRegistry.def" + + return false; +} + +bool PassBuilder::parseFunctionPassName(FunctionPassManager &FPM, + StringRef Name) { +#define FUNCTION_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + FPM.addPass(CREATE_PASS); \ + return true; \ + } +#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">") { \ + FPM.addPass(RequireAnalysisPass<decltype(CREATE_PASS)>()); \ + return true; \ + } \ + if (Name == "invalidate<" NAME ">") { \ + FPM.addPass(InvalidateAnalysisPass<decltype(CREATE_PASS)>()); \ + return true; \ + } +#include "PassRegistry.def" + + return false; +} + +bool PassBuilder::parseFunctionPassPipeline(FunctionPassManager &FPM, + StringRef &PipelineText, + bool VerifyEachPass, + bool DebugLogging) { + for (;;) { + // Parse nested pass managers by recursing. + if (PipelineText.startswith("function(")) { + FunctionPassManager NestedFPM(DebugLogging); + + // Parse the inner pipeline inte the nested manager. + PipelineText = PipelineText.substr(strlen("function(")); + if (!parseFunctionPassPipeline(NestedFPM, PipelineText, VerifyEachPass, + DebugLogging) || + PipelineText.empty()) + return false; + assert(PipelineText[0] == ')'); + PipelineText = PipelineText.substr(1); + + // Add the nested pass manager with the appropriate adaptor. + FPM.addPass(std::move(NestedFPM)); + } else { + // Otherwise try to parse a pass name. + size_t End = PipelineText.find_first_of(",)"); + if (!parseFunctionPassName(FPM, PipelineText.substr(0, End))) + return false; + if (VerifyEachPass) + FPM.addPass(VerifierPass()); + + PipelineText = PipelineText.substr(End); + } + + if (PipelineText.empty() || PipelineText[0] == ')') + return true; + + assert(PipelineText[0] == ','); + PipelineText = PipelineText.substr(1); + } +} + +bool PassBuilder::parseCGSCCPassPipeline(CGSCCPassManager &CGPM, + StringRef &PipelineText, + bool VerifyEachPass, + bool DebugLogging) { + for (;;) { + // Parse nested pass managers by recursing. + if (PipelineText.startswith("cgscc(")) { + CGSCCPassManager NestedCGPM(DebugLogging); + + // Parse the inner pipeline into the nested manager. + PipelineText = PipelineText.substr(strlen("cgscc(")); + if (!parseCGSCCPassPipeline(NestedCGPM, PipelineText, VerifyEachPass, + DebugLogging) || + PipelineText.empty()) + return false; + assert(PipelineText[0] == ')'); + PipelineText = PipelineText.substr(1); + + // Add the nested pass manager with the appropriate adaptor. + CGPM.addPass(std::move(NestedCGPM)); + } else if (PipelineText.startswith("function(")) { + FunctionPassManager NestedFPM(DebugLogging); + + // Parse the inner pipeline inte the nested manager. + PipelineText = PipelineText.substr(strlen("function(")); + if (!parseFunctionPassPipeline(NestedFPM, PipelineText, VerifyEachPass, + DebugLogging) || + PipelineText.empty()) + return false; + assert(PipelineText[0] == ')'); + PipelineText = PipelineText.substr(1); + + // Add the nested pass manager with the appropriate adaptor. + CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(NestedFPM))); + } else { + // Otherwise try to parse a pass name. + size_t End = PipelineText.find_first_of(",)"); + if (!parseCGSCCPassName(CGPM, PipelineText.substr(0, End))) + return false; + // FIXME: No verifier support for CGSCC passes! + + PipelineText = PipelineText.substr(End); + } + + if (PipelineText.empty() || PipelineText[0] == ')') + return true; + + assert(PipelineText[0] == ','); + PipelineText = PipelineText.substr(1); + } +} + +bool PassBuilder::parseModulePassPipeline(ModulePassManager &MPM, + StringRef &PipelineText, + bool VerifyEachPass, + bool DebugLogging) { + for (;;) { + // Parse nested pass managers by recursing. + if (PipelineText.startswith("module(")) { + ModulePassManager NestedMPM(DebugLogging); + + // Parse the inner pipeline into the nested manager. + PipelineText = PipelineText.substr(strlen("module(")); + if (!parseModulePassPipeline(NestedMPM, PipelineText, VerifyEachPass, + DebugLogging) || + PipelineText.empty()) + return false; + assert(PipelineText[0] == ')'); + PipelineText = PipelineText.substr(1); + + // Now add the nested manager as a module pass. + MPM.addPass(std::move(NestedMPM)); + } else if (PipelineText.startswith("cgscc(")) { + CGSCCPassManager NestedCGPM(DebugLogging); + + // Parse the inner pipeline inte the nested manager. + PipelineText = PipelineText.substr(strlen("cgscc(")); + if (!parseCGSCCPassPipeline(NestedCGPM, PipelineText, VerifyEachPass, + DebugLogging) || + PipelineText.empty()) + return false; + assert(PipelineText[0] == ')'); + PipelineText = PipelineText.substr(1); + + // Add the nested pass manager with the appropriate adaptor. + MPM.addPass( + createModuleToPostOrderCGSCCPassAdaptor(std::move(NestedCGPM))); + } else if (PipelineText.startswith("function(")) { + FunctionPassManager NestedFPM(DebugLogging); + + // Parse the inner pipeline inte the nested manager. + PipelineText = PipelineText.substr(strlen("function(")); + if (!parseFunctionPassPipeline(NestedFPM, PipelineText, VerifyEachPass, + DebugLogging) || + PipelineText.empty()) + return false; + assert(PipelineText[0] == ')'); + PipelineText = PipelineText.substr(1); + + // Add the nested pass manager with the appropriate adaptor. + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(NestedFPM))); + } else { + // Otherwise try to parse a pass name. + size_t End = PipelineText.find_first_of(",)"); + if (!parseModulePassName(MPM, PipelineText.substr(0, End))) + return false; + if (VerifyEachPass) + MPM.addPass(VerifierPass()); + + PipelineText = PipelineText.substr(End); + } + + if (PipelineText.empty() || PipelineText[0] == ')') + return true; + + assert(PipelineText[0] == ','); + PipelineText = PipelineText.substr(1); + } +} + +// Primary pass pipeline description parsing routine. +// FIXME: Should this routine accept a TargetMachine or require the caller to +// pre-populate the analysis managers with target-specific stuff? +bool PassBuilder::parsePassPipeline(ModulePassManager &MPM, + StringRef PipelineText, bool VerifyEachPass, + bool DebugLogging) { + // By default, try to parse the pipeline as-if it were within an implicit + // 'module(...)' pass pipeline. If this will parse at all, it needs to + // consume the entire string. + if (parseModulePassPipeline(MPM, PipelineText, VerifyEachPass, DebugLogging)) + return PipelineText.empty(); + + // This isn't parsable as a module pipeline, look for the end of a pass name + // and directly drop down to that layer. + StringRef FirstName = + PipelineText.substr(0, PipelineText.find_first_of(",)")); + assert(!isModulePassName(FirstName) && + "Already handled all module pipeline options."); + + // If this looks like a CGSCC pass, parse the whole thing as a CGSCC + // pipeline. + if (isCGSCCPassName(FirstName)) { + CGSCCPassManager CGPM(DebugLogging); + if (!parseCGSCCPassPipeline(CGPM, PipelineText, VerifyEachPass, + DebugLogging) || + !PipelineText.empty()) + return false; + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); + return true; + } + + // Similarly, if this looks like a Function pass, parse the whole thing as + // a Function pipelien. + if (isFunctionPassName(FirstName)) { + FunctionPassManager FPM(DebugLogging); + if (!parseFunctionPassPipeline(FPM, PipelineText, VerifyEachPass, + DebugLogging) || + !PipelineText.empty()) + return false; + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + return true; + } + + return false; +} diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def new file mode 100644 index 0000000..d768a3a --- /dev/null +++ b/lib/Passes/PassRegistry.def @@ -0,0 +1,77 @@ +//===- PassRegistry.def - Registry of passes --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is used as the registry of passes that are part of the core LLVM +// libraries. This file describes both transformation passes and analyses +// Analyses are registered while transformation passes have names registered +// that can be used when providing a textual pass pipeline. +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +#ifndef MODULE_ANALYSIS +#define MODULE_ANALYSIS(NAME, CREATE_PASS) +#endif +MODULE_ANALYSIS("lcg", LazyCallGraphAnalysis()) +MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis()) +MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis()) +#undef MODULE_ANALYSIS + +#ifndef MODULE_PASS +#define MODULE_PASS(NAME, CREATE_PASS) +#endif +MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass()) +MODULE_PASS("no-op-module", NoOpModulePass()) +MODULE_PASS("print", PrintModulePass(dbgs())) +MODULE_PASS("print-cg", LazyCallGraphPrinterPass(dbgs())) +MODULE_PASS("verify", VerifierPass()) +#undef MODULE_PASS + +#ifndef CGSCC_ANALYSIS +#define CGSCC_ANALYSIS(NAME, CREATE_PASS) +#endif +CGSCC_ANALYSIS("no-op-cgscc", NoOpCGSCCAnalysis()) +#undef CGSCC_ANALYSIS + +#ifndef CGSCC_PASS +#define CGSCC_PASS(NAME, CREATE_PASS) +#endif +CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass()) +CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass()) +#undef CGSCC_PASS + +#ifndef FUNCTION_ANALYSIS +#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) +#endif +FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis()) +FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis()) +FUNCTION_ANALYSIS("loops", LoopAnalysis()) +FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis()) +FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis()) +FUNCTION_ANALYSIS("targetir", + TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis()) +#undef FUNCTION_ANALYSIS + +#ifndef FUNCTION_PASS +#define FUNCTION_PASS(NAME, CREATE_PASS) +#endif +FUNCTION_PASS("early-cse", EarlyCSEPass()) +FUNCTION_PASS("instcombine", InstCombinePass()) +FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass()) +FUNCTION_PASS("no-op-function", NoOpFunctionPass()) +FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass()) +FUNCTION_PASS("print", PrintFunctionPass(dbgs())) +FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs())) +FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs())) +FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs())) +FUNCTION_PASS("simplify-cfg", SimplifyCFGPass()) +FUNCTION_PASS("verify", VerifierPass()) +FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass()) +#undef FUNCTION_PASS diff --git a/lib/ProfileData/CoverageMapping.cpp b/lib/ProfileData/CoverageMapping.cpp index 31213d7..46d494b 100644 --- a/lib/ProfileData/CoverageMapping.cpp +++ b/lib/ProfileData/CoverageMapping.cpp @@ -20,6 +20,7 @@ #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace coverage; @@ -217,12 +218,13 @@ CoverageMapping::load(CoverageMappingReader &CoverageReader, } ErrorOr<std::unique_ptr<CoverageMapping>> -CoverageMapping::load(StringRef ObjectFilename, StringRef ProfileFilename) { +CoverageMapping::load(StringRef ObjectFilename, StringRef ProfileFilename, + Triple::ArchType Arch) { auto CounterMappingBuff = MemoryBuffer::getFileOrSTDIN(ObjectFilename); if (std::error_code EC = CounterMappingBuff.getError()) return EC; auto CoverageReaderOrErr = - BinaryCoverageReader::create(CounterMappingBuff.get()); + BinaryCoverageReader::create(CounterMappingBuff.get(), Arch); if (std::error_code EC = CoverageReaderOrErr.getError()) return EC; auto CoverageReader = std::move(CoverageReaderOrErr.get()); diff --git a/lib/ProfileData/CoverageMappingReader.cpp b/lib/ProfileData/CoverageMappingReader.cpp index d32f1da..12e9e88 100644 --- a/lib/ProfileData/CoverageMappingReader.cpp +++ b/lib/ProfileData/CoverageMappingReader.cpp @@ -14,9 +14,12 @@ #include "llvm/ProfileData/CoverageMappingReader.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace coverage; @@ -287,24 +290,6 @@ std::error_code RawCoverageMappingReader::read() { } namespace { -/// \brief The coverage mapping data for a single function. -/// It points to the function's name. -template <typename IntPtrT> struct CoverageMappingFunctionRecord { - IntPtrT FunctionNamePtr; - uint32_t FunctionNameSize; - uint32_t CoverageMappingSize; - uint64_t FunctionHash; -}; - -/// \brief The coverage mapping data for a single translation unit. -/// It points to the array of function coverage mapping records and the encoded -/// filenames array. -template <typename IntPtrT> struct CoverageMappingTURecord { - uint32_t FunctionRecordsSize; - uint32_t FilenamesSize; - uint32_t CoverageMappingsSize; - uint32_t Version; -}; /// \brief A helper structure to access the data from a section /// in an object file. @@ -331,77 +316,76 @@ struct SectionData { }; } -template <typename T> +template <typename T, support::endianness Endian> std::error_code readCoverageMappingData( SectionData &ProfileNames, StringRef Data, std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records, std::vector<StringRef> &Filenames) { + using namespace support; llvm::DenseSet<T> UniqueFunctionMappingData; // Read the records in the coverage data section. - while (!Data.empty()) { - if (Data.size() < sizeof(CoverageMappingTURecord<T>)) + for (const char *Buf = Data.data(), *End = Buf + Data.size(); Buf < End;) { + if (Buf + 4 * sizeof(uint32_t) > End) return instrprof_error::malformed; - auto TU = reinterpret_cast<const CoverageMappingTURecord<T> *>(Data.data()); - Data = Data.substr(sizeof(CoverageMappingTURecord<T>)); - switch (TU->Version) { + uint32_t NRecords = endian::readNext<uint32_t, Endian, unaligned>(Buf); + uint32_t FilenamesSize = endian::readNext<uint32_t, Endian, unaligned>(Buf); + uint32_t CoverageSize = endian::readNext<uint32_t, Endian, unaligned>(Buf); + uint32_t Version = endian::readNext<uint32_t, Endian, unaligned>(Buf); + + switch (Version) { case CoverageMappingVersion1: break; default: return instrprof_error::unsupported_version; } - auto Version = CoverageMappingVersion(TU->Version); - // Get the function records. - auto FunctionRecords = - reinterpret_cast<const CoverageMappingFunctionRecord<T> *>(Data.data()); - if (Data.size() < - sizeof(CoverageMappingFunctionRecord<T>) * TU->FunctionRecordsSize) - return instrprof_error::malformed; - Data = Data.substr(sizeof(CoverageMappingFunctionRecord<T>) * - TU->FunctionRecordsSize); + // Skip past the function records, saving the start and end for later. + const char *FunBuf = Buf; + Buf += NRecords * (sizeof(T) + 2 * sizeof(uint32_t) + sizeof(uint64_t)); + const char *FunEnd = Buf; // Get the filenames. - if (Data.size() < TU->FilenamesSize) + if (Buf + FilenamesSize > End) return instrprof_error::malformed; - auto RawFilenames = Data.substr(0, TU->FilenamesSize); - Data = Data.substr(TU->FilenamesSize); size_t FilenamesBegin = Filenames.size(); - RawCoverageFilenamesReader Reader(RawFilenames, Filenames); + RawCoverageFilenamesReader Reader(StringRef(Buf, FilenamesSize), Filenames); if (auto Err = Reader.read()) return Err; + Buf += FilenamesSize; - // Get the coverage mappings. - if (Data.size() < TU->CoverageMappingsSize) + // We'll read the coverage mapping records in the loop below. + const char *CovBuf = Buf; + Buf += CoverageSize; + const char *CovEnd = Buf; + if (Buf > End) return instrprof_error::malformed; - auto CoverageMappings = Data.substr(0, TU->CoverageMappingsSize); - Data = Data.substr(TU->CoverageMappingsSize); - for (unsigned I = 0; I < TU->FunctionRecordsSize; ++I) { - auto &MappingRecord = FunctionRecords[I]; + while (FunBuf < FunEnd) { + // Read the function information + T NamePtr = endian::readNext<T, Endian, unaligned>(FunBuf); + uint32_t NameSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf); + uint32_t DataSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf); + uint64_t FuncHash = endian::readNext<uint64_t, Endian, unaligned>(FunBuf); - // Get the coverage mapping. - if (CoverageMappings.size() < MappingRecord.CoverageMappingSize) + // Now use that to read the coverage data. + if (CovBuf + DataSize > CovEnd) return instrprof_error::malformed; - auto Mapping = - CoverageMappings.substr(0, MappingRecord.CoverageMappingSize); - CoverageMappings = - CoverageMappings.substr(MappingRecord.CoverageMappingSize); + auto Mapping = StringRef(CovBuf, DataSize); + CovBuf += DataSize; // Ignore this record if we already have a record that points to the same - // function name. - // This is useful to ignore the redundant records for the functions - // with ODR linkage. - if (!UniqueFunctionMappingData.insert(MappingRecord.FunctionNamePtr) - .second) + // function name. This is useful to ignore the redundant records for the + // functions with ODR linkage. + if (!UniqueFunctionMappingData.insert(NamePtr).second) continue; - StringRef FunctionName; - if (auto Err = - ProfileNames.get(MappingRecord.FunctionNamePtr, - MappingRecord.FunctionNameSize, FunctionName)) - return Err; + + // Finally, grab the name and create a record. + StringRef FuncName; + if (std::error_code EC = ProfileNames.get(NamePtr, NameSize, FuncName)) + return EC; Records.push_back(BinaryCoverageReader::ProfileMappingRecord( - Version, FunctionName, MappingRecord.FunctionHash, Mapping, + CoverageMappingVersion(Version), FuncName, FuncHash, Mapping, FilenamesBegin, Filenames.size() - FilenamesBegin)); } } @@ -414,8 +398,10 @@ static const char *TestingFormatMagic = "llvmcovmtestdata"; static std::error_code loadTestingFormat(StringRef Data, SectionData &ProfileNames, StringRef &CoverageMapping, - uint8_t &BytesInAddress) { + uint8_t &BytesInAddress, + support::endianness &Endian) { BytesInAddress = 8; + Endian = support::endianness::little; Data = Data.substr(StringRef(TestingFormatMagic).size()); if (Data.size() < 1) @@ -444,12 +430,35 @@ static std::error_code loadTestingFormat(StringRef Data, static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer, SectionData &ProfileNames, StringRef &CoverageMapping, - uint8_t &BytesInAddress) { - auto ObjectFileOrErr = object::ObjectFile::createObjectFile(ObjectBuffer); - if (std::error_code EC = ObjectFileOrErr.getError()) + uint8_t &BytesInAddress, + support::endianness &Endian, + Triple::ArchType Arch) { + auto BinOrErr = object::createBinary(ObjectBuffer); + if (std::error_code EC = BinOrErr.getError()) return EC; - auto OF = std::move(ObjectFileOrErr.get()); + auto Bin = std::move(BinOrErr.get()); + std::unique_ptr<ObjectFile> OF; + if (auto *Universal = dyn_cast<object::MachOUniversalBinary>(Bin.get())) { + // If we have a universal binary, try to look up the object for the + // appropriate architecture. + auto ObjectFileOrErr = Universal->getObjectForArch(Arch); + if (std::error_code EC = ObjectFileOrErr.getError()) + return EC; + OF = std::move(ObjectFileOrErr.get()); + } else if (isa<object::ObjectFile>(Bin.get())) { + // For any other object file, upcast and take ownership. + OF.reset(cast<object::ObjectFile>(Bin.release())); + // If we've asked for a particular arch, make sure they match. + if (Arch != Triple::ArchType::UnknownArch && OF->getArch() != Arch) + return object_error::arch_not_found; + } else + // We can only handle object files. + return instrprof_error::malformed; + + // The coverage uses native pointer sizes for the object it's written in. BytesInAddress = OF->getBytesInAddress(); + Endian = OF->isLittleEndian() ? support::endianness::little + : support::endianness::big; // Look for the sections that we are interested in. int FoundSectionCount = 0; @@ -479,28 +488,36 @@ static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer, } ErrorOr<std::unique_ptr<BinaryCoverageReader>> -BinaryCoverageReader::create(std::unique_ptr<MemoryBuffer> &ObjectBuffer) { +BinaryCoverageReader::create(std::unique_ptr<MemoryBuffer> &ObjectBuffer, + Triple::ArchType Arch) { std::unique_ptr<BinaryCoverageReader> Reader(new BinaryCoverageReader()); SectionData Profile; StringRef Coverage; uint8_t BytesInAddress; + support::endianness Endian; std::error_code EC; if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic)) // This is a special format used for testing. EC = loadTestingFormat(ObjectBuffer->getBuffer(), Profile, Coverage, - BytesInAddress); + BytesInAddress, Endian); else EC = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), Profile, Coverage, - BytesInAddress); + BytesInAddress, Endian, Arch); if (EC) return EC; - if (BytesInAddress == 4) - EC = readCoverageMappingData<uint32_t>( + if (BytesInAddress == 4 && Endian == support::endianness::little) + EC = readCoverageMappingData<uint32_t, support::endianness::little>( + Profile, Coverage, Reader->MappingRecords, Reader->Filenames); + else if (BytesInAddress == 4 && Endian == support::endianness::big) + EC = readCoverageMappingData<uint32_t, support::endianness::big>( + Profile, Coverage, Reader->MappingRecords, Reader->Filenames); + else if (BytesInAddress == 8 && Endian == support::endianness::little) + EC = readCoverageMappingData<uint64_t, support::endianness::little>( Profile, Coverage, Reader->MappingRecords, Reader->Filenames); - else if (BytesInAddress == 8) - EC = readCoverageMappingData<uint64_t>( + else if (BytesInAddress == 8 && Endian == support::endianness::big) + EC = readCoverageMappingData<uint64_t, support::endianness::big>( Profile, Coverage, Reader->MappingRecords, Reader->Filenames); else return instrprof_error::malformed; diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index 01e199d..3a5b266 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -14,6 +14,7 @@ #include "llvm/ProfileData/InstrProfReader.h" #include "InstrProfIndexed.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ProfileData/InstrProf.h" #include <cassert> @@ -112,7 +113,7 @@ std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { // Read the function hash. if (Line.is_at_end()) return error(instrprof_error::truncated); - if ((Line++)->getAsInteger(10, Record.Hash)) + if ((Line++)->getAsInteger(0, Record.Hash)) return error(instrprof_error::malformed); // Read the number of counters. diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 393ecf4..5a402bb 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -1248,10 +1248,10 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode, return false; case rmTowardPositive: - return sign == false; + return !sign; case rmTowardNegative: - return sign == true; + return sign; } llvm_unreachable("Invalid rounding mode found"); } @@ -1430,7 +1430,7 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) /* Determine if the operation on the absolute values is effectively an addition or subtraction. */ - subtract ^= (sign ^ rhs.sign) ? true : false; + subtract ^= static_cast<bool>(sign ^ rhs.sign); /* Are we bigger exponent-wise than the RHS? */ bits = exponent - rhs.exponent; diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 50a639c..2533fa0 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -672,6 +672,14 @@ hash_code llvm::hash_value(const APInt &Arg) { return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords()); } +bool APInt::isSplat(unsigned SplatSizeInBits) const { + assert(getBitWidth() % SplatSizeInBits == 0 && + "SplatSizeInBits must divide width!"); + // We can check that all parts of an integer are equal by making use of a + // little trick: rotate and check if it's still the same value. + return *this == rotl(SplatSizeInBits); +} + /// HiBits - This function returns the high "numBits" bits of this APInt. APInt APInt::getHiBits(unsigned numBits) const { return APIntOps::lshr(*this, BitWidth - numBits); @@ -1310,13 +1318,8 @@ APInt APInt::sqrt() const { // libc sqrt function which will probably use a hardware sqrt computation. // This should be faster than the algorithm below. if (magnitude < 52) { -#if HAVE_ROUND return APInt(BitWidth, uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0]))))); -#else - return APInt(BitWidth, - uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0])) + 0.5)); -#endif } // Okay, all the short cuts are exhausted. We must compute it. The following @@ -1508,21 +1511,18 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, assert(u && "Must provide dividend"); assert(v && "Must provide divisor"); assert(q && "Must provide quotient"); - assert(u != v && u != q && v != q && "Must us different memory"); + assert(u != v && u != q && v != q && "Must use different memory"); assert(n>1 && "n must be > 1"); - // Knuth uses the value b as the base of the number system. In our case b - // is 2^31 so we just set it to -1u. - uint64_t b = uint64_t(1) << 32; + // b denotes the base of the number system. In our case b is 2^32. + LLVM_CONSTEXPR uint64_t b = uint64_t(1) << 32; -#if 0 DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); DEBUG(dbgs() << "KnuthDiv: original:"); DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); DEBUG(dbgs() << " by"); DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]); DEBUG(dbgs() << '\n'); -#endif // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of // u and v by d. Note that we have taken Knuth's advice here to use a power // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of @@ -1547,13 +1547,12 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, } } u[m+n] = u_carry; -#if 0 + DEBUG(dbgs() << "KnuthDiv: normal:"); DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); DEBUG(dbgs() << " by"); DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]); DEBUG(dbgs() << '\n'); -#endif // D2. [Initialize j.] Set j to m. This is the loop counter over the places. int j = m; @@ -1583,46 +1582,35 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation // consists of a simple multiplication by a one-place number, combined with // a subtraction. + // The digits (u[j+n]...u[j]) should be kept positive; if the result of + // this step is actually negative, (u[j+n]...u[j]) should be left as the + // true value plus b**(n+1), namely as the b's complement of + // the true value, and a "borrow" to the left should be remembered. bool isNeg = false; for (unsigned i = 0; i < n; ++i) { - uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32); + uint64_t u_tmp = (uint64_t(u[j+i+1]) << 32) | uint64_t(u[j+i]); uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]); bool borrow = subtrahend > u_tmp; - DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp - << ", subtrahend == " << subtrahend + DEBUG(dbgs() << "KnuthDiv: u_tmp = " << u_tmp + << ", subtrahend = " << subtrahend << ", borrow = " << borrow << '\n'); uint64_t result = u_tmp - subtrahend; unsigned k = j + i; - u[k++] = (unsigned)(result & (b-1)); // subtract low word - u[k++] = (unsigned)(result >> 32); // subtract high word - while (borrow && k <= m+n) { // deal with borrow to the left + u[k++] = (unsigned)result; // subtraction low word + u[k++] = (unsigned)(result >> 32); // subtraction high word + while (borrow && k <= m+n) { // deal with borrow to the left borrow = u[k] == 0; u[k]--; k++; } isNeg |= borrow; - DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ", u[j+i+1] == " << - u[j+i+1] << '\n'); + DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j+i] + << ", u[j+i+1] = " << u[j+i+1] << '\n'); } DEBUG(dbgs() << "KnuthDiv: after subtraction:"); DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); DEBUG(dbgs() << '\n'); - // The digits (u[j+n]...u[j]) should be kept positive; if the result of - // this step is actually negative, (u[j+n]...u[j]) should be left as the - // true value plus b**(n+1), namely as the b's complement of - // the true value, and a "borrow" to the left should be remembered. - // - if (isNeg) { - bool carry = true; // true because b's complement is "complement + 1" - for (unsigned i = 0; i <= m+n; ++i) { - u[i] = ~u[i] + carry; // b's complement - carry = carry && u[i] == 0; - } - } - DEBUG(dbgs() << "KnuthDiv: after complement:"); - DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); - DEBUG(dbgs() << '\n'); // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was // negative, go to step D6; otherwise go on to step D7. @@ -1644,7 +1632,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, u[j+n] += carry; } DEBUG(dbgs() << "KnuthDiv: after correction:"); - DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]); + DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. @@ -1677,9 +1665,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, } DEBUG(dbgs() << '\n'); } -#if 0 DEBUG(dbgs() << '\n'); -#endif } void APInt::divide(const APInt LHS, unsigned lhsWords, @@ -1803,6 +1789,8 @@ void APInt::divide(const APInt LHS, unsigned lhsWords, // The quotient is in Q. Reconstitute the quotient into Quotient's low // order words. + // This case is currently dead as all users of divide() handle trivial cases + // earlier. if (lhsWords == 1) { uint64_t tmp = uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2)); @@ -2296,13 +2284,13 @@ void APInt::dump() const { this->toStringUnsigned(U); this->toStringSigned(S); dbgs() << "APInt(" << BitWidth << "b, " - << U.str() << "u " << S.str() << "s)"; + << U << "u " << S << "s)"; } void APInt::print(raw_ostream &OS, bool isSigned) const { SmallString<40> S; this->toString(S, 10, isSigned, /* formatAsCLiteral = */false); - OS << S.str(); + OS << S; } // This implements a variety of operations on a representation of diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp index 7c306b2..f48edac 100644 --- a/lib/Support/Allocator.cpp +++ b/lib/Support/Allocator.cpp @@ -12,12 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Allocator.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/DataTypes.h" -#include "llvm/Support/Memory.h" -#include "llvm/Support/Recycler.h" #include "llvm/Support/raw_ostream.h" -#include <cstring> namespace llvm { diff --git a/lib/Support/Android.mk b/lib/Support/Android.mk index 34448a7..4d1f526 100644 --- a/lib/Support/Android.mk +++ b/lib/Support/Android.mk @@ -35,8 +35,6 @@ support_SRC_FILES := \ IntervalMap.cpp \ IntEqClasses.cpp \ IntrusiveRefCntPtr.cpp \ - IsInf.cpp \ - IsNAN.cpp \ LEB128.cpp \ LineIterator.cpp \ Locale.cpp \ diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index a44c1a3..684afa9 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -58,8 +58,6 @@ add_llvm_library(LLVMSupport IntEqClasses.cpp IntervalMap.cpp IntrusiveRefCntPtr.cpp - IsInf.cpp - IsNAN.cpp LEB128.cpp LineIterator.cpp Locale.cpp diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index b49ec36..af6c605 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm-c/Support.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" @@ -32,10 +33,8 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" -#include <cerrno> #include <cstdlib> #include <map> -#include <system_error> using namespace llvm; using namespace cl; @@ -1463,10 +1462,9 @@ void basic_parser_impl::printOptionNoValue(const Option &O, // -help and -help-hidden option implementation // -static int OptNameCompare(const void *LHS, const void *RHS) { - typedef std::pair<const char *, Option *> pair_ty; - - return strcmp(((const pair_ty *)LHS)->first, ((const pair_ty *)RHS)->first); +static int OptNameCompare(const std::pair<const char *, Option *> *LHS, + const std::pair<const char *, Option *> *RHS) { + return strcmp(LHS->first, RHS->first); } // Copy Options into a vector so we can sort them as we like. @@ -1494,7 +1492,7 @@ static void sortOpts(StringMap<Option *> &OptMap, } // Sort the options list alphabetically. - qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare); + array_pod_sort(Opts.begin(), Opts.end(), OptNameCompare); } namespace { @@ -1516,7 +1514,7 @@ public: // Invoke the printer. void operator=(bool Value) { - if (Value == false) + if (!Value) return; StrOptionPairVector Opts; @@ -1562,10 +1560,11 @@ public: explicit CategorizedHelpPrinter(bool showHidden) : HelpPrinter(showHidden) {} // Helper function for printOptions(). - // It shall return true if A's name should be lexographically - // ordered before B's name. It returns false otherwise. - static bool OptionCategoryCompare(OptionCategory *A, OptionCategory *B) { - return strcmp(A->getName(), B->getName()) < 0; + // It shall return a negative value if A's name should be lexicographically + // ordered before B's name. It returns a value greater equal zero otherwise. + static int OptionCategoryCompare(OptionCategory *const *A, + OptionCategory *const *B) { + return strcmp((*A)->getName(), (*B)->getName()); } // Make sure we inherit our base class's operator=() @@ -1586,8 +1585,8 @@ protected: // Sort the different option categories alphabetically. assert(SortedCategories.size() > 0 && "No option categories registered!"); - std::sort(SortedCategories.begin(), SortedCategories.end(), - OptionCategoryCompare); + array_pod_sort(SortedCategories.begin(), SortedCategories.end(), + OptionCategoryCompare); // Create map to empty vectors. for (std::vector<OptionCategory *>::const_iterator @@ -1716,7 +1715,7 @@ static cl::opt<bool> PrintAllOptions( cl::init(false), cl::cat(GenericCategory)); void HelpPrinterWrapper::operator=(bool Value) { - if (Value == false) + if (!Value) return; // Decide which printer to invoke. If more than one option category is diff --git a/lib/Support/Compression.cpp b/lib/Support/Compression.cpp index 17ae295..b54613e 100644 --- a/lib/Support/Compression.cpp +++ b/lib/Support/Compression.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Compression.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp index 9b0e443..aba0f1d 100644 --- a/lib/Support/CrashRecoveryContext.cpp +++ b/lib/Support/CrashRecoveryContext.cpp @@ -8,13 +8,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/CrashRecoveryContext.h" -#include "llvm/ADT/SmallString.h" #include "llvm/Config/config.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/ThreadLocal.h" -#include <cstdio> #include <setjmp.h> using namespace llvm; diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp index 0d504ee..f1a334b 100644 --- a/lib/Support/DAGDeltaAlgorithm.cpp +++ b/lib/Support/DAGDeltaAlgorithm.cpp @@ -63,9 +63,6 @@ private: DAGDeltaAlgorithm &DDA; - const changeset_ty &Changes; - const std::vector<edge_ty> &Dependencies; - std::vector<change_ty> Roots; /// Cache of failed test results. Successful test results are never cached @@ -139,9 +136,8 @@ private: } public: - DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA, - const changeset_ty &_Changes, - const std::vector<edge_ty> &_Dependencies); + DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &DDA, const changeset_ty &Changes, + const std::vector<edge_ty> &Dependencies); changeset_ty Run(); @@ -174,21 +170,17 @@ protected: } public: - DeltaActiveSetHelper(DAGDeltaAlgorithmImpl &_DDAI, - const changeset_ty &_Required) - : DDAI(_DDAI), Required(_Required) {} + DeltaActiveSetHelper(DAGDeltaAlgorithmImpl &DDAI, + const changeset_ty &Required) + : DDAI(DDAI), Required(Required) {} }; } -DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA, - const changeset_ty &_Changes, - const std::vector<edge_ty> - &_Dependencies) - : DDA(_DDA), - Changes(_Changes), - Dependencies(_Dependencies) -{ +DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl( + DAGDeltaAlgorithm &DDA, const changeset_ty &Changes, + const std::vector<edge_ty> &Dependencies) + : DDA(DDA) { for (changeset_ty::const_iterator it = Changes.begin(), ie = Changes.end(); it != ie; ++it) { Predecessors.insert(std::make_pair(*it, std::vector<change_ty>())); diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp index dbf6465..a44b958 100644 --- a/lib/Support/DataStream.cpp +++ b/lib/Support/DataStream.cpp @@ -18,8 +18,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Program.h" -#include <cerrno> -#include <cstdio> #include <string> #include <system_error> #if !defined(_MSC_VER) && !defined(__MINGW32__) diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp index 9c58ae8..a88b18e 100644 --- a/lib/Support/Debug.cpp +++ b/lib/Support/Debug.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Signals.h" #include "llvm/Support/circular_raw_ostream.h" +#include "llvm/Support/raw_ostream.h" #undef isCurrentDebugType #undef setCurrentDebugType diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp index b176a8b..307ff09 100644 --- a/lib/Support/FileOutputBuffer.cpp +++ b/lib/Support/FileOutputBuffer.cpp @@ -11,11 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Errc.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Support/FileOutputBuffer.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Errc.h" #include <system_error> #if !defined(_MSC_VER) && !defined(__MINGW32__) @@ -77,9 +76,16 @@ FileOutputBuffer::create(StringRef FilePath, size_t Size, if (EC) return EC; +#ifndef LLVM_ON_WIN32 + // On Windows, CreateFileMapping (the mmap function on Windows) + // automatically extends the underlying file. We don't need to + // extend the file beforehand. _chsize (ftruncate on Windows) is + // pretty slow just like it writes specified amount of bytes, + // so we should avoid calling that. EC = sys::fs::resize_file(FD, Size); if (EC) return EC; +#endif auto MappedFile = llvm::make_unique<mapped_file_region>( FD, mapped_file_region::readwrite, Size, 0, EC); diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index 4635114..80d2aef 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -101,6 +101,8 @@ void FoldingSetNodeID::AddString(StringRef String) { // Otherwise do it the hard way. // To be compatible with above bulk transfer, we need to take endianness // into account. + static_assert(sys::IsBigEndianHost || sys::IsLittleEndianHost, + "Unexpected host endianness"); if (sys::IsBigEndianHost) { for (Pos += 4; Pos <= Size; Pos += 4) { unsigned V = ((unsigned char)String[Pos - 4] << 24) | @@ -109,8 +111,7 @@ void FoldingSetNodeID::AddString(StringRef String) { (unsigned char)String[Pos - 1]; Bits.push_back(V); } - } else { - assert(sys::IsLittleEndianHost && "Unexpected host endianness"); + } else { // Little-endian host for (Pos += 4; Pos <= Size; Pos += 4) { unsigned V = ((unsigned char)String[Pos - 1] << 24) | ((unsigned char)String[Pos - 2] << 16) | @@ -222,6 +223,8 @@ static void **AllocateBuckets(unsigned NumBuckets) { //===----------------------------------------------------------------------===// // FoldingSetImpl Implementation +void FoldingSetImpl::anchor() {} + FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { assert(5 < Log2InitSize && Log2InitSize < 32 && "Initial hash table size out of range"); diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp index 618ec26..2ed71c7 100644 --- a/lib/Support/FormattedStream.cpp +++ b/lib/Support/FormattedStream.cpp @@ -13,6 +13,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> using namespace llvm; diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index 054df52..fd4ce54 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -15,7 +15,6 @@ #include "llvm/Config/config.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" #include "llvm/Support/Program.h" using namespace llvm; @@ -98,6 +97,7 @@ static bool ExecGraphViewer(StringRef ExecPath, std::vector<const char *> &args, return false; } +namespace { struct GraphSession { std::string LogBuffer; bool TryFindProgram(StringRef Names, std::string &ProgramPath) { @@ -114,6 +114,7 @@ struct GraphSession { return false; } }; +} // namespace static const char *getProgramName(GraphProgram::Name program) { switch (program) { diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 42bc342..0e9a62e 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -357,10 +357,16 @@ StringRef sys::getHostCPUName() { case 63: case 69: case 70: - // Not all Haswell processors support AVX too (such as the Pentium + // Not all Haswell processors support AVX2 (such as the Pentium // versions instead of the i7 versions). return HasAVX2 ? "core-avx2" : "corei7"; + // Broadwell: + case 61: + // Not all Broadwell processors support AVX2 (such as the Pentium + // versions instead of the i7 versions). + return HasAVX2 ? "broadwell" : "corei7"; + case 28: // Most 45 nm Intel Atom processors case 38: // 45 nm Atom Lincroft case 39: // 32 nm Atom Medfield diff --git a/lib/Support/IsInf.cpp b/lib/Support/IsInf.cpp deleted file mode 100644 index d6da0c9..0000000 --- a/lib/Support/IsInf.cpp +++ /dev/null @@ -1,49 +0,0 @@ -//===-- IsInf.cpp - Platform-independent wrapper around C99 isinf() -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Platform-independent wrapper around C99 isinf() -// -//===----------------------------------------------------------------------===// - -#include "llvm/Config/config.h" - -#if HAVE_ISINF_IN_MATH_H -# include <math.h> -#elif HAVE_ISINF_IN_CMATH -# include <cmath> -#elif HAVE_STD_ISINF_IN_CMATH -# include <cmath> -using std::isinf; -#elif HAVE_FINITE_IN_IEEEFP_H -// A handy workaround I found at http://www.unixguide.net/sun/faq ... -// apparently this has been a problem with Solaris for years. -# include <ieeefp.h> -static int isinf(double x) { return !finite(x) && x==x; } -#elif defined(_MSC_VER) -#include <float.h> -#define isinf(X) (!_finite(X)) -#elif defined(_AIX) && defined(__GNUC__) -// GCC's fixincludes seems to be removing the isinf() declaration from the -// system header /usr/include/math.h -# include <math.h> -static int isinf(double x) { return !finite(x) && x==x; } -#elif defined(__hpux) -// HP-UX is "special" -#include <math.h> -static int isinf(double x) { return ((x) == INFINITY) || ((x) == -INFINITY); } -#else -# error "Don't know how to get isinf()" -#endif - -namespace llvm { - -int IsInf(float f) { return isinf(f); } -int IsInf(double d) { return isinf(d); } - -} // end namespace llvm; diff --git a/lib/Support/IsNAN.cpp b/lib/Support/IsNAN.cpp deleted file mode 100644 index bdfdfbf..0000000 --- a/lib/Support/IsNAN.cpp +++ /dev/null @@ -1,33 +0,0 @@ -//===-- IsNAN.cpp ---------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Platform-independent wrapper around C99 isnan(). -// -//===----------------------------------------------------------------------===// - -#include "llvm/Config/config.h" - -#if HAVE_ISNAN_IN_MATH_H -# include <math.h> -#elif HAVE_ISNAN_IN_CMATH -# include <cmath> -#elif HAVE_STD_ISNAN_IN_CMATH -# include <cmath> -using std::isnan; -#elif defined(_MSC_VER) -#include <float.h> -#define isnan _isnan -#else -# error "Don't know how to get isnan()" -#endif - -namespace llvm { - int IsNAN(float f) { return isnan(f); } - int IsNAN(double d) { return isnan(d); } -} // end namespace llvm; diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp index ec3158c..d07c5f0 100644 --- a/lib/Support/LockFileManager.cpp +++ b/lib/Support/LockFileManager.cpp @@ -7,12 +7,10 @@ // //===----------------------------------------------------------------------===// #include "llvm/Support/LockFileManager.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include <sys/stat.h> #include <sys/types.h> @@ -91,7 +89,7 @@ LockFileManager::LockFileManager(StringRef FileName) UniqueLockFileName += "-%%%%%%%%"; int UniqueLockFileID; if (std::error_code EC = sys::fs::createUniqueFile( - UniqueLockFileName.str(), UniqueLockFileID, UniqueLockFileName)) { + UniqueLockFileName, UniqueLockFileID, UniqueLockFileName)) { Error = EC; return; } @@ -116,7 +114,7 @@ LockFileManager::LockFileManager(StringRef FileName) // We failed to write out PID, so make up an excuse, remove the // unique lock file, and fail. Error = make_error_code(errc::no_space_on_device); - sys::fs::remove(UniqueLockFileName.c_str()); + sys::fs::remove(UniqueLockFileName); return; } } @@ -124,7 +122,7 @@ LockFileManager::LockFileManager(StringRef FileName) while (1) { // Create a link from the lock file name. If this succeeds, we're done. std::error_code EC = - sys::fs::create_link(UniqueLockFileName.str(), LockFileName.str()); + sys::fs::create_link(UniqueLockFileName, LockFileName); if (!EC) return; @@ -137,11 +135,11 @@ LockFileManager::LockFileManager(StringRef FileName) // from the lock file. if ((Owner = readLockFile(LockFileName))) { // Wipe out our unique lock file (it's useless now) - sys::fs::remove(UniqueLockFileName.str()); + sys::fs::remove(UniqueLockFileName); return; } - if (!sys::fs::exists(LockFileName.str())) { + if (!sys::fs::exists(LockFileName)) { // The previous owner released the lock file before we could read it. // Try to get ownership again. continue; @@ -149,7 +147,7 @@ LockFileManager::LockFileManager(StringRef FileName) // There is a lock file that nobody owns; try to clean it up and get // ownership. - if ((EC = sys::fs::remove(LockFileName.str()))) { + if ((EC = sys::fs::remove(LockFileName))) { Error = EC; return; } @@ -171,8 +169,8 @@ LockFileManager::~LockFileManager() { return; // Since we own the lock, remove the lock file and our own unique lock file. - sys::fs::remove(LockFileName.str()); - sys::fs::remove(UniqueLockFileName.str()); + sys::fs::remove(LockFileName); + sys::fs::remove(UniqueLockFileName); } LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() { @@ -186,8 +184,9 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() { Interval.tv_sec = 0; Interval.tv_nsec = 1000000; #endif - // Don't wait more than one minute for the file to appear. - const unsigned MaxSeconds = 60; + // Don't wait more than five minutes per iteration. Total timeout for the file + // to appear is ~8.5 mins. + const unsigned MaxSeconds = 5*60; do { // Sleep for the designated interval, to allow the owning process time to // finish up and remove the lock file. @@ -202,7 +201,7 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() { if (sys::fs::access(LockFileName.c_str(), sys::fs::AccessMode::Exist) == errc::no_such_file_or_directory) { // If the original file wasn't created, somone thought the lock was dead. - if (!sys::fs::exists(FileName.str())) + if (!sys::fs::exists(FileName)) return Res_OwnerDied; return Res_Success; } @@ -235,5 +234,5 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() { } std::error_code LockFileManager::unsafeRemoveLockFile() { - return sys::fs::remove(LockFileName.str()); + return sys::fs::remove(LockFileName); } diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 379db88..98862e9 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -23,7 +23,6 @@ #include "llvm/Support/Program.h" #include <cassert> #include <cerrno> -#include <cstdio> #include <cstring> #include <new> #include <sys/types.h> diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index a11bb7f..cf467381 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -19,9 +19,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include <cctype> -#include <cstdio> #include <cstring> -#include <fcntl.h> #if !defined(_MSC_VER) && !defined(__MINGW32__) #include <unistd.h> @@ -30,6 +28,7 @@ #endif using namespace llvm; +using namespace llvm::support::endian; namespace { using llvm::StringRef; @@ -48,7 +47,6 @@ namespace { // * empty (in this case we return an empty string) // * either C: or {//,\\}net. // * {/,\} - // * {.,..} // * {file,directory}name if (path.empty()) @@ -75,12 +73,6 @@ namespace { if (is_separator(path[0])) return path.substr(0, 1); - if (path.startswith("..")) - return path.substr(0, 2); - - if (path[0] == '.') - return path.substr(0, 1); - // * {file,directory}name size_t end = path.find_first_of(separators); return path.substr(0, end); @@ -917,7 +909,7 @@ file_magic identify_magic(StringRef Magic) { if (Magic.size() < MinSize) return file_magic::coff_import_library; - int BigObjVersion = *reinterpret_cast<const support::ulittle16_t*>( + int BigObjVersion = read16le( Magic.data() + offsetof(COFF::BigObjHeader, Version)); if (BigObjVersion < COFF::BigObjHeader::MinBigObjectVersion) return file_magic::coff_import_library; @@ -1034,8 +1026,7 @@ file_magic identify_magic(StringRef Magic) { case 'M': // Possible MS-DOS stub on Windows PE file if (Magic[1] == 'Z') { - uint32_t off = - *reinterpret_cast<const support::ulittle32_t*>(Magic.data() + 0x3c); + uint32_t off = read32le(Magic.data() + 0x3c); // PE/COFF file, either EXE or DLL. if (off < Magic.size() && memcmp(Magic.data()+off, COFF::PEMagic, sizeof(COFF::PEMagic)) == 0) diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp index ad67e1b..d0c1748 100644 --- a/lib/Support/Process.cpp +++ b/lib/Support/Process.cpp @@ -13,8 +13,8 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp index b84b82b..34e336b 100644 --- a/lib/Support/Program.cpp +++ b/lib/Support/Program.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Program.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" #include <system_error> using namespace llvm; diff --git a/lib/Support/RandomNumberGenerator.cpp b/lib/Support/RandomNumberGenerator.cpp index 2943137..81d0411 100644 --- a/lib/Support/RandomNumberGenerator.cpp +++ b/lib/Support/RandomNumberGenerator.cpp @@ -13,13 +13,15 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "rng" +#include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/RandomNumberGenerator.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "rng" + // Tracking BUG: 19665 // http://llvm.org/bugs/show_bug.cgi?id=19665 // diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp index f7fe1e4..d3e29ac 100644 --- a/lib/Support/Regex.cpp +++ b/lib/Support/Regex.cpp @@ -14,8 +14,7 @@ #include "llvm/Support/Regex.h" #include "regex_impl.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringRef.h" #include <string> using namespace llvm; diff --git a/lib/Support/ScaledNumber.cpp b/lib/Support/ScaledNumber.cpp index 6f6699c..987c2d8 100644 --- a/lib/Support/ScaledNumber.cpp +++ b/lib/Support/ScaledNumber.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/ScaledNumber.h" #include "llvm/ADT/APFloat.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::ScaledNumbers; diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index b50a66b..d5e3157 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -14,13 +14,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/SourceMgr.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Locale.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" -#include <system_error> using namespace llvm; static const size_t TabStop = 8; diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp index c312cc1..ea417c4 100644 --- a/lib/Support/SpecialCaseList.cpp +++ b/lib/Support/SpecialCaseList.cpp @@ -15,13 +15,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/SpecialCaseList.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" -#include "llvm/Support/raw_ostream.h" #include <string> #include <system_error> #include <utility> diff --git a/lib/Support/StreamingMemoryObject.cpp b/lib/Support/StreamingMemoryObject.cpp index f39bc56..90f3ed8 100644 --- a/lib/Support/StreamingMemoryObject.cpp +++ b/lib/Support/StreamingMemoryObject.cpp @@ -8,12 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/StreamingMemoryObject.h" -#include "llvm/Support/Compiler.h" #include <cassert> #include <cstddef> #include <cstring> - - using namespace llvm; namespace { diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp index d77ad7f..3e2420f 100644 --- a/lib/Support/StringExtras.cpp +++ b/lib/Support/StringExtras.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" using namespace llvm; diff --git a/lib/Support/SystemUtils.cpp b/lib/Support/SystemUtils.cpp index 2036364..7fa6ae3 100644 --- a/lib/Support/SystemUtils.cpp +++ b/lib/Support/SystemUtils.cpp @@ -13,8 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/SystemUtils.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/Program.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp index f691883..3ca8572 100644 --- a/lib/Support/TargetRegistry.cpp +++ b/lib/Support/TargetRegistry.cpp @@ -10,7 +10,6 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Host.h" #include "llvm/Support/raw_ostream.h" #include <cassert> #include <vector> diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index e1a531a..d7b6515 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -14,12 +14,10 @@ #include "llvm/Support/Timer.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Support/Process.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index e74b23c..d4b150a 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -141,6 +141,7 @@ const char *Triple::getOSTypeName(OSType Kind) { switch (Kind) { case UnknownOS: return "unknown"; + case CloudABI: return "cloudabi"; case Darwin: return "darwin"; case DragonFly: return "dragonfly"; case FreeBSD: return "freebsd"; @@ -280,6 +281,7 @@ static Triple::ArchType parseARMArch(StringRef ArchName) { .Cases("v7", "v7a", "v7em", "v7l", arch) .Cases("v7m", "v7r", "v7s", arch) .Cases("v8", "v8a", arch) + .Cases("v8.1", "v8.1a", arch) .Default(Triple::UnknownArch); } @@ -345,6 +347,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) { static Triple::OSType parseOS(StringRef OSName) { return StringSwitch<Triple::OSType>(OSName) + .StartsWith("cloudabi", Triple::CloudABI) .StartsWith("darwin", Triple::Darwin) .StartsWith("dragonfly", Triple::DragonFly) .StartsWith("freebsd", Triple::FreeBSD) @@ -401,6 +404,7 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { SubArchName = SubArchName.substr(0, SubArchName.size() - 2); return StringSwitch<Triple::SubArchType>(SubArchName) + .EndsWith("v8.1a", Triple::ARMSubArch_v8_1a) .EndsWith("v8", Triple::ARMSubArch_v8) .EndsWith("v8a", Triple::ARMSubArch_v8) .EndsWith("v7", Triple::ARMSubArch_v7) @@ -413,6 +417,7 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { .EndsWith("v6", Triple::ARMSubArch_v6) .EndsWith("v6m", Triple::ARMSubArch_v6m) .EndsWith("v6sm", Triple::ARMSubArch_v6m) + .EndsWith("v6k", Triple::ARMSubArch_v6k) .EndsWith("v6t2", Triple::ARMSubArch_v6t2) .EndsWith("v5", Triple::ARMSubArch_v5) .EndsWith("v5e", Triple::ARMSubArch_v5) @@ -436,6 +441,30 @@ static const char *getObjectFormatTypeName(Triple::ObjectFormatType Kind) { } static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { + switch (T.getArch()) { + default: + break; + case Triple::hexagon: + case Triple::mips: + case Triple::mipsel: + case Triple::mips64: + case Triple::mips64el: + case Triple::r600: + case Triple::amdgcn: + case Triple::sparc: + case Triple::sparcv9: + case Triple::systemz: + case Triple::xcore: + case Triple::ppc64le: + return Triple::ELF; + + case Triple::ppc: + case Triple::ppc64: + if (T.isOSDarwin()) + return Triple::MachO; + return Triple::ELF; + } + if (T.isOSDarwin()) return Triple::MachO; else if (T.isOSWindows()) @@ -714,6 +743,14 @@ void Triple::getOSVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const { StringRef OSName = getOSName(); + // For Android, we care about the Android version rather than the Linux + // version. + if (getEnvironment() == Android) { + OSName = getEnvironmentName().substr(strlen("android")); + if (OSName.startswith("eabi")) + OSName = OSName.substr(strlen("eabi")); + } + // Assume that the OS portion of the triple starts with the canonical name. StringRef OSTypeName = getOSTypeName(getOS()); if (OSName.startswith(OSTypeName)) @@ -839,7 +876,7 @@ void Triple::setArchName(StringRef Str) { Triple += getVendorName(); Triple += "-"; Triple += getOSAndEnvironmentName(); - setTriple(Triple.str()); + setTriple(Triple); } void Triple::setVendorName(StringRef Str) { @@ -1063,9 +1100,9 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const { .Cases("v5", "v5t", "arm10tdmi") .Cases("v5e", "v5te", "arm1022e") .Case("v5tej", "arm926ej-s") - .Cases("v6", "v6k", "arm1136jf-s") + .Case("v6", "arm1136jf-s") .Case("v6j", "arm1136j-s") - .Cases("v6z", "v6zk", "arm1176jzf-s") + .Cases("v6k", "v6z", "v6zk", "arm1176jzf-s") .Case("v6t2", "arm1156t2-s") .Cases("v6m", "v6-m", "v6sm", "v6s-m", "cortex-m0") .Cases("v7", "v7a", "v7-a", "v7l", "v7-l", "cortex-a8") @@ -1074,6 +1111,7 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const { .Cases("v7m", "v7-m", "cortex-m3") .Cases("v7em", "v7e-m", "cortex-m4") .Cases("v8", "v8a", "v8-a", "cortex-a53") + .Cases("v8.1a", "v8.1-a", "generic-armv8.1-a") .Default(nullptr); else result = llvm::StringSwitch<const char *>(MArch) @@ -1099,6 +1137,8 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const { default: return "strongarm"; } + case llvm::Triple::NaCl: + return "cortex-a8"; default: switch (getEnvironment()) { case llvm::Triple::EABIHF: diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp index 56ed964..d2cc75b 100644 --- a/lib/Support/Twine.cpp +++ b/lib/Support/Twine.cpp @@ -28,13 +28,6 @@ void Twine::toVector(SmallVectorImpl<char> &Out) const { print(OS); } -StringRef Twine::toStringRef(SmallVectorImpl<char> &Out) const { - if (isSingleStringRef()) - return getSingleStringRef(); - toVector(Out); - return StringRef(Out.data(), Out.size()); -} - StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const { if (isUnary()) { switch (getLHSKind()) { @@ -72,6 +65,9 @@ void Twine::printOneChild(raw_ostream &OS, Child Ptr, case Twine::StringRefKind: OS << *Ptr.stringRef; break; + case Twine::SmallStringKind: + OS << *Ptr.smallString; + break; case Twine::CharKind: OS << Ptr.character; break; @@ -122,6 +118,10 @@ void Twine::printOneChildRepr(raw_ostream &OS, Child Ptr, OS << "stringref:\"" << Ptr.stringRef << "\""; break; + case Twine::SmallStringKind:
+ OS << "smallstring:\""
+ << *Ptr.smallString << "\"";
+ break;
case Twine::CharKind: OS << "char:\"" << Ptr.character << "\""; break; diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index baf2767..5816fb8 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -18,10 +18,11 @@ #include "Unix.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" -#include <llvm/Config/config.h> #if HAVE_SYS_STAT_H #include <sys/stat.h> #endif diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 665c7de..a9b48e0 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -14,6 +14,7 @@ #include "Unix.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Format.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/ManagedStatic.h" @@ -324,7 +325,8 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth, } #endif -static bool printSymbolizedStackTrace(void **StackTrace, int Depth, FILE *FD) { +static bool printSymbolizedStackTrace(void **StackTrace, int Depth, + llvm::raw_ostream &OS) { // FIXME: Subtract necessary number from StackTrace entries to turn return addresses // into actual instruction addresses. // Use llvm-symbolizer tool to symbolize the stack traces. @@ -382,7 +384,7 @@ static bool printSymbolizedStackTrace(void **StackTrace, int Depth, FILE *FD) { int frame_no = 0; for (int i = 0; i < Depth; i++) { if (!Modules[i]) { - fprintf(FD, "#%d %p\n", frame_no++, StackTrace[i]); + OS << format("#%d %p\n", frame_no++, StackTrace[i]); continue; } // Read pairs of lines (function name and file/line info) until we @@ -393,17 +395,17 @@ static bool printSymbolizedStackTrace(void **StackTrace, int Depth, FILE *FD) { StringRef FunctionName = *CurLine++; if (FunctionName.empty()) break; - fprintf(FD, "#%d %p ", frame_no++, StackTrace[i]); + OS << format("#%d %p ", frame_no++, StackTrace[i]); if (!FunctionName.startswith("??")) - fprintf(FD, "%s ", FunctionName.str().c_str()); + OS << format("%s ", FunctionName.str().c_str()); if (CurLine == Lines.end()) return false; StringRef FileLineInfo = *CurLine++; if (!FileLineInfo.startswith("??")) - fprintf(FD, "%s", FileLineInfo.str().c_str()); + OS << format("%s", FileLineInfo.str().c_str()); else - fprintf(FD, "(%s+%p)", Modules[i], (void *)Offsets[i]); - fprintf(FD, "\n"); + OS << format("(%s+%p)", Modules[i], (void *)Offsets[i]); + OS << "\n"; } } return true; @@ -415,13 +417,13 @@ static bool printSymbolizedStackTrace(void **StackTrace, int Depth, FILE *FD) { // // On glibc systems we have the 'backtrace' function, which works nicely, but // doesn't demangle symbols. -void llvm::sys::PrintStackTrace(FILE *FD) { +void llvm::sys::PrintStackTrace(raw_ostream &OS) { #if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) static void* StackTrace[256]; // Use backtrace() to output a backtrace on Linux systems with glibc. int depth = backtrace(StackTrace, static_cast<int>(array_lengthof(StackTrace))); - if (printSymbolizedStackTrace(StackTrace, depth, FD)) + if (printSymbolizedStackTrace(StackTrace, depth, OS)) return; #if HAVE_DLFCN_H && __GNUG__ int width = 0; @@ -441,34 +443,34 @@ void llvm::sys::PrintStackTrace(FILE *FD) { Dl_info dlinfo; dladdr(StackTrace[i], &dlinfo); - fprintf(FD, "%-2d", i); + OS << format("%-2d", i); const char* name = strrchr(dlinfo.dli_fname, '/'); - if (!name) fprintf(FD, " %-*s", width, dlinfo.dli_fname); - else fprintf(FD, " %-*s", width, name+1); + if (!name) OS << format(" %-*s", width, dlinfo.dli_fname); + else OS << format(" %-*s", width, name+1); - fprintf(FD, " %#0*lx", - (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]); + OS << format(" %#0*lx", (int)(sizeof(void*) * 2) + 2, + (unsigned long)StackTrace[i]); if (dlinfo.dli_sname != nullptr) { - fputc(' ', FD); + OS << ' '; # if HAVE_CXXABI_H int res; char* d = abi::__cxa_demangle(dlinfo.dli_sname, nullptr, nullptr, &res); # else char* d = NULL; # endif - if (!d) fputs(dlinfo.dli_sname, FD); - else fputs(d, FD); + if (!d) OS << dlinfo.dli_sname; + else OS << d; free(d); // FIXME: When we move to C++11, use %t length modifier. It's not in // C++03 and causes gcc to issue warnings. Losing the upper 32 bits of // the stack offset for a stack dump isn't likely to cause any problems. - fprintf(FD, " + %u",(unsigned)((char*)StackTrace[i]- - (char*)dlinfo.dli_saddr)); + OS << format(" + %u",(unsigned)((char*)StackTrace[i]- + (char*)dlinfo.dli_saddr)); } - fputc('\n', FD); + OS << '\n'; } #else backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO); @@ -477,7 +479,7 @@ void llvm::sys::PrintStackTrace(FILE *FD) { } static void PrintStackTraceSignalHandler(void *) { - PrintStackTrace(stderr); + PrintStackTrace(llvm::errs()); } void llvm::sys::DisableSystemDialogsOnCrash() {} diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index d8b5702..d558ff5 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -599,8 +599,8 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &it, it.IterationHandle = intptr_t(FindHandle.take()); SmallString<128> directory_entry_path(path); - path::append(directory_entry_path, directory_entry_name_utf8.str()); - it.CurrentEntry = directory_entry(directory_entry_path.str()); + path::append(directory_entry_path, directory_entry_name_utf8); + it.CurrentEntry = directory_entry(directory_entry_path); return std::error_code(); } diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc index 854eac7..5f9ce7f 100644 --- a/lib/Support/Windows/Process.inc +++ b/lib/Support/Windows/Process.inc @@ -329,6 +329,16 @@ class DefaultColors }; DefaultColors defaultColors; + +WORD fg_color(WORD color) { + return color & (FOREGROUND_BLUE | FOREGROUND_GREEN | + FOREGROUND_INTENSITY | FOREGROUND_RED); +} + +WORD bg_color(WORD color) { + return color & (BACKGROUND_BLUE | BACKGROUND_GREEN | + BACKGROUND_INTENSITY | BACKGROUND_RED); +} } bool Process::ColorNeedsFlush() { @@ -350,6 +360,7 @@ const char *Process::OutputBold(bool bg) { const char *Process::OutputColor(char code, bool bold, bool bg) { if (UseANSI) return colorcodes[bg?1:0][bold?1:0][code&7]; + WORD current = DefaultColors::GetCurrentColor(); WORD colors; if (bg) { colors = ((code&1) ? BACKGROUND_RED : 0) | @@ -357,12 +368,14 @@ const char *Process::OutputColor(char code, bool bold, bool bg) { ((code&4) ? BACKGROUND_BLUE : 0); if (bold) colors |= BACKGROUND_INTENSITY; + colors |= fg_color(current); } else { colors = ((code&1) ? FOREGROUND_RED : 0) | ((code&2) ? FOREGROUND_GREEN : 0 ) | ((code&4) ? FOREGROUND_BLUE : 0); if (bold) colors |= FOREGROUND_INTENSITY; + colors |= bg_color(current); } SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors); return 0; diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc index aa1aa72..de6bf1c 100644 --- a/lib/Support/Windows/Signals.inc +++ b/lib/Support/Windows/Signals.inc @@ -10,13 +10,15 @@ // This file provides the Win32 specific implementation of the Signals class. // //===----------------------------------------------------------------------===// - #include "llvm/Support/FileSystem.h" #include <algorithm> #include <signal.h> #include <stdio.h> #include <vector> +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + // The Windows.h header must be after LLVM and standard headers. #include "WindowsSupport.h" @@ -172,6 +174,92 @@ static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL; // (such as CTRL/C) occurs. This causes concurrency issues with the above // globals which this critical section addresses. static CRITICAL_SECTION CriticalSection; +static bool CriticalSectionInitialized = false; + +static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess, + HANDLE hThread, STACKFRAME64 &StackFrame, + CONTEXT *Context) { + DWORD machineType; +#if defined(_M_X64) + machineType = IMAGE_FILE_MACHINE_AMD64; +#else + machineType = IMAGE_FILE_MACHINE_I386; +#endif + + // Initialize the symbol handler. + SymSetOptions(SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES); + SymInitialize(hProcess, NULL, TRUE); + + while (true) { + if (!StackWalk64(machineType, hProcess, hThread, &StackFrame, Context, NULL, + SymFunctionTableAccess64, SymGetModuleBase64, NULL)) { + break; + } + + if (StackFrame.AddrFrame.Offset == 0) + break; + + using namespace llvm; + // Print the PC in hexadecimal. + DWORD64 PC = StackFrame.AddrPC.Offset; +#if defined(_M_X64) + OS << format("0x%016llX", PC); +#elif defined(_M_IX86) + OS << format("0x%08lX", static_cast<DWORD>(PC)); +#endif + +// Print the parameters. Assume there are four. +#if defined(_M_X64) + OS << format(" (0x%016llX 0x%016llX 0x%016llX 0x%016llX)", + StackFrame.Params[0], StackFrame.Params[1], StackFrame.Params[2], + StackFrame.Params[3]); +#elif defined(_M_IX86) + OS << format(" (0x%08lX 0x%08lX 0x%08lX 0x%08lX)", + static_cast<DWORD>(StackFrame.Params[0]), + static_cast<DWORD>(StackFrame.Params[1]), + static_cast<DWORD>(StackFrame.Params[2]), + static_cast<DWORD>(StackFrame.Params[3])); +#endif + // Verify the PC belongs to a module in this process. + if (!SymGetModuleBase64(hProcess, PC)) { + OS << " <unknown module>\n"; + continue; + } + + // Print the symbol name. + char buffer[512]; + IMAGEHLP_SYMBOL64 *symbol = reinterpret_cast<IMAGEHLP_SYMBOL64 *>(buffer); + memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL64)); + symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64); + symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL64); + + DWORD64 dwDisp; + if (!SymGetSymFromAddr64(hProcess, PC, &dwDisp, symbol)) { + OS << '\n'; + continue; + } + + buffer[511] = 0; + if (dwDisp > 0) + OS << format(", %s() + 0x%llX bytes(s)", (const char*)symbol->Name, + dwDisp); + else + OS << format(", %s", (const char*)symbol->Name); + + // Print the source file and line number information. + IMAGEHLP_LINE64 line; + DWORD dwLineDisp; + memset(&line, 0, sizeof(line)); + line.SizeOfStruct = sizeof(line); + if (SymGetLineFromAddr64(hProcess, PC, &dwLineDisp, &line)) { + OS << format(", %s, line %lu", line.FileName, line.LineNumber); + if (dwLineDisp > 0) + OS << format(" + 0x%lX byte(s)", dwLineDisp); + } + + OS << '\n'; + } +} namespace llvm { @@ -203,6 +291,16 @@ extern "C" void HandleAbort(int Sig) { } } +static void InitializeThreading() { + if (CriticalSectionInitialized) + return; + + // Now's the time to create the critical section. This is the first time + // through here, and there's only one thread. + InitializeCriticalSection(&CriticalSection); + CriticalSectionInitialized = true; +} + static void RegisterHandler() { #if __MINGW32__ && !defined(__MINGW64_VERSION_MAJOR) // On MinGW.org, we need to load up the symbols explicitly, because the @@ -221,9 +319,7 @@ static void RegisterHandler() { return; } - // Now's the time to create the critical section. This is the first time - // through here, and there's only one thread. - InitializeCriticalSection(&CriticalSection); + InitializeThreading(); // Enter it immediately. Now if someone hits CTRL/C, the console handler // can't proceed until the globals are updated. @@ -298,13 +394,37 @@ void sys::PrintStackTraceOnErrorSignal() { RegisterHandler(); LeaveCriticalSection(&CriticalSection); } +} + +#if defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR) +// Provide a prototype for RtlCaptureContext, mingw32 from mingw.org is +// missing it but mingw-w64 has it. +extern "C" VOID WINAPI RtlCaptureContext(PCONTEXT ContextRecord); +#endif -void llvm::sys::PrintStackTrace(FILE *) { - // FIXME: Implement. +void llvm::sys::PrintStackTrace(raw_ostream &OS) { + + STACKFRAME64 StackFrame = {}; + CONTEXT Context = {0}; + ::RtlCaptureContext(&Context); +#if defined(_M_X64) + StackFrame.AddrPC.Offset = Context.Rip; + StackFrame.AddrStack.Offset = Context.Rsp; + StackFrame.AddrFrame.Offset = Context.Rbp; +#else + StackFrame.AddrPC.Offset = Context.Eip; + StackFrame.AddrStack.Offset = Context.Esp; + StackFrame.AddrFrame.Offset = Context.Ebp; +#endif + StackFrame.AddrPC.Mode = AddrModeFlat; + StackFrame.AddrStack.Mode = AddrModeFlat; + StackFrame.AddrFrame.Mode = AddrModeFlat; + PrintStackTraceForThread(OS, GetCurrentProcess(), GetCurrentThread(), + StackFrame, &Context); } -void sys::SetInterruptFunction(void (*IF)()) { +void llvm::sys::SetInterruptFunction(void (*IF)()) { RegisterHandler(); InterruptFunction = IF; LeaveCriticalSection(&CriticalSection); @@ -314,14 +434,13 @@ void sys::SetInterruptFunction(void (*IF)()) { /// AddSignalHandler - Add a function to be called when a signal is delivered /// to the process. The handler can have a cookie passed to it to identify /// what instance of the handler it is. -void sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) { +void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) { if (CallBacksToRun == 0) CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >(); CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie)); RegisterHandler(); LeaveCriticalSection(&CriticalSection); } -} static void Cleanup() { EnterCriticalSection(&CriticalSection); @@ -346,6 +465,11 @@ static void Cleanup() { } void llvm::sys::RunInterruptHandlers() { + // The interrupt handler may be called from an interrupt, but it may also be + // called manually (such as the case of report_fatal_error with no registered + // error handler). We must ensure that the critical section is properly + // initialized. + InitializeThreading(); Cleanup(); } @@ -356,9 +480,7 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { STACKFRAME64 StackFrame; memset(&StackFrame, 0, sizeof(StackFrame)); - DWORD machineType; #if defined(_M_X64) - machineType = IMAGE_FILE_MACHINE_AMD64; StackFrame.AddrPC.Offset = ep->ContextRecord->Rip; StackFrame.AddrPC.Mode = AddrModeFlat; StackFrame.AddrStack.Offset = ep->ContextRecord->Rsp; @@ -366,7 +488,6 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { StackFrame.AddrFrame.Offset = ep->ContextRecord->Rbp; StackFrame.AddrFrame.Mode = AddrModeFlat; #elif defined(_M_IX86) - machineType = IMAGE_FILE_MACHINE_I386; StackFrame.AddrPC.Offset = ep->ContextRecord->Eip; StackFrame.AddrPC.Mode = AddrModeFlat; StackFrame.AddrStack.Offset = ep->ContextRecord->Esp; @@ -377,81 +498,8 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { HANDLE hProcess = GetCurrentProcess(); HANDLE hThread = GetCurrentThread(); - - // Initialize the symbol handler. - SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_LOAD_LINES); - SymInitialize(hProcess, NULL, TRUE); - - while (true) { - if (!StackWalk64(machineType, hProcess, hThread, &StackFrame, - ep->ContextRecord, NULL, SymFunctionTableAccess64, - SymGetModuleBase64, NULL)) { - break; - } - - if (StackFrame.AddrFrame.Offset == 0) - break; - - // Print the PC in hexadecimal. - DWORD64 PC = StackFrame.AddrPC.Offset; -#if defined(_M_X64) - fprintf(stderr, "0x%016llX", PC); -#elif defined(_M_IX86) - fprintf(stderr, "0x%08lX", static_cast<DWORD>(PC)); -#endif - - // Print the parameters. Assume there are four. -#if defined(_M_X64) - fprintf(stderr, " (0x%016llX 0x%016llX 0x%016llX 0x%016llX)", - StackFrame.Params[0], - StackFrame.Params[1], - StackFrame.Params[2], - StackFrame.Params[3]); -#elif defined(_M_IX86) - fprintf(stderr, " (0x%08lX 0x%08lX 0x%08lX 0x%08lX)", - static_cast<DWORD>(StackFrame.Params[0]), - static_cast<DWORD>(StackFrame.Params[1]), - static_cast<DWORD>(StackFrame.Params[2]), - static_cast<DWORD>(StackFrame.Params[3])); -#endif - // Verify the PC belongs to a module in this process. - if (!SymGetModuleBase64(hProcess, PC)) { - fputs(" <unknown module>\n", stderr); - continue; - } - - // Print the symbol name. - char buffer[512]; - IMAGEHLP_SYMBOL64 *symbol = reinterpret_cast<IMAGEHLP_SYMBOL64 *>(buffer); - memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL64)); - symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64); - symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL64); - - DWORD64 dwDisp; - if (!SymGetSymFromAddr64(hProcess, PC, &dwDisp, symbol)) { - fputc('\n', stderr); - continue; - } - - buffer[511] = 0; - if (dwDisp > 0) - fprintf(stderr, ", %s() + 0x%llX bytes(s)", symbol->Name, dwDisp); - else - fprintf(stderr, ", %s", symbol->Name); - - // Print the source file and line number information. - IMAGEHLP_LINE64 line; - DWORD dwLineDisp; - memset(&line, 0, sizeof(line)); - line.SizeOfStruct = sizeof(line); - if (SymGetLineFromAddr64(hProcess, PC, &dwLineDisp, &line)) { - fprintf(stderr, ", %s, line %lu", line.FileName, line.LineNumber); - if (dwLineDisp > 0) - fprintf(stderr, " + 0x%lX byte(s)", dwLineDisp); - } - - fputc('\n', stderr); - } + PrintStackTraceForThread(llvm::errs(), hProcess, hThread, StackFrame, + ep->ContextRecord); _exit(ep->ExceptionRecord->ExceptionCode); } diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp index 6ae7945..93aec7c 100644 --- a/lib/Support/YAMLParser.cpp +++ b/lib/Support/YAMLParser.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/YAMLParser.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp index 43a0e10..74e5414 100644 --- a/lib/Support/YAMLTraits.cpp +++ b/lib/Support/YAMLTraits.cpp @@ -7,13 +7,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Errc.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/YAMLParser.h" -#include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" #include <cctype> #include <cstring> @@ -168,9 +169,17 @@ void Input::endMapping() { } unsigned Input::beginSequence() { - if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) { + if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) return SQ->Entries.size(); + if (isa<EmptyHNode>(CurrentNode)) + return 0; + // Treat case where there's a scalar "null" value as an empty sequence. + if (ScalarHNode *SN = dyn_cast<ScalarHNode>(CurrentNode)) { + if (isNull(SN->value())) + return 0; } + // Any other type of HNode is an error. + setError(CurrentNode, "not a sequence"); return 0; } @@ -192,12 +201,7 @@ void Input::postflightElement(void *SaveInfo) { CurrentNode = reinterpret_cast<HNode *>(SaveInfo); } -unsigned Input::beginFlowSequence() { - if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) { - return SQ->Entries.size(); - } - return 0; -} +unsigned Input::beginFlowSequence() { return beginSequence(); } bool Input::preflightFlowElement(unsigned index, void *&SaveInfo) { if (EC) diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 44f6a6e..f66dfd3 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "TGParser.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index dff48f9..bb3db4b 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -32,6 +32,9 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable ARMv8 CRC-32 checksum instructions">; +def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true", + "Enable ARMv8.1a extensions", [FeatureCRC]>; + /// Cyclone has register move instructions which are "free". def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; @@ -89,6 +92,10 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8, FeatureNEON, FeatureCRC]>; +def : ProcessorModel<"generic-armv8.1-a", NoSchedModel, [FeatureV8_1a, + FeatureNEON, + FeatureCrypto]>; + def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; // FIXME: Cortex-A72 is currently modelled as an Cortex-A57. diff --git a/lib/Target/AArch64/AArch64A53Fix835769.cpp b/lib/Target/AArch64/AArch64A53Fix835769.cpp index dd401c6..3bc5a54 100644 --- a/lib/Target/AArch64/AArch64A53Fix835769.cpp +++ b/lib/Target/AArch64/AArch64A53Fix835769.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp index 2cf3c22..bffd9e6 100644 --- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp +++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp @@ -142,7 +142,7 @@ private: int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB); void scanInstruction(MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain*> &Active, - std::set<std::unique_ptr<Chain>> &AllChains); + std::vector<std::unique_ptr<Chain>> &AllChains); void maybeKillChain(MachineOperand &MO, unsigned Idx, std::map<unsigned, Chain*> &RegChains); Color getColor(unsigned Register); @@ -287,12 +287,12 @@ public: raw_string_ostream OS(S); OS << "{"; - StartInst->print(OS, NULL, true); + StartInst->print(OS, /* SkipOpers= */true); OS << " -> "; - LastInst->print(OS, NULL, true); + LastInst->print(OS, /* SkipOpers= */true); if (KillInst) { OS << " (kill @ "; - KillInst->print(OS, NULL, true); + KillInst->print(OS, /* SkipOpers= */true); OS << ")"; } OS << "}"; @@ -307,6 +307,11 @@ public: //===----------------------------------------------------------------------===// bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) { + // Don't do anything if this isn't an A53 or A57. + if (!(F.getSubtarget<AArch64Subtarget>().isCortexA53() || + F.getSubtarget<AArch64Subtarget>().isCortexA57())) + return false; + bool Changed = false; DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n"); @@ -331,7 +336,7 @@ bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) { // been killed yet. This is keyed by register - all chains can only have one // "link" register between each inst in the chain. std::map<unsigned, Chain*> ActiveChains; - std::set<std::unique_ptr<Chain>> AllChains; + std::vector<std::unique_ptr<Chain>> AllChains; unsigned Idx = 0; for (auto &MI : MBB) scanInstruction(&MI, Idx++, ActiveChains, AllChains); @@ -598,10 +603,9 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C, return Changed; } -void AArch64A57FPLoadBalancing:: -scanInstruction(MachineInstr *MI, unsigned Idx, - std::map<unsigned, Chain*> &ActiveChains, - std::set<std::unique_ptr<Chain>> &AllChains) { +void AArch64A57FPLoadBalancing::scanInstruction( + MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains, + std::vector<std::unique_ptr<Chain>> &AllChains) { // Inspect "MI", updating ActiveChains and AllChains. if (isMul(MI)) { @@ -620,7 +624,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx, auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg)); ActiveChains[DestReg] = G.get(); - AllChains.insert(std::move(G)); + AllChains.push_back(std::move(G)); } else if (isMla(MI)) { @@ -664,7 +668,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx, << TRI->getName(DestReg) << "\n"); auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg)); ActiveChains[DestReg] = G.get(); - AllChains.insert(std::move(G)); + AllChains.push_back(std::move(G)); } else { diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp index 287989f..716e1a3 100644 --- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp +++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp @@ -41,6 +41,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index d64d851..1b4483a 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -12,12 +12,14 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/AArch64AddressingModes.h" #include "AArch64.h" #include "AArch64MCInstLower.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "InstPrinter/AArch64InstPrinter.h" +#include "MCTargetDesc/AArch64MCExpr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" @@ -34,8 +36,10 @@ #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCLinkerOptimizationHint.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" @@ -49,7 +53,7 @@ class AArch64AsmPrinter : public AsmPrinter { public: AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this), - SM(*this), AArch64FI(nullptr), LOHLabelCounter(0) {} + SM(*this), AArch64FI(nullptr) {} const char *getPassName() const override { return "AArch64 Assembly Printer"; @@ -110,7 +114,6 @@ private: typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol; MInstToMCSymbol LOHInstToLabel; - unsigned LOHLabelCounter; }; } // end of anonymous namespace @@ -219,6 +222,17 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, O << '#' << Imm; break; } + case MachineOperand::MO_GlobalAddress: { + const GlobalValue *GV = MO.getGlobal(); + MCSymbol *Sym = getSymbol(GV); + + // FIXME: Can we get anything other than a plain symbol here? + assert(!MO.getTargetFlags() && "Unknown operand target flag!"); + + O << *Sym; + printOffset(MO.getOffset(), O); + break; + } } } @@ -450,7 +464,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { if (AArch64FI->getLOHRelated().count(MI)) { // Generate a label for LOH related instruction - MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++); + MCSymbol *LOHLabel = createTempSymbol("loh"); // Associate the instruction with the label LOHInstToLabel[MI] = LOHLabel; OutStreamer.EmitLabel(LOHLabel); @@ -489,24 +503,57 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(OutStreamer, TmpInst); return; } - case AArch64::TLSDESC_BLR: { - MCOperand Callee, Sym; - MCInstLowering.lowerOperand(MI->getOperand(0), Callee); - MCInstLowering.lowerOperand(MI->getOperand(1), Sym); - - // First emit a relocation-annotation. This expands to no code, but requests + case AArch64::TLSDESC_CALLSEQ: { + /// lower this to: + /// adrp x0, :tlsdesc:var + /// ldr x1, [x0, #:tlsdesc_lo12:var] + /// add x0, x0, #:tlsdesc_lo12:var + /// .tlsdesccall var + /// blr x1 + /// (TPIDR_EL0 offset now in x0) + const MachineOperand &MO_Sym = MI->getOperand(0); + MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym); + MCOperand Sym, SymTLSDescLo12, SymTLSDesc; + MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | + AArch64II::MO_NC); + MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE); + MCInstLowering.lowerOperand(MO_Sym, Sym); + MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12); + MCInstLowering.lowerOperand(MO_TLSDESC, SymTLSDesc); + + MCInst Adrp; + Adrp.setOpcode(AArch64::ADRP); + Adrp.addOperand(MCOperand::CreateReg(AArch64::X0)); + Adrp.addOperand(SymTLSDesc); + EmitToStreamer(OutStreamer, Adrp); + + MCInst Ldr; + Ldr.setOpcode(AArch64::LDRXui); + Ldr.addOperand(MCOperand::CreateReg(AArch64::X1)); + Ldr.addOperand(MCOperand::CreateReg(AArch64::X0)); + Ldr.addOperand(SymTLSDescLo12); + Ldr.addOperand(MCOperand::CreateImm(0)); + EmitToStreamer(OutStreamer, Ldr); + + MCInst Add; + Add.setOpcode(AArch64::ADDXri); + Add.addOperand(MCOperand::CreateReg(AArch64::X0)); + Add.addOperand(MCOperand::CreateReg(AArch64::X0)); + Add.addOperand(SymTLSDescLo12); + Add.addOperand(MCOperand::CreateImm(AArch64_AM::getShiftValue(0))); + EmitToStreamer(OutStreamer, Add); + + // Emit a relocation-annotation. This expands to no code, but requests // the following instruction gets an R_AARCH64_TLSDESC_CALL. MCInst TLSDescCall; TLSDescCall.setOpcode(AArch64::TLSDESCCALL); TLSDescCall.addOperand(Sym); EmitToStreamer(OutStreamer, TLSDescCall); - // Other than that it's just a normal indirect call to the function loaded - // from the descriptor. - MCInst BLR; - BLR.setOpcode(AArch64::BLR); - BLR.addOperand(Callee); - EmitToStreamer(OutStreamer, BLR); + MCInst Blr; + Blr.setOpcode(AArch64::BLR); + Blr.addOperand(MCOperand::CreateReg(AArch64::X1)); + EmitToStreamer(OutStreamer, Blr); return; } diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp index 3b74481..06ff9af 100644 --- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp +++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp @@ -62,10 +62,10 @@ struct LDTLSCleanup : public MachineFunctionPass { for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { switch (I->getOpcode()) { - case AArch64::TLSDESC_BLR: + case AArch64::TLSDESC_CALLSEQ: // Make sure it's a local dynamic access. - if (!I->getOperand(1).isSymbol() || - strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) + if (!I->getOperand(0).isSymbol() || + strcmp(I->getOperand(0).getSymbolName(), "_TLS_MODULE_BASE_")) break; if (TLSBaseAddrReg) diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp index 938dcb3..568f258 100644 --- a/lib/Target/AArch64/AArch64CollectLOH.cpp +++ b/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -279,7 +279,7 @@ static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg, /// definition. It also consider definitions of ADRP instructions as uses and /// ignore other uses. The ADRPMode is used to collect the information for LHO /// that involve ADRP operation only. -static void initReachingDef(MachineFunction &MF, +static void initReachingDef(const MachineFunction &MF, InstrToInstrs *ColorOpToReachedUses, BlockToInstrPerColor &Gen, BlockToRegSet &Kill, BlockToSetOfInstrsPerColor &ReachableUses, @@ -288,7 +288,7 @@ static void initReachingDef(MachineFunction &MF, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); unsigned NbReg = RegToId.size(); - for (MachineBasicBlock &MBB : MF) { + for (const MachineBasicBlock &MBB : MF) { auto &BBGen = Gen[&MBB]; BBGen = make_unique<const MachineInstr *[]>(NbReg); std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr); @@ -382,7 +382,7 @@ static void initReachingDef(MachineFunction &MF, /// op.reachedUses /// /// Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) -static void reachingDefAlgorithm(MachineFunction &MF, +static void reachingDefAlgorithm(const MachineFunction &MF, InstrToInstrs *ColorOpToReachedUses, BlockToSetOfInstrsPerColor &In, BlockToSetOfInstrsPerColor &Out, @@ -392,7 +392,7 @@ static void reachingDefAlgorithm(MachineFunction &MF, bool HasChanged; do { HasChanged = false; - for (MachineBasicBlock &MBB : MF) { + for (const MachineBasicBlock &MBB : MF) { unsigned CurReg; for (CurReg = 0; CurReg < NbReg; ++CurReg) { SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg); @@ -401,7 +401,7 @@ static void reachingDefAlgorithm(MachineFunction &MF, SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg); unsigned Size = BBOutSet.size(); // In[bb][color] = U Out[bb.predecessors][color] - for (MachineBasicBlock *PredMBB : MBB.predecessors()) { + for (const MachineBasicBlock *PredMBB : MBB.predecessors()) { SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg); BBInSet.insert(PredOutSet.begin(), PredOutSet.end()); } @@ -433,7 +433,7 @@ static void reachingDefAlgorithm(MachineFunction &MF, /// @p DummyOp. /// \pre ColorOpToReachedUses is an array of at least number of registers of /// InstrToInstrs. -static void reachingDef(MachineFunction &MF, +static void reachingDef(const MachineFunction &MF, InstrToInstrs *ColorOpToReachedUses, const MapRegToId &RegToId, bool ADRPMode = false, const MachineInstr *DummyOp = nullptr) { @@ -983,7 +983,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs, /// Look for every register defined by potential LOHs candidates. /// Map these registers with dense id in @p RegToId and vice-versa in /// @p IdToReg. @p IdToReg is populated only in DEBUG mode. -static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId, +static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId, MapIdToReg &IdToReg, const TargetRegisterInfo *TRI) { unsigned CurRegId = 0; diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 61017c1..99cb641 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -3158,7 +3158,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); CLI.Call = MIB; @@ -4563,7 +4563,7 @@ bool AArch64FastISel::selectShift(const Instruction *I) { unsigned ResultReg = 0; uint64_t ShiftVal = C->getZExtValue(); MVT SrcVT = RetVT; - bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true; + bool IsZExt = I->getOpcode() != Instruction::AShr; const Value *Op0 = I->getOperand(0); if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { if (!isIntExtFree(ZExt)) { diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index ac11c4d..0a47dcb 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -65,7 +65,7 @@ public: /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) override; SDNode *SelectMLAV64LaneV128(SDNode *N); @@ -211,13 +211,20 @@ static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, } bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( - const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { - assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); - // Require the address to be in a register. That is safe for all AArch64 - // variants and it is hard to do anything much smarter without knowing - // how the operand is used. - OutOps.push_back(Op); - return false; + const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { + switch(ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_m: + case InlineAsm::Constraint_Q: + // Require the address to be in a register. That is safe for all AArch64 + // variants and it is hard to do anything much smarter without knowing + // how the operand is used. + OutOps.push_back(Op); + return false; + } + return true; } /// SelectArithImmed - Select an immediate value that can be represented as @@ -299,7 +306,7 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { } } -/// \brief Determine wether it is worth to fold V into an extended register. +/// \brief Determine whether it is worth to fold V into an extended register. bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { // it hurts if the value is used at least twice, unless we are optimizing // for code size. @@ -1055,7 +1062,7 @@ SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, SDValue Ops[] = {N->getOperand(2), // Mem operand; Chain}; - EVT ResTys[] = {MVT::Untyped, MVT::Other}; + const EVT ResTys[] = {MVT::Untyped, MVT::Other}; SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); SDValue SuperReg = SDValue(Ld, 0); @@ -1077,8 +1084,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, N->getOperand(2), // Incremental Chain}; - EVT ResTys[] = {MVT::i64, // Type of the write back register - MVT::Untyped, MVT::Other}; + const EVT ResTys[] = {MVT::i64, // Type of the write back register + MVT::Untyped, MVT::Other}; SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); @@ -1119,8 +1126,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc) { SDLoc dl(N); EVT VT = N->getOperand(2)->getValueType(0); - EVT ResTys[] = {MVT::i64, // Type of the write back register - MVT::Other}; // Type for the Chain + const EVT ResTys[] = {MVT::i64, // Type of the write back register + MVT::Other}; // Type for the Chain // Form a REG_SEQUENCE to force register allocation. bool Is128Bit = VT.getSizeInBits() == 128; @@ -1136,6 +1143,7 @@ SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, return St; } +namespace { /// WidenVector - Given a value in the V64 register class, produce the /// equivalent value in the V128 register class. class WidenVector { @@ -1156,6 +1164,7 @@ public: return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); } }; +} // namespace /// NarrowVector - Given a value in the V128 register class, produce the /// equivalent value in the V64 register class. @@ -1184,7 +1193,7 @@ SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, SDValue RegSeq = createQTuple(Regs); - EVT ResTys[] = {MVT::Untyped, MVT::Other}; + const EVT ResTys[] = {MVT::Untyped, MVT::Other}; unsigned LaneNo = cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); @@ -1224,8 +1233,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, SDValue RegSeq = createQTuple(Regs); - EVT ResTys[] = {MVT::i64, // Type of the write back register - MVT::Untyped, MVT::Other}; + const EVT ResTys[] = {MVT::i64, // Type of the write back register + MVT::Untyped, MVT::Other}; unsigned LaneNo = cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); @@ -1309,8 +1318,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, SDValue RegSeq = createQTuple(Regs); - EVT ResTys[] = {MVT::i64, // Type of the write back register - MVT::Other}; + const EVT ResTys[] = {MVT::i64, // Type of the write back register + MVT::Other}; unsigned LaneNo = cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index a1b324e..0c0e856 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -64,8 +64,16 @@ EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden, static cl::opt<bool> EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, - cl::desc("Allow AArch64 SLI/SRI formation"), - cl::init(false)); + cl::desc("Allow AArch64 SLI/SRI formation"), + cl::init(false)); + +// FIXME: The necessary dtprel relocations don't seem to be supported +// well in the GNU bfd and gold linkers at the moment. Therefore, by +// default, for now, fall back to GeneralDynamic code generation. +cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration( + "aarch64-elf-ldtls-generation", cl::Hidden, + cl::desc("Allow AArch64 Local Dynamic TLS code generation"), + cl::init(false)); AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI) @@ -362,9 +370,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FLOG10, MVT::v8f16, Expand); // AArch64 has implementations of a lot of rounding-like FP operations. - static MVT RoundingTypes[] = { MVT::f32, MVT::f64}; - for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) { - MVT Ty = RoundingTypes[I]; + for (MVT Ty : {MVT::f32, MVT::f64}) { setOperationAction(ISD::FFLOOR, Ty, Legal); setOperationAction(ISD::FNEARBYINT, Ty, Legal); setOperationAction(ISD::FCEIL, Ty, Legal); @@ -561,9 +567,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, } // AArch64 has implementations of a lot of rounding-like FP operations. - static MVT RoundingVecTypes[] = {MVT::v2f32, MVT::v4f32, MVT::v2f64 }; - for (unsigned I = 0; I < array_lengthof(RoundingVecTypes); ++I) { - MVT Ty = RoundingVecTypes[I]; + for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) { setOperationAction(ISD::FFLOOR, Ty, Legal); setOperationAction(ISD::FNEARBYINT, Ty, Legal); setOperationAction(ISD::FCEIL, Ty, Legal); @@ -752,7 +756,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG"; case AArch64ISD::CSINC: return "AArch64ISD::CSINC"; case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; - case AArch64ISD::TLSDESC_CALL: return "AArch64ISD::TLSDESC_CALL"; + case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ"; case AArch64ISD::ADC: return "AArch64ISD::ADC"; case AArch64ISD::SBC: return "AArch64ISD::SBC"; case AArch64ISD::ADDS: return "AArch64ISD::ADDS"; @@ -811,6 +815,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz"; case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz"; case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz"; + case AArch64ISD::SADDV: return "AArch64ISD::SADDV"; + case AArch64ISD::UADDV: return "AArch64ISD::UADDV"; + case AArch64ISD::SMINV: return "AArch64ISD::SMINV"; + case AArch64ISD::UMINV: return "AArch64ISD::UMINV"; + case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV"; + case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV"; case AArch64ISD::NOT: return "AArch64ISD::NOT"; case AArch64ISD::BIT: return "AArch64ISD::BIT"; case AArch64ISD::CBZ: return "AArch64ISD::CBZ"; @@ -1247,7 +1257,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { case ISD::SMULO: case ISD::UMULO: { CC = AArch64CC::NE; - bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false; + bool IsSigned = Op.getOpcode() == ISD::SMULO; if (Op.getValueType() == MVT::i32) { unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; // For a 32 bit multiply with overflow check we want the instruction @@ -2784,13 +2794,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); if (IsThisReturn) { // For 'this' returns, use the X0-preserving mask if applicable - Mask = TRI->getThisReturnPreservedMask(CallConv); + Mask = TRI->getThisReturnPreservedMask(MF, CallConv); if (!Mask) { IsThisReturn = false; - Mask = TRI->getCallPreservedMask(CallConv); + Mask = TRI->getCallPreservedMask(MF, CallConv); } } else - Mask = TRI->getCallPreservedMask(CallConv); + Mask = TRI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -3027,58 +3037,34 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, /// When accessing thread-local variables under either the general-dynamic or /// local-dynamic system, we make a "TLS-descriptor" call. The variable will /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry -/// is a function pointer to carry out the resolution. This function takes the -/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All -/// other registers (except LR, NZCV) are preserved. -/// -/// Thus, the ideal call sequence on AArch64 is: -/// -/// adrp x0, :tlsdesc:thread_var -/// ldr x8, [x0, :tlsdesc_lo12:thread_var] -/// add x0, x0, :tlsdesc_lo12:thread_var -/// .tlsdesccall thread_var -/// blr x8 -/// (TPIDR_EL0 offset now in x0). +/// is a function pointer to carry out the resolution. /// -/// The ".tlsdesccall" directive instructs the assembler to insert a particular -/// relocation to help the linker relax this sequence if it turns out to be too -/// conservative. +/// The sequence is: +/// adrp x0, :tlsdesc:var +/// ldr x1, [x0, #:tlsdesc_lo12:var] +/// add x0, x0, #:tlsdesc_lo12:var +/// .tlsdesccall var +/// blr x1 +/// (TPIDR_EL0 offset now in x0) /// -/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this -/// is harmless. -SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr, - SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const { +/// The above sequence must be produced unscheduled, to enable the linker to +/// optimize/relax this sequence. +/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the +/// above sequence, and expanded really late in the compilation flow, to ensure +/// the sequence is produced as per above. +SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL, + SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); - // The function we need to call is simply the first entry in the GOT for this - // descriptor, load it in preparation. - SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr); - - // TLS calls preserve all registers except those that absolutely must be - // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be - // silly). - const uint32_t *Mask = - Subtarget->getRegisterInfo()->getTLSCallPreservedMask(); - - // The function takes only one argument: the address of the descriptor itself - // in X0. - SDValue Glue, Chain; - Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue); - Glue = Chain.getValue(1); + SDValue Chain = DAG.getEntryNode(); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - // We're now ready to populate the argument list, as with a normal call: - SmallVector<SDValue, 6> Ops; + SmallVector<SDValue, 2> Ops; Ops.push_back(Chain); - Ops.push_back(Func); Ops.push_back(SymAddr); - Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT)); - Ops.push_back(DAG.getRegisterMask(Mask)); - Ops.push_back(Glue); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops); - Glue = Chain.getValue(1); + Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, Ops); + SDValue Glue = Chain.getValue(1); return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue); } @@ -3089,9 +3075,18 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, assert(Subtarget->isTargetELF() && "This function expects an ELF target"); assert(getTargetMachine().getCodeModel() == CodeModel::Small && "ELF TLS only supported in small memory model"); + // Different choices can be made for the maximum size of the TLS area for a + // module. For the small address model, the default TLS size is 16MiB and the + // maximum TLS size is 4GiB. + // FIXME: add -mtls-size command line option and make it control the 16MiB + // vs. 4GiB code sequence generation. const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); + if (!EnableAArch64ELFLocalDynamicTLSGeneration) { + if (Model == TLSModel::LocalDynamic) + Model = TLSModel::GeneralDynamic; + } SDValue TPOff; EVT PtrVT = getPointerTy(); @@ -3102,17 +3097,20 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, if (Model == TLSModel::LocalExec) { SDValue HiVar = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1); + GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); SDValue LoVar = DAG.getTargetGlobalAddress( GV, DL, PtrVT, 0, - AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); + AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, - DAG.getTargetConstant(16, MVT::i32)), - 0); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), - 0); + SDValue TPWithOff_lo = + SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase, + HiVar, DAG.getTargetConstant(0, MVT::i32)), + 0); + SDValue TPWithOff = + SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo, + LoVar, DAG.getTargetConstant(0, MVT::i32)), + 0); + return TPWithOff; } else if (Model == TLSModel::InitialExec) { TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff); @@ -3127,19 +3125,6 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, DAG.getMachineFunction().getInfo<AArch64FunctionInfo>(); MFI->incNumLocalDynamicTLSAccesses(); - // Accesses used in this sequence go via the TLS descriptor which lives in - // the GOT. Prepare an address we can use to handle this. - SDValue HiDesc = DAG.getTargetExternalSymbol( - "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE); - SDValue LoDesc = DAG.getTargetExternalSymbol( - "_TLS_MODULE_BASE_", PtrVT, - AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - - // First argument to the descriptor call is the address of the descriptor - // itself. - SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc); - DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); - // The call needs a relocation too for linker relaxation. It doesn't make // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of // the address. @@ -3148,40 +3133,23 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, // Now we can calculate the offset from TPIDR_EL0 to this module's // thread-local area. - TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG); + TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG); // Now use :dtprel_whatever: operations to calculate this variable's offset // in its thread-storage area. SDValue HiVar = DAG.getTargetGlobalAddress( - GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1); + GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); SDValue LoVar = DAG.getTargetGlobalAddress( GV, DL, MVT::i64, 0, - AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); - - SDValue DTPOff = - SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, - DAG.getTargetConstant(16, MVT::i32)), - 0); - DTPOff = - SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), - 0); - - TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff); - } else if (Model == TLSModel::GeneralDynamic) { - // Accesses used in this sequence go via the TLS descriptor which lives in - // the GOT. Prepare an address we can use to handle this. - SDValue HiDesc = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE); - SDValue LoDesc = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - // First argument to the descriptor call is the address of the descriptor - // itself. - SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc); - DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); - + TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar, + DAG.getTargetConstant(0, MVT::i32)), + 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar, + DAG.getTargetConstant(0, MVT::i32)), + 0); + } else if (Model == TLSModel::GeneralDynamic) { // The call needs a relocation too for linker relaxation. It doesn't make // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of // the address. @@ -3189,7 +3157,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); // Finally we can make a call to calculate the offset from tpidr_el0. - TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG); + TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG); } else llvm_unreachable("Unsupported ELF TLS access model"); @@ -3356,11 +3324,12 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, EVT VecVT; EVT EltVT; - SDValue EltMask, VecVal1, VecVal2; + uint64_t EltMask; + SDValue VecVal1, VecVal2; if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) { EltVT = MVT::i32; VecVT = MVT::v4i32; - EltMask = DAG.getConstant(0x80000000ULL, EltVT); + EltMask = 0x80000000ULL; if (!VT.isVector()) { VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT, @@ -3378,7 +3347,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, // We want to materialize a mask with the the high bit set, but the AdvSIMD // immediate moves cannot materialize that in a single instruction for // 64-bit elements. Instead, materialize zero and then negate it. - EltMask = DAG.getConstant(0, EltVT); + EltMask = 0; if (!VT.isVector()) { VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT, @@ -3393,11 +3362,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, llvm_unreachable("Invalid type for copysign!"); } - std::vector<SDValue> BuildVectorOps; - for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) - BuildVectorOps.push_back(EltMask); - - SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, BuildVectorOps); + SDValue BuildVec = DAG.getConstant(EltMask, VecVT); // If we couldn't materialize the mask above, then the mask vector will be // the zero vector, and we need to negate it here. @@ -5927,8 +5892,10 @@ FailedModImm: if (VT.getVectorElementType().isFloatingPoint()) { SmallVector<SDValue, 8> Ops; - MVT NewType = - (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; + EVT EltTy = VT.getVectorElementType(); + assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) && + "Unsupported floating-point vector type"); + MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits()); for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts); @@ -6781,7 +6748,7 @@ bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, unsigned LZ = countLeadingZeros((uint64_t)Val); unsigned Shift = (63 - LZ) / 16; // MOVZ is free so return true for one or fewer MOVK. - return (Shift < 3) ? true : false; + return Shift < 3; } // Generate SUBS and CSEL for integer abs. @@ -6898,6 +6865,15 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, N->getOperand(0)); } } else { + // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) + APInt VNP1 = -Value + 1; + if (VNP1.isPowerOf2()) { + SDValue ShiftedVal = + DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), + DAG.getConstant(VNP1.logBase2(), MVT::i64)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0), + ShiftedVal); + } // (mul x, -(2^N + 1)) => - (add (shl x, N), x) APInt VNM1 = -Value - 1; if (VNM1.isPowerOf2()) { @@ -6908,15 +6884,6 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), Add); } - // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) - APInt VNP1 = -Value + 1; - if (VNP1.isPowerOf2()) { - SDValue ShiftedVal = - DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VNP1.logBase2(), MVT::i64)); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0), - ShiftedVal); - } } } return SDValue(); @@ -7211,21 +7178,54 @@ static SDValue performBitcastCombine(SDNode *N, static SDValue performConcatVectorsCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + + // Optimize concat_vectors of truncated vectors, where the intermediate + // type is illegal, to avoid said illegality, e.g., + // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))), + // (v2i16 (truncate (v2i64))))) + // -> + // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))), + // (v4i32 (bitcast (v2i64))), + // <0, 2, 4, 6>))) + // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed + // on both input and result type, so we might generate worse code. + // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8. + if (N->getNumOperands() == 2 && + N0->getOpcode() == ISD::TRUNCATE && + N1->getOpcode() == ISD::TRUNCATE) { + SDValue N00 = N0->getOperand(0); + SDValue N10 = N1->getOperand(0); + EVT N00VT = N00.getValueType(); + + if (N00VT == N10.getValueType() && + (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) && + N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) { + MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16); + SmallVector<int, 8> Mask(MidVT.getVectorNumElements()); + for (size_t i = 0; i < Mask.size(); ++i) + Mask[i] = i * 2; + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getVectorShuffle( + MidVT, dl, + DAG.getNode(ISD::BITCAST, dl, MidVT, N00), + DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask)); + } + } + // Wait 'til after everything is legalized to try this. That way we have // legal vector types and such. if (DCI.isBeforeLegalizeOps()) return SDValue(); - SDLoc dl(N); - EVT VT = N->getValueType(0); - // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector // splat. The indexed instructions are going to be expecting a DUPLANE64, so // canonicalise to that. - if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) { + if (N0 == N1 && VT.getVectorNumElements() == 2) { assert(VT.getVectorElementType().getSizeInBits() == 64); - return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, - WidenVector(N->getOperand(0), DAG), + return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG), DAG.getConstant(0, MVT::i64)); } @@ -7238,10 +7238,9 @@ static SDValue performConcatVectorsCombine(SDNode *N, // becomes // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS)) - SDValue Op1 = N->getOperand(1); - if (Op1->getOpcode() != ISD::BITCAST) + if (N1->getOpcode() != ISD::BITCAST) return SDValue(); - SDValue RHS = Op1->getOperand(0); + SDValue RHS = N1->getOperand(0); MVT RHSTy = RHS.getValueType().getSimpleVT(); // If the RHS is not a vector, this is not the pattern we're looking for. if (!RHSTy.isVector()) @@ -7251,10 +7250,10 @@ static SDValue performConcatVectorsCombine(SDNode *N, MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(), RHSTy.getVectorNumElements() * 2); - return DAG.getNode( - ISD::BITCAST, dl, VT, - DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy, - DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS)); + return DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy, + DAG.getNode(ISD::BITCAST, dl, RHSTy, N0), + RHS)); } static SDValue tryCombineFixedPointConvert(SDNode *N, @@ -7651,6 +7650,15 @@ static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) { N->getOperand(0), N->getOperand(1), AndN.getOperand(0)); } +static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, + SelectionDAG &DAG) { + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), N->getValueType(0), + DAG.getNode(Opc, SDLoc(N), + N->getOperand(1).getSimpleValueType(), + N->getOperand(1)), + DAG.getConstant(0, MVT::i64)); +} + static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -7663,6 +7671,18 @@ static SDValue performIntrinsicCombine(SDNode *N, case Intrinsic::aarch64_neon_vcvtfxu2fp: return tryCombineFixedPointConvert(N, DCI, DAG); break; + case Intrinsic::aarch64_neon_saddv: + return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG); + case Intrinsic::aarch64_neon_uaddv: + return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG); + case Intrinsic::aarch64_neon_sminv: + return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG); + case Intrinsic::aarch64_neon_uminv: + return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG); + case Intrinsic::aarch64_neon_smaxv: + return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG); + case Intrinsic::aarch64_neon_umaxv: + return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG); case Intrinsic::aarch64_neon_fmax: return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); @@ -8792,9 +8812,11 @@ bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { } // For the real atomic operations, we have ldxr/stxr up to 128 bits, -bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { +TargetLoweringBase::AtomicRMWExpansionKind +AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= 128; + return Size <= 128 ? AtomicRMWExpansionKind::LLSC + : AtomicRMWExpansionKind::None; } bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const { diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index e973364..5ff11e8 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -30,9 +30,9 @@ enum { WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. CALL, // Function call. - // Almost the same as a normal call node, except that a TLSDesc relocation is - // needed so the linker can relax it correctly if possible. - TLSDESC_CALL, + // Produces the full sequence of instructions for getting the thread pointer + // offset of a variable into X0, using the TLSDesc model. + TLSDESC_CALLSEQ, ADRP, // Page address of a TargetGlobalAddress operand. ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. LOADgot, // Load from automatically generated descriptor (e.g. Global @@ -141,6 +141,18 @@ enum { FCMLEz, FCMLTz, + // Vector across-lanes addition + // Only the lower result lane is defined. + SADDV, + UADDV, + + // Vector across-lanes min/max + // Only the lower result lane is defined. + SMINV, + UMINV, + SMAXV, + UMAXV, + // Vector bitwise negation NOT, @@ -335,7 +347,8 @@ public: bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; bool useLoadStackGuardNode() const override; TargetLoweringBase::LegalizeTypeAction @@ -399,8 +412,8 @@ private: SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const; + SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL, + SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; @@ -460,6 +473,16 @@ private: std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + if (ConstraintCode == "Q") + return InlineAsm::Constraint_Q; + // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are + // followed by llvm_unreachable so we'll leave them unimplemented in + // the backend for now. + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; bool mayBeEmittedAsTailCall(CallInst *CI) const override; bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 64cec55..8e0af2d 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -31,7 +31,7 @@ using namespace llvm; AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), - RI(this, &STI), Subtarget(STI) {} + RI(STI.getTargetTriple()), Subtarget(STI) {} /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. @@ -2068,10 +2068,10 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, .setMIFlag(Flag); } -MachineInstr * -AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const { +MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + ArrayRef<unsigned> Ops, + int FrameIndex) const { // This is a bit of a hack. Consider this instruction: // // %vreg0<def> = COPY %SP; GPR64all:%vreg0 diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index d8f1274..fa4b8b7 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -129,10 +129,9 @@ public: const TargetRegisterInfo *TRI) const override; using TargetInstrInfo::foldMemoryOperandImpl; - MachineInstr * - foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const override; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, + int FrameIndex) const override; bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 6e4c0b0..ec6fa5c 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -22,6 +22,8 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; +def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">, + AssemblerPredicate<"FeatureV8_1a", "v8.1a">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsCyclone : Predicate<"Subtarget->isCyclone()">; @@ -96,6 +98,19 @@ def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; + +// Generates the general dynamic sequences, i.e. +// adrp x0, :tlsdesc:var +// ldr x1, [x0, #:tlsdesc_lo12:var] +// add x0, x0, #:tlsdesc_lo12:var +// .tlsdesccall var +// blr x1 + +// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) +// number of operands (the variable) +def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, + [SDTCisPtrTy<0>]>; + def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, @@ -229,10 +244,11 @@ def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; -def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL", - SDT_AArch64TLSDescCall, - [SDNPInGlue, SDNPOutGlue, SDNPHasChain, - SDNPVariadic]>; +def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", + SDT_AArch64TLSDescCallSeq, + [SDNPInGlue, SDNPOutGlue, SDNPHasChain, + SDNPVariadic]>; + def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", SDT_AArch64WrapperLarge>; @@ -244,6 +260,13 @@ def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>; def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>; +def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; +def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; +def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; +def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; +def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; +def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; + //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -1049,15 +1072,16 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> { let AsmString = ".tlsdesccall $sym"; } -// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It -// gets expanded to two MCInsts during lowering. -let isCall = 1, Defs = [LR] in -def TLSDESC_BLR - : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym), - [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>; +// FIXME: maybe the scratch register used shouldn't be fixed to X1? +// FIXME: can "hasSideEffects be dropped? +let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, + isCodeGenOnly = 1 in +def TLSDESC_CALLSEQ + : Pseudo<(outs), (ins i64imm:$sym), + [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>; +def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), + (TLSDESC_CALLSEQ texternalsym:$sym)>; -def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym), - (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>; //===----------------------------------------------------------------------===// // Conditional branch (immediate) instruction. //===----------------------------------------------------------------------===// @@ -2326,8 +2350,15 @@ defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>; defm FMOV : UnscaledConversion<"fmov">; -def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>; -def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>; +// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable +let isReMaterializable = 1, isCodeGenOnly = 1 in { +def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, + PseudoInstExpansion<(FMOVWSr FPR32:$Rd, WZR)>, + Requires<[NoZCZ]>; +def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, + PseudoInstExpansion<(FMOVXDr FPR64:$Rd, XZR)>, + Requires<[NoZCZ]>; +} //===----------------------------------------------------------------------===// // Floating point conversion instruction. @@ -3416,10 +3447,10 @@ defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">; defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">; defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">; defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">; -def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))), - (ADDPv2i64p V128:$Rn)>; -def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))), - (ADDPv2i64p V128:$Rn)>; +def : Pat<(v2i64 (AArch64saddv V128:$Rn)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; +def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), (FADDPv2i32p V64:$Rn)>; def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), @@ -3709,10 +3740,6 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>; defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>; defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>; -defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>; -defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>; -defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>; -defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>; // Floating point vector extractions are codegen'd as either a sequence of @@ -3776,121 +3803,143 @@ defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>; -multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> { -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it - def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - (i64 0)))>; - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (SMOVvi8to32 +// Patterns for across-vector intrinsics, that have a node equivalent, that +// returns a vector (with only the low lane defined) instead of a scalar. +// In effect, opNode is the same as (scalar_to_vector (IntNode)). +multiclass SIMDAcrossLanesIntrinsic<string baseOpc, + SDPatternOperator opNode> { +// If a lane instruction caught the vector_extract around opNode, we can +// directly match the latter to the instruction. +def : Pat<(v8i8 (opNode V64:$Rn)), + (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>; +def : Pat<(v16i8 (opNode V128:$Rn)), (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - (i64 0)))>; -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it -def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - (i64 0)))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - (i64 0)))>; + (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>; +def : Pat<(v4i16 (opNode V64:$Rn)), + (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>; +def : Pat<(v8i16 (opNode V128:$Rn)), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>; +def : Pat<(v4i32 (opNode V128:$Rn)), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>; + + +// If none did, fallback to the explicit patterns, consuming the vector_extract. +def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), + (i32 0)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), + bsub), ssub)>; +def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), + bsub), ssub)>; +def : Pat<(i32 (vector_extract (insert_subvector undef, + (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), + hsub), ssub)>; +def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), + hsub), ssub)>; +def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), + ssub), ssub)>; + +} + +multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, + SDPatternOperator opNode> + : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { // If there is a sign extension after this intrinsic, consume it as smov already // performed it -def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)), +def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, + (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + (i64 0)))>; +def : Pat<(i32 (sext_inreg (i32 (vector_extract + (opNode (v16i8 V128:$Rn)), (i64 0))), i8)), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), + (i64 0)))>; +def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, + (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)), (i32 (SMOVvi16to32 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), (i64 0)))>; -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), +def : Pat<(i32 (sext_inreg (i32 (vector_extract + (opNode (v8i16 V128:$Rn)), (i64 0))), i16)), (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - (i64 0)))>; -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it -def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - (i64 0)))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - (i64 0)))>; - -def : Pat<(i32 (intOp (v4i32 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), - ssub))>; + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), + (i64 0)))>; } -multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> { -// If there is a masking operation keeping only what has been actually -// generated, consume it. - def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - ssub))>; - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - ssub))>; +multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, + SDPatternOperator opNode> + : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { // If there is a masking operation keeping only what has been actually // generated, consume it. -def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - ssub))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), +def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, + (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + ssub))>; +def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), + maski8_or_more)), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), ssub))>; - -// If there is a masking operation keeping only what has been actually -// generated, consume it. -def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - ssub))>; -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), +def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, + (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), ssub))>; -// If there is a masking operation keeping only what has been actually -// generated, consume it. -def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - ssub))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), +def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), + maski16_or_more)), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), ssub))>; +} -def : Pat<(i32 (intOp (v4i32 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), - ssub))>; +defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; +// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm +def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), + (ADDPv2i32 V64:$Rn, V64:$Rn)>; -} +defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>; +// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm +def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))), + (ADDPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>; +def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))), + (SMAXPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>; +def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))), + (SMINPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>; +def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))), + (UMAXPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>; +def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))), + (UMINPv2i32 V64:$Rn, V64:$Rn)>; multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> { def : Pat<(i32 (intOp (v8i8 V64:$Rn))), @@ -3953,32 +4002,6 @@ def : Pat<(i64 (intOp (v4i32 V128:$Rn))), dsub))>; } -defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_aarch64_neon_saddv>; -// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm -def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_aarch64_neon_uaddv>; -// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm -def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>; -def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>; -def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>; -def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>; -def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; - defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 8463ce6..b1499e2 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -63,16 +63,24 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // If a matching instruction is found, MergeForward is set to true if the // merge is to remove the first instruction and replace the second with // a pair-wise insn, and false if the reverse is true. + // \p SExtIdx[out] gives the index of the result of the load pair that + // must be extended. The value of SExtIdx assumes that the paired load + // produces the value in this order: (I, returned iterator), i.e., + // -1 means no value has to be extended, 0 means I, and 1 means the + // returned iterator. MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, - bool &MergeForward, + bool &MergeForward, int &SExtIdx, unsigned Limit); // Merge the two instructions indicated into a single pair-wise instruction. // If MergeForward is true, erase the first instruction and fold its // operation into the second. If false, the reverse. Return the instruction // following the first instruction (which may change during processing). + // \p SExtIdx index of the result that must be extended for a paired load. + // -1 means none, 0 means I, and 1 means Paired. MachineBasicBlock::iterator mergePairedInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, bool MergeForward); + MachineBasicBlock::iterator Paired, bool MergeForward, + int SExtIdx); // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using @@ -181,6 +189,43 @@ int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) { } } +static unsigned getMatchingNonSExtOpcode(unsigned Opc, + bool *IsValidLdStrOpc = nullptr) { + if (IsValidLdStrOpc) + *IsValidLdStrOpc = true; + switch (Opc) { + default: + if (IsValidLdStrOpc) + *IsValidLdStrOpc = false; + return UINT_MAX; + case AArch64::STRDui: + case AArch64::STURDi: + case AArch64::STRQui: + case AArch64::STURQi: + case AArch64::STRWui: + case AArch64::STURWi: + case AArch64::STRXui: + case AArch64::STURXi: + case AArch64::LDRDui: + case AArch64::LDURDi: + case AArch64::LDRQui: + case AArch64::LDURQi: + case AArch64::LDRWui: + case AArch64::LDURWi: + case AArch64::LDRXui: + case AArch64::LDURXi: + case AArch64::STRSui: + case AArch64::STURSi: + case AArch64::LDRSui: + case AArch64::LDURSi: + return Opc; + case AArch64::LDRSWui: + return AArch64::LDRWui; + case AArch64::LDURSWi: + return AArch64::LDURWi; + } +} + static unsigned getMatchingPairOpcode(unsigned Opc) { switch (Opc) { default: @@ -282,7 +327,7 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, - bool MergeForward) { + bool MergeForward, int SExtIdx) { MachineBasicBlock::iterator NextI = I; ++NextI; // If NextI is the second of the two instructions to be merged, we need @@ -292,11 +337,13 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, if (NextI == Paired) ++NextI; - bool IsUnscaled = isUnscaledLdst(I->getOpcode()); + unsigned Opc = + SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); + bool IsUnscaled = isUnscaledLdst(Opc); int OffsetStride = IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1; - unsigned NewOpc = getMatchingPairOpcode(I->getOpcode()); + unsigned NewOpc = getMatchingPairOpcode(Opc); // Insert our new paired instruction after whichever of the paired // instructions MergeForward indicates. MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; @@ -311,6 +358,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, Paired->getOperand(2).getImm() + OffsetStride) { RtMI = Paired; Rt2MI = I; + // Here we swapped the assumption made for SExtIdx. + // I.e., we turn ldp I, Paired into ldp Paired, I. + // Update the index accordingly. + if (SExtIdx != -1) + SExtIdx = (SExtIdx + 1) % 2; } else { RtMI = I; Rt2MI = Paired; @@ -337,8 +389,47 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, DEBUG(dbgs() << " "); DEBUG(Paired->print(dbgs())); DEBUG(dbgs() << " with instruction:\n "); - DEBUG(((MachineInstr *)MIB)->print(dbgs())); - DEBUG(dbgs() << "\n"); + + if (SExtIdx != -1) { + // Generate the sign extension for the proper result of the ldp. + // I.e., with X1, that would be: + // %W1<def> = KILL %W1, %X1<imp-def> + // %X1<def> = SBFMXri %X1<kill>, 0, 31 + MachineOperand &DstMO = MIB->getOperand(SExtIdx); + // Right now, DstMO has the extended register, since it comes from an + // extended opcode. + unsigned DstRegX = DstMO.getReg(); + // Get the W variant of that register. + unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); + // Update the result of LDP to use the W instead of the X variant. + DstMO.setReg(DstRegW); + DEBUG(((MachineInstr *)MIB)->print(dbgs())); + DEBUG(dbgs() << "\n"); + // Make the machine verifier happy by providing a definition for + // the X register. + // Insert this definition right after the generated LDP, i.e., before + // InsertionPoint. + MachineInstrBuilder MIBKill = + BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(TargetOpcode::KILL), DstRegW) + .addReg(DstRegW) + .addReg(DstRegX, RegState::Define); + MIBKill->getOperand(2).setImplicit(); + // Create the sign extension. + MachineInstrBuilder MIBSXTW = + BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(AArch64::SBFMXri), DstRegX) + .addReg(DstRegX) + .addImm(0) + .addImm(31); + (void)MIBSXTW; + DEBUG(dbgs() << " Extend operand:\n "); + DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs())); + DEBUG(dbgs() << "\n"); + } else { + DEBUG(((MachineInstr *)MIB)->print(dbgs())); + DEBUG(dbgs() << "\n"); + } // Erase the old instructions. I->eraseFromParent(); @@ -396,7 +487,8 @@ static int alignTo(int Num, int PowOf2) { /// be combined with the current instruction into a load/store pair. MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, - bool &MergeForward, unsigned Limit) { + bool &MergeForward, int &SExtIdx, + unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; MachineInstr *FirstMI = I; @@ -436,7 +528,19 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // Now that we know this is a real instruction, count it. ++Count; - if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) { + bool CanMergeOpc = Opc == MI->getOpcode(); + SExtIdx = -1; + if (!CanMergeOpc) { + bool IsValidLdStrOpc; + unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc); + if (!IsValidLdStrOpc) + continue; + // Opc will be the first instruction in the pair. + SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0; + CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode()); + } + + if (CanMergeOpc && MI->getOperand(2).isImm()) { // If we've found another instruction with the same opcode, check to see // if the base and offset are compatible with our starting instruction. // These instructions all have scaled immediate operands, so we just @@ -823,13 +927,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { } // Look ahead up to ScanLimit instructions for a pairable instruction. bool MergeForward = false; + int SExtIdx = -1; MachineBasicBlock::iterator Paired = - findMatchingInsn(MBBI, MergeForward, ScanLimit); + findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit); if (Paired != E) { // Merge the loads into a pair. Keeping the iterator straight is a // pain, so we let the merge routine tell us what the next instruction // is after it's done mucking about. - MBBI = mergePairedInsns(MBBI, Paired, MergeForward); + MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx); Modified = true; ++NumPairCreated; diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index e57b0f4..b829341 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -22,9 +22,12 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; +extern cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration; + AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer) : Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {} @@ -84,10 +87,16 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); Model = Printer.TM.getTLSModel(GV); + if (!EnableAArch64ELFLocalDynamicTLSGeneration && + Model == TLSModel::LocalDynamic) + Model = TLSModel::GeneralDynamic; + } else { assert(MO.isSymbol() && StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" && "unexpected external TLS symbol"); + // The general dynamic access sequence is used to get the + // address of _TLS_MODULE_BASE_. Model = TLSModel::GeneralDynamic; } switch (Model) { @@ -123,6 +132,8 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, RefFlags |= AArch64MCExpr::VK_G1; else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0) RefFlags |= AArch64MCExpr::VK_G0; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_HI12) + RefFlags |= AArch64MCExpr::VK_HI12; if (MO.getTargetFlags() & AArch64II::MO_NC) RefFlags |= AArch64MCExpr::VK_NC; diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp index 4690177..5394875 100644 --- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp +++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp @@ -319,7 +319,7 @@ void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg, const MachineInstr &MI) { - LiveInterval LI = LIs.getInterval(reg); + const LiveInterval &LI = LIs.getInterval(reg); SlotIndex SI = LIs.getInstructionIndex(&MI); return LI.expiredAt(SI); } diff --git a/lib/Target/AArch64/AArch64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp index c037c86..e1b93bf 100644 --- a/lib/Target/AArch64/AArch64PromoteConstant.cpp +++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp @@ -38,6 +38,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -189,9 +190,11 @@ private: IPI->second.push_back(&Use); // Transfer the dominated uses of IPI to NewPt // Inserting into the DenseMap may invalidate existing iterator. - // Keep a copy of the key to find the iterator to erase. + // Keep a copy of the key to find the iterator to erase. Keep a copy of the + // value so that we don't have to dereference IPI->second. Instruction *OldInstr = IPI->first; - InsertPts[NewPt] = std::move(IPI->second); + Uses OldUses = std::move(IPI->second); + InsertPts[NewPt] = std::move(OldUses); // Erase IPI. InsertPts.erase(OldInstr); } diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 206cdbb..33c11fe 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -18,6 +18,7 @@ #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -37,9 +38,8 @@ static cl::opt<bool> ReserveX18("aarch64-reserve-x18", cl::Hidden, cl::desc("Reserve X18, making it unavailable as GPR")); -AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii, - const AArch64Subtarget *sti) - : AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {} +AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT) + : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {} const MCPhysReg * AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { @@ -55,7 +55,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } const uint32_t * -AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { +AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls return CSR_AArch64_NoRegs_RegMask; @@ -66,15 +67,16 @@ AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { } const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const { - if (STI->isTargetDarwin()) + if (TT.isOSDarwin()) return CSR_AArch64_TLS_Darwin_RegMask; - assert(STI->isTargetELF() && "only expect Darwin or ELF TLS"); + assert(TT.isOSBinFormatELF() && "only expect Darwin or ELF TLS"); return CSR_AArch64_TLS_ELF_RegMask; } const uint32_t * -AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { +AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { // This should return a register mask that is the same as that returned by // getCallPreservedMask but that additionally preserves the register used for // the first i64 argument (which must also be the register used to return a @@ -97,12 +99,12 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AArch64::WSP); Reserved.set(AArch64::WZR); - if (TFI->hasFP(MF) || STI->isTargetDarwin()) { + if (TFI->hasFP(MF) || TT.isOSDarwin()) { Reserved.set(AArch64::FP); Reserved.set(AArch64::W29); } - if (STI->isTargetDarwin() || ReserveX18) { + if (TT.isOSDarwin() || ReserveX18) { Reserved.set(AArch64::X18); // Platform register Reserved.set(AArch64::W18); } @@ -129,10 +131,10 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF, return true; case AArch64::X18: case AArch64::W18: - return STI->isTargetDarwin() || ReserveX18; + return TT.isOSDarwin() || ReserveX18; case AArch64::FP: case AArch64::W29: - return TFI->hasFP(MF) || STI->isTargetDarwin(); + return TFI->hasFP(MF) || TT.isOSDarwin(); case AArch64::W19: case AArch64::X19: return hasBasePointer(MF); @@ -269,7 +271,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, // The FP is only available if there is no dynamic realignment. We // don't know for sure yet whether we'll need that, so we guess based // on whether there are any local variables that would trigger it. - if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset)) + if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, AArch64::FP, FPOffset)) return false; // If we can reference via the stack pointer or base pointer, try that. @@ -277,7 +279,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, // to only disallow SP relative references in the live range of // the VLA(s). In practice, it's unclear how much difference that // would make, but it may be worth doing. - if (isFrameOffsetLegal(MI, Offset)) + if (isFrameOffsetLegal(MI, AArch64::SP, Offset)) return false; // The offset likely isn't legal; we want to allocate a virtual base register. @@ -285,6 +287,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, } bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, + unsigned BaseReg, int64_t Offset) const { assert(Offset <= INT_MAX && "Offset too big to fit in int."); assert(MI && "Unable to get the legal offset for nil instruction."); @@ -302,10 +305,11 @@ void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, DebugLoc DL; // Defaults to "unknown" if (Ins != MBB->end()) DL = Ins->getDebugLoc(); - + const MachineFunction &MF = *MBB->getParent(); + const AArch64InstrInfo *TII = + MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); const MCInstrDesc &MCID = TII->get(AArch64::ADDXri); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - const MachineFunction &MF = *MBB->getParent(); MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF)); unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); @@ -324,6 +328,9 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, ++i; assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } + const MachineFunction *MF = MI.getParent()->getParent(); + const AArch64InstrInfo *TII = + MF->getSubtarget<AArch64Subtarget>().getInstrInfo(); bool Done = rewriteAArch64FrameIndex(MI, i, BaseReg, Off, TII); assert(Done && "Unable to resolve frame index!"); (void)Done; @@ -337,6 +344,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const AArch64InstrInfo *TII = + MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); const AArch64FrameLowering *TFI = static_cast<const AArch64FrameLowering *>( MF.getSubtarget().getFrameLowering()); @@ -389,10 +398,10 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case AArch64::GPR64RegClassID: case AArch64::GPR32commonRegClassID: case AArch64::GPR64commonRegClassID: - return 32 - 1 // XZR/SP - - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP - - (STI->isTargetDarwin() || ReserveX18) // X18 reserved as platform register - - hasBasePointer(MF); // X19 + return 32 - 1 // XZR/SP + - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP + - (TT.isOSDarwin() || ReserveX18) // X18 reserved as platform register + - hasBasePointer(MF); // X19 case AArch64::FPR8RegClassID: case AArch64::FPR16RegClassID: case AArch64::FPR32RegClassID: diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h index 51a5034..c01bfa5 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/lib/Target/AArch64/AArch64RegisterInfo.h @@ -19,26 +19,24 @@ namespace llvm { -class AArch64InstrInfo; -class AArch64Subtarget; class MachineFunction; class RegScavenger; class TargetRegisterClass; +class Triple; struct AArch64RegisterInfo : public AArch64GenRegisterInfo { private: - const AArch64InstrInfo *TII; - const AArch64Subtarget *STI; + const Triple &TT; public: - AArch64RegisterInfo(const AArch64InstrInfo *tii, const AArch64Subtarget *sti); + AArch64RegisterInfo(const Triple &TT); bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; /// Code Generation virtual methods... - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; - const uint32_t *getCallPreservedMask(CallingConv::ID) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; unsigned getCSRFirstUseCost() const override { // The cost will be compared against BlockFrequency where entry has the @@ -59,7 +57,8 @@ public: /// /// Should return NULL in the case that the calling convention does not have /// this property - const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; + const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF, + CallingConv::ID) const; BitVector getReservedRegs(const MachineFunction &MF) const override; const TargetRegisterClass * @@ -73,7 +72,7 @@ public: bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override; - bool isFrameOffsetLegal(const MachineInstr *MI, + bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override; void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index c613025..221d70d 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -48,7 +48,7 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT, const TargetMachine &TM, bool LittleEndian) : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false), - HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), + HasV8_1a(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {} diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index d418cc5..bcab97d 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -41,6 +41,7 @@ protected: bool HasNEON; bool HasCrypto; bool HasCRC; + bool HasV8_1a; // HasZeroCycleRegMove - Has zero-cycle register mov instructions. bool HasZeroCycleRegMove; @@ -86,6 +87,7 @@ public: const AArch64RegisterInfo *getRegisterInfo() const override { return &getInstrInfo()->getRegisterInfo(); } + const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override { return true; } bool enablePostMachineScheduler() const override { return isCortexA53() || isCortexA57(); @@ -99,6 +101,7 @@ public: bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } + bool hasV8_1a() const { return HasV8_1a; } bool isLittleEndian() const { return IsLittle; } diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index d73d0b3..f902f64 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -104,6 +104,16 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { return make_unique<AArch64_ELFTargetObjectFile>(); } +// Helper function to build a DataLayout string +static std::string computeDataLayout(StringRef TT, bool LittleEndian) { + Triple Triple(TT); + if (Triple.isOSBinFormatMachO()) + return "e-m:o-i64:64-i128:128-n32:64-S128"; + if (LittleEndian) + return "e-m:e-i64:64-i128:128-n32:64-S128"; + return "E-m:e-i64:64-i128:128-n32:64-S128"; +} + /// TargetMachine ctor - Create an AArch64 architecture model. /// AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, @@ -112,16 +122,12 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool LittleEndian) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - // This nested ternary is horrible, but DL needs to be properly - // initialized - // before TLInfo is constructed. - DL(Triple(TT).isOSBinFormatMachO() - ? "e-m:o-i64:64-i128:128-n32:64-S128" - : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128" - : "E-m:e-i64:64-i128:128-n32:64-S128")), + // This nested ternary is horrible, but DL needs to be properly + // initialized before TLInfo is constructed. + : LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS, + Options, RM, CM, OL), TLOF(createTLOF(Triple(getTargetTriple()))), - Subtarget(TT, CPU, FS, *this, LittleEndian), isLittle(LittleEndian) { + isLittle(LittleEndian) { initAsmInfo(); } @@ -239,7 +245,7 @@ bool AArch64PassConfig::addPreISel() { // FIXME: On AArch64, this depends on the type. // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). // and the offset has to be a multiple of the related size in bytes. - if (TM->getOptLevel() != CodeGenOpt::None) + if (TM->getOptLevel() == CodeGenOpt::Aggressive) addPass(createGlobalMergePass(TM, 4095)); if (TM->getOptLevel() != CodeGenOpt::None) addPass(createAArch64AddressTypePromotionPass()); @@ -287,10 +293,7 @@ void AArch64PassConfig::addPostRegAlloc() { // Change dead register definitions to refer to the zero register. if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination) addPass(createAArch64DeadRegisterDefinitions()); - if (TM->getOptLevel() != CodeGenOpt::None && - (TM->getSubtarget<AArch64Subtarget>().isCortexA53() || - TM->getSubtarget<AArch64Subtarget>().isCortexA57()) && - usingDefaultRegAlloc()) + if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc()) // Improve performance for some FP/SIMD code for A57. addPass(createAArch64A57FPLoadBalancing()); } diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h index 7143adf..ec34fad 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.h +++ b/lib/Target/AArch64/AArch64TargetMachine.h @@ -23,9 +23,7 @@ namespace llvm { class AArch64TargetMachine : public LLVMTargetMachine { protected: - const DataLayout DL; std::unique_ptr<TargetLoweringObjectFile> TLOF; - AArch64Subtarget Subtarget; mutable StringMap<std::unique_ptr<AArch64Subtarget>> SubtargetMap; public: @@ -35,11 +33,6 @@ public: CodeGenOpt::Level OL, bool IsLittleEndian); ~AArch64TargetMachine() override; - - const DataLayout *getDataLayout() const override { return &DL; } - const AArch64Subtarget *getSubtargetImpl() const override { - return &Subtarget; - } const AArch64Subtarget *getSubtargetImpl(const Function &F) const override; // Pass Pipeline Configuration diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp index 4069038..8ff58e9 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/Dwarf.h" using namespace llvm; using namespace dwarf; @@ -23,6 +24,11 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx, InitializeELF(TM.Options.UseInitArray); } +AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile() + : TargetLoweringObjectFileMachO() { + SupportGOTPCRelWithOffset = false; +} + const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference( const GlobalValue *GV, unsigned Encoding, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI, @@ -50,3 +56,18 @@ MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol( MachineModuleInfo *MMI) const { return TM.getSymbol(GV, Mang); } + +const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel( + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, + MachineModuleInfo *MMI, MCStreamer &Streamer) const { + assert((Offset+MV.getConstant() == 0) && + "Arch64 does not support GOT PC rel with extra offset"); + // On ARM64 Darwin, we can reference symbols with foo@GOT-., which + // is an indirect pc-relative reference. + const MCExpr *Res = + MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext()); + MCSymbol *PCSym = getContext().CreateTempSymbol(); + Streamer.EmitLabel(PCSym); + const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext()); + return MCBinaryExpr::CreateSub(Res, PC, getContext()); +} diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h index 2e595f9..d41f445 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.h +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -24,6 +24,8 @@ class AArch64_ELFTargetObjectFile : public TargetLoweringObjectFileELF { /// AArch64_MachoTargetObjectFile - This TLOF implementation is used for Darwin. class AArch64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { public: + AArch64_MachoTargetObjectFile(); + const MCExpr *getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding, Mangler &Mang, const TargetMachine &TM, @@ -33,6 +35,11 @@ public: MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI) const override; + + const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCValue &MV, int64_t Offset, + MachineModuleInfo *MMI, + MCStreamer &Streamer) const override; }; } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 0646d85..0533355 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -10,6 +10,7 @@ #include "AArch64TargetTransformInfo.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" @@ -352,7 +353,7 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // We don't lower vector selects well that are wider than the register width. if (ValTy->isVectorTy() && ISD == ISD::SELECT) { // We would need this many instructions to hide the scalarization happening. - unsigned AmortizationCost = 20; + const unsigned AmortizationCost = 20; static const TypeConversionCostTblEntry<MVT::SimpleValueType> VectorSelectTbl[] = { { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost }, @@ -426,6 +427,15 @@ unsigned AArch64TTIImpl::getMaxInterleaveFactor() { void AArch64TTIImpl::getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) { + // Enable partial unrolling and runtime unrolling. + BaseT::getUnrollingPreferences(L, UP); + + // For inner loop, it is more likely to be a hot one, and the runtime check + // can be promoted out from LICM pass, so the overhead is less, let's try + // a larger threshold to unroll more loops. + if (L->getLoopDepth() > 1) + UP.PartialThreshold *= 2; + // Disable partial & runtime unrolling on -Os. UP.PartialOptSizeThreshold = 0; } diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 1960c99..1219ffc 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -113,11 +113,10 @@ public: #define GET_OPERAND_DIAGNOSTIC_TYPES #include "AArch64GenAsmMatcher.inc" }; - AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &MII, - const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(_STI) { - MCAsmParserExtension::Initialize(_Parser); + AArch64AsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(), STI(STI) { + MCAsmParserExtension::Initialize(Parser); MCStreamer &S = getParser().getStreamer(); if (S.getTargetStreamer() == nullptr) new AArch64TargetStreamer(S); @@ -205,6 +204,8 @@ private: struct BarrierOp { unsigned Val; // Not the enum since not all values have names. + const char *Data; + unsigned Length; }; struct SysRegOp { @@ -221,6 +222,8 @@ private: struct PrefetchOp { unsigned Val; + const char *Data; + unsigned Length; }; struct ShiftExtendOp { @@ -254,8 +257,7 @@ private: MCContext &Ctx; public: - AArch64Operand(KindTy K, MCContext &_Ctx) - : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {} + AArch64Operand(KindTy K, MCContext &Ctx) : Kind(K), Ctx(Ctx) {} AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) { Kind = o.Kind; @@ -349,6 +351,11 @@ public: return Barrier.Val; } + StringRef getBarrierName() const { + assert(Kind == k_Barrier && "Invalid access!"); + return StringRef(Barrier.Data, Barrier.Length); + } + unsigned getReg() const override { assert(Kind == k_Register && "Invalid access!"); return Reg.RegNum; @@ -384,6 +391,11 @@ public: return Prefetch.Val; } + StringRef getPrefetchName() const { + assert(Kind == k_Prefetch && "Invalid access!"); + return StringRef(Prefetch.Data, Prefetch.Length); + } + AArch64_AM::ShiftExtendType getShiftExtendType() const { assert(Kind == k_ShiftExtend && "Invalid access!"); return ShiftExtend.Type; @@ -752,58 +764,47 @@ public: } bool isMovZSymbolG3() const { - static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 }; - return isMovWSymbol(Variants); + return isMovWSymbol(AArch64MCExpr::VK_ABS_G3); } bool isMovZSymbolG2() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S, - AArch64MCExpr::VK_TPREL_G2, AArch64MCExpr::VK_DTPREL_G2}; - return isMovWSymbol(Variants); + return isMovWSymbol({AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S, + AArch64MCExpr::VK_TPREL_G2, + AArch64MCExpr::VK_DTPREL_G2}); } bool isMovZSymbolG1() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S, + return isMovWSymbol({ + AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S, AArch64MCExpr::VK_GOTTPREL_G1, AArch64MCExpr::VK_TPREL_G1, AArch64MCExpr::VK_DTPREL_G1, - }; - return isMovWSymbol(Variants); + }); } bool isMovZSymbolG0() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S, - AArch64MCExpr::VK_TPREL_G0, AArch64MCExpr::VK_DTPREL_G0}; - return isMovWSymbol(Variants); + return isMovWSymbol({AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S, + AArch64MCExpr::VK_TPREL_G0, + AArch64MCExpr::VK_DTPREL_G0}); } bool isMovKSymbolG3() const { - static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 }; - return isMovWSymbol(Variants); + return isMovWSymbol(AArch64MCExpr::VK_ABS_G3); } bool isMovKSymbolG2() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G2_NC}; - return isMovWSymbol(Variants); + return isMovWSymbol(AArch64MCExpr::VK_ABS_G2_NC); } bool isMovKSymbolG1() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G1_NC, AArch64MCExpr::VK_TPREL_G1_NC, - AArch64MCExpr::VK_DTPREL_G1_NC - }; - return isMovWSymbol(Variants); + return isMovWSymbol({AArch64MCExpr::VK_ABS_G1_NC, + AArch64MCExpr::VK_TPREL_G1_NC, + AArch64MCExpr::VK_DTPREL_G1_NC}); } bool isMovKSymbolG0() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC, - AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC - }; - return isMovWSymbol(Variants); + return isMovWSymbol( + {AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC, + AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC}); } template<int RegWidth, int Shift> @@ -1608,10 +1609,14 @@ public: return Op; } - static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, SMLoc S, + static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, + StringRef Str, + SMLoc S, MCContext &Ctx) { auto Op = make_unique<AArch64Operand>(k_Barrier, Ctx); Op->Barrier.Val = Val; + Op->Barrier.Data = Str.data(); + Op->Barrier.Length = Str.size(); Op->StartLoc = S; Op->EndLoc = S; return Op; @@ -1642,10 +1647,14 @@ public: return Op; } - static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, SMLoc S, + static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, + StringRef Str, + SMLoc S, MCContext &Ctx) { auto Op = make_unique<AArch64Operand>(k_Prefetch, Ctx); Op->Prefetch.Val = Val; + Op->Barrier.Data = Str.data(); + Op->Barrier.Length = Str.size(); Op->StartLoc = S; Op->EndLoc = S; return Op; @@ -1673,9 +1682,8 @@ void AArch64Operand::print(raw_ostream &OS) const { << AArch64_AM::getFPImmFloat(getFPImm()) << ") >"; break; case k_Barrier: { - bool Valid; - StringRef Name = AArch64DB::DBarrierMapper().toString(getBarrier(), Valid); - if (Valid) + StringRef Name = getBarrierName(); + if (!Name.empty()) OS << "<barrier " << Name << ">"; else OS << "<barrier invalid #" << getBarrier() << ">"; @@ -1718,9 +1726,8 @@ void AArch64Operand::print(raw_ostream &OS) const { OS << "c" << getSysCR(); break; case k_Prefetch: { - bool Valid; - StringRef Name = AArch64PRFM::PRFMMapper().toString(getPrefetch(), Valid); - if (Valid) + StringRef Name = getPrefetchName(); + if (!Name.empty()) OS << "<prfop " << Name << ">"; else OS << "<prfop invalid #" << getPrefetch() << ">"; @@ -1963,7 +1970,11 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { return MatchOperand_ParseFail; } - Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext())); + bool Valid; + auto Mapper = AArch64PRFM::PRFMMapper(); + StringRef Name = Mapper.toString(MCE->getValue(), Valid); + Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name, + S, getContext())); return MatchOperand_Success; } @@ -1973,14 +1984,16 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { } bool Valid; - unsigned prfop = AArch64PRFM::PRFMMapper().fromString(Tok.getString(), Valid); + auto Mapper = AArch64PRFM::PRFMMapper(); + unsigned prfop = Mapper.fromString(Tok.getString(), Valid); if (!Valid) { TokError("pre-fetch hint expected"); return MatchOperand_ParseFail; } Parser.Lex(); // Eat identifier token. - Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext())); + Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Tok.getString(), + S, getContext())); return MatchOperand_Success; } @@ -2582,8 +2595,11 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { Error(ExprLoc, "barrier operand out of range"); return MatchOperand_ParseFail; } - Operands.push_back( - AArch64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext())); + bool Valid; + auto Mapper = AArch64DB::DBarrierMapper(); + StringRef Name = Mapper.toString(MCE->getValue(), Valid); + Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name, + ExprLoc, getContext())); return MatchOperand_Success; } @@ -2593,7 +2609,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { } bool Valid; - unsigned Opt = AArch64DB::DBarrierMapper().fromString(Tok.getString(), Valid); + auto Mapper = AArch64DB::DBarrierMapper(); + unsigned Opt = Mapper.fromString(Tok.getString(), Valid); if (!Valid) { TokError("invalid barrier option name"); return MatchOperand_ParseFail; @@ -2605,8 +2622,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { return MatchOperand_ParseFail; } - Operands.push_back( - AArch64Operand::CreateBarrier(Opt, getLoc(), getContext())); + Operands.push_back( AArch64Operand::CreateBarrier(Opt, Tok.getString(), + getLoc(), getContext())); Parser.Lex(); // Consume the option return MatchOperand_Success; @@ -2631,8 +2648,8 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) { assert(IsKnown == (MSRReg != -1U) && "register should be -1 if and only if it's unknown"); - uint32_t PStateField = - AArch64PState::PStateMapper().fromString(Tok.getString(), IsKnown); + auto PStateMapper = AArch64PState::PStateMapper(); + uint32_t PStateField = PStateMapper.fromString(Tok.getString(), IsKnown); assert(IsKnown == (PStateField != -1U) && "register should be -1 if and only if it's unknown"); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 423da65..84b63a0 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachO.h" using namespace llvm; @@ -493,14 +494,28 @@ void ELFAArch64AsmBackend::processFixupValue( IsResolved = false; } +// Returns whether this fixup is based on an address in the .eh_frame section, +// and therefore should be byte swapped. +// FIXME: Should be replaced with something more principled. +static bool isByteSwappedFixup(const MCExpr *E) { + MCValue Val; + if (!E->EvaluateAsRelocatable(Val, nullptr, nullptr)) + return false; + + if (!Val.getSymA() || Val.getSymA()->getSymbol().isUndefined()) + return false; + + const MCSectionELF *SecELF = + dyn_cast<MCSectionELF>(&Val.getSymA()->getSymbol().getSection()); + return SecELF->getSectionName() == ".eh_frame"; +} + void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const { // store fixups in .eh_frame section in big endian order if (!IsLittleEndian && Fixup.getKind() == FK_Data_4) { - const MCSection *Sec = Fixup.getValue()->FindAssociatedSection(); - const MCSectionELF *SecELF = dyn_cast_or_null<const MCSectionELF>(Sec); - if (SecELF && SecELF->getSectionName() == ".eh_frame") + if (isByteSwappedFixup(Fixup.getValue())) Value = ByteSwap_32(unsigned(Value)); } AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 8dc6c30..8f780d2 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -203,24 +203,27 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) { } namespace llvm { -MCStreamer * -createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst) { - MCStreamer *S = llvm::createAsmStreamer( - Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); - new AArch64TargetAsmStreamer(*S, OS); - return S; +MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new AArch64TargetAsmStreamer(S, OS); } MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter); - new AArch64TargetELFStreamer(*S); if (RelaxAll) S->getAssembler().setRelaxAll(true); return S; } + +MCTargetStreamer * +createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + Triple TT(STI.getTargetTriple()); + if (TT.getObjectFormat() == Triple::ELF) + return new AArch64TargetELFStreamer(S); + return nullptr; +} } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 4756a19..9ea49f0 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -38,9 +38,7 @@ class AArch64MCCodeEmitter : public MCCodeEmitter { AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT public: - AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) - : Ctx(ctx) {} + AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx) {} ~AArch64MCCodeEmitter() {} @@ -205,9 +203,8 @@ public: MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx) { - return new AArch64MCCodeEmitter(MCII, STI, Ctx); + return new AArch64MCCodeEmitter(MCII, Ctx); } /// getMachineOpValue - Return binary encoding of operand. If the machine diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index e396df8..9e31508 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/ELF.h" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 0f7a6b8..38b399d 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -123,94 +123,61 @@ static MCInstPrinter *createAArch64MCInstPrinter(const Target &T, return nullptr; } -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin()) - return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll, - /*LabelSections*/ true); - +static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, + MCAsmBackend &TAB, raw_ostream &OS, + MCCodeEmitter *Emitter, bool RelaxAll) { return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll); } +static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, + bool DWARFMustBeAtTheEnd) { + return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll, + DWARFMustBeAtTheEnd, + /*LabelSections*/ true); +} + // Force static initialization. extern "C" void LLVMInitializeAArch64TargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn X(TheAArch64leTarget, createAArch64MCAsmInfo); - RegisterMCAsmInfoFn Y(TheAArch64beTarget, createAArch64MCAsmInfo); - RegisterMCAsmInfoFn Z(TheARM64Target, createAArch64MCAsmInfo); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget, - createAArch64MCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget, - createAArch64MCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target, - createAArch64MCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget, - createAArch64MCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget, - createAArch64MCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheARM64Target, - createAArch64MCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget, - createAArch64MCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget, - createAArch64MCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheARM64Target, - createAArch64MCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget, - createAArch64MCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget, - createAArch64MCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target, - createAArch64MCSubtargetInfo); + for (Target *T : + {&TheAArch64leTarget, &TheAArch64beTarget, &TheARM64Target}) { + // Register the MC asm info. + RegisterMCAsmInfoFn X(*T, createAArch64MCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(*T, createAArch64MCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createAArch64MCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createAArch64MCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createAArch64MCSubtargetInfo); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(*T, createAArch64MCCodeEmitter); + + // Register the obj streamers. + TargetRegistry::RegisterELFStreamer(*T, createELFStreamer); + TargetRegistry::RegisterMachOStreamer(*T, createMachOStreamer); + + // Register the obj target streamer. + TargetRegistry::RegisterObjectTargetStreamer( + *T, createAArch64ObjectTargetStreamer); + + // Register the asm streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, + createAArch64AsmTargetStreamer); + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createAArch64MCInstPrinter); + } // Register the asm backend. - TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget, - createAArch64leAsmBackend); + for (Target *T : {&TheAArch64leTarget, &TheARM64Target}) + TargetRegistry::RegisterMCAsmBackend(*T, createAArch64leAsmBackend); TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget, createAArch64beAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheARM64Target, - createAArch64leAsmBackend); - - // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget, - createAArch64MCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget, - createAArch64MCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheARM64Target, - createAArch64MCCodeEmitter); - - // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget, - createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget, - createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer); - - // Register the asm streamer. - TargetRegistry::RegisterAsmStreamer(TheAArch64leTarget, - createAArch64MCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheAArch64beTarget, - createAArch64MCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheARM64Target, - createAArch64MCAsmStreamer); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget, - createAArch64MCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget, - createAArch64MCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheARM64Target, - createAArch64MCInstPrinter); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index 1553115..7ce303b 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -28,8 +28,10 @@ class MCRegisterInfo; class MCObjectWriter; class MCStreamer; class MCSubtargetInfo; +class MCTargetStreamer; class StringRef; class Target; +class Triple; class raw_ostream; extern Target TheAArch64leTarget; @@ -37,9 +39,8 @@ extern Target TheAArch64beTarget; extern Target TheARM64Target; MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx); + const MCRegisterInfo &MRI, + MCContext &Ctx); MCAsmBackend *createAArch64leAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); @@ -53,11 +54,14 @@ MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI, MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType, uint32_t CPUSubtype); -MCStreamer * -createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst); +MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm); + +MCTargetStreamer *createAArch64ObjectTargetStreamer(MCStreamer &S, + const MCSubtargetInfo &STI); + } // End llvm namespace // Defines symbolic names for AArch64 registers. This defines a mapping from diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index bc6c7a9..160c1c5 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -19,10 +19,10 @@ using namespace llvm; StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const { - for (unsigned i = 0; i < NumPairs; ++i) { - if (Pairs[i].Value == Value) { + for (unsigned i = 0; i < NumMappings; ++i) { + if (Mappings[i].Value == Value) { Valid = true; - return Pairs[i].Name; + return Mappings[i].Name; } } @@ -32,10 +32,10 @@ StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const { uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const { std::string LowerCaseName = Name.lower(); - for (unsigned i = 0; i < NumPairs; ++i) { - if (Pairs[i].Name == LowerCaseName) { + for (unsigned i = 0; i < NumMappings; ++i) { + if (Mappings[i].Name == LowerCaseName) { Valid = true; - return Pairs[i].Value; + return Mappings[i].Value; } } @@ -47,7 +47,7 @@ bool AArch64NamedImmMapper::validImm(uint32_t Value) const { return Value < TooBigImm; } -const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATMappings[] = { {"s1e1r", S1E1R}, {"s1e2r", S1E2R}, {"s1e3r", S1E3R}, @@ -63,9 +63,9 @@ const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = { }; AArch64AT::ATMapper::ATMapper() - : AArch64NamedImmMapper(ATPairs, 0) {} + : AArch64NamedImmMapper(ATMappings, 0) {} -const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierMappings[] = { {"oshld", OSHLD}, {"oshst", OSHST}, {"osh", OSH}, @@ -81,9 +81,9 @@ const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[] }; AArch64DB::DBarrierMapper::DBarrierMapper() - : AArch64NamedImmMapper(DBarrierPairs, 16u) {} + : AArch64NamedImmMapper(DBarrierMappings, 16u) {} -const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCMappings[] = { {"zva", ZVA}, {"ivac", IVAC}, {"isw", ISW}, @@ -95,25 +95,25 @@ const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = { }; AArch64DC::DCMapper::DCMapper() - : AArch64NamedImmMapper(DCPairs, 0) {} + : AArch64NamedImmMapper(DCMappings, 0) {} -const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICMappings[] = { {"ialluis", IALLUIS}, {"iallu", IALLU}, {"ivau", IVAU} }; AArch64IC::ICMapper::ICMapper() - : AArch64NamedImmMapper(ICPairs, 0) {} + : AArch64NamedImmMapper(ICMappings, 0) {} -const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBMappings[] = { {"sy", SY}, }; AArch64ISB::ISBMapper::ISBMapper() - : AArch64NamedImmMapper(ISBPairs, 16) {} + : AArch64NamedImmMapper(ISBMappings, 16) {} -const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMMappings[] = { {"pldl1keep", PLDL1KEEP}, {"pldl1strm", PLDL1STRM}, {"pldl2keep", PLDL2KEEP}, @@ -135,18 +135,18 @@ const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = { }; AArch64PRFM::PRFMMapper::PRFMMapper() - : AArch64NamedImmMapper(PRFMPairs, 32) {} + : AArch64NamedImmMapper(PRFMMappings, 32) {} -const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStatePairs[] = { +const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings[] = { {"spsel", SPSel}, {"daifset", DAIFSet}, {"daifclr", DAIFClr} }; AArch64PState::PStateMapper::PStateMapper() - : AArch64NamedImmMapper(PStatePairs, 0) {} + : AArch64NamedImmMapper(PStateMappings, 0) {} -const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = { {"mdccsr_el0", MDCCSR_EL0}, {"dbgdtrrx_el0", DBGDTRRX_EL0}, {"mdrar_el1", MDRAR_EL1}, @@ -247,11 +247,11 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = { AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits) : SysRegMapper(FeatureBits) { - InstPairs = &MRSPairs[0]; - NumInstPairs = llvm::array_lengthof(MRSPairs); + InstMappings = &MRSMappings[0]; + NumInstMappings = llvm::array_lengthof(MRSMappings); } -const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = { {"dbgdtrtx_el0", DBGDTRTX_EL0}, {"oslar_el1", OSLAR_EL1}, {"pmswinc_el0", PMSWINC_EL0}, @@ -271,12 +271,12 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = { AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits) : SysRegMapper(FeatureBits) { - InstPairs = &MSRPairs[0]; - NumInstPairs = llvm::array_lengthof(MSRPairs); + InstMappings = &MSRMappings[0]; + NumInstMappings = llvm::array_lengthof(MSRMappings); } -const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings[] = { {"osdtrrx_el1", OSDTRRX_EL1}, {"osdtrtx_el1", OSDTRTX_EL1}, {"teecr32_el1", TEECR32_EL1}, @@ -756,7 +756,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[] }; const AArch64NamedImmMapper::Mapping -AArch64SysReg::SysRegMapper::CycloneSysRegPairs[] = { +AArch64SysReg::SysRegMapper::CycloneSysRegMappings[] = { {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3} }; @@ -765,29 +765,29 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { std::string NameLower = Name.lower(); // First search the registers shared by all - for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { - if (SysRegPairs[i].Name == NameLower) { + for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) { + if (SysRegMappings[i].Name == NameLower) { Valid = true; - return SysRegPairs[i].Value; + return SysRegMappings[i].Value; } } // Next search for target specific registers if (FeatureBits & AArch64::ProcCyclone) { - for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) { - if (CycloneSysRegPairs[i].Name == NameLower) { + for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) { + if (CycloneSysRegMappings[i].Name == NameLower) { Valid = true; - return CycloneSysRegPairs[i].Value; + return CycloneSysRegMappings[i].Value; } } } // Now try the instruction-specific registers (either read-only or // write-only). - for (unsigned i = 0; i < NumInstPairs; ++i) { - if (InstPairs[i].Name == NameLower) { + for (unsigned i = 0; i < NumInstMappings; ++i) { + if (InstMappings[i].Name == NameLower) { Valid = true; - return InstPairs[i].Value; + return InstMappings[i].Value; } } @@ -816,26 +816,26 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { std::string AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const { // First search the registers shared by all - for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { - if (SysRegPairs[i].Value == Bits) { - return SysRegPairs[i].Name; + for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) { + if (SysRegMappings[i].Value == Bits) { + return SysRegMappings[i].Name; } } // Next search for target specific registers if (FeatureBits & AArch64::ProcCyclone) { - for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) { - if (CycloneSysRegPairs[i].Value == Bits) { - return CycloneSysRegPairs[i].Name; + for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) { + if (CycloneSysRegMappings[i].Value == Bits) { + return CycloneSysRegMappings[i].Name; } } } // Now try the instruction-specific registers (either read-only or // write-only). - for (unsigned i = 0; i < NumInstPairs; ++i) { - if (InstPairs[i].Value == Bits) { - return InstPairs[i].Name; + for (unsigned i = 0; i < NumInstMappings; ++i) { + if (InstMappings[i].Value == Bits) { + return InstMappings[i].Name; } } @@ -850,7 +850,7 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const { + "_c" + utostr(CRm) + "_" + utostr(Op2); } -const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIMappings[] = { {"ipas2e1is", IPAS2E1IS}, {"ipas2le1is", IPAS2LE1IS}, {"vmalle1is", VMALLE1IS}, @@ -886,4 +886,4 @@ const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = { }; AArch64TLBI::TLBIMapper::TLBIMapper() - : AArch64NamedImmMapper(TLBIPairs, 0) {} + : AArch64NamedImmMapper(TLBIMappings, 0) {} diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index c60b09a..2ae6f52 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -283,8 +283,8 @@ struct AArch64NamedImmMapper { }; template<int N> - AArch64NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) - : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {} + AArch64NamedImmMapper(const Mapping (&Mappings)[N], uint32_t TooBigImm) + : Mappings(&Mappings[0]), NumMappings(N), TooBigImm(TooBigImm) {} StringRef toString(uint32_t Value, bool &Valid) const; uint32_t fromString(StringRef Name, bool &Valid) const; @@ -294,8 +294,8 @@ struct AArch64NamedImmMapper { /// N being 0 indicates no immediate syntax-form is allowed. bool validImm(uint32_t Value) const; protected: - const Mapping *Pairs; - size_t NumPairs; + const Mapping *Mappings; + size_t NumMappings; uint32_t TooBigImm; }; @@ -317,7 +317,7 @@ namespace AArch64AT { }; struct ATMapper : AArch64NamedImmMapper { - const static Mapping ATPairs[]; + const static Mapping ATMappings[]; ATMapper(); }; @@ -341,7 +341,7 @@ namespace AArch64DB { }; struct DBarrierMapper : AArch64NamedImmMapper { - const static Mapping DBarrierPairs[]; + const static Mapping DBarrierMappings[]; DBarrierMapper(); }; @@ -361,7 +361,7 @@ namespace AArch64DC { }; struct DCMapper : AArch64NamedImmMapper { - const static Mapping DCPairs[]; + const static Mapping DCMappings[]; DCMapper(); }; @@ -378,7 +378,7 @@ namespace AArch64IC { struct ICMapper : AArch64NamedImmMapper { - const static Mapping ICPairs[]; + const static Mapping ICMappings[]; ICMapper(); }; @@ -394,7 +394,7 @@ namespace AArch64ISB { SY = 0xf }; struct ISBMapper : AArch64NamedImmMapper { - const static Mapping ISBPairs[]; + const static Mapping ISBMappings[]; ISBMapper(); }; @@ -424,7 +424,7 @@ namespace AArch64PRFM { }; struct PRFMMapper : AArch64NamedImmMapper { - const static Mapping PRFMPairs[]; + const static Mapping PRFMMappings[]; PRFMMapper(); }; @@ -439,7 +439,7 @@ namespace AArch64PState { }; struct PStateMapper : AArch64NamedImmMapper { - const static Mapping PStatePairs[]; + const static Mapping PStateMappings[]; PStateMapper(); }; @@ -1134,11 +1134,11 @@ namespace AArch64SysReg { // burdening the common AArch64NamedImmMapper with abstractions only needed in // this one case. struct SysRegMapper { - static const AArch64NamedImmMapper::Mapping SysRegPairs[]; - static const AArch64NamedImmMapper::Mapping CycloneSysRegPairs[]; + static const AArch64NamedImmMapper::Mapping SysRegMappings[]; + static const AArch64NamedImmMapper::Mapping CycloneSysRegMappings[]; - const AArch64NamedImmMapper::Mapping *InstPairs; - size_t NumInstPairs; + const AArch64NamedImmMapper::Mapping *InstMappings; + size_t NumInstMappings; uint64_t FeatureBits; SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { } @@ -1147,12 +1147,12 @@ namespace AArch64SysReg { }; struct MSRMapper : SysRegMapper { - static const AArch64NamedImmMapper::Mapping MSRPairs[]; + static const AArch64NamedImmMapper::Mapping MSRMappings[]; MSRMapper(uint64_t FeatureBits); }; struct MRSMapper : SysRegMapper { - static const AArch64NamedImmMapper::Mapping MRSPairs[]; + static const AArch64NamedImmMapper::Mapping MRSMappings[]; MRSMapper(uint64_t FeatureBits); }; @@ -1197,7 +1197,7 @@ namespace AArch64TLBI { }; struct TLBIMapper : AArch64NamedImmMapper { - const static Mapping TLBIPairs[]; + const static Mapping TLBIMappings[]; TLBIMapper(); }; @@ -1229,7 +1229,7 @@ namespace AArch64II { MO_NO_FLAG, - MO_FRAGMENT = 0x7, + MO_FRAGMENT = 0xf, /// MO_PAGE - A symbol operand with this flag represents the pc-relative /// offset of the 4K page containing the symbol. This is used with the @@ -1257,26 +1257,31 @@ namespace AArch64II { /// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction MO_G0 = 6, + /// MO_HI12 - This flag indicates that a symbol operand represents the bits + /// 13-24 of a 64-bit address, used in a arithmetic immediate-shifted-left- + /// by-12-bits instruction. + MO_HI12 = 7, + /// MO_GOT - This flag indicates that a symbol operand represents the /// address of the GOT entry for the symbol, rather than the address of /// the symbol itself. - MO_GOT = 8, + MO_GOT = 0x10, /// MO_NC - Indicates whether the linker is expected to check the symbol /// reference for overflow. For example in an ADRP/ADD pair of relocations /// the ADRP usually does check, but not the ADD. - MO_NC = 0x10, + MO_NC = 0x20, /// MO_TLS - Indicates that the operand being accessed is some kind of /// thread-local symbol. On Darwin, only one type of thread-local access /// exists (pre linker-relaxation), but on ELF the TLSModel used for the /// referee will affect interpretation. - MO_TLS = 0x20, + MO_TLS = 0x40, /// MO_CONSTPOOL - This flag indicates that a symbol operand represents /// the address of a constant pool entry for the symbol, rather than the /// address of the symbol itself. - MO_CONSTPOOL = 0x40 + MO_CONSTPOOL = 0x80 }; } // end namespace AArch64II diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index 387f1f6..7a1865c 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -27,12 +27,15 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" +#include "ARMSubtarget.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <map> @@ -678,8 +681,13 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { } bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { - TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo()); - TRI = Fn.getSubtarget().getRegisterInfo(); + const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); + // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be + // enabled when NEON is available. + if (!(STI.isCortexA15() && STI.hasNEON())) + return false; + TII = STI.getInstrInfo(); + TRI = STI.getRegisterInfo(); MRI = &Fn.getRegInfo(); bool Modified = false; diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index f080c60..ce0aed9 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -167,9 +167,12 @@ def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true", "Support ARM v6M instructions", [HasV6Ops]>; +def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true", + "Support ARM v6k instructions", + [HasV6Ops]>; def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", "Support ARM v6t2 instructions", - [HasV6MOps, FeatureThumb2]>; + [HasV6MOps, HasV6KOps, FeatureThumb2]>; def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", "Support ARM v7 instructions", [HasV6T2Ops, FeaturePerfMon]>; @@ -177,6 +180,9 @@ def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", "Support ARM v8 instructions", [HasV7Ops, FeatureVirtualization, FeatureMP]>; +def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true", + "Support ARM v8.1a instructions", + [HasV8Ops, FeatureAClass, FeatureCRC]>; //===----------------------------------------------------------------------===// // ARM Processors supported. @@ -320,12 +326,6 @@ def : ProcNoItin<"iwmmxt", [HasV5TEOps]>; def : Processor<"arm1136j-s", ARMV6Itineraries, [HasV6Ops]>; def : Processor<"arm1136jf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2, FeatureHasSlowFPVMLx]>; -def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6Ops]>; -def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2, - FeatureHasSlowFPVMLx]>; -def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6Ops]>; -def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2, - FeatureHasSlowFPVMLx]>; // V6M Processors. def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, @@ -337,6 +337,14 @@ def : Processor<"cortex-m1", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, def : Processor<"sc000", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, FeatureDB, FeatureMClass]>; +// V6K Processors. +def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6KOps]>; +def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6KOps, FeatureVFP2, + FeatureHasSlowFPVMLx]>; +def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6KOps]>; +def : Processor<"mpcore", ARMV6Itineraries, [HasV6KOps, FeatureVFP2, + FeatureHasSlowFPVMLx]>; + // V6T2 Processors. def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops, FeatureDSPThumb2]>; @@ -449,6 +457,14 @@ def : ProcessorModel<"cyclone", SwiftModel, FeatureDB,FeatureDSPThumb2, FeatureHasRAS, FeatureZCZeroing]>; +// V8.1 Processors +def : ProcNoItin<"generic-armv8.1-a", [HasV8Ops, FeatureV8_1a, + FeatureDB, FeatureFPARMv8, + FeatureNEON, FeatureDSPThumb2, + FeatureHWDiv, FeatureHWDivARM, + FeatureTrustZone, FeatureT2XtPk, + FeatureCrypto]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 2544a01..102def1 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -120,9 +120,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer.EndCOFFSymbolDef(); } - // Have common code print out the function header with linkage info etc. - EmitFunctionHeader(); - // Emit the rest of the function body. EmitFunctionBody(); @@ -438,65 +435,6 @@ void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { Triple TT(TM.getTargetTriple()); - if (TT.isOSBinFormatMachO()) { - Reloc::Model RelocM = TM.getRelocationModel(); - if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) { - // Declare all the text sections up front (before the DWARF sections - // emitted by AsmPrinter::doInitialization) so the assembler will keep - // them together at the beginning of the object file. This helps - // avoid out-of-range branches that are due a fundamental limitation of - // the way symbol offsets are encoded with the current Darwin ARM - // relocations. - const TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<const TargetLoweringObjectFileMachO &>( - getObjFileLowering()); - - // Collect the set of sections our functions will go into. - SetVector<const MCSection *, SmallVector<const MCSection *, 8>, - SmallPtrSet<const MCSection *, 8> > TextSections; - // Default text section comes first. - TextSections.insert(TLOFMacho.getTextSection()); - // Now any user defined text sections from function attributes. - for (Module::iterator F = M.begin(), e = M.end(); F != e; ++F) - if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) - TextSections.insert(TLOFMacho.SectionForGlobal(F, *Mang, TM)); - // Now the coalescable sections. - TextSections.insert(TLOFMacho.getTextCoalSection()); - TextSections.insert(TLOFMacho.getConstTextCoalSection()); - - // Emit the sections in the .s file header to fix the order. - for (unsigned i = 0, e = TextSections.size(); i != e; ++i) - OutStreamer.SwitchSection(TextSections[i]); - - if (RelocM == Reloc::DynamicNoPIC) { - const MCSection *sect = - OutContext.getMachOSection("__TEXT", "__symbol_stub4", - MachO::S_SYMBOL_STUBS, - 12, SectionKind::getText()); - OutStreamer.SwitchSection(sect); - } else { - const MCSection *sect = - OutContext.getMachOSection("__TEXT", "__picsymbolstub4", - MachO::S_SYMBOL_STUBS, - 16, SectionKind::getText()); - OutStreamer.SwitchSection(sect); - } - const MCSection *StaticInitSect = - OutContext.getMachOSection("__TEXT", "__StaticInit", - MachO::S_REGULAR | - MachO::S_ATTR_PURE_INSTRUCTIONS, - SectionKind::getText()); - OutStreamer.SwitchSection(StaticInitSect); - } - - // Compiling with debug info should not affect the code - // generation. Ensure the cstring section comes before the - // optional __DWARF secion. Otherwise, PC-relative loads would - // have to use different instruction sequences at "-g" in order to - // reach global data in the same object file. - OutStreamer.SwitchSection(getObjFileLowering().getCStringSection()); - } - // Use unified assembler syntax. OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified); @@ -669,7 +607,7 @@ void ARMAsmPrinter::emitAttributes() { std::string CPUString = STI.getCPUString(); - if (CPUString != "generic") { + if (CPUString.find("generic") != 0) { //CPUString doesn't start with "generic" // FIXME: remove krait check when GNU tools support krait cpu if (STI.isKrait()) { ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); @@ -723,7 +661,8 @@ void ARMAsmPrinter::emitAttributes() { // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture if (STI.hasV8Ops()) ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - ARMBuildAttrs::AllowNeonARMv8); + STI.hasV8_1a() ? ARMBuildAttrs::AllowNeonARMv8_1a: + ARMBuildAttrs::AllowNeonARMv8); } else { if (STI.hasFPARMv8()) // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one @@ -960,10 +899,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { MCSymbol *MCSym; if (ACPV->isLSDA()) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - OS << DL->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber(); - MCSym = OutContext.GetOrCreateSymbol(OS.str()); + MCSym = getCurExceptionSym(); } else if (ACPV->isBlockAddress()) { const BlockAddress *BA = cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(); diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 50cb954..e475ae4 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -103,13 +103,16 @@ private: const MachineInstr *MI); public: - unsigned getISAEncoding(const Function *F) override { + unsigned getISAEncoding() override { // ARM/Darwin adds ISA to the DWARF info for each function. Triple TT(TM.getTargetTriple()); if (!TT.isOSBinFormatMachO()) return 0; - const ARMSubtarget &STI = TM.getSubtarget<ARMSubtarget>(*F); - return STI.isThumb() ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm; + bool isThumb = TT.getArch() == Triple::thumb || + TT.getArch() == Triple::thumbeb || + TT.getSubArch() == Triple::ARMSubArch_v7m || + TT.getSubArch() == Triple::ARMSubArch_v6m; + return isThumb ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm; } private: diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 29ee22e..7ee3cb0 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -4115,19 +4116,21 @@ enum ARMExeDomain { // std::pair<uint16_t, uint16_t> ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { - // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON - // if they are not predicated. - if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) - return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); - - // CortexA9 is particularly picky about mixing the two and wants these - // converted. - if (Subtarget.isCortexA9() && !isPredicated(MI) && - (MI->getOpcode() == ARM::VMOVRS || - MI->getOpcode() == ARM::VMOVSR || - MI->getOpcode() == ARM::VMOVS)) - return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); - + // If we don't have access to NEON instructions then we won't be able + // to swizzle anything to the NEON domain. Check to make sure. + if (Subtarget.hasNEON()) { + // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON + // if they are not predicated. + if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) + return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); + + // CortexA9 is particularly picky about mixing the two and wants these + // converted. + if (Subtarget.isCortexA9() && !isPredicated(MI) && + (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR || + MI->getOpcode() == ARM::VMOVS)) + return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); + } // No other instructions can be swizzled, so just determine their domain. unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; @@ -4220,6 +4223,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // Zap the predicate operands. assert(!isPredicated(MI) && "Cannot predicate a VORRd"); + // Make sure we've got NEON instructions. + assert(Subtarget.hasNEON() && "VORRd requires NEON"); + // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); @@ -4507,7 +4513,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, } bool ARMBaseInstrInfo::hasNOP() const { - return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; + return (Subtarget.getFeatureBits() & ARM::HasV6KOps) != 0; } bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 7574727..a8c7657 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -45,26 +45,27 @@ using namespace llvm; -ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) { +ARMBaseRegisterInfo::ARMBaseRegisterInfo() + : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {} + +static unsigned getFramePointerReg(const ARMSubtarget &STI) { if (STI.isTargetMachO()) { if (STI.isTargetDarwin() || STI.isThumb1Only()) - FramePtr = ARM::R7; + return ARM::R7; else - FramePtr = ARM::R11; + return ARM::R11; } else if (STI.isTargetWindows()) - FramePtr = ARM::R11; + return ARM::R11; else // ARM EABI - FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11; + return STI.isThumb() ? ARM::R7 : ARM::R11; } const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>(); const MCPhysReg *RegList = STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; - if (!MF) return RegList; - const Function *F = MF->getFunction(); if (F->getCallingConv() == CallingConv::GHC) { // GHC set of callee saved regs is empty as all those regs are @@ -89,8 +90,10 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return RegList; } -const uint32_t* -ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { +const uint32_t * +ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls return CSR_NoRegs_RegMask; @@ -102,8 +105,10 @@ ARMBaseRegisterInfo::getNoPreservedMask() const { return CSR_NoRegs_RegMask; } -const uint32_t* -ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { +const uint32_t * +ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); // This should return a register mask that is the same as that returned by // getCallPreservedMask but that additionally preserves the register used for // the first i32 argument (which must also be the register used to return a @@ -121,7 +126,8 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); + const TargetFrameLowering *TFI = STI.getFrameLowering(); // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); @@ -130,7 +136,7 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(ARM::FPSCR); Reserved.set(ARM::APSR_NZCV); if (TFI->hasFP(MF)) - Reserved.set(FramePtr); + Reserved.set(getFramePointerReg(STI)); if (hasBasePointer(MF)) Reserved.set(BasePtr); // Some targets reserve R9. @@ -150,9 +156,9 @@ getReservedRegs(const MachineFunction &MF) const { return Reserved; } -const TargetRegisterClass* -ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) - const { +const TargetRegisterClass * +ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &) const { const TargetRegisterClass *Super = RC; TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); do { @@ -187,7 +193,8 @@ ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { unsigned ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); + const TargetFrameLowering *TFI = STI.getFrameLowering(); switch (RC->getID()) { default: @@ -283,29 +290,6 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg, } } -bool -ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { - // CortexA9 has a Write-after-write hazard for NEON registers. - if (!STI.isLikeA9()) - return false; - - switch (RC->getID()) { - case ARM::DPRRegClassID: - case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - case ARM::QPRRegClassID: - case ARM::QPR_8RegClassID: - case ARM::QPR_VFP2RegClassID: - case ARM::SPRRegClassID: - case ARM::SPR_8RegClassID: - // Avoid reusing S, D, and Q registers. - // Don't increase register pressure for QQ and QQQQ. - return true; - default: - return false; - } -} - bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -350,7 +334,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { return false; // Stack realignment requires a frame pointer. If we already started // register allocation with frame pointer elimination, it is too late now. - if (!MRI->canReserveReg(FramePtr)) + if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget<ARMSubtarget>()))) return false; // We may also need a base pointer if there are dynamic allocas or stack // pointer adjustments around calls. @@ -384,10 +368,11 @@ cannotEliminateFrame(const MachineFunction &MF) const { unsigned ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); + const TargetFrameLowering *TFI = STI.getFrameLowering(); if (TFI->hasFP(MF)) - return FramePtr; + return getFramePointerReg(STI); return ARM::SP; } @@ -539,7 +524,6 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // The incoming offset is relating to the SP at the start of the function, // but when we access the local it'll be relative to the SP after local // allocation, so adjust our SP-relative offset by that allocation size. - Offset = -Offset; Offset += MFI->getLocalFrameSize(); // Assume that we'll have at least some spill slots allocated. // FIXME: This is a total SWAG number. We should run some statistics @@ -552,9 +536,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // on whether there are any local variables that would trigger it. unsigned StackAlign = TFI->getStackAlignment(); if (TFI->hasFP(MF) && - (MI->getDesc().TSFlags & ARMII::AddrModeMask) != ARMII::AddrModeT1_s && !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) { - if (isFrameOffsetLegal(MI, FPOffset)) + if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset)) return false; } // If we can reference via the stack pointer, try that. @@ -562,7 +545,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // to only disallow SP relative references in the live range of // the VLA(s). In practice, it's unclear how much difference that // would make, but it may be worth doing. - if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset)) + if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, ARM::SP, Offset)) return false; // The offset likely isn't legal, we want to allocate a virtual base register. @@ -625,7 +608,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, (void)Done; } -bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, +bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const { const MCInstrDesc &Desc = MI->getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); @@ -669,7 +652,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, NumBits = 8; break; case ARMII::AddrModeT1_s: - NumBits = 8; + NumBits = (BaseReg == ARM::SP ? 8 : 5); Scale = 4; isSigned = false; break; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 17027c2..fdc1ef9 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -21,10 +21,6 @@ #include "ARMGenRegisterInfo.inc" namespace llvm { - class ARMSubtarget; - class ARMBaseInstrInfo; - class Type; - /// Register allocation hints. namespace ARMRI { enum { @@ -82,27 +78,22 @@ static inline bool isCalleeSavedRegister(unsigned Reg, class ARMBaseRegisterInfo : public ARMGenRegisterInfo { protected: - const ARMSubtarget &STI; - - /// FramePtr - ARM physical register used as frame ptr. - unsigned FramePtr; - /// BasePtr - ARM physical register used as a base ptr in complex stack /// frames. I.e., when we need a 3rd base, not just SP and FP, due to /// variable size stack objects. unsigned BasePtr; // Can be only subclassed. - explicit ARMBaseRegisterInfo(const ARMSubtarget &STI); + explicit ARMBaseRegisterInfo(); // Return the opcode that implements 'Op', or 0 if no opcode unsigned getOpcode(int Op) const; public: /// Code Generation virtual methods... - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; - const uint32_t *getCallPreservedMask(CallingConv::ID) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const; /// getThisReturnPreservedMask - Returns a call preserved mask specific to the @@ -113,7 +104,8 @@ public: /// /// Should return NULL in the case that the calling convention does not have /// this property - const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; + const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF, + CallingConv::ID) const; BitVector getReservedRegs(const MachineFunction &MF) const override; @@ -124,7 +116,8 @@ public: getCrossCopyRegClass(const TargetRegisterClass *RC) const override; const TargetRegisterClass * - getLargestLegalSuperClass(const TargetRegisterClass *RC) const override; + getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &MF) const override; unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; @@ -138,8 +131,6 @@ public: void updateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const override; - bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const override; - bool hasBasePointer(const MachineFunction &MF) const; bool canRealignStack(const MachineFunction &MF) const; @@ -152,7 +143,7 @@ public: int64_t Offset) const override; void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override; - bool isFrameOffsetLegal(const MachineInstr *MI, + bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override; bool cannotEliminateFrame(const MachineFunction &MF) const; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 375d394..9c8d228 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -2265,7 +2265,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); // Finish off the call including any return values. SmallVector<unsigned, 4> UsedRegs; @@ -2416,7 +2416,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); // Finish off the call including any return values. SmallVector<unsigned, 4> UsedRegs; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 5a5bd57..830953b 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -293,7 +293,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); unsigned Align = STI.getFrameLowering()->getStackAlignment(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -742,8 +742,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = STI.getFrameLowering()->getStackAlignment(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 6ebf640..44cd1ef 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -257,7 +257,7 @@ private: /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. - bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) override; // Form pairs of consecutive R, S, D, or Q registers. @@ -3086,7 +3086,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { // Store exclusive double return a i32 value which is the return status // of the issued store. - EVT ResTys[] = { MVT::i32, MVT::Other }; + const EVT ResTys[] = {MVT::i32, MVT::Other}; bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); // Place arguments in the right order. @@ -3472,9 +3472,10 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ bool ARMDAGToDAGISel:: -SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, +SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { - assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); + assert(ConstraintID == InlineAsm::Constraint_m && + "unexpected asm memory constraint"); // Require the address to be in a register. That is safe for all ARM // variants and it is hard to do anything much smarter without knowing // how the operand is used. diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 56290aa..3b1b8dd 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -23,6 +23,7 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -40,6 +41,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" @@ -47,6 +49,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include <utility> using namespace llvm; @@ -568,14 +571,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::LOAD); // It is legal to extload from v4i8 to v4i16 or v4i32. - MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, - MVT::v4i16, MVT::v2i16, - MVT::v2i32}; - for (unsigned i = 0; i < 6; ++i) { + for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, + MVT::v2i32}) { for (MVT VT : MVT::integer_vector_valuetypes()) { - setLoadExtAction(ISD::EXTLOAD, VT, Tys[i], Legal); - setLoadExtAction(ISD::ZEXTLOAD, VT, Tys[i], Legal); - setLoadExtAction(ISD::SEXTLOAD, VT, Tys[i], Legal); + setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal); + setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal); + setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); } } } @@ -614,6 +615,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FRINT, MVT::f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); } @@ -869,14 +876,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // Various VFP goodness if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { - // int <-> fp are custom expanded into bit_convert + ARMISD ops. - if (Subtarget->hasVFP2()) { - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - } - // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) { setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); @@ -1033,11 +1032,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::RBIT: return "ARMISD::RBIT"; - case ARMISD::FTOSI: return "ARMISD::FTOSI"; - case ARMISD::FTOUI: return "ARMISD::FTOUI"; - case ARMISD::SITOF: return "ARMISD::SITOF"; - case ARMISD::UITOF: return "ARMISD::UITOF"; - case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; @@ -1164,6 +1158,20 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { return TargetLowering::getRegClassFor(VT); } +// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the +// source/dest is aligned and the copy size is large enough. We therefore want +// to align such objects passed to memory intrinsics. +bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, + unsigned &PrefAlign) const { + if (!isa<MemIntrinsic>(CI)) + return false; + MinSize = 8; + // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1 + // cycle faster than 4-byte aligned LDM. + PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4); + return true; +} + // Create a fast isel object. FastISel * ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, @@ -1815,16 +1823,16 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); if (isThisReturn) { // For 'this' returns, use the R0-preserving mask if applicable - Mask = ARI->getThisReturnPreservedMask(CallConv); + Mask = ARI->getThisReturnPreservedMask(MF, CallConv); if (!Mask) { // Set isThisReturn to false if the calling convention is not one that // allows 'returned' to be modeled in this way, so LowerCallResult does // not try to pass 'this' straight through isThisReturn = false; - Mask = ARI->getCallPreservedMask(CallConv); + Mask = ARI->getCallPreservedMask(MF, CallConv); } } else - Mask = ARI->getCallPreservedMask(CallConv); + Mask = ARI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -1857,60 +1865,61 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, /// on the stack. Remember the next parameter register to allocate, /// and then confiscate the rest of the parameter registers to insure /// this. -void -ARMTargetLowering::HandleByVal( - CCState *State, unsigned &size, unsigned Align) const { - unsigned reg = State->AllocateReg(GPRArgRegs); +void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, + unsigned Align) const { assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && "unhandled ParmContext"); - if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { - if (Subtarget->isAAPCS_ABI() && Align > 4) { - unsigned AlignInRegs = Align / 4; - unsigned Waste = (ARM::R4 - reg) % AlignInRegs; - for (unsigned i = 0; i < Waste; ++i) - reg = State->AllocateReg(GPRArgRegs); - } - if (reg != 0) { - unsigned excess = 4 * (ARM::R4 - reg); - - // Special case when NSAA != SP and parameter size greater than size of - // all remained GPR regs. In that case we can't split parameter, we must - // send it to stack. We also must set NCRN to R4, so waste all - // remained registers. - const unsigned NSAAOffset = State->getNextStackOffset(); - if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { - while (State->AllocateReg(GPRArgRegs)) - ; - return; - } + // Byval (as with any stack) slots are always at least 4 byte aligned. + Align = std::max(Align, 4U); - // First register for byval parameter is the first register that wasn't - // allocated before this method call, so it would be "reg". - // If parameter is small enough to be saved in range [reg, r4), then - // the end (first after last) register would be reg + param-size-in-regs, - // else parameter would be splitted between registers and stack, - // end register would be r4 in this case. - unsigned ByValRegBegin = reg; - unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4; - State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); - // Note, first register is allocated in the beginning of function already, - // allocate remained amount of registers we need. - for (unsigned i = reg+1; i != ByValRegEnd; ++i) - State->AllocateReg(GPRArgRegs); - // A byval parameter that is split between registers and memory needs its - // size truncated here. - // In the case where the entire structure fits in registers, we set the - // size in memory to zero. - if (size < excess) - size = 0; - else - size -= excess; - } + unsigned Reg = State->AllocateReg(GPRArgRegs); + if (!Reg) + return; + + unsigned AlignInRegs = Align / 4; + unsigned Waste = (ARM::R4 - Reg) % AlignInRegs; + for (unsigned i = 0; i < Waste; ++i) + Reg = State->AllocateReg(GPRArgRegs); + + if (!Reg) + return; + + unsigned Excess = 4 * (ARM::R4 - Reg); + + // Special case when NSAA != SP and parameter size greater than size of + // all remained GPR regs. In that case we can't split parameter, we must + // send it to stack. We also must set NCRN to R4, so waste all + // remained registers. + const unsigned NSAAOffset = State->getNextStackOffset(); + if (NSAAOffset != 0 && Size > Excess) { + while (State->AllocateReg(GPRArgRegs)) + ; + return; } + + // First register for byval parameter is the first register that wasn't + // allocated before this method call, so it would be "reg". + // If parameter is small enough to be saved in range [reg, r4), then + // the end (first after last) register would be reg + param-size-in-regs, + // else parameter would be splitted between registers and stack, + // end register would be r4 in this case. + unsigned ByValRegBegin = Reg; + unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4); + State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); + // Note, first register is allocated in the beginning of function already, + // allocate remained amount of registers we need. + for (unsigned i = Reg + 1; i != ByValRegEnd; ++i) + State->AllocateReg(GPRArgRegs); + // A byval parameter that is split between registers and memory needs its + // size truncated here. + // In the case where the entire structure fits in registers, we set the + // size in memory to zero. + Size = std::max<int>(Size - Excess, 0); } + /// MatchingStackOffset - Return true if the given stack call argument is /// already available in the same position (relatively) of the caller's /// incoming argument stack. @@ -1991,7 +2000,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (isCalleeStructRet || isCallerStructRet) return false; - // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: + // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo:: // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation // support in the assembler and linker to be used. This would need to be @@ -2819,50 +2828,6 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } -void -ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, - unsigned InRegsParamRecordIdx, - unsigned ArgSize, - unsigned &ArgRegsSize, - unsigned &ArgRegsSaveSize) - const { - unsigned NumGPRs; - if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { - unsigned RBegin, REnd; - CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); - NumGPRs = REnd - RBegin; - } else { - unsigned int firstUnalloced; - firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs); - NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; - } - - unsigned Align = Subtarget->getFrameLowering()->getStackAlignment(); - ArgRegsSize = NumGPRs * 4; - - // If parameter is split between stack and GPRs... - if (NumGPRs && Align > 4 && - (ArgRegsSize < ArgSize || - InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { - // Add padding for part of param recovered from GPRs. For example, - // if Align == 8, its last byte must be at address K*8 - 1. - // We need to do it, since remained (stack) part of parameter has - // stack alignment, and we need to "attach" "GPRs head" without gaps - // to it: - // Stack: - // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes... - // [ [padding] [GPRs head] ] [ Tail passed via stack .... - // - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned Padding = - OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align); - ArgRegsSaveSize = ArgRegsSize + Padding; - } else - // We don't need to extend regs save size for byval parameters if they - // are passed via GPRs only. - ArgRegsSaveSize = ArgRegsSize; -} - // The remaining GPRs hold either the beginning of variable-argument // data, or the beginning of an aggregate passed by value (usually // byval). Either way, we allocate stack slots adjacent to the data @@ -2876,13 +2841,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, const Value *OrigArg, unsigned InRegsParamRecordIdx, - unsigned OffsetFromOrigArg, - unsigned ArgOffset, - unsigned ArgSize, - bool ForceMutable, - unsigned ByValStoreOffset, - unsigned TotalArgRegsSaveSize) const { - + int ArgOffset, + unsigned ArgSize) const { // Currently, two use-cases possible: // Case #1. Non-var-args function, and we meet first byval parameter. // Setup first unallocated register as first byval register; @@ -2897,82 +2857,39 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned firstRegToSaveIndex, lastRegToSaveIndex; unsigned RBegin, REnd; if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); - firstRegToSaveIndex = RBegin - ARM::R0; - lastRegToSaveIndex = REnd - ARM::R0; } else { - firstRegToSaveIndex = CCInfo.getFirstUnallocated(GPRArgRegs); - lastRegToSaveIndex = 4; - } - - unsigned ArgRegsSize, ArgRegsSaveSize; - computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize, - ArgRegsSize, ArgRegsSaveSize); - - // Store any by-val regs to their spots on the stack so that they may be - // loaded by deferencing the result of formal parameter pointer or va_next. - // Note: once stack area for byval/varargs registers - // was initialized, it can't be initialized again. - if (ArgRegsSaveSize) { - unsigned Padding = ArgRegsSaveSize - ArgRegsSize; - - if (Padding) { - assert(AFI->getStoredByValParamsPadding() == 0 && - "The only parameter may be padded."); - AFI->setStoredByValParamsPadding(Padding); - } - - int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize, - Padding + - ByValStoreOffset - - (int64_t)TotalArgRegsSaveSize, - false); - SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); - if (Padding) { - MFI->CreateFixedObject(Padding, - ArgOffset + ByValStoreOffset - - (int64_t)ArgRegsSaveSize, - false); - } - - SmallVector<SDValue, 4> MemOps; - for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; - ++firstRegToSaveIndex, ++i) { - const TargetRegisterClass *RC; - if (AFI->isThumb1OnlyFunction()) - RC = &ARM::tGPRRegClass; - else - RC = &ARM::GPRRegClass; + unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs); + RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx]; + REnd = ARM::R4; + } - unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); - SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); - SDValue Store = - DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i), - false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, - DAG.getConstant(4, getPointerTy())); - } + if (REnd != RBegin) + ArgOffset = -4 * (ARM::R4 - RBegin); - AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); + int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false); + SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); - if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); - return FrameIndex; - } else { - if (ArgSize == 0) { - // We cannot allocate a zero-byte object for the first variadic argument, - // so just make up a size. - ArgSize = 4; - } - // This will point to the next argument passed via stack. - return MFI->CreateFixedObject( - ArgSize, ArgOffset, !ForceMutable); + SmallVector<SDValue, 4> MemOps; + const TargetRegisterClass *RC = + AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass; + + for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) { + unsigned VReg = MF.addLiveIn(Reg, RC); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(OrigArg, 4 * i), false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, + DAG.getConstant(4, getPointerTy())); } + + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); + return FrameIndex; } // Setup stack frame, the va_list pointer will start from. @@ -2990,11 +2907,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, // the result of va_next. // If there is no regs to be stored, just point address after last // argument passed via stack. - int FrameIndex = - StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, - CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable, - 0, TotalArgRegsSaveSize); - + int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, + CCInfo.getInRegsParamsCount(), + CCInfo.getNextStackOffset(), 4); AFI->setVarArgsFrameIndex(FrameIndex); } @@ -3020,7 +2935,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, isVarArg)); SmallVector<SDValue, 16> ArgValues; - int lastInsIndex = -1; SDValue ArgValue; Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; @@ -3030,50 +2944,40 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // We also increase this value in case of varargs function. AFI->setArgRegsSaveSize(0); - unsigned ByValStoreOffset = 0; - unsigned TotalArgRegsSaveSize = 0; - unsigned ArgRegsSaveSizeMaxAlign = 4; - // Calculate the amount of stack space that we need to allocate to store // byval and variadic arguments that are passed in registers. // We need to know this before we allocate the first byval or variadic // argument, as they will be allocated a stack slot below the CFA (Canonical // Frame Address, the stack pointer at entry to the function). + unsigned ArgRegBegin = ARM::R4; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount()) + break; + CCValAssign &VA = ArgLocs[i]; - if (VA.isMemLoc()) { - int index = VA.getValNo(); - if (index != lastInsIndex) { - ISD::ArgFlagsTy Flags = Ins[index].Flags; - if (Flags.isByVal()) { - unsigned ExtraArgRegsSize; - unsigned ExtraArgRegsSaveSize; - computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(), - Flags.getByValSize(), - ExtraArgRegsSize, ExtraArgRegsSaveSize); - - TotalArgRegsSaveSize += ExtraArgRegsSaveSize; - if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign) - ArgRegsSaveSizeMaxAlign = Flags.getByValAlign(); - CCInfo.nextInRegsParam(); - } - lastInsIndex = index; - } - } + unsigned Index = VA.getValNo(); + ISD::ArgFlagsTy Flags = Ins[Index].Flags; + if (!Flags.isByVal()) + continue; + + assert(VA.isMemLoc() && "unexpected byval pointer in reg"); + unsigned RBegin, REnd; + CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd); + ArgRegBegin = std::min(ArgRegBegin, RBegin); + + CCInfo.nextInRegsParam(); } CCInfo.rewindByValRegsInfo(); - lastInsIndex = -1; + + int lastInsIndex = -1; if (isVarArg && MFI->hasVAStart()) { - unsigned ExtraArgRegsSize; - unsigned ExtraArgRegsSaveSize; - computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0, - ExtraArgRegsSize, ExtraArgRegsSaveSize); - TotalArgRegsSaveSize += ExtraArgRegsSaveSize; + unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs); + if (RegIdx != array_lengthof(GPRArgRegs)) + ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]); } - // If the arg regs save area contains N-byte aligned values, the - // bottom of it must be at least N-byte aligned. - TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign); - TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U); + + unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin); + AFI->setArgRegsSaveSize(TotalArgRegsSaveSize); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -3178,18 +3082,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, "Byval arguments cannot be implicit"); unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); - ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign()); - int FrameIndex = StoreByValRegs( - CCInfo, DAG, dl, Chain, CurOrigArg, - CurByValIndex, - Ins[VA.getValNo()].PartOffset, - VA.getLocMemOffset(), - Flags.getByValSize(), - true /*force mutable frames*/, - ByValStoreOffset, - TotalArgRegsSaveSize); - ByValStoreOffset += Flags.getByValSize(); - ByValStoreOffset = std::min(ByValStoreOffset, 16U); + int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg, + CurByValIndex, VA.getLocMemOffset(), + Flags.getByValSize()); InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); CCInfo.nextInRegsParam(); } else { @@ -3894,7 +3789,6 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorFP_TO_INT(Op, DAG); - if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::FP_TO_SINT) @@ -3907,20 +3801,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { /*isSigned*/ false, SDLoc(Op)).first; } - SDLoc dl(Op); - unsigned Opc; - - switch (Op.getOpcode()) { - default: llvm_unreachable("Invalid opcode!"); - case ISD::FP_TO_SINT: - Opc = ARMISD::FTOSI; - break; - case ISD::FP_TO_UINT: - Opc = ARMISD::FTOUI; - break; - } - Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); - return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); + return Op; } static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { @@ -3960,7 +3841,6 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorINT_TO_FP(Op, DAG); - if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::SINT_TO_FP) @@ -3973,21 +3853,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { /*isSigned*/ false, SDLoc(Op)).first; } - SDLoc dl(Op); - unsigned Opc; - - switch (Op.getOpcode()) { - default: llvm_unreachable("Invalid opcode!"); - case ISD::SINT_TO_FP: - Opc = ARMISD::SITOF; - break; - case ISD::UINT_TO_FP: - Opc = ARMISD::UITOF; - break; - } - - Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); - return DAG.getNode(Opc, dl, VT, Op); + return Op; } SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { @@ -7239,16 +7105,20 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // Load an immediate to varEnd. unsigned varEnd = MRI.createVirtualRegister(TRC); - if (IsThumb2) { + if (Subtarget->useMovt(*MF)) { unsigned Vtmp = varEnd; if ((LoopSize & 0xFFFF0000) != 0) Vtmp = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp) - .addImm(LoopSize & 0xFFFF)); + AddDefaultPred(BuildMI(BB, dl, + TII->get(IsThumb2 ? ARM::t2MOVi16 : ARM::MOVi16), + Vtmp).addImm(LoopSize & 0xFFFF)); if ((LoopSize & 0xFFFF0000) != 0) - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) - .addReg(Vtmp).addImm(LoopSize >> 16)); + AddDefaultPred(BuildMI(BB, dl, + TII->get(IsThumb2 ? ARM::t2MOVTi16 : ARM::MOVTi16), + varEnd) + .addReg(Vtmp) + .addImm(LoopSize >> 16)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); @@ -10076,6 +9946,28 @@ bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { return false; } +bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { + EVT VT = ExtVal.getValueType(); + + if (!isTypeLegal(VT)) + return false; + + // Don't create a loadext if we can fold the extension into a wide/long + // instruction. + // If there's more than one user instruction, the loadext is desirable no + // matter what. There can be two uses by the same instruction. + if (ExtVal->use_empty() || + !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode())) + return true; + + SDNode *U = *ExtVal->use_begin(); + if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB || + U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL)) + return false; + + return true; +} + bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; @@ -10289,9 +10181,9 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { // Thumb2 and ARM modes can use cmn for negative immediates. if (!Subtarget->isThumb()) - return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1; + return ARM_AM::getSOImmVal(std::abs(Imm)) != -1; if (Subtarget->isThumb2()) - return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1; + return ARM_AM::getT2SOImmVal(std::abs(Imm)) != -1; // Thumb1 doesn't have cmn, and only 8-bit immediates. return Imm >= 0 && Imm <= 255; } @@ -10302,7 +10194,7 @@ bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { /// immediate into a register. bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { // Same encoding for add/sub, just flip the sign. - int64_t AbsImm = llvm::abs64(Imm); + int64_t AbsImm = std::abs(Imm); if (!Subtarget->isThumb()) return ARM_AM::getSOImmVal(AbsImm) != -1; if (Subtarget->isThumb2()) @@ -11198,9 +11090,12 @@ bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { // For the real atomic operations, we have ldrex/strex up to 32 bits, // and up to 64 bits on the non-M profiles -bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { +TargetLoweringBase::AtomicRMWExpansionKind +ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= (Subtarget->isMClass() ? 32U : 64U); + return (Size <= (Subtarget->isMClass() ? 32U : 64U)) + ? AtomicRMWExpansionKind::LLSC + : AtomicRMWExpansionKind::None; } // This has so far only been implemented for MachO. diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index ec1407d..dd4c954 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -65,11 +65,6 @@ namespace llvm { RBIT, // ARM bitreverse instruction - FTOSI, // FP to sint within a FP register. - FTOUI, // FP to uint within a FP register. - SITOF, // sint to FP within a FP register. - UITOF, // uint to FP within a FP register. - SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out. SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag. @@ -283,6 +278,8 @@ namespace llvm { using TargetLowering::isZExtFree; bool isZExtFree(SDValue Val, EVT VT2) const override; + bool isVectorLoadExtDesirable(SDValue ExtVal) const override; + bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; @@ -346,6 +343,12 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + // FIXME: Map different constraints differently. + return InlineAsm::Constraint_m; + } + const ARMSubtarget* getSubtarget() const { return Subtarget; } @@ -360,6 +363,9 @@ namespace llvm { return true; } + bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, + unsigned &PrefAlign) const override; + /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. FastISel *createFastISel(FunctionLoweringInfo &funcInfo, @@ -404,7 +410,8 @@ namespace llvm { bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; bool useLoadStackGuardNode() const override; @@ -525,12 +532,8 @@ namespace llvm { SDLoc dl, SDValue &Chain, const Value *OrigArg, unsigned InRegsParamRecordIdx, - unsigned OffsetFromOrigArg, - unsigned ArgOffset, - unsigned ArgSize, - bool ForceMutable, - unsigned ByValStoreOffset, - unsigned TotalArgRegsSaveSize) const; + int ArgOffset, + unsigned ArgSize) const; void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, @@ -538,12 +541,6 @@ namespace llvm { unsigned TotalArgRegsSaveSize, bool ForceMutable = false) const; - void computeRegArea(CCState &CCInfo, MachineFunction &MF, - unsigned InRegsParamRecordIdx, - unsigned ArgSize, - unsigned &ArgRegsSize, - unsigned &ArgRegsSaveSize) const; - SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const override; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 7d27cf3..e79608d 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -983,7 +983,12 @@ class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> { class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [IsARM, HasV6]; } - +class VFPPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [HasVFP2]; +} +class VFPNoNEONPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [HasVFP2, DontUseNEONForFP]; +} //===----------------------------------------------------------------------===// // Thumb Instruction Format Definitions. // diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index bc617f0..7c004c9 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -30,8 +30,7 @@ using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(STI) { -} + : ARMBaseInstrInfo(STI), RI() {} /// getNoopForMachoTarget - Return the noop instruction to use for a noop. void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { @@ -146,6 +145,10 @@ namespace { return false; const ARMSubtarget &STI = static_cast<const ARMSubtarget &>(MF.getSubtarget()); + // Don't do this for Thumb1. + if (STI.isThumb1Only()) + return false; + const TargetMachine &TM = MF.getTarget(); if (TM.getRelocationModel() != Reloc::PIC_) return false; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 126c552..c3984ca 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -199,6 +199,9 @@ def HasV6M : Predicate<"Subtarget->hasV6MOps()">, def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">, AssemblerPredicate<"HasV6T2Ops", "armv6t2">; def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; +def HasV6K : Predicate<"Subtarget->hasV6KOps()">, + AssemblerPredicate<"HasV6KOps", "armv6k">; +def NoV6K : Predicate<"!Subtarget->hasV6KOps()">; def HasV7 : Predicate<"Subtarget->hasV7Ops()">, AssemblerPredicate<"HasV7Ops", "armv7">; def HasV8 : Predicate<"Subtarget->hasV8Ops()">, @@ -223,6 +226,8 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; +def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">, + AssemblerPredicate<"FeatureV8_1a", "v8.1a">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, @@ -1835,11 +1840,11 @@ def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary, let Inst{7-0} = imm; } -def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>; -def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>; -def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>; -def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>; -def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6K]>; +def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6K]>; +def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6K]>; +def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6K]>; +def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6K]>; def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>; def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 2a7b4b5..a6a07a8 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -2790,7 +2790,7 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))))]>; class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode MulOp, SDNode ShOp> + ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), @@ -2826,7 +2826,7 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDNode MulOp, SDNode ShOp> + SDPatternOperator MulOp, SDPatternOperator ShOp> : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), @@ -3674,7 +3674,7 @@ multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N3VMulOpSL_HS<bits<4> op11_8, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, SDNode ShOp> { + string OpcodeStr, string Dt, SDPatternOperator ShOp> { def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, @@ -3711,27 +3711,38 @@ multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, } // Neon 3-argument intrinsics, -// element sizes of 8, 16 and 32 bits: -multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, - InstrItinClass itinD, InstrItinClass itinQ, +// element sizes of 16 and 32 bits: +multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. - def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, - OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; - def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, + def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; - def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, + def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. - def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, - OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; - def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, + def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; - def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, + def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; } +// element sizes of 8, 16 and 32 bits: +multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, SDPatternOperator IntOp> + :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32, + itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{ + // 64-bit vector types. + def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16, + OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; + // 128-bit vector types. + def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16, + OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; +} // Neon Long Multiply-Op vector operations, // element sizes of 8, 16 and 32 bits: @@ -4305,6 +4316,147 @@ defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; +let Predicates = [HasNEON, HasV8_1a] in { + // v8.1a Neon Rounding Double Multiply-Op vector operations, + // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long + // (Q += D * D) + defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqadds + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v2i32 (int_arm_neon_vqadds + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v8i16 (int_arm_neon_vqadds + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), + (v8i16 QPR:$Vm))))), + (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + def : Pat<(v4i32 (int_arm_neon_vqadds + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), + (v4i32 QPR:$Vm))))), + (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + + defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqadds + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh + (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, + imm:$lane))>; + def : Pat<(v2i32 (int_arm_neon_vqadds + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh + (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, + imm:$lane))>; + def : Pat<(v8i16 (int_arm_neon_vqadds + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh + (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), + imm:$lane)))))), + (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), + (v8i16 QPR:$src2), + (v4i16 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; + def : Pat<(v4i32 (int_arm_neon_vqadds + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh + (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), + imm:$lane)))))), + (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), + (v4i32 QPR:$src2), + (v2i32 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + + // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long + // (Q -= D * D) + defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqsubs + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v2i32 (int_arm_neon_vqsubs + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v8i16 (int_arm_neon_vqsubs + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), + (v8i16 QPR:$Vm))))), + (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + def : Pat<(v4i32 (int_arm_neon_vqsubs + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), + (v4i32 QPR:$Vm))))), + (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + + defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqsubs + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh + (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; + def : Pat<(v2i32 (int_arm_neon_vqsubs + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh + (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, + imm:$lane))>; + def : Pat<(v8i16 (int_arm_neon_vqsubs + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh + (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), + imm:$lane)))))), + (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), + (v8i16 QPR:$src2), + (v4i16 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; + def : Pat<(v4i32 (int_arm_neon_vqsubs + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh + (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), + imm:$lane)))))), + (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), + (v4i32 QPR:$src2), + (v2i32 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; +} // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, "vqdmlal", "s", null_frag>; @@ -6158,6 +6310,21 @@ class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; +class NVCVTIFPat<SDNode OpNode, NeonI Inst> + : NEONFPPat<(f32 (OpNode GPR:$a)), + (f32 (EXTRACT_SUBREG + (v2f32 (Inst + (INSERT_SUBREG + (v2f32 (IMPLICIT_DEF)), + (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), + ssub_0))>; +class NVCVTFIPat<SDNode OpNode, NeonI Inst> + : NEONFPPat<(i32 (OpNode SPR:$a)), + (i32 (EXTRACT_SUBREG + (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$a, ssub_0))), + ssub_0))>; + def : N3VSPat<fadd, VADDfd>; def : N3VSPat<fsub, VSUBfd>; def : N3VSPat<fmul, VMULfd>; @@ -6173,10 +6340,22 @@ def : N2VSPat<fabs, VABSfd>; def : N2VSPat<fneg, VNEGfd>; def : N3VSPat<NEONfmax, VMAXfd>; def : N3VSPat<NEONfmin, VMINfd>; -def : N2VSPat<arm_ftosi, VCVTf2sd>; -def : N2VSPat<arm_ftoui, VCVTf2ud>; -def : N2VSPat<arm_sitof, VCVTs2fd>; -def : N2VSPat<arm_uitof, VCVTu2fd>; +def : NVCVTFIPat<fp_to_sint, VCVTf2sd>; +def : NVCVTFIPat<fp_to_uint, VCVTf2ud>; +def : NVCVTIFPat<sint_to_fp, VCVTs2fd>; +def : NVCVTIFPat<uint_to_fp, VCVTu2fd>; + +// NEON doesn't have any f64 conversions, so provide patterns to make +// sure the VFP conversions match when extracting from a vector. +def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), + (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; +def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), + (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; +def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), + (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; +def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), + (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; + // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. def : Pat<(f32 (bitconvert GPR:$a)), diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e0a9314..afff016 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -11,16 +11,10 @@ // //===----------------------------------------------------------------------===// -def SDT_FTOI : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>; -def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>; def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; -def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>; -def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>; -def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>; -def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>; def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>; @@ -633,7 +627,7 @@ multiclass vcvt_inst<string opc, bits<2> rm, def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), - [(set SPR:$Sd, (arm_ftosi (node SPR:$Sm)))]>, + []>, Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } @@ -641,7 +635,7 @@ multiclass vcvt_inst<string opc, bits<2> rm, def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"), - [(set SPR:$Sd, (arm_ftoui (node SPR:$Sm)))]>, + []>, Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } @@ -649,7 +643,7 @@ multiclass vcvt_inst<string opc, bits<2> rm, def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"), - [(set SPR:$Sd, (arm_ftosi (f64 (node (f64 DPR:$Dm)))))]>, + []>, Requires<[HasFPARMv8, HasDPVFP]> { bits<5> Dm; @@ -664,7 +658,7 @@ multiclass vcvt_inst<string opc, bits<2> rm, def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"), - [(set SPR:$Sd, (arm_ftoui (f64 (node (f64 DPR:$Dm)))))]>, + []>, Requires<[HasFPARMv8, HasDPVFP]> { bits<5> Dm; @@ -676,6 +670,27 @@ multiclass vcvt_inst<string opc, bits<2> rm, let Inst{8} = 1; } } + + let Predicates = [HasFPARMv8] in { + def : Pat<(i32 (fp_to_sint (node SPR:$a))), + (COPY_TO_REGCLASS + (!cast<Instruction>(NAME#"SS") SPR:$a), + GPR)>; + def : Pat<(i32 (fp_to_uint (node SPR:$a))), + (COPY_TO_REGCLASS + (!cast<Instruction>(NAME#"US") SPR:$a), + GPR)>; + } + let Predicates = [HasFPARMv8, HasDPVFP] in { + def : Pat<(i32 (fp_to_sint (node (f64 DPR:$a)))), + (COPY_TO_REGCLASS + (!cast<Instruction>(NAME#"SD") DPR:$a), + GPR)>; + def : Pat<(i32 (fp_to_uint (node (f64 DPR:$a)))), + (COPY_TO_REGCLASS + (!cast<Instruction>(NAME#"UD") DPR:$a), + GPR)>; + } } defm VCVTA : vcvt_inst<"a", 0b00, frnd>; @@ -980,14 +995,22 @@ class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm", - [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> { + []> { let Inst{7} = 1; // s32 } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(f64 (sint_to_fp GPR:$a)), + (VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; + + def : VFPPat<(f64 (sint_to_fp (i32 (load addrmode5:$a)))), + (VSITOD (VLDRS addrmode5:$a))>; +} + def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$Sd),(ins SPR:$Sm), IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> { + []> { let Inst{7} = 1; // s32 // Some single precision VFP instructions may be executed on both NEON and @@ -995,17 +1018,31 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)), + (VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>; + +def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (load addrmode5:$a)))), + (VSITOS (VLDRS addrmode5:$a))>; + def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm", - [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> { + []> { let Inst{7} = 0; // u32 } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(f64 (uint_to_fp GPR:$a)), + (VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; + + def : VFPPat<(f64 (uint_to_fp (i32 (load addrmode5:$a)))), + (VUITOD (VLDRS addrmode5:$a))>; +} + def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> { + []> { let Inst{7} = 0; // u32 // Some single precision VFP instructions may be executed on both NEON and @@ -1013,6 +1050,12 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)), + (VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>; + +def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (load addrmode5:$a)))), + (VUITOS (VLDRS addrmode5:$a))>; + // FP -> Int: class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, @@ -1055,14 +1098,22 @@ class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> { + []> { let Inst{7} = 1; // Z bit } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))), + (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>; + + def : VFPPat<(store (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr), + (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>; +} + def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> { + []> { let Inst{7} = 1; // Z bit // Some single precision VFP instructions may be executed on both NEON and @@ -1070,17 +1121,31 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)), + (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>; + +def : VFPNoNEONPat<(store (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr), + (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>; + def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> { + []> { let Inst{7} = 1; // Z bit } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))), + (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>; + + def : VFPPat<(store (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr), + (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>; +} + def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> { + []> { let Inst{7} = 1; // Z bit // Some single precision VFP instructions may be executed on both NEON and @@ -1088,6 +1153,12 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)), + (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>; + +def : VFPNoNEONPat<(store (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr), + (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>; + // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. let Uses = [FPSCR] in { // FIXME: Verify encoding after integrated assembler is working. diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index a8d0981..eca8e28 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -19,7 +19,7 @@ #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "Thumb1RegisterInfo.h" +#include "ThumbRegisterInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -38,6 +38,7 @@ #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index ddfdb52..a68ab1b 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -149,11 +149,7 @@ public: unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; } void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; } - unsigned getArgRegsSaveSize(unsigned Align = 0) const { - if (!Align) - return ArgRegsSaveSize; - return (ArgRegsSaveSize + Align - 1) & ~(Align - 1); - } + unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; } void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; } unsigned getReturnRegsCount() const { return ReturnRegsCount; } diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index 80b4b48..e6e8cdf 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -16,6 +16,4 @@ using namespace llvm; void ARMRegisterInfo::anchor() { } -ARMRegisterInfo::ARMRegisterInfo(const ARMSubtarget &sti) - : ARMBaseRegisterInfo(sti) { -} +ARMRegisterInfo::ARMRegisterInfo() : ARMBaseRegisterInfo() {} diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index b623173..e2e650e 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -23,7 +23,7 @@ class ARMSubtarget; struct ARMRegisterInfo : public ARMBaseRegisterInfo { virtual void anchor(); public: - ARMRegisterInfo(const ARMSubtarget &STI); + ARMRegisterInfo(); }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 89624dd..fbec9e6 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -129,6 +129,7 @@ void ARMSubtarget::initializeEnvironment() { HasV5TEOps = false; HasV6Ops = false; HasV6MOps = false; + HasV6KOps = false; HasV6T2Ops = false; HasV7Ops = false; HasV8Ops = false; @@ -165,6 +166,7 @@ void ARMSubtarget::initializeEnvironment() { HasTrustZone = false; HasCrypto = false; HasCRC = false; + HasV8_1a = false; HasZeroCycleZeroing = false; AllowsUnalignedMem = false; Thumb2DSP = false; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index f4deddf..f36cd5c 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -56,13 +56,14 @@ protected: ARMProcClassEnum ARMProcClass; /// HasV4TOps, HasV5TOps, HasV5TEOps, - /// HasV6Ops, HasV6MOps, HasV6T2Ops, HasV7Ops, HasV8Ops - + /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops - /// Specify whether target support specific ARM ISA variants. bool HasV4TOps; bool HasV5TOps; bool HasV5TEOps; bool HasV6Ops; bool HasV6MOps; + bool HasV6KOps; bool HasV6T2Ops; bool HasV7Ops; bool HasV8Ops; @@ -181,6 +182,9 @@ protected: /// HasCRC - if true, processor supports CRC instructions bool HasCRC; + /// HasV8_1a - if true, the processor has V8.1a: PAN and RDMA extensions + bool HasV8_1a; + /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are /// particularly effective at zeroing a VFP register. bool HasZeroCycleZeroing; @@ -287,6 +291,7 @@ public: bool hasV5TEOps() const { return HasV5TEOps; } bool hasV6Ops() const { return HasV6Ops; } bool hasV6MOps() const { return HasV6MOps; } + bool hasV6KOps() const { return HasV6KOps; } bool hasV6T2Ops() const { return HasV6T2Ops; } bool hasV7Ops() const { return HasV7Ops; } bool hasV8Ops() const { return HasV8Ops; } @@ -311,6 +316,7 @@ public: bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } + bool hasV8_1a() const { return HasV8_1a; } bool hasVirtualization() const { return HasVirtualization; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index a97a058..1bee1b0 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -37,6 +37,11 @@ EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden, " to make use of cmpxchg flow-based information"), cl::init(true)); +static cl::opt<bool> +EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden, + cl::desc("Enable ARM load/store optimization pass"), + cl::init(true)); + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget); @@ -105,9 +110,11 @@ computeTargetABI(const Triple &TT, StringRef CPU, return TargetABI; } -static std::string computeDataLayout(const Triple &TT, - ARMBaseTargetMachine::ARMABI ABI, +static std::string computeDataLayout(StringRef TT, StringRef CPU, + const TargetOptions &Options, bool isLittle) { + const Triple Triple(TT); + auto ABI = computeTargetABI(Triple, CPU, Options); std::string Ret = ""; if (isLittle) @@ -117,7 +124,7 @@ static std::string computeDataLayout(const Triple &TT, // Big endian. Ret += "E"; - Ret += DataLayout::getManglingComponent(TT); + Ret += DataLayout::getManglingComponent(Triple); // Pointers are 32 bits and aligned to 32 bits. Ret += "-p:32:32"; @@ -147,7 +154,7 @@ static std::string computeDataLayout(const Triple &TT, // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit // aligned everywhere else. - if (TT.isOSNaCl()) + if (Triple.isOSNaCl()) Ret += "-S128"; else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS) Ret += "-S64"; @@ -164,9 +171,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT, + CPU, FS, Options, RM, CM, OL), TargetABI(computeTargetABI(Triple(TT), CPU, Options)), - DL(computeDataLayout(Triple(TT), TargetABI, isLittle)), TLOF(createTLOF(Triple(getTargetTriple()))), Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) { @@ -325,7 +332,7 @@ void ARMPassConfig::addIRPasses() { } bool ARMPassConfig::addPreISel() { - if (TM->getOptLevel() != CodeGenOpt::None) + if (TM->getOptLevel() == CodeGenOpt::Aggressive) // FIXME: This is using the thumb1 only constant value for // maximal global offset for merging globals. We may want // to look into using the old value for non-thumb1 code of @@ -339,32 +346,30 @@ bool ARMPassConfig::addPreISel() { bool ARMPassConfig::addInstSelector() { addPass(createARMISelDag(getARMTargetMachine(), getOptLevel())); - const ARMSubtarget *Subtarget = &getARMSubtarget(); - if (Subtarget->isTargetELF() && !Subtarget->isThumb1Only() && + if (Triple(TM->getTargetTriple()).isOSBinFormatELF() && TM->Options.EnableFastISel) addPass(createARMGlobalBaseRegPass()); return false; } void ARMPassConfig::addPreRegAlloc() { - if (getOptLevel() != CodeGenOpt::None) - addPass(createARMLoadStoreOptimizationPass(true)); - if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) + if (getOptLevel() != CodeGenOpt::None) { addPass(createMLxExpansionPass()); - // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be - // enabled when NEON is available. - if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() && - getARMSubtarget().hasNEON() && !DisableA15SDOptimization) { - addPass(createA15SDOptimizerPass()); + + if (EnableARMLoadStoreOpt) + addPass(createARMLoadStoreOptimizationPass(/* pre-register alloc */ true)); + + if (!DisableA15SDOptimization) + addPass(createA15SDOptimizerPass()); } } void ARMPassConfig::addPreSched2() { if (getOptLevel() != CodeGenOpt::None) { - addPass(createARMLoadStoreOptimizationPass()); + if (EnableARMLoadStoreOpt) + addPass(createARMLoadStoreOptimizationPass()); - if (getARMSubtarget().hasNEON()) - addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); + addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } // Expand some pseudo instructions into multiple instructions to allow @@ -372,26 +377,21 @@ void ARMPassConfig::addPreSched2() { addPass(createARMExpandPseudoPass()); if (getOptLevel() != CodeGenOpt::None) { - if (!getARMSubtarget().isThumb1Only()) { - // in v8, IfConversion depends on Thumb instruction widths - if (getARMSubtarget().restrictIT() && - !getARMSubtarget().prefers32BitThumb()) - addPass(createThumb2SizeReductionPass()); + // in v8, IfConversion depends on Thumb instruction widths + if (getARMSubtarget().restrictIT()) + addPass(createThumb2SizeReductionPass()); + if (!getARMSubtarget().isThumb1Only()) addPass(&IfConverterID); - } } - if (getARMSubtarget().isThumb2()) - addPass(createThumb2ITBlockPass()); + addPass(createThumb2ITBlockPass()); } void ARMPassConfig::addPreEmitPass() { - if (getARMSubtarget().isThumb2()) { - if (!getARMSubtarget().prefers32BitThumb()) - addPass(createThumb2SizeReductionPass()); + addPass(createThumb2SizeReductionPass()); - // Constant island pass work on unbundled instructions. + // Constant island pass work on unbundled instructions. + if (getARMSubtarget().isThumb2()) addPass(&UnpackMachineBundlesID); - } addPass(createARMOptimizeBarriersPass()); addPass(createARMConstantIslandPass()); diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 7f6a1ee..20ca97b 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -30,7 +30,6 @@ public: } TargetABI; protected: - const DataLayout DL; std::unique_ptr<TargetLoweringObjectFile> TLOF; ARMSubtarget Subtarget; bool isLittle; @@ -45,9 +44,8 @@ public: bool isLittle); ~ARMBaseTargetMachine() override; - const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; } + const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } const ARMSubtarget *getSubtargetImpl(const Function &F) const override; - const DataLayout *getDataLayout() const override { return &DL; } bool isLittleEndian() const { return isLittle; } /// \brief Get the TargetIRAnalysis for this target. diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk index 55a5775..6694b53 100644 --- a/lib/Target/ARM/Android.mk +++ b/lib/Target/ARM/Android.mk @@ -4,6 +4,7 @@ arm_codegen_TBLGEN_TABLES := \ ARMGenRegisterInfo.inc \ ARMGenInstrInfo.inc \ ARMGenCodeEmitter.inc \ + ARMGenCodeEmitter.inc \ ARMGenMCCodeEmitter.inc \ ARMGenMCPseudoLowering.inc \ ARMGenAsmWriter.inc \ @@ -41,10 +42,9 @@ arm_codegen_SRC_FILES := \ MLxExpansionPass.cpp \ Thumb1FrameLowering.cpp \ Thumb1InstrInfo.cpp \ - Thumb1RegisterInfo.cpp \ + ThumbRegisterInfo.cpp \ Thumb2ITBlockPass.cpp \ Thumb2InstrInfo.cpp \ - Thumb2RegisterInfo.cpp \ Thumb2SizeReduction.cpp # For the host diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 59461e8..2215efb 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -276,6 +276,9 @@ class ARMAsmParser : public MCTargetAsmParser { bool hasD16() const { return STI.getFeatureBits() & ARM::FeatureD16; } + bool hasV8_1a() const { + return STI.getFeatureBits() & ARM::FeatureV8_1a; + } void SwitchMode() { uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); @@ -342,10 +345,10 @@ public: }; - ARMAsmParser(MCSubtargetInfo & _STI, MCAsmParser & _Parser, + ARMAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(_STI), MII(MII), UC(_Parser) { - MCAsmParserExtension::Initialize(_Parser); + : STI(STI), MII(MII), UC(Parser) { + MCAsmParserExtension::Initialize(Parser); // Cache the MCRegisterInfo. MRI = getContext().getRegisterInfo(); diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 2530640..0b698197 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -40,10 +40,9 @@ add_llvm_target(ARMCodeGen MLxExpansionPass.cpp Thumb1FrameLowering.cpp Thumb1InstrInfo.cpp - Thumb1RegisterInfo.cpp + ThumbRegisterInfo.cpp Thumb2ITBlockPass.cpp Thumb2InstrInfo.cpp - Thumb2RegisterInfo.cpp Thumb2SizeReduction.cpp ) diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 16eea33..e15323d 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -637,12 +637,12 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, printRegName(O, MO1.getReg()); unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm()); - unsigned Op = ARM_AM::getAM5Op(MO2.getImm()); + ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm()); if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) { O << ", " << markup("<imm:") << "#" - << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) + << ARM_AM::getAddrOpcStr(Op) << ImmOffs * 4 << markup(">"); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMArchName.def b/lib/Target/ARM/MCTargetDesc/ARMArchName.def index 9f007a0..96a0c1a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMArchName.def +++ b/lib/Target/ARM/MCTargetDesc/ARMArchName.def @@ -30,6 +30,7 @@ ARM_ARCH_NAME("armv5t", ARMV5T, "5T", v5T) ARM_ARCH_NAME("armv5te", ARMV5TE, "5TE", v5TE) ARM_ARCH_NAME("armv6", ARMV6, "6", v6) ARM_ARCH_NAME("armv6j", ARMV6J, "6J", v6) +ARM_ARCH_NAME("armv6k", ARMV6K, "6K", v6K) ARM_ARCH_NAME("armv6t2", ARMV6T2, "6T2", v6T2) ARM_ARCH_NAME("armv6z", ARMV6Z, "6Z", v6KZ) ARM_ARCH_NAME("armv6zk", ARMV6ZK, "6ZK", v6KZ) @@ -43,6 +44,8 @@ ARM_ARCH_NAME("armv7-m", ARMV7M, "7-M", v7) ARM_ARCH_ALIAS("armv7m", ARMV7M) ARM_ARCH_NAME("armv8-a", ARMV8A, "8-A", v8) ARM_ARCH_ALIAS("armv8a", ARMV8A) +ARM_ARCH_NAME("armv8.1-a", ARMV8_1A, "8.1-A", v8) +ARM_ARCH_ALIAS("armv8.1a", ARMV8_1A) ARM_ARCH_NAME("iwmmxt", IWMMXT, "iwmmxt", v5TE) ARM_ARCH_NAME("iwmmxt2", IWMMXT2, "iwmmxt2", v5TE) diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 2b65520..9648ffa 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -783,6 +783,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { setAttributeItem(THUMB_ISA_use, AllowThumb32, false); break; + case ARM::ARMV6K: case ARM::ARMV6Z: case ARM::ARMV6ZK: setAttributeItem(ARM_ISA_use, Allowed, false); @@ -816,6 +817,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { break; case ARM::ARMV8A: + case ARM::ARMV8_1A: setAttributeItem(CPU_arch_profile, ApplicationProfile, false); setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, AllowThumb32, false); @@ -913,9 +915,8 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPARMv8A, /* OverwriteExisting= */ false); - setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, - ARMBuildAttrs::AllowNeonARMv8, - /* OverwriteExisting= */ false); + // 'Advanced_SIMD_arch' must be emitted not here, but within + // ARMAsmPrinter::emitAttributes(), depending on hasV8Ops() and hasV8_1a() break; case ARM::SOFTVFP: @@ -1362,25 +1363,29 @@ void ARMELFStreamer::emitUnwindRaw(int64_t Offset, namespace llvm { -MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst) { - MCStreamer *S = llvm::createAsmStreamer( - Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); - new ARMTargetAsmStreamer(*S, OS, *InstPrint, isVerboseAsm); - return S; +MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new ARMTargetAsmStreamer(S, OS, *InstPrint, isVerboseAsm); } MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) { return new ARMTargetStreamer(S); } +MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S, + const MCSubtargetInfo &STI) { + Triple TT(STI.getTargetTriple()); + if (TT.getObjectFormat() == Triple::ELF) + return new ARMTargetELFStreamer(S); + return new ARMTargetStreamer(S); +} + MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, bool RelaxAll, bool IsThumb) { ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb); - new ARMTargetELFStreamer(*S); // FIXME: This should eventually end up somewhere else where more // intelligent flag decisions can be made. For now we are just maintaining // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 66a1618..caa8736 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -59,6 +59,7 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) { // Exceptions handling switch (TheTriple.getOS()) { + case Triple::Bitrig: case Triple::NetBSD: ExceptionsType = ExceptionHandling::DwarfCFI; break; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index efbebd3..e48cabb 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -441,14 +441,12 @@ public: MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx) { return new ARMMCCodeEmitter(MCII, Ctx, true); } MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx) { return new ARMMCCodeEmitter(MCII, Ctx, false); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index 68d32b2..5b90de3 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -10,6 +10,7 @@ #include "ARMMCExpr.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" using namespace llvm; #define DEBUG_TYPE "armmcexpr" diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index 06bf6c9..2be98d2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -26,8 +26,8 @@ private: const VariantKind Kind; const MCExpr *Expr; - explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr) - : Kind(_Kind), Expr(_Expr) {} + explicit ARMMCExpr(VariantKind Kind, const MCExpr *Expr) + : Kind(Kind), Expr(Expr) {} public: /// @name Construction diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 8c19785..7ff7f9a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -153,6 +153,17 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { // Use CPU to figure out the exact features ARMArchFeature = "+v8"; break; + case Triple::ARMSubArch_v8_1a: + if (NoCPU) + // v8.1a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, + // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, + // FeatureT2XtPk, FeatureCrypto, FeatureCRC, FeatureV8_1a + ARMArchFeature = "+v8.1a,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm," + "+trustzone,+t2xtpk,+crypto,+crc"; + else + // Use CPU to figure out the exact features + ARMArchFeature = "+v8.1a"; + break; case Triple::ARMSubArch_v7m: isThumb = true; if (NoCPU) @@ -195,6 +206,9 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { case Triple::ARMSubArch_v6t2: ARMArchFeature = "+v6t2"; break; + case Triple::ARMSubArch_v6k: + ARMArchFeature = "+v6k"; + break; case Triple::ARMSubArch_v6m: isThumb = true; if (NoCPU) @@ -295,27 +309,18 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -// This is duplicated code. Refactor this. -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll) { - Triple TheTriple(TT); +static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, + MCAsmBackend &MAB, raw_ostream &OS, + MCCodeEmitter *Emitter, bool RelaxAll) { + return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, + T.getArch() == Triple::thumb); +} - switch (TheTriple.getObjectFormat()) { - default: llvm_unreachable("unsupported object format"); - case Triple::MachO: { - MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, false); - new ARMTargetStreamer(*S); - return S; - } - case Triple::COFF: - assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported"); - return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS); - case Triple::ELF: - return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, - TheTriple.getArch() == Triple::thumb); - } +static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, + bool DWARFMustBeAtTheEnd) { + return createMachOStreamer(Ctx, MAB, OS, Emitter, false, DWARFMustBeAtTheEnd); } static MCInstPrinter *createARMMCInstPrinter(const Target &T, @@ -379,61 +384,53 @@ static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) { // Force static initialization. extern "C" void LLVMInitializeARMTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn X(TheARMLETarget, createARMMCAsmInfo); - RegisterMCAsmInfoFn Y(TheARMBETarget, createARMMCAsmInfo); - RegisterMCAsmInfoFn A(TheThumbLETarget, createARMMCAsmInfo); - RegisterMCAsmInfoFn B(TheThumbBETarget, createARMMCAsmInfo); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheARMLETarget, createARMMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheARMBETarget, createARMMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget, - createARMMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget, - createARMMCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheARMLETarget, createARMMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheARMBETarget, createARMMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheThumbLETarget, createARMMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheThumbBETarget, createARMMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheARMLETarget, createARMMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheARMBETarget, createARMMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheThumbLETarget, createARMMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheThumbBETarget, createARMMCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheARMLETarget, - ARM_MC::createARMMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheARMBETarget, - ARM_MC::createARMMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheThumbLETarget, - ARM_MC::createARMMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheThumbBETarget, - ARM_MC::createARMMCSubtargetInfo); - - // Register the MC instruction analyzer. - TargetRegistry::RegisterMCInstrAnalysis(TheARMLETarget, - createARMMCInstrAnalysis); - TargetRegistry::RegisterMCInstrAnalysis(TheARMBETarget, - createARMMCInstrAnalysis); - TargetRegistry::RegisterMCInstrAnalysis(TheThumbLETarget, - createARMMCInstrAnalysis); - TargetRegistry::RegisterMCInstrAnalysis(TheThumbBETarget, - createARMMCInstrAnalysis); + for (Target *T : {&TheARMLETarget, &TheARMBETarget, &TheThumbLETarget, + &TheThumbBETarget}) { + // Register the MC asm info. + RegisterMCAsmInfoFn X(*T, createARMMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(*T, createARMMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createARMMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createARMMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, + ARM_MC::createARMMCSubtargetInfo); + + // Register the MC instruction analyzer. + TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis); + + TargetRegistry::RegisterELFStreamer(*T, createELFStreamer); + TargetRegistry::RegisterCOFFStreamer(*T, createARMWinCOFFStreamer); + TargetRegistry::RegisterMachOStreamer(*T, createARMMachOStreamer); + + // Register the obj target streamer. + TargetRegistry::RegisterObjectTargetStreamer(*T, + createARMObjectTargetStreamer); + + // Register the asm streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, createARMTargetAsmStreamer); + + // Register the null TargetStreamer. + TargetRegistry::RegisterNullTargetStreamer(*T, createARMNullTargetStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createARMMCInstPrinter); + + // Register the MC relocation info. + TargetRegistry::RegisterMCRelocationInfo(*T, createARMMCRelocationInfo); + } // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(TheARMLETarget, - createARMLEMCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheARMBETarget, - createARMBEMCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheThumbLETarget, - createARMLEMCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheThumbBETarget, - createARMBEMCCodeEmitter); + for (Target *T : {&TheARMLETarget, &TheThumbLETarget}) + TargetRegistry::RegisterMCCodeEmitter(*T, createARMLEMCCodeEmitter); + for (Target *T : {&TheARMBETarget, &TheThumbBETarget}) + TargetRegistry::RegisterMCCodeEmitter(*T, createARMBEMCCodeEmitter); // Register the asm backend. TargetRegistry::RegisterMCAsmBackend(TheARMLETarget, createARMLEAsmBackend); @@ -442,44 +439,4 @@ extern "C" void LLVMInitializeARMTargetMC() { createThumbLEAsmBackend); TargetRegistry::RegisterMCAsmBackend(TheThumbBETarget, createThumbBEAsmBackend); - - // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(TheARMLETarget, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheARMBETarget, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheThumbLETarget, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheThumbBETarget, createMCStreamer); - - // Register the asm streamer. - TargetRegistry::RegisterAsmStreamer(TheARMLETarget, createMCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheARMBETarget, createMCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheThumbLETarget, createMCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheThumbBETarget, createMCAsmStreamer); - - // Register the null TargetStreamer. - TargetRegistry::RegisterNullTargetStreamer(TheARMLETarget, - createARMNullTargetStreamer); - TargetRegistry::RegisterNullTargetStreamer(TheARMBETarget, - createARMNullTargetStreamer); - TargetRegistry::RegisterNullTargetStreamer(TheThumbLETarget, - createARMNullTargetStreamer); - TargetRegistry::RegisterNullTargetStreamer(TheThumbBETarget, - createARMNullTargetStreamer); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheARMLETarget, createARMMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheARMBETarget, createARMMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheThumbLETarget, - createARMMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheThumbBETarget, - createARMMCInstPrinter); - - // Register the MC relocation info. - TargetRegistry::RegisterMCRelocationInfo(TheARMLETarget, - createARMMCRelocationInfo); - TargetRegistry::RegisterMCRelocationInfo(TheARMBETarget, - createARMMCRelocationInfo); - TargetRegistry::RegisterMCRelocationInfo(TheThumbLETarget, - createARMMCRelocationInfo); - TargetRegistry::RegisterMCRelocationInfo(TheThumbBETarget, - createARMMCRelocationInfo); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index c17e959..7e9ba66 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -32,6 +32,7 @@ class MCRelocationInfo; class MCTargetStreamer; class StringRef; class Target; +class Triple; class raw_ostream; extern Target TheARMLETarget, TheThumbLETarget; @@ -47,21 +48,20 @@ namespace ARM_MC { StringRef FS); } -MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst); - MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S); +MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm); +MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S, + const MCSubtargetInfo &STI); MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx); MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx); MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI, @@ -80,10 +80,11 @@ MCAsmBackend *createThumbLEAsmBackend(const Target &T, const MCRegisterInfo &MRI MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -/// createARMWinCOFFStreamer - Construct a PE/COFF machine code streamer which -/// will generate a PE/COFF object file. +// Construct a PE/COFF machine code streamer which will generate a PE/COFF +// object file. MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, - MCCodeEmitter &Emitter, raw_ostream &OS); + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll); /// createARMELFObjectWriter - Construct an ELF Mach-O object writer. MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS, diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp index 593fe34..173cc93 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp @@ -72,14 +72,10 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { // opcode when r4 is not in .save directive. // Compute the consecutive registers from r4 to r11. - uint32_t Range = 0; - uint32_t Mask = (1u << 4); - for (uint32_t Bit = (1u << 5); Bit < (1u << 12); Bit <<= 1) { - if ((RegSave & Bit) == 0u) - break; - ++Range; - Mask |= Bit; - } + uint32_t Mask = RegSave & 0xff0u; + uint32_t Range = countTrailingOnes(Mask >> 5); // Exclude r4. + // Mask off non-consecutive registers. Keep r4. + Mask &= ~(0xffffffe0u << Range); // Emit this opcode when the mask covers every registers. uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask); @@ -105,50 +101,24 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { /// Emit unwind opcodes for .vsave directives void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) { - size_t i = 32; - - while (i > 16) { - uint32_t Bit = 1u << (i - 1); - if ((VFPRegSave & Bit) == 0u) { - --i; - continue; - } - - uint32_t Range = 0; - - --i; - Bit >>= 1; - - while (i > 16 && (VFPRegSave & Bit)) { - --i; - ++Range; - Bit >>= 1; + // We only have 4 bits to save the offset in the opcode so look at the lower + // and upper 16 bits separately. + for (uint32_t Regs : {VFPRegSave & 0xffff0000u, VFPRegSave & 0x0000ffffu}) { + while (Regs) { + // Now look for a run of set bits. Remember the MSB and LSB of the run. + auto RangeMSB = 32 - countLeadingZeros(Regs); + auto RangeLen = countLeadingOnes(Regs << (32 - RangeMSB)); + auto RangeLSB = RangeMSB - RangeLen; + + int Opcode = RangeLSB >= 16 + ? ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 + : ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD; + + EmitInt16(Opcode | ((RangeLSB % 16) << 4) | (RangeLen - 1)); + + // Zero out bits we're done with. + Regs &= ~(-1u << RangeLSB); } - - EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | - ((i - 16) << 4) | Range); - } - - while (i > 0) { - uint32_t Bit = 1u << (i - 1); - if ((VFPRegSave & Bit) == 0u) { - --i; - continue; - } - - uint32_t Range = 0; - - --i; - Bit >>= 1; - - while (i > 0 && (VFPRegSave & Bit)) { - --i; - ++Range; - Bit >>= 1; - } - - EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | - Range); } } diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp index b344ced..dc707dc 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -37,10 +37,10 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) { } } -namespace llvm { -MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, - MCCodeEmitter &Emitter, raw_ostream &OS) { - return new ARMWinCOFFStreamer(Context, MAB, Emitter, OS); -} +MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context, + MCAsmBackend &MAB, raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll) { + return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS); } diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 51e519d..ed2deea 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -382,6 +382,9 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { TRI = Fn.getSubtarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>(); + // Only run this for CortexA9. + if (!STI->isCortexA9()) + return false; isLikeA9 = STI->isLikeA9() || STI->isSwift(); isSwift = STI->isSwift(); diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index f4d9be3..2d031d0 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -232,7 +232,7 @@ Make use of hi register variants of cmp: tCMPhir / tCMPZhir. //===---------------------------------------------------------------------===// Thumb1 immediate field sometimes keep pre-scaled values. See -Thumb1RegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and +ThumbRegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and Thumb2. //===---------------------------------------------------------------------===// diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 7dcc64e..c496cd7 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -41,7 +41,7 @@ static void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const TargetInstrInfo &TII, DebugLoc dl, - const Thumb1RegisterInfo &MRI, + const ThumbRegisterInfo &MRI, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) { emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, MRI, MIFlags); @@ -53,8 +53,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); - const Thumb1RegisterInfo *RegInfo = - static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo()); + const ThumbRegisterInfo *RegInfo = + static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); if (!hasReservedCallFrame(MF)) { // If we have alloca, convert as follows: // ADJCALLSTACKDOWN -> sub, sp, sp, amount @@ -89,13 +89,12 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - const Thumb1RegisterInfo *RegInfo = - static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo()); + const ThumbRegisterInfo *RegInfo = + static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); - unsigned Align = STI.getFrameLowering()->getStackAlignment(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); assert(NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); @@ -328,17 +327,16 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const Thumb1RegisterInfo *RegInfo = - static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo()); + const ThumbRegisterInfo *RegInfo = + static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); - unsigned Align = STI.getFrameLowering()->getStackAlignment(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); assert((unsigned)NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); - const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h index b785b28..cf93203 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.h +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -16,7 +16,7 @@ #include "ARMFrameLowering.h" #include "Thumb1InstrInfo.h" -#include "Thumb1RegisterInfo.h" +#include "ThumbRegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index c24f740..29aaa15 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -22,8 +22,7 @@ using namespace llvm; Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(STI) { -} + : ARMBaseInstrInfo(STI), RI() {} /// getNoopForMachoTarget - Return the noop instruction to use for a noop. void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 9fba760..f3f493d 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -15,13 +15,13 @@ #define LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H #include "ARMBaseInstrInfo.h" -#include "Thumb1RegisterInfo.h" +#include "ThumbRegisterInfo.h" namespace llvm { class ARMSubtarget; class Thumb1InstrInfo : public ARMBaseInstrInfo { - Thumb1RegisterInfo RI; + ThumbRegisterInfo RI; public: explicit Thumb1InstrInfo(const ARMSubtarget &STI); @@ -36,7 +36,7 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const Thumb1RegisterInfo &getRegisterInfo() const override { return RI; } + const ThumbRegisterInfo &getRegisterInfo() const override { return RI; } void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index b657f2d..7bb2265 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -255,6 +255,8 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { const ARMSubtarget &STI = static_cast<const ARMSubtarget &>(Fn.getSubtarget()); + if (!STI.isThumb2()) + return false; AFI = Fn.getInfo<ARMFunctionInfo>(); TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); TRI = STI.getRegisterInfo(); diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 62c3752..26ca7e9 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -30,8 +30,7 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden, cl::init(false)); Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(STI) { -} + : ARMBaseInstrInfo(STI), RI() {} /// getNoopForMachoTarget - Return the noop instruction to use for a noop. void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 46a1f6d..916ab06 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -15,14 +15,14 @@ #define LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H #include "ARMBaseInstrInfo.h" -#include "Thumb2RegisterInfo.h" +#include "ThumbRegisterInfo.h" namespace llvm { class ARMSubtarget; class ScheduleHazardRecognizer; class Thumb2InstrInfo : public ARMBaseInstrInfo { - Thumb2RegisterInfo RI; + ThumbRegisterInfo RI; public: explicit Thumb2InstrInfo(const ARMSubtarget &STI); @@ -60,7 +60,7 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const Thumb2RegisterInfo &getRegisterInfo() const override { return RI; } + const ThumbRegisterInfo &getRegisterInfo() const override { return RI; } private: void expandLoadStackGuard(MachineBasicBlock::iterator MI, diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp deleted file mode 100644 index 0d5d85a..0000000 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ /dev/null @@ -1,53 +0,0 @@ -//===-- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Thumb-2 implementation of the TargetRegisterInfo -// class. -// -//===----------------------------------------------------------------------===// - -#include "Thumb2RegisterInfo.h" -#include "ARM.h" -#include "ARMSubtarget.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMSubtarget &sti) - : ARMBaseRegisterInfo(sti) { -} - -/// emitLoadConstPool - Emits a load from constpool to materialize the -/// specified immediate. -void -Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned SubIdx, - int Val, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned MIFlags) const { - MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - MachineConstantPool *ConstantPool = MF.getConstantPool(); - const Constant *C = ConstantInt::get( - Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); - unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); - - BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci)) - .addReg(DestReg, getDefRegState(true), SubIdx) - .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0) - .setMIFlags(MIFlags); -} diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h deleted file mode 100644 index 1dd94cc..0000000 --- a/lib/Target/ARM/Thumb2RegisterInfo.h +++ /dev/null @@ -1,38 +0,0 @@ -//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Thumb-2 implementation of the TargetRegisterInfo -// class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H -#define LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H - -#include "ARMBaseRegisterInfo.h" - -namespace llvm { - -class ARMSubtarget; - -struct Thumb2RegisterInfo : public ARMBaseRegisterInfo { -public: - Thumb2RegisterInfo(const ARMSubtarget &STI); - - /// emitLoadConstPool - Emits a load from constpool to materialize the - /// specified immediate. - void - emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val, - ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0, - unsigned MIFlags = MachineInstr::NoFlags) const override; -}; -} - -#endif diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 2ee908b..e967e53 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/Function.h" // To access Function attributes #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -1002,6 +1003,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget()); + if (STI->isThumb1Only() || STI->prefers32BitThumb()) + return false; + TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo()); // Optimizing / minimizing size? diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp index 5e2cbdc..b5f9d7e 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===-- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------------===// +//===-- ThumbRegisterInfo.cpp - Thumb-1 Register Information -------------===// // // The LLVM Compiler Infrastructure // @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "Thumb1RegisterInfo.h" +#include "ThumbRegisterInfo.h" #include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" @@ -38,39 +38,35 @@ extern cl::opt<bool> ReuseFrameIndexVals; using namespace llvm; -Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMSubtarget &sti) - : ARMBaseRegisterInfo(sti) { -} +ThumbRegisterInfo::ThumbRegisterInfo() : ARMBaseRegisterInfo() {} + +const TargetRegisterClass * +ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &MF) const { + if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only()) + return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF); -const TargetRegisterClass* -Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) - const { if (ARM::tGPRRegClass.hasSubClassEq(RC)) return &ARM::tGPRRegClass; - return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC); + return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF); } const TargetRegisterClass * -Thumb1RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) - const { +ThumbRegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only()) + return ARMBaseRegisterInfo::getPointerRegClass(MF, Kind); return &ARM::tGPRRegClass; } -/// emitLoadConstPool - Emits a load from constpool to materialize the -/// specified immediate. -void -Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned SubIdx, - int Val, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned MIFlags) const { - assert((isARMLowRegister(DestReg) || - isVirtualRegister(DestReg)) && - "Thumb1 does not have ldr to high register"); - +static void emitThumb1LoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + DebugLoc dl, unsigned DestReg, + unsigned SubIdx, int Val, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned MIFlags) { MachineFunction &MF = *MBB.getParent(); + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); const TargetInstrInfo &TII = *STI.getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( @@ -83,6 +79,42 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, .setMIFlags(MIFlags); } +static void emitThumb2LoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + DebugLoc dl, unsigned DestReg, + unsigned SubIdx, int Val, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned MIFlags) { + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + MachineConstantPool *ConstantPool = MF.getConstantPool(); + const Constant *C = ConstantInt::get( + Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci)) + .addReg(DestReg, getDefRegState(true), SubIdx) + .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0) + .setMIFlags(MIFlags); +} + +/// emitLoadConstPool - Emits a load from constpool to materialize the +/// specified immediate. +void ThumbRegisterInfo::emitLoadConstPool( + MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, + unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred, + unsigned PredReg, unsigned MIFlags) const { + MachineFunction &MF = *MBB.getParent(); + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); + if (STI.isThumb1Only()) { + assert((isARMLowRegister(DestReg) || isVirtualRegister(DestReg)) && + "Thumb1 does not have ldr to high register"); + return emitThumb1LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred, + PredReg, MIFlags); + } + return emitThumb2LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred, + PredReg, MIFlags); +} /// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize /// a destreg = basereg + immediate in Thumb code. Materialize the immediate @@ -317,12 +349,14 @@ static unsigned convertToNonSPOpcode(unsigned Opcode) { return Opcode; } -bool Thumb1RegisterInfo:: -rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, - const ARMBaseInstrInfo &TII) const { +bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II, + unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const ARMBaseInstrInfo &TII) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); + assert(MBB.getParent()->getSubtarget<ARMSubtarget>().isThumb1Only() && + "This isn't needed for thumb2!"); DebugLoc dl = MI.getDebugLoc(); MachineInstrBuilder MIB(*MBB.getParent(), &MI); unsigned Opcode = MI.getOpcode(); @@ -386,8 +420,13 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, return Offset == 0; } -void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, +void ThumbRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const { + const MachineFunction &MF = *MI.getParent()->getParent(); + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); + if (!STI.isThumb1Only()) + return ARMBaseRegisterInfo::resolveFrameIndex(MI, BaseReg, Offset); + const ARMBaseInstrInfo &TII = *STI.getInstrInfo(); int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -403,12 +442,15 @@ void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, /// saveScavengerRegister - Spill the register so it can be used by the /// register scavenger. Return true. -bool -Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - MachineBasicBlock::iterator &UseMI, - const TargetRegisterClass *RC, - unsigned Reg) const { +bool ThumbRegisterInfo::saveScavengerRegister( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC, + unsigned Reg) const { + + const ARMSubtarget &STI = MBB.getParent()->getSubtarget<ARMSubtarget>(); + if (!STI.isThumb1Only()) + return ARMBaseRegisterInfo::saveScavengerRegister(MBB, I, UseMI, RC, Reg); + // Thumb1 can't use the emergency spill slot on the stack because // ldr/str immediate offsets must be positive, and if we're referencing // off the frame pointer (if, for example, there are alloca() calls in @@ -452,14 +494,18 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, return true; } -void -Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, unsigned FIOperandNum, - RegScavenger *RS) const { - unsigned VReg = 0; +void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); + if (!STI.isThumb1Only()) + return ARMBaseRegisterInfo::eliminateFrameIndex(II, SPAdj, FIOperandNum, + RS); + + unsigned VReg = 0; const ARMBaseInstrInfo &TII = *STI.getInstrInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc dl = MI.getDebugLoc(); diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/ThumbRegisterInfo.h index 5feaf52..23aaff3 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/ThumbRegisterInfo.h @@ -1,4 +1,4 @@ -//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===// +//===- ThumbRegisterInfo.h - Thumb Register Information Impl -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,9 @@ // //===----------------------------------------------------------------------===// // -// This file contains the Thumb-1 implementation of the TargetRegisterInfo -// class. +// This file contains the Thumb implementation of the TargetRegisterInfo +// class. With the exception of emitLoadConstPool Thumb2 tracks +// ARMBaseRegisterInfo, Thumb1 overloads the functions below. // //===----------------------------------------------------------------------===// @@ -22,12 +23,13 @@ namespace llvm { class ARMSubtarget; class ARMBaseInstrInfo; -struct Thumb1RegisterInfo : public ARMBaseRegisterInfo { +struct ThumbRegisterInfo : public ARMBaseRegisterInfo { public: - Thumb1RegisterInfo(const ARMSubtarget &STI); + ThumbRegisterInfo(); const TargetRegisterClass * - getLargestLegalSuperClass(const TargetRegisterClass *RC) const override; + getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &MF) const override; const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp index 07f62a9..b91b0e1 100644 --- a/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -17,16 +17,16 @@ #include "BPFSubtarget.h" #include "BPFTargetMachine.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; #define DEBUG_TYPE "bpf-isel" diff --git a/lib/Target/BPF/BPFISelLowering.h b/lib/Target/BPF/BPFISelLowering.h index 04d7908..5a6f0f7 100644 --- a/lib/Target/BPF/BPFISelLowering.h +++ b/lib/Target/BPF/BPFISelLowering.h @@ -20,6 +20,7 @@ #include "llvm/Target/TargetLowering.h" namespace llvm { +class BPFSubtarget; namespace BPFISD { enum { FIRST_NUMBER = ISD::BUILTIN_OP_END, diff --git a/lib/Target/BPF/BPFRegisterInfo.h b/lib/Target/BPF/BPFRegisterInfo.h index 364d6f6..7072dd0 100644 --- a/lib/Target/BPF/BPFRegisterInfo.h +++ b/lib/Target/BPF/BPFRegisterInfo.h @@ -25,8 +25,7 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo { BPFRegisterInfo(); - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp index 5245395..9487427 100644 --- a/lib/Target/BPF/BPFTargetMachine.cpp +++ b/lib/Target/BPF/BPFTargetMachine.cpp @@ -35,9 +35,9 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + : LLVMTargetMachine(T, "e-m:e-p:64:64-i64:64-n32:64-S128", TT, CPU, FS, + Options, RM, CM, OL), TLOF(make_unique<TargetLoweringObjectFileELF>()), - DL("e-m:e-p:64:64-i64:64-n32:64-S128"), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } diff --git a/lib/Target/BPF/BPFTargetMachine.h b/lib/Target/BPF/BPFTargetMachine.h index 821cffc..6aeafb9 100644 --- a/lib/Target/BPF/BPFTargetMachine.h +++ b/lib/Target/BPF/BPFTargetMachine.h @@ -20,7 +20,6 @@ namespace llvm { class BPFTargetMachine : public LLVMTargetMachine { std::unique_ptr<TargetLoweringObjectFile> TLOF; - const DataLayout DL; BPFSubtarget Subtarget; public: @@ -28,8 +27,10 @@ public: const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); - const DataLayout *getDataLayout() const override { return &DL; } - const BPFSubtarget *getSubtargetImpl() const override { return &Subtarget; } + const BPFSubtarget *getSubtargetImpl() const { return &Subtarget; } + const BPFSubtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp index b94693a..9c51d66 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp @@ -60,7 +60,6 @@ public: MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx) { return new BPFMCCodeEmitter(MRI); } diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index f82f009..fd04001 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -61,13 +61,11 @@ static MCCodeGenInfo *createBPFMCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCStreamer *createBPFMCStreamer(const Target &T, StringRef TT, +static MCStreamer *createBPFMCStreamer(const Triple &T, MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &_OS, - MCCodeEmitter *_Emitter, - const MCSubtargetInfo &STI, + raw_ostream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { - return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll); + return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll); } static MCInstPrinter * @@ -104,7 +102,7 @@ extern "C" void LLVMInitializeBPFTargetMC() { TargetRegistry::RegisterMCAsmBackend(TheBPFTarget, createBPFAsmBackend); // Register the object streamer - TargetRegistry::RegisterMCObjectStreamer(TheBPFTarget, createBPFMCStreamer); + TargetRegistry::RegisterELFStreamer(TheBPFTarget, createBPFMCStreamer); // Register the MCInstPrinter. TargetRegistry::RegisterMCInstPrinter(TheBPFTarget, createBPFMCInstPrinter); diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h index 55901cc..1fd2bec 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h @@ -33,7 +33,6 @@ extern Target TheBPFTarget; MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx); MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI, diff --git a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp index 818a992..87716e6 100644 --- a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp +++ b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp @@ -14,5 +14,5 @@ using namespace llvm; Target llvm::TheBPFTarget; extern "C" void LLVMInitializeBPFTargetInfo() { - RegisterTarget<Triple::bpf> X(TheBPFTarget, "bpf", "BPF"); + RegisterTarget<Triple::bpf, /*HasJIT=*/true> X(TheBPFTarget, "bpf", "BPF"); } diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index c7fec52..d0e2010 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -1981,7 +1981,8 @@ void CppWriter::printModule(const std::string& fname, printEscapedString(mName); Out << "\", getGlobalContext());"; if (!TheModule->getTargetTriple().empty()) { - nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");"; + nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayoutStr() + << "\");"; } if (!TheModule->getTargetTriple().empty()) { nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple() diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index 4bae7f8..678a932 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -22,20 +22,13 @@ namespace llvm { class formatted_raw_ostream; -class CPPSubtarget : public TargetSubtargetInfo { -}; - struct CPPTargetMachine : public TargetMachine { - CPPTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : TargetMachine(T, TT, CPU, FS, Options), Subtarget() {} -private: - CPPSubtarget Subtarget; + CPPTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL) + : TargetMachine(T, "", TT, CPU, FS, Options) {} public: - const CPPSubtarget *getSubtargetImpl() const override { return &Subtarget; } bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, bool DisableVerify, AnalysisID StartAfter, diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index eaa8bef..c6ffb96 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -31,7 +31,6 @@ add_llvm_target(HexagonCodeGen HexagonRemoveSZExtArgs.cpp HexagonSelectionDAGInfo.cpp HexagonSplitConst32AndConst64.cpp - HexagonSplitTFRCondSets.cpp HexagonSubtarget.cpp HexagonTargetMachine.cpp HexagonTargetObjectFile.cpp diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index e0a3b2f..dfe79f9 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -36,7 +36,6 @@ namespace llvm { FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM); FunctionPass *createHexagonCFGOptimizer(); - FunctionPass *createHexagonSplitTFRCondSets(); FunctionPass *createHexagonSplitConst32AndConst64(); FunctionPass *createHexagonExpandPredSpillCode(); FunctionPass *createHexagonHardwareLoops(); diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td index f892c9f..53a687c 100644 --- a/lib/Target/Hexagon/Hexagon.td +++ b/lib/Target/Hexagon/Hexagon.td @@ -28,10 +28,10 @@ def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">; //===----------------------------------------------------------------------===// // Hexagon Instruction Predicate Definitions. //===----------------------------------------------------------------------===// -def HasV5T : Predicate<"Subtarget->hasV5TOps()">; -def NoV5T : Predicate<"!Subtarget->hasV5TOps()">; -def UseMEMOP : Predicate<"Subtarget->useMemOps()">; -def IEEERndNearV5T : Predicate<"Subtarget->modeIEEERndNear()">; +def HasV5T : Predicate<"HST->hasV5TOps()">; +def NoV5T : Predicate<"!HST->hasV5TOps()">; +def UseMEMOP : Predicate<"HST->useMemOps()">; +def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">; //===----------------------------------------------------------------------===// // Classes used for relation maps. @@ -168,14 +168,6 @@ def getRegForm : InstrMapping { let ValueCols = [["reg"]]; } -def getRegShlForm : InstrMapping { - let FilterClass = "ImmRegShl"; - let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; - let ColFields = ["InputType"]; - let KeyCol = ["imm"]; - let ValueCols = [["reg"]]; -} - //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp index dd193f9..5a26045 100644 --- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -127,12 +127,21 @@ static bool isCombinableInstType(MachineInstr *MI, case Hexagon::A2_tfrsi: { // A transfer-immediate can be combined if its argument is a signed 8bit // value. - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); - unsigned DestReg = MI->getOperand(0).getReg(); + const MachineOperand &Op0 = MI->getOperand(0); + const MachineOperand &Op1 = MI->getOperand(1); + assert(Op0.isReg()); + + unsigned DestReg = Op0.getReg(); + // Ensure that TargetFlags are MO_NO_FLAG for a global. This is a + // workaround for an ABI bug that prevents GOT relocations on combine + // instructions + if (!Op1.isImm() && Op1.getTargetFlags() != HexagonII::MO_NO_FLAG) + return false; - // Only combine constant extended TFRI if we are in aggressive mode. + // Only combine constant extended A2_tfrsi if we are in aggressive mode. + bool NotExt = Op1.isImm() && isInt<8>(Op1.getImm()); return Hexagon::IntRegsRegClass.contains(DestReg) && - (ShouldCombineAggressively || isInt<8>(MI->getOperand(1).getImm())); + (ShouldCombineAggressively || NotExt); } case Hexagon::TFRI_V4: { diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp index 8176598..40059fb 100644 --- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp +++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -79,7 +79,166 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { ++MII) { MachineInstr *MI = MII; int Opc = MI->getOpcode(); - if (Opc == Hexagon::STriw_pred) { + if (Opc == Hexagon::S2_storerb_pci_pseudo || + Opc == Hexagon::S2_storerh_pci_pseudo || + Opc == Hexagon::S2_storeri_pci_pseudo || + Opc == Hexagon::S2_storerd_pci_pseudo || + Opc == Hexagon::S2_storerf_pci_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::S2_storerd_pci_pseudo) + Opcode = Hexagon::S2_storerd_pci; + else if (Opc == Hexagon::S2_storeri_pci_pseudo) + Opcode = Hexagon::S2_storeri_pci; + else if (Opc == Hexagon::S2_storerh_pci_pseudo) + Opcode = Hexagon::S2_storerh_pci; + else if (Opc == Hexagon::S2_storerf_pci_pseudo) + Opcode = Hexagon::S2_storerf_pci; + else if (Opc == Hexagon::S2_storerb_pci_pseudo) + Opcode = Hexagon::S2_storerb_pci; + else + llvm_unreachable("wrong Opc"); + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op3 = MI->getOperand(3); // Modifier value. + MachineOperand &Op4 = MI->getOperand(4); + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op3); + // Replace the pseude circ_ldd by the real circ_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op0); + NewMI->addOperand(Op1); + NewMI->addOperand(Op4); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + NewMI->addOperand(Op2); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::L2_loadrd_pci_pseudo || + Opc == Hexagon::L2_loadri_pci_pseudo || + Opc == Hexagon::L2_loadrh_pci_pseudo || + Opc == Hexagon::L2_loadruh_pci_pseudo|| + Opc == Hexagon::L2_loadrb_pci_pseudo || + Opc == Hexagon::L2_loadrub_pci_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::L2_loadrd_pci_pseudo) + Opcode = Hexagon::L2_loadrd_pci; + else if (Opc == Hexagon::L2_loadri_pci_pseudo) + Opcode = Hexagon::L2_loadri_pci; + else if (Opc == Hexagon::L2_loadrh_pci_pseudo) + Opcode = Hexagon::L2_loadrh_pci; + else if (Opc == Hexagon::L2_loadruh_pci_pseudo) + Opcode = Hexagon::L2_loadruh_pci; + else if (Opc == Hexagon::L2_loadrb_pci_pseudo) + Opcode = Hexagon::L2_loadrb_pci; + else if (Opc == Hexagon::L2_loadrub_pci_pseudo) + Opcode = Hexagon::L2_loadrub_pci; + else + llvm_unreachable("wrong Opc"); + + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op4 = MI->getOperand(4); // Modifier value. + MachineOperand &Op5 = MI->getOperand(5); + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op4); + // Replace the pseude circ_ldd by the real circ_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op1); + NewMI->addOperand(Op0); + NewMI->addOperand(Op2); + NewMI->addOperand(Op5); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::L2_loadrd_pbr_pseudo || + Opc == Hexagon::L2_loadri_pbr_pseudo || + Opc == Hexagon::L2_loadrh_pbr_pseudo || + Opc == Hexagon::L2_loadruh_pbr_pseudo|| + Opc == Hexagon::L2_loadrb_pbr_pseudo || + Opc == Hexagon::L2_loadrub_pbr_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::L2_loadrd_pbr_pseudo) + Opcode = Hexagon::L2_loadrd_pbr; + else if (Opc == Hexagon::L2_loadri_pbr_pseudo) + Opcode = Hexagon::L2_loadri_pbr; + else if (Opc == Hexagon::L2_loadrh_pbr_pseudo) + Opcode = Hexagon::L2_loadrh_pbr; + else if (Opc == Hexagon::L2_loadruh_pbr_pseudo) + Opcode = Hexagon::L2_loadruh_pbr; + else if (Opc == Hexagon::L2_loadrb_pbr_pseudo) + Opcode = Hexagon::L2_loadrb_pbr; + else if (Opc == Hexagon::L2_loadrub_pbr_pseudo) + Opcode = Hexagon::L2_loadrub_pbr; + else + llvm_unreachable("wrong Opc"); + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op4 = MI->getOperand(4); // Modifier value. + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op4); + // Replace the pseudo brev_ldd by the real brev_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op1); + NewMI->addOperand(Op0); + NewMI->addOperand(Op2); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::S2_storerd_pbr_pseudo || + Opc == Hexagon::S2_storeri_pbr_pseudo || + Opc == Hexagon::S2_storerh_pbr_pseudo || + Opc == Hexagon::S2_storerb_pbr_pseudo || + Opc == Hexagon::S2_storerf_pbr_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::S2_storerd_pbr_pseudo) + Opcode = Hexagon::S2_storerd_pbr; + else if (Opc == Hexagon::S2_storeri_pbr_pseudo) + Opcode = Hexagon::S2_storeri_pbr; + else if (Opc == Hexagon::S2_storerh_pbr_pseudo) + Opcode = Hexagon::S2_storerh_pbr; + else if (Opc == Hexagon::S2_storerf_pbr_pseudo) + Opcode = Hexagon::S2_storerf_pbr; + else if (Opc == Hexagon::S2_storerb_pbr_pseudo) + Opcode = Hexagon::S2_storerb_pbr; + else + llvm_unreachable("wrong Opc"); + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op3 = MI->getOperand(3); // Modifier value. + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op3); + // Replace the pseudo brev_ldd by the real brev_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op0); + NewMI->addOperand(Op1); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + NewMI->addOperand(Op2); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::STriw_pred) { // STriw_pred [R30], ofst, SrcReg; unsigned FP = MI->getOperand(0).getReg(); assert(FP == QST.getRegisterInfo()->getFrameRegister() && diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 2b1992f..65d689b 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -140,7 +140,7 @@ bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); unsigned RetOpcode = MBBI->getOpcode(); - return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext; + return RetOpcode == Hexagon::TCRETURNi || RetOpcode == Hexagon::TCRETURNr; } void HexagonFrameLowering::emitEpilogue(MachineFunction &MF, diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 1577c33..c47ee9c 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -690,7 +690,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, // If the induction variable bump is not a power of 2, quit. // Othwerise we'd need a general integer division. - if (!isPowerOf2_64(abs64(IVBump))) + if (!isPowerOf2_64(std::abs(IVBump))) return nullptr; MachineBasicBlock *PH = Loop->getLoopPreheader(); diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index fb056b5..aaccac8 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -45,37 +45,25 @@ namespace llvm { /// namespace { class HexagonDAGToDAGISel : public SelectionDAGISel { - /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can - /// make the right decision when generating code for different targets. - const HexagonSubtarget *Subtarget; - - // Keep a reference to HexagonTargetMachine. - const HexagonTargetMachine& TM; - DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap; + const HexagonTargetMachine& HTM; + const HexagonSubtarget *HST; public: - explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine, + explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(targetmachine, OptLevel), TM(targetmachine) { + : SelectionDAGISel(tm, OptLevel), HTM(tm) { initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry()); } - bool hasNumUsesBelowThresGA(SDNode *N) const; - SDNode *Select(SDNode *N) override; + bool runOnMachineFunction(MachineFunction &MF) override { + // Reset the subtarget each time through. + HST = &MF.getSubtarget<HexagonSubtarget>(); + SelectionDAGISel::runOnMachineFunction(MF); + return true; + } - // Complex Pattern Selectors. - inline bool foldGlobalAddress(SDValue &N, SDValue &R); - inline bool foldGlobalAddressGP(SDValue &N, SDValue &R); - bool foldGlobalAddressImpl(SDValue &N, SDValue &R, bool ShouldLookForGP); - bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2); + virtual void PreprocessISelDAG() override; + + SDNode *Select(SDNode *N) override; // Complex Pattern Selectors. inline bool SelectAddrGA(SDValue &N, SDValue &R); @@ -87,18 +75,12 @@ public: return "Hexagon DAG->DAG Pattern Instruction Selection"; } - bool runOnMachineFunction(MachineFunction &MF) override { - Subtarget = &MF.getSubtarget<HexagonSubtarget>(); - return SelectionDAGISel::runOnMachineFunction(MF); - } - + SDNode *SelectFrameIndex(SDNode *N); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) override; - bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); - SDNode *SelectLoad(SDNode *N); SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl); SDNode *SelectIndexedLoad(LoadSDNode *LD, SDLoc dl); @@ -110,99 +92,98 @@ public: SDNode *SelectIndexedStore(StoreSDNode *ST, SDLoc dl); SDNode *SelectStore(SDNode *N); SDNode *SelectSHL(SDNode *N); - SDNode *SelectSelect(SDNode *N); - SDNode *SelectTruncate(SDNode *N); SDNode *SelectMul(SDNode *N); SDNode *SelectZeroExtend(SDNode *N); - SDNode *SelectIntrinsicWOChain(SDNode *N); SDNode *SelectIntrinsicWChain(SDNode *N); + SDNode *SelectIntrinsicWOChain(SDNode *N); SDNode *SelectConstant(SDNode *N); SDNode *SelectConstantFP(SDNode *N); SDNode *SelectAdd(SDNode *N); - bool isConstExtProfitable(SDNode *N) const; - -// XformMskToBitPosU5Imm - Returns the bit position which -// the single bit 32 bit mask represents. -// Used in Clr and Set bit immediate memops. -SDValue XformMskToBitPosU5Imm(uint32_t Imm) { - int32_t bitPos; - bitPos = Log2_32(Imm); - assert(bitPos >= 0 && bitPos < 32 && - "Constant out of range for 32 BitPos Memops"); - return CurDAG->getTargetConstant(bitPos, MVT::i32); -} - -// XformMskToBitPosU4Imm - Returns the bit position which the single bit 16 bit -// mask represents. Used in Clr and Set bit immediate memops. -SDValue XformMskToBitPosU4Imm(uint16_t Imm) { - return XformMskToBitPosU5Imm(Imm); -} + SDNode *SelectBitOp(SDNode *N); + + // XformMskToBitPosU5Imm - Returns the bit position which + // the single bit 32 bit mask represents. + // Used in Clr and Set bit immediate memops. + SDValue XformMskToBitPosU5Imm(uint32_t Imm) { + int32_t bitPos; + bitPos = Log2_32(Imm); + assert(bitPos >= 0 && bitPos < 32 && + "Constant out of range for 32 BitPos Memops"); + return CurDAG->getTargetConstant(bitPos, MVT::i32); + } -// XformMskToBitPosU3Imm - Returns the bit position which the single bit 8 bit -// mask represents. Used in Clr and Set bit immediate memops. -SDValue XformMskToBitPosU3Imm(uint8_t Imm) { - return XformMskToBitPosU5Imm(Imm); -} + // XformMskToBitPosU4Imm - Returns the bit position which the single-bit + // 16 bit mask represents. Used in Clr and Set bit immediate memops. + SDValue XformMskToBitPosU4Imm(uint16_t Imm) { + return XformMskToBitPosU5Imm(Imm); + } -// Return true if there is exactly one bit set in V, i.e., if V is one of the -// following integers: 2^0, 2^1, ..., 2^31. -bool ImmIsSingleBit(uint32_t v) const { - return isPowerOf2_32(v); -} + // XformMskToBitPosU3Imm - Returns the bit position which the single-bit + // 8 bit mask represents. Used in Clr and Set bit immediate memops. + SDValue XformMskToBitPosU3Imm(uint8_t Imm) { + return XformMskToBitPosU5Imm(Imm); + } -// XformM5ToU5Imm - Return a target constant with the specified value, of type -// i32 where the negative literal is transformed into a positive literal for -// use in -= memops. -inline SDValue XformM5ToU5Imm(signed Imm) { - assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); - return CurDAG->getTargetConstant( - Imm, MVT::i32); -} + // Return true if there is exactly one bit set in V, i.e., if V is one of the + // following integers: 2^0, 2^1, ..., 2^31. + bool ImmIsSingleBit(uint32_t v) const { + return isPowerOf2_32(v); + } + // XformM5ToU5Imm - Return a target constant with the specified value, of + // type i32 where the negative literal is transformed into a positive literal + // for use in -= memops. + inline SDValue XformM5ToU5Imm(signed Imm) { + assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); + return CurDAG->getTargetConstant( - Imm, MVT::i32); + } -// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range -// [1..128], used in cmpb.gtu instructions. -inline SDValue XformU7ToU7M1Imm(signed Imm) { - assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op"); - return CurDAG->getTargetConstant(Imm - 1, MVT::i8); -} + // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range + // [1..128], used in cmpb.gtu instructions. + inline SDValue XformU7ToU7M1Imm(signed Imm) { + assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op"); + return CurDAG->getTargetConstant(Imm - 1, MVT::i8); + } -// XformS8ToS8M1Imm - Return a target constant decremented by 1. -inline SDValue XformSToSM1Imm(signed Imm) { - return CurDAG->getTargetConstant(Imm - 1, MVT::i32); -} + // XformS8ToS8M1Imm - Return a target constant decremented by 1. + inline SDValue XformSToSM1Imm(signed Imm) { + return CurDAG->getTargetConstant(Imm - 1, MVT::i32); + } -// XformU8ToU8M1Imm - Return a target constant decremented by 1. -inline SDValue XformUToUM1Imm(unsigned Imm) { - assert((Imm >= 1) && "Cannot decrement unsigned int less than 1"); - return CurDAG->getTargetConstant(Imm - 1, MVT::i32); -} + // XformU8ToU8M1Imm - Return a target constant decremented by 1. + inline SDValue XformUToUM1Imm(unsigned Imm) { + assert((Imm >= 1) && "Cannot decrement unsigned int less than 1"); + return CurDAG->getTargetConstant(Imm - 1, MVT::i32); + } -// XformSToSM2Imm - Return a target constant decremented by 2. -inline SDValue XformSToSM2Imm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm - 2, MVT::i32); -} + // XformSToSM2Imm - Return a target constant decremented by 2. + inline SDValue XformSToSM2Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm - 2, MVT::i32); + } -// XformSToSM3Imm - Return a target constant decremented by 3. -inline SDValue XformSToSM3Imm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm - 3, MVT::i32); -} + // XformSToSM3Imm - Return a target constant decremented by 3. + inline SDValue XformSToSM3Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm - 3, MVT::i32); + } -// Include the pieces autogenerated from the target description. -#include "HexagonGenDAGISel.inc" + // Include the pieces autogenerated from the target description. + #include "HexagonGenDAGISel.inc" private: - bool isValueExtension(SDValue const &Val, unsigned FromBits, SDValue &Src); -}; + bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src); +}; // end HexagonDAGToDAGISel } // end anonymous namespace /// createHexagonISelDag - This pass converts a legalized DAG into a /// Hexagon-specific DAG, ready for instruction scheduling. /// -FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM, - CodeGenOpt::Level OptLevel) { +namespace llvm { +FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, + CodeGenOpt::Level OptLevel) { return new HexagonDAGToDAGISel(TM, OptLevel); } +} static void initializePassOnce(PassRegistry &Registry) { const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection"; @@ -216,76 +197,6 @@ void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) { } -static bool IsS11_0_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // immS16 predicate - True if the immediate fits in a 16-bit sign extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isInt<11>(v); -} - - -static bool IsS11_1_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // immS16 predicate - True if the immediate fits in a 16-bit sign extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<11,1>(v); -} - - -static bool IsS11_2_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // immS16 predicate - True if the immediate fits in a 16-bit sign extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<11,2>(v); -} - - -static bool IsS11_3_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // immS16 predicate - True if the immediate fits in a 16-bit sign extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<11,3>(v); -} - - -static bool IsU6_0_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // u6 predicate - True if the immediate fits in a 6-bit unsigned extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isUInt<6>(v); -} - - -static bool IsU6_1_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // u6 predicate - True if the immediate fits in a 6-bit unsigned extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedUInt<6,1>(v); -} - - -static bool IsU6_2_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // u6 predicate - True if the immediate fits in a 6-bit unsigned extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedUInt<6,2>(v); -} - - // Intrinsics that return a a predicate. static unsigned doesIntrinsicReturnPredicate(unsigned ID) { @@ -332,216 +243,119 @@ static unsigned doesIntrinsicReturnPredicate(unsigned ID) } } -static bool OffsetFitsS11(EVT MemType, int64_t Offset) { - if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) { - return true; - } - if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) { - return true; - } - if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) { - return true; - } - if (MemType == MVT::i8 && isInt<11>(Offset)) { - return true; - } - return false; -} - - -// -// Try to lower loads of GlobalAdresses into base+offset loads. Custom -// lowering for GlobalAddress nodes has already turned it into a -// CONST32. -// -SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl) { - SDValue Chain = LD->getChain(); - SDNode* Const32 = LD->getBasePtr().getNode(); - unsigned Opcode = 0; - - if (Const32->getOpcode() == HexagonISD::CONST32 && - ISD::isNormalLoad(LD)) { - SDValue Base = Const32->getOperand(0); - EVT LoadedVT = LD->getMemoryVT(); - int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset(); - if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) { - MVT PointerTy = getTargetLowering()->getPointerTy(); - const GlobalValue* GV = - cast<GlobalAddressSDNode>(Base)->getGlobal(); - SDValue TargAddr = - CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0); - SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set, - dl, PointerTy, - TargAddr); - // Figure out base + offset opcode - if (LoadedVT == MVT::i64) Opcode = Hexagon::L2_loadrd_io; - else if (LoadedVT == MVT::i32) Opcode = Hexagon::L2_loadri_io; - else if (LoadedVT == MVT::i16) Opcode = Hexagon::L2_loadrh_io; - else if (LoadedVT == MVT::i8) Opcode = Hexagon::L2_loadrb_io; - else llvm_unreachable("unknown memory type"); - - // Build indexed load. - SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy); - SDNode* Result = CurDAG->getMachineNode(Opcode, dl, - LD->getValueType(0), - MVT::Other, - SDValue(NewBase,0), - TargetConstOff, - Chain); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); - cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); - ReplaceUses(LD, Result); - return Result; - } - } - - return SelectCode(LD); -} - - SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode, - SDLoc dl) -{ + SDLoc dl) { SDValue Chain = LD->getChain(); EVT LoadedVT = LD->getMemoryVT(); SDValue Base = LD->getBasePtr(); SDValue Offset = LD->getOffset(); SDNode *OffsetNode = Offset.getNode(); int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); - SDValue N1 = LD->getOperand(1); - SDValue CPTmpN1_0; - SDValue CPTmpN1_1; - - if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && - N1.getNode()->getValueType(0) == MVT::i32) { - const HexagonInstrInfo *TII = Subtarget->getInstrInfo(); - if (TII->isValidAutoIncImm(LoadedVT, Val)) { - SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32); - SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, - MVT::Other, Base, TargetConst, - Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, - SDValue(Result_1, 0)); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); - cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); - const SDValue Froms[] = { SDValue(LD, 0), - SDValue(LD, 1), - SDValue(LD, 2) - }; - const SDValue Tos[] = { SDValue(Result_2, 0), - SDValue(Result_1, 1), - SDValue(Result_1, 2) - }; - ReplaceUses(Froms, Tos, 3); - return Result_2; - } - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); - SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, - MVT::Other, Base, TargetConst0, + + const HexagonInstrInfo &TII = *HST->getInstrInfo(); + if (TII.isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, + MVT::Other, Base, TargetConst, Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, - MVT::i64, SDValue(Result_1, 0)); - SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, - MVT::i32, Base, TargetConstVal, - SDValue(Result_1, 1)); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, + SDValue(Result_1, 0)); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = LD->getMemOperand(); cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); const SDValue Froms[] = { SDValue(LD, 0), SDValue(LD, 1), - SDValue(LD, 2) - }; + SDValue(LD, 2) }; const SDValue Tos[] = { SDValue(Result_2, 0), - SDValue(Result_3, 0), - SDValue(Result_1, 1) - }; + SDValue(Result_1, 1), + SDValue(Result_1, 2) }; ReplaceUses(Froms, Tos, 3); return Result_2; } - return SelectCode(LD); + + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other, + Base, TargetConst0, Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, + SDValue(Result_1, 0)); + SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, + Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) }; + const SDValue Tos[] = { SDValue(Result_2, 0), + SDValue(Result_3, 0), + SDValue(Result_1, 1) }; + ReplaceUses(Froms, Tos, 3); + return Result_2; } SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode, - SDLoc dl) -{ + SDLoc dl) { SDValue Chain = LD->getChain(); EVT LoadedVT = LD->getMemoryVT(); SDValue Base = LD->getBasePtr(); SDValue Offset = LD->getOffset(); SDNode *OffsetNode = Offset.getNode(); int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); - SDValue N1 = LD->getOperand(1); - SDValue CPTmpN1_0; - SDValue CPTmpN1_1; - - if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && - N1.getNode()->getValueType(0) == MVT::i32) { - const HexagonInstrInfo *TII = Subtarget->getInstrInfo(); - if (TII->isValidAutoIncImm(LoadedVT, Val)) { - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, - MVT::i32, MVT::Other, Base, - TargetConstVal, Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, - TargetConst0); - SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl, - MVT::i64, MVT::Other, - SDValue(Result_2,0), - SDValue(Result_1,0)); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); - cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); - const SDValue Froms[] = { SDValue(LD, 0), - SDValue(LD, 1), - SDValue(LD, 2) - }; - const SDValue Tos[] = { SDValue(Result_3, 0), - SDValue(Result_1, 1), - SDValue(Result_1, 2) - }; - ReplaceUses(Froms, Tos, 3); - return Result_3; - } - // Generate an indirect load. - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + const HexagonInstrInfo &TII = *HST->getInstrInfo(); + if (TII.isValidAutoIncImm(LoadedVT, Val)) { SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, - MVT::Other, - Base, TargetConst0, Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, - TargetConst0); - SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl, + MVT::i32, MVT::Other, Base, + TargetConstVal, Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64, MVT::Other, - SDValue(Result_2,0), + TargetConst0, SDValue(Result_1,0)); - // Add offset to base. - SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, - Base, TargetConstVal, - SDValue(Result_1, 1)); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = LD->getMemOperand(); cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); const SDValue Froms[] = { SDValue(LD, 0), SDValue(LD, 1), - SDValue(LD, 2) - }; - const SDValue Tos[] = { SDValue(Result_3, 0), // Load value. - SDValue(Result_4, 0), // New address. - SDValue(Result_1, 1) - }; + SDValue(LD, 2) }; + const SDValue Tos[] = { SDValue(Result_2, 0), + SDValue(Result_1, 1), + SDValue(Result_1, 2) }; ReplaceUses(Froms, Tos, 3); - return Result_3; + return Result_2; } - return SelectCode(LD); + // Generate an indirect load. + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::Other, Base, TargetConst0, + Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl, + MVT::i64, MVT::Other, + TargetConst0, + SDValue(Result_1,0)); + // Add offset to base. + SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, + Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) }; + const SDValue Tos[] = { SDValue(Result_2, 0), // Load value. + SDValue(Result_3, 0), // New address. + SDValue(Result_1, 1) }; + ReplaceUses(Froms, Tos, 3); + return Result_2; } @@ -555,45 +369,44 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { EVT LoadedVT = LD->getMemoryVT(); unsigned Opcode = 0; - // Check for zero ext loads. - bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD); + // Check for zero extended loads. Treat any-extend loads as zero extended + // loads. + ISD::LoadExtType ExtType = LD->getExtensionType(); + bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD); // Figure out the opcode. - const HexagonInstrInfo *TII = Subtarget->getInstrInfo(); + const HexagonInstrInfo &TII = *HST->getInstrInfo(); if (LoadedVT == MVT::i64) { - if (TII->isValidAutoIncImm(LoadedVT, Val)) + if (TII.isValidAutoIncImm(LoadedVT, Val)) Opcode = Hexagon::L2_loadrd_pi; else Opcode = Hexagon::L2_loadrd_io; } else if (LoadedVT == MVT::i32) { - if (TII->isValidAutoIncImm(LoadedVT, Val)) + if (TII.isValidAutoIncImm(LoadedVT, Val)) Opcode = Hexagon::L2_loadri_pi; else Opcode = Hexagon::L2_loadri_io; } else if (LoadedVT == MVT::i16) { - if (TII->isValidAutoIncImm(LoadedVT, Val)) - Opcode = zextval ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi; + if (TII.isValidAutoIncImm(LoadedVT, Val)) + Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi; else - Opcode = zextval ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io; + Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io; } else if (LoadedVT == MVT::i8) { - if (TII->isValidAutoIncImm(LoadedVT, Val)) - Opcode = zextval ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi; + if (TII.isValidAutoIncImm(LoadedVT, Val)) + Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi; else - Opcode = zextval ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io; + Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io; } else llvm_unreachable("unknown memory type"); - // For zero ext i64 loads, we need to add combine instructions. - if (LD->getValueType(0) == MVT::i64 && - LD->getExtensionType() == ISD::ZEXTLOAD) { + // For zero extended i64 loads, we need to add combine instructions. + if (LD->getValueType(0) == MVT::i64 && IsZeroExt) return SelectIndexedLoadZeroExtend64(LD, Opcode, dl); - } - if (LD->getValueType(0) == MVT::i64 && - LD->getExtensionType() == ISD::SEXTLOAD) { - // Handle sign ext i64 loads. + // Handle sign extended i64 loads. + if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD) return SelectIndexedLoadSignExtend64(LD, Opcode, dl); - } - if (TII->isValidAutoIncImm(LoadedVT, Val)) { + + if (TII.isValidAutoIncImm(LoadedVT, Val)) { SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); SDNode* Result = CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), @@ -649,7 +462,7 @@ SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) { if (AM != ISD::UNINDEXED) { result = SelectIndexedLoad(LD, dl); } else { - result = SelectBaseOffsetLoad(LD, dl); + result = SelectCode(LD); } return result; @@ -665,13 +478,12 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { // Get the constant value. int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); EVT StoredVT = ST->getMemoryVT(); + EVT ValueVT = Value.getValueType(); // Offset value must be within representable range // and must have correct alignment properties. - const HexagonInstrInfo *TII = Subtarget->getInstrInfo(); - if (TII->isValidAutoIncImm(StoredVT, Val)) { - SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value, - Chain}; + const HexagonInstrInfo &TII = *HST->getInstrInfo(); + if (TII.isValidAutoIncImm(StoredVT, Val)) { unsigned Opcode = 0; // Figure out the post inc version of opcode. @@ -681,6 +493,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi; else llvm_unreachable("unknown memory type"); + if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) { + assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store"); + Value = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, + dl, MVT::i32, Value); + } + SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value, + Chain}; // Build post increment store. SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other, Ops); @@ -694,7 +513,8 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { } // Note: Order of operands matches the def of instruction: - // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ... + // def S2_storerd_io + // : STInst<(outs), (ins IntRegs:$base, imm:$offset, DoubleRegs:$src1), ... // and it differs for POST_ST* for instance. SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value, Chain}; @@ -724,61 +544,6 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { return Result_2; } - -SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST, - SDLoc dl) { - SDValue Chain = ST->getChain(); - SDNode* Const32 = ST->getBasePtr().getNode(); - SDValue Value = ST->getValue(); - unsigned Opcode = 0; - - // Try to lower stores of GlobalAdresses into indexed stores. Custom - // lowering for GlobalAddress nodes has already turned it into a - // CONST32. Avoid truncating stores for the moment. Post-inc stores - // do the same. Don't think there's a reason for it, so will file a - // bug to fix. - if ((Const32->getOpcode() == HexagonISD::CONST32) && - !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) { - SDValue Base = Const32->getOperand(0); - if (Base.getOpcode() == ISD::TargetGlobalAddress) { - EVT StoredVT = ST->getMemoryVT(); - int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset(); - if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) { - MVT PointerTy = getTargetLowering()->getPointerTy(); - const GlobalValue* GV = - cast<GlobalAddressSDNode>(Base)->getGlobal(); - SDValue TargAddr = - CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0); - SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set, - dl, PointerTy, - TargAddr); - - // Figure out base + offset opcode - if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_io; - else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io; - else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io; - else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io; - else llvm_unreachable("unknown memory type"); - - SDValue Ops[] = {SDValue(NewBase,0), - CurDAG->getTargetConstant(Offset,PointerTy), - Value, Chain}; - // build indexed store - SDNode* Result = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = ST->getMemOperand(); - cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); - ReplaceUses(ST, Result); - return Result; - } - } - } - - return SelectCode(ST); -} - - SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) { SDLoc dl(N); StoreSDNode *ST = cast<StoreSDNode>(N); @@ -789,7 +554,7 @@ SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) { return SelectIndexedStore(ST, dl); } - return SelectBaseOffsetStore(ST, dl); + return SelectCode(ST); } SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { @@ -875,187 +640,6 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { return SelectCode(N); } - -SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) { - SDLoc dl(N); - SDValue N0 = N->getOperand(0); - if (N0.getOpcode() == ISD::SETCC) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) { - SDValue N000 = N00.getOperand(0); - SDValue N001 = N00.getOperand(1); - if (cast<VTSDNode>(N001)->getVT() == MVT::i16) { - SDValue N01 = N0.getOperand(1); - SDValue N02 = N0.getOperand(2); - - // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2, - // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1, - // IntRegs:i32:$src2) - // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2) - // Pattern complexity = 9 cost = 1 size = 0. - if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) { - SDValue N1 = N->getOperand(1); - if (N01 == N1) { - SDValue N2 = N->getOperand(2); - if (N000 == N2 && - N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 && - N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) { - SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl, - MVT::i32, N000); - SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_max, dl, - MVT::i32, - SDValue(SextNode, 0), - N1); - ReplaceUses(N, Result); - return Result; - } - } - } - - // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2, - // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1, - // IntRegs:i32:$src2) - // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2) - // Pattern complexity = 9 cost = 1 size = 0. - if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) { - SDValue N1 = N->getOperand(1); - if (N01 == N1) { - SDValue N2 = N->getOperand(2); - if (N000 == N2 && - N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 && - N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) { - SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl, - MVT::i32, N000); - SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_min, dl, - MVT::i32, - SDValue(SextNode, 0), - N1); - ReplaceUses(N, Result); - return Result; - } - } - } - } - } - } - - return SelectCode(N); -} - - -SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) { - SDLoc dl(N); - SDValue Shift = N->getOperand(0); - - // - // %conv.i = sext i32 %tmp1 to i64 - // %conv2.i = sext i32 %add to i64 - // %mul.i = mul nsw i64 %conv2.i, %conv.i - // %shr5.i = lshr i64 %mul.i, 32 - // %conv3.i = trunc i64 %shr5.i to i32 - // - // --- match with the following --- - // - // %conv3.i = mpy (%tmp1, %add) - // - // Trunc to i32. - if (N->getValueType(0) == MVT::i32) { - // Trunc from i64. - if (Shift.getNode()->getValueType(0) == MVT::i64) { - // Trunc child is logical shift right. - if (Shift.getOpcode() != ISD::SRL) { - return SelectCode(N); - } - - SDValue ShiftOp0 = Shift.getOperand(0); - SDValue ShiftOp1 = Shift.getOperand(1); - - // Shift by const 32 - if (ShiftOp1.getOpcode() != ISD::Constant) { - return SelectCode(N); - } - - int32_t ShiftConst = - cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue(); - if (ShiftConst != 32) { - return SelectCode(N); - } - - // Shifting a i64 signed multiply - SDValue Mul = ShiftOp0; - if (Mul.getOpcode() != ISD::MUL) { - return SelectCode(N); - } - - SDValue MulOp0 = Mul.getOperand(0); - SDValue MulOp1 = Mul.getOperand(1); - - SDValue OP0; - SDValue OP1; - - // Handle sign_extend and sextload - if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) { - SDValue Sext0 = MulOp0.getOperand(0); - if (Sext0.getNode()->getValueType(0) != MVT::i32) { - return SelectCode(N); - } - - OP0 = Sext0; - } else if (MulOp0.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode()); - if (LD->getMemoryVT() != MVT::i32 || - LD->getExtensionType() != ISD::SEXTLOAD || - LD->getAddressingMode() != ISD::UNINDEXED) { - return SelectCode(N); - } - - SDValue Chain = LD->getChain(); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, - MVT::Other, - LD->getBasePtr(), - TargetConst0, Chain), 0); - } else { - return SelectCode(N); - } - - // Same goes for the second operand. - if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) { - SDValue Sext1 = MulOp1.getOperand(0); - if (Sext1.getNode()->getValueType(0) != MVT::i32) - return SelectCode(N); - - OP1 = Sext1; - } else if (MulOp1.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode()); - if (LD->getMemoryVT() != MVT::i32 || - LD->getExtensionType() != ISD::SEXTLOAD || - LD->getAddressingMode() != ISD::UNINDEXED) { - return SelectCode(N); - } - - SDValue Chain = LD->getChain(); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, - MVT::Other, - LD->getBasePtr(), - TargetConst0, Chain), 0); - } else { - return SelectCode(N); - } - - // Generate a mpy instruction. - SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpy_up, dl, MVT::i32, - OP0, OP1); - ReplaceUses(N, Result); - return Result; - } - } - - return SelectCode(N); -} - - SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { SDLoc dl(N); if (N->getValueType(0) == MVT::i32) { @@ -1134,6 +718,36 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { // SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { SDLoc dl(N); + + SDValue Op0 = N->getOperand(0); + EVT OpVT = Op0.getValueType(); + unsigned OpBW = OpVT.getSizeInBits(); + + // Special handling for zero-extending a vector of booleans. + if (OpVT.isVector() && OpVT.getVectorElementType() == MVT::i1 && OpBW <= 64) { + SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0); + unsigned NE = OpVT.getVectorNumElements(); + EVT ExVT = N->getValueType(0); + unsigned ES = ExVT.getVectorElementType().getSizeInBits(); + uint64_t MV = 0, Bit = 1; + for (unsigned i = 0; i < NE; ++i) { + MV |= Bit; + Bit <<= ES; + } + SDValue Ones = CurDAG->getTargetConstant(MV, MVT::i64); + SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64_Int_Real, dl, + MVT::i64, Ones); + if (ExVT.getSizeInBits() == 32) { + SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64, + SDValue(Mask,0), SDValue(OnesReg,0)); + SDValue SubR = CurDAG->getTargetConstant(Hexagon::subreg_loreg, MVT::i32); + return CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT, + SDValue(And,0), SubR); + } + return CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT, + SDValue(Mask,0), SDValue(OnesReg,0)); + } + SDNode *IsIntrinsic = N->getOperand(0).getNode(); if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) { unsigned ID = @@ -1141,7 +755,7 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { if (doesIntrinsicReturnPredicate(ID)) { // Now we need to differentiate target data types. if (N->getValueType(0) == MVT::i64) { - // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs). + // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs). SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl, MVT::i32, @@ -1171,6 +785,203 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { } // +// Checking for intrinsics circular load/store, and bitreverse load/store +// instrisics in order to select the correct lowered operation. +// +SDNode *HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { + unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + if (IntNo == Intrinsic::hexagon_circ_ldd || + IntNo == Intrinsic::hexagon_circ_ldw || + IntNo == Intrinsic::hexagon_circ_lduh || + IntNo == Intrinsic::hexagon_circ_ldh || + IntNo == Intrinsic::hexagon_circ_ldub || + IntNo == Intrinsic::hexagon_circ_ldb) { + SDLoc dl(N); + SDValue Chain = N->getOperand(0); + SDValue Base = N->getOperand(2); + SDValue Load = N->getOperand(3); + SDValue ModifierExpr = N->getOperand(4); + SDValue Offset = N->getOperand(5); + + // We need to add the rerurn type for the load. This intrinsic has + // two return types, one for the load and one for the post-increment. + // Only the *_ld instructions push the extra return type, and bump the + // result node operand number correspondingly. + std::vector<EVT> ResTys; + unsigned opc; + unsigned memsize, align; + MVT MvtSize = MVT::i32; + + if (IntNo == Intrinsic::hexagon_circ_ldd) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i64); + opc = Hexagon::L2_loadrd_pci_pseudo; + memsize = 8; + align = 8; + } else if (IntNo == Intrinsic::hexagon_circ_ldw) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadri_pci_pseudo; + memsize = 4; + align = 4; + } else if (IntNo == Intrinsic::hexagon_circ_ldh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrh_pci_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_circ_lduh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadruh_pci_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_circ_ldb) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrb_pci_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else if (IntNo == Intrinsic::hexagon_circ_ldub) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrub_pci_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else + llvm_unreachable("no opc"); + + ResTys.push_back(MVT::Other); + + // Copy over the arguments, which are the same mostly. + SmallVector<SDValue, 5> Ops; + Ops.push_back(Base); + Ops.push_back(Load); + Ops.push_back(ModifierExpr); + int32_t Val = cast<ConstantSDNode>(Offset.getNode())->getSExtValue(); + Ops.push_back(CurDAG->getTargetConstant(Val, MVT::i32)); + Ops.push_back(Chain); + SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops); + + SDValue ST; + MachineMemOperand *Mem = + MF->getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, memsize, align); + if (MvtSize != MVT::i32) + ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load, + MvtSize, Mem); + else + ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem); + + SDNode* Store = SelectStore(ST.getNode()); + + const SDValue Froms[] = { SDValue(N, 0), + SDValue(N, 1) }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Store, 0) }; + ReplaceUses(Froms, Tos, 2); + return Result; + } + + if (IntNo == Intrinsic::hexagon_brev_ldd || + IntNo == Intrinsic::hexagon_brev_ldw || + IntNo == Intrinsic::hexagon_brev_ldh || + IntNo == Intrinsic::hexagon_brev_lduh || + IntNo == Intrinsic::hexagon_brev_ldb || + IntNo == Intrinsic::hexagon_brev_ldub) { + SDLoc dl(N); + SDValue Chain = N->getOperand(0); + SDValue Base = N->getOperand(2); + SDValue Load = N->getOperand(3); + SDValue ModifierExpr = N->getOperand(4); + + // We need to add the rerurn type for the load. This intrinsic has + // two return types, one for the load and one for the post-increment. + std::vector<EVT> ResTys; + unsigned opc; + unsigned memsize, align; + MVT MvtSize = MVT::i32; + + if (IntNo == Intrinsic::hexagon_brev_ldd) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i64); + opc = Hexagon::L2_loadrd_pbr_pseudo; + memsize = 8; + align = 8; + } else if (IntNo == Intrinsic::hexagon_brev_ldw) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadri_pbr_pseudo; + memsize = 4; + align = 4; + } else if (IntNo == Intrinsic::hexagon_brev_ldh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrh_pbr_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_brev_lduh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadruh_pbr_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_brev_ldb) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrb_pbr_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else if (IntNo == Intrinsic::hexagon_brev_ldub) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrub_pbr_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else + llvm_unreachable("no opc"); + + ResTys.push_back(MVT::Other); + + // Copy over the arguments, which are the same mostly. + SmallVector<SDValue, 4> Ops; + Ops.push_back(Base); + Ops.push_back(Load); + Ops.push_back(ModifierExpr); + Ops.push_back(Chain); + SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops); + SDValue ST; + MachineMemOperand *Mem = + MF->getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, memsize, align); + if (MvtSize != MVT::i32) + ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load, + MvtSize, Mem); + else + ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem); + + SDNode* Store = SelectStore(ST.getNode()); + + const SDValue Froms[] = { SDValue(N, 0), + SDValue(N, 1) }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Store, 0) }; + ReplaceUses(Froms, Tos, 2); + return Result; + } + + return SelectCode(N); +} + +// // Checking for intrinsics which have predicate registers as operand(s) // and lowering to the actual intrinsic. // @@ -1217,37 +1028,20 @@ SDNode *HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) { return SelectCode(N); } - // // Map predicate true (encoded as -1 in LLVM) to a XOR. // SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) { SDLoc dl(N); if (N->getValueType(0) == MVT::i1) { - SDNode* Result; + SDNode* Result = 0; int32_t Val = cast<ConstantSDNode>(N)->getSExtValue(); if (Val == -1) { - // Create the IntReg = 1 node. - SDNode* IntRegTFR = - CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, - CurDAG->getTargetConstant(0, MVT::i32)); - - // Pd = IntReg - SDNode* Pd = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1, - SDValue(IntRegTFR, 0)); - - // not(Pd) - SDNode* NotPd = CurDAG->getMachineNode(Hexagon::C2_not, dl, MVT::i1, - SDValue(Pd, 0)); - - // xor(not(Pd)) - Result = CurDAG->getMachineNode(Hexagon::C2_xor, dl, MVT::i1, - SDValue(Pd, 0), SDValue(NotPd, 0)); - - // We have just built: - // Rs = Pd - // Pd = xor(not(Pd), Pd) - + Result = CurDAG->getMachineNode(Hexagon::TFR_PdTrue, dl, MVT::i1); + } else if (Val == 0) { + Result = CurDAG->getMachineNode(Hexagon::TFR_PdFalse, dl, MVT::i1); + } + if (Result) { ReplaceUses(N, Result); return Result; } @@ -1283,347 +1077,282 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) { return Result; } - -SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { - if (N->isMachineOpcode()) { - N->setNodeId(-1); - return nullptr; // Already selected. - } - - - switch (N->getOpcode()) { - case ISD::Constant: - return SelectConstant(N); - - case ISD::ConstantFP: - return SelectConstantFP(N); - - case ISD::ADD: - return SelectAdd(N); - - case ISD::SHL: - return SelectSHL(N); - - case ISD::LOAD: - return SelectLoad(N); - - case ISD::STORE: - return SelectStore(N); - - case ISD::SELECT: - return SelectSelect(N); - - case ISD::TRUNCATE: - return SelectTruncate(N); - - case ISD::MUL: - return SelectMul(N); - - case ISD::ZERO_EXTEND: - return SelectZeroExtend(N); - - case ISD::INTRINSIC_WO_CHAIN: - return SelectIntrinsicWOChain(N); - } - - return SelectCode(N); -} - - // -// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way -// to define these instructions. +// Map the following, where possible. +// AND/FABS -> clrbit +// OR -> setbit +// XOR/FNEG ->toggle_bit. // -bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; -} +SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { + SDLoc dl(N); + EVT ValueVT = N->getValueType(0); + // We handle only 32 and 64-bit bit ops. + if (!(ValueVT == MVT::i32 || ValueVT == MVT::i64 || + ValueVT == MVT::f32 || ValueVT == MVT::f64)) + return SelectCode(N); -bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. + // We handly only fabs and fneg for V5. + unsigned Opc = N->getOpcode(); + if ((Opc == ISD::FABS || Opc == ISD::FNEG) && !HST->hasV5TOps()) + return SelectCode(N); - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_0_Offset(Offset.getNode())); + int64_t Val = 0; + if (Opc != ISD::FABS && Opc != ISD::FNEG) { + if (N->getOperand(1).getOpcode() == ISD::Constant) + Val = cast<ConstantSDNode>((N)->getOperand(1))->getSExtValue(); + else + return SelectCode(N); } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_0_Offset(Offset.getNode())); -} - -bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_1_Offset(Offset.getNode())); + if (Opc == ISD::AND) { + if (((ValueVT == MVT::i32) && + (!((Val & 0x80000000) || (Val & 0x7fffffff)))) || + ((ValueVT == MVT::i64) && + (!((Val & 0x8000000000000000) || (Val & 0x7fffffff))))) + // If it's simple AND, do the normal op. + return SelectCode(N); + else + Val = ~Val; } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_1_Offset(Offset.getNode())); -} - - -bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_2_Offset(Offset.getNode())); + // If OR or AND is being fed by shl, srl and, sra don't do this change, + // because Hexagon provide |= &= on shl, srl, and sra. + // Traverse the DAG to see if there is shl, srl and sra. + if (Opc == ISD::OR || Opc == ISD::AND) { + switch (N->getOperand(0)->getOpcode()) { + default: break; + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: + return SelectCode(N); + } } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_2_Offset(Offset.getNode())); -} + // Make sure it's power of 2. + unsigned bitpos = 0; + if (Opc != ISD::FABS && Opc != ISD::FNEG) { + if (((ValueVT == MVT::i32) && !isPowerOf2_32(Val)) || + ((ValueVT == MVT::i64) && !isPowerOf2_64(Val))) + return SelectCode(N); -bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_0_Offset(Offset.getNode())); + // Get the bit position. + bitpos = countTrailingZeros(uint64_t(Val)); + } else { + // For fabs and fneg, it's always the 31st bit. + bitpos = 31; } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_0_Offset(Offset.getNode())); -} + unsigned BitOpc = 0; + // Set the right opcode for bitwise operations. + switch(Opc) { + default: llvm_unreachable("Only bit-wise/abs/neg operations are allowed."); + case ISD::AND: + case ISD::FABS: + BitOpc = Hexagon::S2_clrbit_i; + break; + case ISD::OR: + BitOpc = Hexagon::S2_setbit_i; + break; + case ISD::XOR: + case ISD::FNEG: + BitOpc = Hexagon::S2_togglebit_i; + break; + } -bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. + SDNode *Result; + // Get the right SDVal for the opcode. + SDValue SDVal = CurDAG->getTargetConstant(bitpos, MVT::i32); - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_1_Offset(Offset.getNode())); + if (ValueVT == MVT::i32 || ValueVT == MVT::f32) { + Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT, + N->getOperand(0), SDVal); + } else { + // 64-bit gymnastic to use REG_SEQUENCE. But it's worth it. + EVT SubValueVT; + if (ValueVT == MVT::i64) + SubValueVT = MVT::i32; + else + SubValueVT = MVT::f32; + + SDNode *Reg = N->getOperand(0).getNode(); + SDValue RegClass = CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, + MVT::i64); + + SDValue SubregHiIdx = CurDAG->getTargetConstant(Hexagon::subreg_hireg, + MVT::i32); + SDValue SubregLoIdx = CurDAG->getTargetConstant(Hexagon::subreg_loreg, + MVT::i32); + + SDValue SubregHI = CurDAG->getTargetExtractSubreg(Hexagon::subreg_hireg, dl, + MVT::i32, SDValue(Reg, 0)); + + SDValue SubregLO = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, dl, + MVT::i32, SDValue(Reg, 0)); + + // Clear/set/toggle hi or lo registers depending on the bit position. + if (SubValueVT != MVT::f32 && bitpos < 32) { + SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT, + SubregLO, SDVal); + const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx, + SDValue(Result0, 0), SubregLoIdx }; + Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, + dl, ValueVT, Ops); + } else { + if (Opc != ISD::FABS && Opc != ISD::FNEG) + SDVal = CurDAG->getTargetConstant(bitpos-32, MVT::i32); + SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT, + SubregHI, SDVal); + const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx, + SubregLO, SubregLoIdx }; + Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, + dl, ValueVT, Ops); + } } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_1_Offset(Offset.getNode())); + + ReplaceUses(N, Result); + return Result; } -bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. +SDNode *HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) { + int FX = cast<FrameIndexSDNode>(N)->getIndex(); + SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32); + SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32); + SDLoc DL(N); - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_2_Offset(Offset.getNode())); - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_2_Offset(Offset.getNode())); -} + SDNode *R = CurDAG->getMachineNode(Hexagon::TFR_FI, DL, MVT::i32, FI, Zero); + if (N->getHasDebugValue()) + CurDAG->TransferDbgValues(SDValue(N, 0), SDValue(R, 0)); + return R; +} -bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() != ISD::ADD) { - return(SelectADDRriS11_2(Addr, Base, Offset)); +SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { + if (N->isMachineOpcode()) { + N->setNodeId(-1); + return nullptr; // Already selected. } - return SelectADDRriS11_2(Addr, Base, Offset); -} + switch (N->getOpcode()) { + case ISD::Constant: + return SelectConstant(N); + case ISD::ConstantFP: + return SelectConstantFP(N); -bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. + case ISD::FrameIndex: + return SelectFrameIndex(N); - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_3_Offset(Offset.getNode())); - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_3_Offset(Offset.getNode())); -} + case ISD::ADD: + return SelectAdd(N); -bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, - SDValue &R2) { - if (Addr.getOpcode() == ISD::FrameIndex) return false; - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (Addr.getOpcode() == ISD::ADD) { - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) - if (isInt<13>(CN->getSExtValue())) - return false; // Let the reg+imm pattern catch this! - R1 = Addr.getOperand(0); - R2 = Addr.getOperand(1); - return true; - } + case ISD::SHL: + return SelectSHL(N); - R1 = Addr; + case ISD::LOAD: + return SelectLoad(N); - return true; -} + case ISD::STORE: + return SelectStore(N); + case ISD::MUL: + return SelectMul(N); -// Handle generic address case. It is accessed from inlined asm =m constraints, -// which could have any kind of pointer. -bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr, - SDValue &Base, SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::FABS: + case ISD::FNEG: + return SelectBitOp(N); - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; - } + case ISD::ZERO_EXTEND: + return SelectZeroExtend(N); - if (Addr.getOpcode() == ISD::ADD) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; + case ISD::INTRINSIC_W_CHAIN: + return SelectIntrinsicWChain(N); + + case ISD::INTRINSIC_WO_CHAIN: + return SelectIntrinsicWOChain(N); } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; + return SelectCode(N); } bool HexagonDAGToDAGISel:: -SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, +SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { - SDValue Op0, Op1; - - switch (ConstraintCode) { - case 'o': // Offsetable. - case 'v': // Not offsetable. - default: return true; - case 'm': // Memory. - if (!SelectAddr(Op.getNode(), Op, Op0, Op1)) - return true; + SDValue Inp = Op, Res; + + switch (ConstraintID) { + default: + return true; + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_o: // Offsetable. + case InlineAsm::Constraint_v: // Not offsetable. + case InlineAsm::Constraint_m: // Memory. + if (SelectAddrFI(Inp, Res)) + OutOps.push_back(Res); + else + OutOps.push_back(Inp); break; } - OutOps.push_back(Op0); - OutOps.push_back(Op1); + OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); return false; } -bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const { - unsigned UseCount = 0; - for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { - UseCount++; - } - - return (UseCount <= 1); - -} - -//===--------------------------------------------------------------------===// -// Return 'true' if use count of the global address is below threshold. -//===--------------------------------------------------------------------===// -bool HexagonDAGToDAGISel::hasNumUsesBelowThresGA(SDNode *N) const { - assert(N->getOpcode() == ISD::TargetGlobalAddress && - "Expecting a target global address"); - - // Always try to fold the address. - if (TM.getOptLevel() == CodeGenOpt::Aggressive) - return true; - - GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); - DenseMap<const GlobalValue *, unsigned>::const_iterator GI = - GlobalAddressUseCountMap.find(GA->getGlobal()); - - if (GI == GlobalAddressUseCountMap.end()) - return false; - - return GI->second <= MaxNumOfUsesForConstExtenders; -} - -//===--------------------------------------------------------------------===// -// Return true if the non-GP-relative global address can be folded. -//===--------------------------------------------------------------------===// -inline bool HexagonDAGToDAGISel::foldGlobalAddress(SDValue &N, SDValue &R) { - return foldGlobalAddressImpl(N, R, false); -} - -//===--------------------------------------------------------------------===// -// Return true if the GP-relative global address can be folded. -//===--------------------------------------------------------------------===// -inline bool HexagonDAGToDAGISel::foldGlobalAddressGP(SDValue &N, SDValue &R) { - return foldGlobalAddressImpl(N, R, true); -} +void HexagonDAGToDAGISel::PreprocessISelDAG() { + SelectionDAG &DAG = *CurDAG; + std::vector<SDNode*> Nodes; + for (auto I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) + Nodes.push_back(I); + + // Simplify: (or (select c x 0) z) -> (select c (or x z) z) + // (or (select c 0 y) z) -> (select c z (or y z)) + // This may not be the right thing for all targets, so do it here. + for (auto I: Nodes) { + if (I->getOpcode() != ISD::OR) + continue; + + auto IsZero = [] (const SDValue &V) -> bool { + if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode())) + return SC->isNullValue(); + return false; + }; + auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool { + if (Op.getOpcode() != ISD::SELECT) + return false; + return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2)); + }; -//===--------------------------------------------------------------------===// -// Fold offset of the global address if number of uses are below threshold. -//===--------------------------------------------------------------------===// -bool HexagonDAGToDAGISel::foldGlobalAddressImpl(SDValue &N, SDValue &R, - bool ShouldLookForGP) { - if (N.getOpcode() == ISD::ADD) { - SDValue N0 = N.getOperand(0); - SDValue N1 = N.getOperand(1); - if ((ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32_GP)) || - (!ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32))) { - ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1); - GlobalAddressSDNode *GA = - dyn_cast<GlobalAddressSDNode>(N0.getOperand(0)); - - if (Const && GA && - (GA->getOpcode() == ISD::TargetGlobalAddress)) { - if ((N0.getOpcode() == HexagonISD::CONST32) && - !hasNumUsesBelowThresGA(GA)) - return false; - R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), - SDLoc(Const), - N.getValueType(), - GA->getOffset() + - (uint64_t)Const->getSExtValue()); - return true; + SDValue N0 = I->getOperand(0), N1 = I->getOperand(1); + EVT VT = I->getValueType(0); + bool SelN0 = IsSelect0(N0); + SDValue SOp = SelN0 ? N0 : N1; + SDValue VOp = SelN0 ? N1 : N0; + + if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) { + SDValue SC = SOp.getOperand(0); + SDValue SX = SOp.getOperand(1); + SDValue SY = SOp.getOperand(2); + SDLoc DLS = SOp; + if (IsZero(SY)) { + SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp); + SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp); + DAG.ReplaceAllUsesWith(I, NewSel.getNode()); + } else if (IsZero(SX)) { + SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp); + SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr); + DAG.ReplaceAllUsesWith(I, NewSel.getNode()); } } } - return false; } + bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) { if (N.getOpcode() != ISD::FrameIndex) return false; @@ -1681,8 +1410,8 @@ bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R, return false; } -bool HexagonDAGToDAGISel::isValueExtension(SDValue const &Val, - unsigned FromBits, SDValue &Src) { +bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val, + unsigned FromBits, SDValue &Src) { unsigned Opc = Val.getOpcode(); switch (Opc) { case ISD::SIGN_EXTEND: diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 0072994..a2209ab 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -164,6 +164,12 @@ CC_Hexagon (unsigned ValNo, MVT ValVT, LocInfo = CCValAssign::ZExt; else LocInfo = CCValAssign::AExt; + } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) { + LocVT = MVT::i32; + LocInfo = CCValAssign::BCvt; + } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) { + LocVT = MVT::i64; + LocInfo = CCValAssign::BCvt; } if (LocVT == MVT::i32 || LocVT == MVT::f32) { @@ -239,6 +245,12 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, LocInfo = CCValAssign::ZExt; else LocInfo = CCValAssign::AExt; + } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) { + LocVT = MVT::i32; + LocInfo = CCValAssign::BCvt; + } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) { + LocVT = MVT::i64; + LocInfo = CCValAssign::BCvt; } if (LocVT == MVT::i32 || LocVT == MVT::f32) { @@ -764,7 +776,7 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock())); } - SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl, + SDValue JumpTableBase = DAG.getNode(HexagonISD::JT, dl, getPointerTy(), TargetJT); SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index, DAG.getConstant(2, MVT::i32)); @@ -944,6 +956,192 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { false, 0); } +// Creates a SPLAT instruction for a constant value VAL. +static SDValue createSplat(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue Val) { + if (VT.getSimpleVT() == MVT::v4i8) + return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val); + + if (VT.getSimpleVT() == MVT::v4i16) + return DAG.getNode(HexagonISD::VSPLATH, dl, VT, Val); + + return SDValue(); +} + +static bool isSExtFree(SDValue N) { + // A sign-extend of a truncate of a sign-extend is free. + if (N.getOpcode() == ISD::TRUNCATE && + N.getOperand(0).getOpcode() == ISD::AssertSext) + return true; + // We have sign-extended loads. + if (N.getOpcode() == ISD::LOAD) + return true; + return false; +} + +SDValue HexagonTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue InpVal = Op.getOperand(0); + if (isa<ConstantSDNode>(InpVal)) { + uint64_t V = cast<ConstantSDNode>(InpVal)->getZExtValue(); + return DAG.getTargetConstant(countPopulation(V), MVT::i64); + } + SDValue PopOut = DAG.getNode(HexagonISD::POPCOUNT, dl, MVT::i32, InpVal); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, PopOut); +} + +SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue Cmp = Op.getOperand(2); + ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get(); + + EVT VT = Op.getValueType(); + EVT LHSVT = LHS.getValueType(); + EVT RHSVT = RHS.getValueType(); + + if (LHSVT == MVT::v2i16) { + assert(ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC)); + unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND; + SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS); + SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS); + SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp); + return SC; + } + + // Treat all other vector types as legal. + if (VT.isVector()) + return Op; + + // Equals and not equals should use sign-extend, not zero-extend, since + // we can represent small negative values in the compare instructions. + // The LLVM default is to use zero-extend arbitrarily in these cases. + if ((CC == ISD::SETEQ || CC == ISD::SETNE) && + (RHSVT == MVT::i8 || RHSVT == MVT::i16) && + (LHSVT == MVT::i8 || LHSVT == MVT::i16)) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS); + if (C && C->getAPIntValue().isNegative()) { + LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS); + RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS); + return DAG.getNode(ISD::SETCC, dl, Op.getValueType(), + LHS, RHS, Op.getOperand(2)); + } + if (isSExtFree(LHS) || isSExtFree(RHS)) { + LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS); + RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS); + return DAG.getNode(ISD::SETCC, dl, Op.getValueType(), + LHS, RHS, Op.getOperand(2)); + } + } + return SDValue(); +} + +SDValue HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) + const { + SDValue PredOp = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2); + EVT OpVT = Op1.getValueType(); + SDLoc DL(Op); + + if (OpVT == MVT::v2i16) { + SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1); + SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2); + SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2); + SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL); + return TR; + } + + return SDValue(); +} + +// Handle only specific vector loads. +SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + LoadSDNode *LoadNode = cast<LoadSDNode>(Op); + SDValue Chain = LoadNode->getChain(); + SDValue Ptr = Op.getOperand(1); + SDValue LoweredLoad; + SDValue Result; + SDValue Base = LoadNode->getBasePtr(); + ISD::LoadExtType Ext = LoadNode->getExtensionType(); + unsigned Alignment = LoadNode->getAlignment(); + SDValue LoadChain; + + if(Ext == ISD::NON_EXTLOAD) + Ext = ISD::ZEXTLOAD; + + if (VT == MVT::v4i16) { + if (Alignment == 2) { + SDValue Loads[4]; + // Base load. + Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // Base+2 load. + SDValue Increment = DAG.getConstant(2, MVT::i32); + Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); + Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // SHL 16, then OR base and base+2. + SDValue ShiftAmount = DAG.getConstant(16, MVT::i32); + SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount); + SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]); + // Base + 4. + Increment = DAG.getConstant(4, MVT::i32); + Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); + Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // Base + 6. + Increment = DAG.getConstant(6, MVT::i32); + Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); + Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // SHL 16, then OR base+4 and base+6. + Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount); + SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]); + // Combine to i64. This could be optimised out later if we can + // affect reg allocation of this code. + Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2); + LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + Loads[0].getValue(1), Loads[1].getValue(1), + Loads[2].getValue(1), Loads[3].getValue(1)); + } else { + // Perform default type expansion. + Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(), + LoadNode->isVolatile(), LoadNode->isNonTemporal(), + LoadNode->isInvariant(), LoadNode->getAlignment()); + LoadChain = Result.getValue(1); + } + } else + llvm_unreachable("Custom lowering unsupported load"); + + Result = DAG.getNode(ISD::BITCAST, DL, VT, Result); + // Since we pretend to lower a load, we need the original chain + // info attached to the result. + SDValue Ops[] = { Result, LoadChain }; + + return DAG.getMergeValues(Ops, DL); +} + + SDValue HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT ValTy = Op.getValueType(); @@ -1028,6 +1226,19 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result); } +// Specifies that for loads and stores VT can be promoted to PromotedLdStVT. +void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) { + if (VT != PromotedLdStVT) { + setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); + + setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::STORE, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); + } +} + SDValue HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); @@ -1045,14 +1256,105 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, : TargetLowering(TM), Subtarget(&STI) { // Set up the register classes. + addRegisterClass(MVT::v2i1, &Hexagon::PredRegsRegClass); // bbbbaaaa + addRegisterClass(MVT::v4i1, &Hexagon::PredRegsRegClass); // ddccbbaa + addRegisterClass(MVT::v8i1, &Hexagon::PredRegsRegClass); // hgfedcba addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass); - addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass); + addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass); + promoteLdStType(MVT::v4i8, MVT::i32); + promoteLdStType(MVT::v2i16, MVT::i32); if (Subtarget->hasV5TOps()) { addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass); addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass); } + addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass); + addRegisterClass(MVT::v8i8, &Hexagon::DoubleRegsRegClass); + addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass); + addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass); + promoteLdStType(MVT::v8i8, MVT::i64); + + // Custom lower v4i16 load only. Let v4i16 store to be + // promoted for now. + setOperationAction(ISD::LOAD, MVT::v4i16, Custom); + AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::i64); + setOperationAction(ISD::STORE, MVT::v4i16, Promote); + AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64); + promoteLdStType(MVT::v2i32, MVT::i64); + + for (unsigned i = (unsigned) MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned) MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT::SimpleValueType VT = (MVT::SimpleValueType) i; + + // Hexagon does not have support for the following operations, + // so they need to be expanded. + setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::ROTL, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); + setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FNEG, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::FPOW, VT, Expand); + setOperationAction(ISD::CTPOP, VT, Expand); + setOperationAction(ISD::CTLZ, VT, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + + // Expand all any extend loads. + for (unsigned j = (unsigned) MVT::FIRST_VECTOR_VALUETYPE; + j <= (unsigned) MVT::LAST_VECTOR_VALUETYPE; ++j) + setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType) j, VT, Expand); + + // Expand all trunc stores. + for (unsigned TargetVT = (unsigned) MVT::FIRST_VECTOR_VALUETYPE; + TargetVT <= (unsigned) MVT::LAST_VECTOR_VALUETYPE; ++TargetVT) + setTruncStoreAction(VT, (MVT::SimpleValueType) TargetVT, Expand); + + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); + setOperationAction(ISD::ConstantPool, VT, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); + setOperationAction(ISD::BUILD_VECTOR, VT, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Expand); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Expand); + setOperationAction(ISD::CONCAT_VECTORS, VT, Expand); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); + + if (!isTypeLegal(VT)) + continue; + + setOperationAction(ISD::ADD, VT, Legal); + setOperationAction(ISD::SUB, VT, Legal); + setOperationAction(ISD::MUL, VT, Legal); + + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); + } + + setOperationAction(ISD::SETCC, MVT::v2i16, Custom); + setOperationAction(ISD::VSELECT, MVT::v2i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); + + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass); computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -1308,9 +1610,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Turn FP extload into load/fextend. for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); - // Hexagon has a i1 sign extending load. - for (MVT VT : MVT::integer_valuetypes()) - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand); + + // No extending loads from i32. + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand); + } + // Turn FP truncstore into trunc + store. setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -1358,6 +1665,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::f64, Expand); } + // Hexagon needs to optimize cases with negative constants. + setOperationAction(ISD::SETCC, MVT::i16, Custom); + setOperationAction(ISD::SETCC, MVT::i8, Custom); + if (EmitJumpTables) { setOperationAction(ISD::BR_JT, MVT::Other, Custom); } else { @@ -1415,9 +1726,17 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTLZ, MVT::i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTR, MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::ROTR, MVT::i64, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); @@ -1429,7 +1748,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - + setOperationAction(ISD::MULHS, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); @@ -1463,27 +1782,63 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return nullptr; - case HexagonISD::CONST32: return "HexagonISD::CONST32"; - case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP"; - case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real"; - case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC"; - case HexagonISD::CMPICC: return "HexagonISD::CMPICC"; - case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC"; - case HexagonISD::BRICC: return "HexagonISD::BRICC"; - case HexagonISD::BRFCC: return "HexagonISD::BRFCC"; - case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC"; - case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC"; - case HexagonISD::Hi: return "HexagonISD::Hi"; - case HexagonISD::Lo: return "HexagonISD::Lo"; - case HexagonISD::FTOI: return "HexagonISD::FTOI"; - case HexagonISD::ITOF: return "HexagonISD::ITOF"; - case HexagonISD::CALLv3: return "HexagonISD::CALLv3"; - case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr"; - case HexagonISD::CALLR: return "HexagonISD::CALLR"; - case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; - case HexagonISD::BR_JT: return "HexagonISD::BR_JT"; - case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; + default: return nullptr; + case HexagonISD::CONST32: return "HexagonISD::CONST32"; + case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP"; + case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real"; + case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC"; + case HexagonISD::CMPICC: return "HexagonISD::CMPICC"; + case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC"; + case HexagonISD::BRICC: return "HexagonISD::BRICC"; + case HexagonISD::BRFCC: return "HexagonISD::BRFCC"; + case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC"; + case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC"; + case HexagonISD::Hi: return "HexagonISD::Hi"; + case HexagonISD::Lo: return "HexagonISD::Lo"; + case HexagonISD::JT: return "HexagonISD::JT"; + case HexagonISD::CP: return "HexagonISD::CP"; + case HexagonISD::POPCOUNT: return "HexagonISD::POPCOUNT"; + case HexagonISD::COMBINE: return "HexagonISD::COMBINE"; + case HexagonISD::PACKHL: return "HexagonISD::PACKHL"; + case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB"; + case HexagonISD::VSPLATH: return "HexagonISD::VSPLATH"; + case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB"; + case HexagonISD::SHUFFEH: return "HexagonISD::SHUFFEH"; + case HexagonISD::SHUFFOB: return "HexagonISD::SHUFFOB"; + case HexagonISD::SHUFFOH: return "HexagonISD::SHUFFOH"; + case HexagonISD::VSXTBH: return "HexagonISD::VSXTBH"; + case HexagonISD::VSXTBW: return "HexagonISD::VSXTBW"; + case HexagonISD::VSRAW: return "HexagonISD::VSRAW"; + case HexagonISD::VSRAH: return "HexagonISD::VSRAH"; + case HexagonISD::VSRLW: return "HexagonISD::VSRLW"; + case HexagonISD::VSRLH: return "HexagonISD::VSRLH"; + case HexagonISD::VSHLW: return "HexagonISD::VSHLW"; + case HexagonISD::VSHLH: return "HexagonISD::VSHLH"; + case HexagonISD::VCMPBEQ: return "HexagonISD::VCMPBEQ"; + case HexagonISD::VCMPBGT: return "HexagonISD::VCMPBGT"; + case HexagonISD::VCMPBGTU: return "HexagonISD::VCMPBGTU"; + case HexagonISD::VCMPHEQ: return "HexagonISD::VCMPHEQ"; + case HexagonISD::VCMPHGT: return "HexagonISD::VCMPHGT"; + case HexagonISD::VCMPHGTU: return "HexagonISD::VCMPHGTU"; + case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ"; + case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT"; + case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU"; + case HexagonISD::INSERT_ri: return "HexagonISD::INSERT_ri"; + case HexagonISD::INSERT_rd: return "HexagonISD::INSERT_rd"; + case HexagonISD::INSERT_riv: return "HexagonISD::INSERT_riv"; + case HexagonISD::INSERT_rdv: return "HexagonISD::INSERT_rdv"; + case HexagonISD::EXTRACTU_ri: return "HexagonISD::EXTRACTU_ri"; + case HexagonISD::EXTRACTU_rd: return "HexagonISD::EXTRACTU_rd"; + case HexagonISD::EXTRACTU_riv: return "HexagonISD::EXTRACTU_riv"; + case HexagonISD::EXTRACTU_rdv: return "HexagonISD::EXTRACTU_rdv"; + case HexagonISD::FTOI: return "HexagonISD::FTOI"; + case HexagonISD::ITOF: return "HexagonISD::ITOF"; + case HexagonISD::CALLv3: return "HexagonISD::CALLv3"; + case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr"; + case HexagonISD::CALLR: return "HexagonISD::CALLR"; + case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; + case HexagonISD::BR_JT: return "HexagonISD::BR_JT"; + case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN"; } } @@ -1505,6 +1860,474 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32)); } +// shouldExpandBuildVectorWithShuffles +// Should we expand the build vector with shuffles? +bool +HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT, + unsigned DefinedValues) const { + + // Hexagon vector shuffle operates on element sizes of bytes or halfwords + EVT EltVT = VT.getVectorElementType(); + int EltBits = EltVT.getSizeInBits(); + if ((EltBits != 8) && (EltBits != 16)) + return false; + + return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); +} + +// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3). V1 and +// V2 are the two vectors to select data from, V3 is the permutation. +static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + SDLoc dl(Op); + EVT VT = Op.getValueType(); + + if (V2.getOpcode() == ISD::UNDEF) + V2 = V1; + + if (SVN->isSplat()) { + int Lane = SVN->getSplatIndex(); + if (Lane == -1) Lane = 0; + + // Test if V1 is a SCALAR_TO_VECTOR. + if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) + return createSplat(DAG, dl, VT, V1.getOperand(0)); + + // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR + // (and probably will turn into a SCALAR_TO_VECTOR once legalization + // reaches it). + if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && + !isa<ConstantSDNode>(V1.getOperand(0))) { + bool IsScalarToVector = true; + for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) + if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { + IsScalarToVector = false; + break; + } + if (IsScalarToVector) + return createSplat(DAG, dl, VT, V1.getOperand(0)); + } + return createSplat(DAG, dl, VT, DAG.getConstant(Lane, MVT::i32)); + } + + // FIXME: We need to support more general vector shuffles. See + // below the comment from the ARM backend that deals in the general + // case with the vector shuffles. For now, let expand handle these. + return SDValue(); + + // If the shuffle is not directly supported and it has 4 elements, use + // the PerfectShuffle-generated table to synthesize it from other shuffles. +} + +// If BUILD_VECTOR has same base element repeated several times, +// report true. +static bool isCommonSplatElement(BuildVectorSDNode *BVN) { + unsigned NElts = BVN->getNumOperands(); + SDValue V0 = BVN->getOperand(0); + + for (unsigned i = 1, e = NElts; i != e; ++i) { + if (BVN->getOperand(i) != V0) + return false; + } + return true; +} + +// LowerVECTOR_SHIFT - Lower a vector shift. Try to convert +// <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific +// <VT> = SHL/SRA/SRL <VT> by <IT/i32>. +static SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) { + BuildVectorSDNode *BVN = 0; + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + SDValue V3; + SDLoc dl(Op); + EVT VT = Op.getValueType(); + + if ((BVN = dyn_cast<BuildVectorSDNode>(V1.getNode())) && + isCommonSplatElement(BVN)) + V3 = V2; + else if ((BVN = dyn_cast<BuildVectorSDNode>(V2.getNode())) && + isCommonSplatElement(BVN)) + V3 = V1; + else + return SDValue(); + + SDValue CommonSplat = BVN->getOperand(0); + SDValue Result; + + if (VT.getSimpleVT() == MVT::v4i16) { + switch (Op.getOpcode()) { + case ISD::SRA: + Result = DAG.getNode(HexagonISD::VSRAH, dl, VT, V3, CommonSplat); + break; + case ISD::SHL: + Result = DAG.getNode(HexagonISD::VSHLH, dl, VT, V3, CommonSplat); + break; + case ISD::SRL: + Result = DAG.getNode(HexagonISD::VSRLH, dl, VT, V3, CommonSplat); + break; + default: + return SDValue(); + } + } else if (VT.getSimpleVT() == MVT::v2i32) { + switch (Op.getOpcode()) { + case ISD::SRA: + Result = DAG.getNode(HexagonISD::VSRAW, dl, VT, V3, CommonSplat); + break; + case ISD::SHL: + Result = DAG.getNode(HexagonISD::VSHLW, dl, VT, V3, CommonSplat); + break; + case ISD::SRL: + Result = DAG.getNode(HexagonISD::VSRLW, dl, VT, V3, CommonSplat); + break; + default: + return SDValue(); + } + } else { + return SDValue(); + } + + return DAG.getNode(ISD::BITCAST, dl, VT, Result); +} + +SDValue +HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { + BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); + SDLoc dl(Op); + EVT VT = Op.getValueType(); + + unsigned Size = VT.getSizeInBits(); + + // A vector larger than 64 bits cannot be represented in Hexagon. + // Expand will split the vector. + if (Size > 64) + return SDValue(); + + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned NElts = BVN->getNumOperands(); + + // Try to generate a SPLAT instruction. + if ((VT.getSimpleVT() == MVT::v4i8 || VT.getSimpleVT() == MVT::v4i16) && + (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, 0, true) && SplatBitSize <= 16)) { + unsigned SplatBits = APSplatBits.getZExtValue(); + int32_t SextVal = ((int32_t) (SplatBits << (32 - SplatBitSize)) >> + (32 - SplatBitSize)); + return createSplat(DAG, dl, VT, DAG.getConstant(SextVal, MVT::i32)); + } + + // Try to generate COMBINE to build v2i32 vectors. + if (VT.getSimpleVT() == MVT::v2i32) { + SDValue V0 = BVN->getOperand(0); + SDValue V1 = BVN->getOperand(1); + + if (V0.getOpcode() == ISD::UNDEF) + V0 = DAG.getConstant(0, MVT::i32); + if (V1.getOpcode() == ISD::UNDEF) + V1 = DAG.getConstant(0, MVT::i32); + + ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(V0); + ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(V1); + // If the element isn't a constant, it is in a register: + // generate a COMBINE Register Register instruction. + if (!C0 || !C1) + return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0); + + // If one of the operands is an 8 bit integer constant, generate + // a COMBINE Immediate Immediate instruction. + if (isInt<8>(C0->getSExtValue()) || + isInt<8>(C1->getSExtValue())) + return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0); + } + + // Try to generate a S2_packhl to build v2i16 vectors. + if (VT.getSimpleVT() == MVT::v2i16) { + for (unsigned i = 0, e = NElts; i != e; ++i) { + if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) + continue; + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(BVN->getOperand(i)); + // If the element isn't a constant, it is in a register: + // generate a S2_packhl instruction. + if (!Cst) { + SDValue pack = DAG.getNode(HexagonISD::PACKHL, dl, MVT::v4i16, + BVN->getOperand(1), BVN->getOperand(0)); + + return DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::v2i16, + pack); + } + } + } + + // In the general case, generate a CONST32 or a CONST64 for constant vectors, + // and insert_vector_elt for all the other cases. + uint64_t Res = 0; + unsigned EltSize = Size / NElts; + SDValue ConstVal; + uint64_t Mask = ~uint64_t(0ULL) >> (64 - EltSize); + bool HasNonConstantElements = false; + + for (unsigned i = 0, e = NElts; i != e; ++i) { + // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon's + // combine, const64, etc. are Big Endian. + unsigned OpIdx = NElts - i - 1; + SDValue Operand = BVN->getOperand(OpIdx); + if (Operand.getOpcode() == ISD::UNDEF) + continue; + + int64_t Val = 0; + if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Operand)) + Val = Cst->getSExtValue(); + else + HasNonConstantElements = true; + + Val &= Mask; + Res = (Res << EltSize) | Val; + } + + if (Size == 64) + ConstVal = DAG.getConstant(Res, MVT::i64); + else + ConstVal = DAG.getConstant(Res, MVT::i32); + + // When there are non constant operands, add them with INSERT_VECTOR_ELT to + // ConstVal, the constant part of the vector. + if (HasNonConstantElements) { + EVT EltVT = VT.getVectorElementType(); + SDValue Width = DAG.getConstant(EltVT.getSizeInBits(), MVT::i64); + SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, + DAG.getConstant(32, MVT::i64)); + + for (unsigned i = 0, e = NElts; i != e; ++i) { + // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon + // is Big Endian. + unsigned OpIdx = NElts - i - 1; + SDValue Operand = BVN->getOperand(OpIdx); + if (dyn_cast<ConstantSDNode>(Operand)) + // This operand is already in ConstVal. + continue; + + if (VT.getSizeInBits() == 64 && + Operand.getValueType().getSizeInBits() == 32) { + SDValue C = DAG.getConstant(0, MVT::i32); + Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand); + } + + SDValue Idx = DAG.getConstant(OpIdx, MVT::i64); + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width); + SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + const SDValue Ops[] = {ConstVal, Operand, Combined}; + + if (VT.getSizeInBits() == 32) + ConstVal = DAG.getNode(HexagonISD::INSERT_riv, dl, MVT::i32, Ops); + else + ConstVal = DAG.getNode(HexagonISD::INSERT_rdv, dl, MVT::i64, Ops); + } + } + + return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal); +} + +SDValue +HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + EVT VT = Op.getValueType(); + unsigned NElts = Op.getNumOperands(); + SDValue Vec = Op.getOperand(0); + EVT VecVT = Vec.getValueType(); + SDValue Width = DAG.getConstant(VecVT.getSizeInBits(), MVT::i64); + SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, + DAG.getConstant(32, MVT::i64)); + SDValue ConstVal = DAG.getConstant(0, MVT::i64); + + ConstantSDNode *W = dyn_cast<ConstantSDNode>(Width); + ConstantSDNode *S = dyn_cast<ConstantSDNode>(Shifted); + + if ((VecVT.getSimpleVT() == MVT::v2i16) && (NElts == 2) && W && S) { + if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) { + // We are trying to concat two v2i16 to a single v4i16. + SDValue Vec0 = Op.getOperand(1); + SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec); + return DAG.getNode(ISD::BITCAST, dl, VT, Combined); + } + } + + if ((VecVT.getSimpleVT() == MVT::v4i8) && (NElts == 2) && W && S) { + if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) { + // We are trying to concat two v4i8 to a single v8i8. + SDValue Vec0 = Op.getOperand(1); + SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec); + return DAG.getNode(ISD::BITCAST, dl, VT, Combined); + } + } + + for (unsigned i = 0, e = NElts; i != e; ++i) { + unsigned OpIdx = NElts - i - 1; + SDValue Operand = Op.getOperand(OpIdx); + + if (VT.getSizeInBits() == 64 && + Operand.getValueType().getSizeInBits() == 32) { + SDValue C = DAG.getConstant(0, MVT::i32); + Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand); + } + + SDValue Idx = DAG.getConstant(OpIdx, MVT::i64); + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width); + SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + const SDValue Ops[] = {ConstVal, Operand, Combined}; + + if (VT.getSizeInBits() == 32) + ConstVal = DAG.getNode(HexagonISD::INSERT_riv, dl, MVT::i32, Ops); + else + ConstVal = DAG.getNode(HexagonISD::INSERT_rdv, dl, MVT::i64, Ops); + } + + return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal); +} + +SDValue +HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + int VTN = VT.isVector() ? VT.getVectorNumElements() : 1; + SDLoc dl(Op); + SDValue Idx = Op.getOperand(1); + SDValue Vec = Op.getOperand(0); + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + int EltSize = EltVT.getSizeInBits(); + SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT ? + EltSize : VTN * EltSize, MVT::i64); + + // Constant element number. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Idx)) { + SDValue Offset = DAG.getConstant(C->getZExtValue() * EltSize, MVT::i32); + const SDValue Ops[] = {Vec, Width, Offset}; + + ConstantSDNode *W = dyn_cast<ConstantSDNode>(Width); + assert(W && "Non constant width in LowerEXTRACT_VECTOR"); + + SDValue N; + // For certain extracts, it is a simple _hi/_lo subreg. + if (VecVT.getSimpleVT() == MVT::v2i32) { + // v2i32 -> i32 vselect. + if (C->getZExtValue() == 0) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, + MVT::i32, Vec); + else if (C->getZExtValue() == 1) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_hireg, dl, + MVT::i32, Vec); + else + llvm_unreachable("Bad offset"); + } else if ((VecVT.getSimpleVT() == MVT::v4i16) && + (W->getZExtValue() == 32)) { + // v4i16 -> v2i16/i32 vselect. + if (C->getZExtValue() == 0) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, + MVT::i32, Vec); + else if (C->getZExtValue() == 2) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_hireg, dl, + MVT::i32, Vec); + else + llvm_unreachable("Bad offset"); + } else if ((VecVT.getSimpleVT() == MVT::v8i8) && + (W->getZExtValue() == 32)) { + // v8i8 -> v4i8/i32 vselect. + if (C->getZExtValue() == 0) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, + MVT::i32, Vec); + else if (C->getZExtValue() == 4) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_hireg, dl, + MVT::i32, Vec); + else + llvm_unreachable("Bad offset"); + } else if (VecVT.getSizeInBits() == 32) { + N = DAG.getNode(HexagonISD::EXTRACTU_ri, dl, MVT::i32, Ops); + } else { + N = DAG.getNode(HexagonISD::EXTRACTU_rd, dl, MVT::i64, Ops); + if (VT.getSizeInBits() == 32) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N); + } + + return DAG.getNode(ISD::BITCAST, dl, VT, N); + } + + // Variable element number. + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx, + DAG.getConstant(EltSize, MVT::i32)); + SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, + DAG.getConstant(32, MVT::i64)); + SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + + const SDValue Ops[] = {Vec, Combined}; + + SDValue N; + if (VecVT.getSizeInBits() == 32) { + N = DAG.getNode(HexagonISD::EXTRACTU_riv, dl, MVT::i32, Ops); + } else { + N = DAG.getNode(HexagonISD::EXTRACTU_rdv, dl, MVT::i64, Ops); + if (VT.getSizeInBits() == 32) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N); + } + return DAG.getNode(ISD::BITCAST, dl, VT, N); +} + +SDValue +HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + int VTN = VT.isVector() ? VT.getVectorNumElements() : 1; + SDLoc dl(Op); + SDValue Vec = Op.getOperand(0); + SDValue Val = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + int EltSize = EltVT.getSizeInBits(); + SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::INSERT_VECTOR_ELT ? + EltSize : VTN * EltSize, MVT::i64); + + if (ConstantSDNode *C = cast<ConstantSDNode>(Idx)) { + SDValue Offset = DAG.getConstant(C->getSExtValue() * EltSize, MVT::i32); + const SDValue Ops[] = {Vec, Val, Width, Offset}; + + SDValue N; + if (VT.getSizeInBits() == 32) + N = DAG.getNode(HexagonISD::INSERT_ri, dl, MVT::i32, Ops); + else + N = DAG.getNode(HexagonISD::INSERT_rd, dl, MVT::i64, Ops); + + return DAG.getNode(ISD::BITCAST, dl, VT, N); + } + + // Variable element number. + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx, + DAG.getConstant(EltSize, MVT::i32)); + SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, + DAG.getConstant(32, MVT::i64)); + SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + + if (VT.getSizeInBits() == 64 && + Val.getValueType().getSizeInBits() == 32) { + SDValue C = DAG.getConstant(0, MVT::i32); + Val = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Val); + } + + const SDValue Ops[] = {Vec, Val, Combined}; + + SDValue N; + if (VT.getSizeInBits() == 32) + N = DAG.getNode(HexagonISD::INSERT_riv, dl, MVT::i32, Ops); + else + N = DAG.getNode(HexagonISD::INSERT_rdv, dl, MVT::i64, Ops); + + return DAG.getNode(ISD::BITCAST, dl, VT, N); +} + bool HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { // Assuming the caller does not have either a signext or zeroext modifier, and @@ -1549,7 +2372,19 @@ SDValue HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); - case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::INSERT_SUBVECTOR: return LowerINSERT_VECTOR(Op, DAG); + case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR(Op, DAG); + case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_VECTOR(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR(Op, DAG); + case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::SRA: + case ISD::SHL: + case ISD::SRL: + return LowerVECTOR_SHIFT(Op, DAG); + case ISD::ConstantPool: + return LowerConstantPool(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); // Frame & Return address. Currently unimplemented. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); @@ -1561,9 +2396,14 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); + // Custom lower some vector loads. + case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::SELECT: return Op; + case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::VSELECT: return LowerVSELECT(Op, DAG); + case ISD::CTPOP: return LowerCTPOP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::INLINEASM: return LowerINLINEASM(Op, DAG); diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 151c28f..34b1ebb 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -37,6 +37,10 @@ bool isPositiveHalfWord(SDNode *N); ADJDYNALLOC, ARGEXTEND, + PIC_ADD, + AT_GOT, + AT_PCREL, + CMPICC, // Compare two GPR operands, set icc. CMPFCC, // Compare two FP operands, set fcc. BRICC, // Branch to dest on icc condition @@ -54,23 +58,44 @@ bool isPositiveHalfWord(SDNode *N); CALLR, RET_FLAG, // Return with a flag operand. - BR_JT, // Jump table. - BARRIER, // Memory barrier + BR_JT, // Branch through jump table. + BARRIER, // Memory barrier. + JT, // Jump table. + CP, // Constant pool. POPCOUNT, COMBINE, - WrapperJT, - WrapperCP, - WrapperCombineII, - WrapperCombineRR, - WrapperCombineRI_V4, - WrapperCombineIR_V4, - WrapperPackhl, - WrapperSplatB, - WrapperSplatH, - WrapperShuffEB, - WrapperShuffEH, - WrapperShuffOB, - WrapperShuffOH, + PACKHL, + VSPLATB, + VSPLATH, + SHUFFEB, + SHUFFEH, + SHUFFOB, + SHUFFOH, + VSXTBH, + VSXTBW, + VSRAW, + VSRAH, + VSRLW, + VSRLH, + VSHLW, + VSHLH, + VCMPBEQ, + VCMPBGT, + VCMPBGTU, + VCMPHEQ, + VCMPHGT, + VCMPHGTU, + VCMPWEQ, + VCMPWGT, + VCMPWGTU, + INSERT_ri, + INSERT_rd, + INSERT_riv, + INSERT_rdv, + EXTRACTU_ri, + EXTRACTU_rd, + EXTRACTU_riv, + EXTRACTU_rdv, TC_RETURN, EH_RETURN, DCFETCH @@ -85,6 +110,8 @@ bool isPositiveHalfWord(SDNode *N); bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize) const; + void promoteLdStType(EVT VT, EVT PromotedLdStVT); + public: const HexagonSubtarget *Subtarget; explicit HexagonTargetLowering(const TargetMachine &TM, @@ -110,10 +137,17 @@ bool isPositiveHalfWord(SDNode *N); bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; - SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + // Should we expand the build vector with shuffles? + bool shouldExpandBuildVectorWithShuffles(EVT VT, + unsigned DefinedValues) const override; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; const char *getTargetNodeName(unsigned Opcode) const override; - SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const; @@ -137,9 +171,13 @@ bool isPositiveHalfWord(SDNode *N); const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -170,6 +208,15 @@ bool isPositiveHalfWord(SDNode *N); const std::string &Constraint, MVT VT) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + if (ConstraintCode == "o") + return InlineAsm::Constraint_o; + else if (ConstraintCode == "v") + return InlineAsm::Constraint_v; + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + // Intrinsics SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; /// isLegalAddressingMode - Return true if the addressing mode represented diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index 3d04678..36a7e9f 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -76,7 +76,7 @@ class OpcodeHexagon { class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr, InstrItinClass itin, IType type> - : Instruction, OpcodeHexagon { + : Instruction { let Namespace = "Hexagon"; dag OutOperandList = outs; @@ -84,18 +84,18 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, let AsmString = asmstr; let Pattern = pattern; let Constraints = cstr; - let Itinerary = itin;
- let Size = 4;
-
- // SoftFail is a field the disassembler can use to provide a way for
- // instructions to not match without killing the whole decode process. It is
- // mainly used for ARM, but Tablegen expects this field to exist or it fails
- // to build the decode table.
- field bits<32> SoftFail = 0;
-
- // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
-
- // Instruction type according to the ISA.
+ let Itinerary = itin; + let Size = 4; + + // SoftFail is a field the disassembler can use to provide a way for + // instructions to not match without killing the whole decode process. It is + // mainly used for ARM, but Tablegen expects this field to exist or it fails + // to build the decode table. + field bits<32> SoftFail = 0; + + // *** Must match MCTargetDesc/HexagonBaseInfo.h *** + + // Instruction type according to the ISA. IType Type = type; let TSFlags{4-0} = Type.Value; @@ -197,7 +197,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, let mayLoad = 1 in class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon; let mayLoad = 1 in class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], @@ -217,7 +217,7 @@ class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [], let mayLoad = 1 in class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon; // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. @@ -225,7 +225,7 @@ class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], let mayStore = 1 in class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon; class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = ""> @@ -234,7 +234,7 @@ class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], let mayStore = 1 in class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon; // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. @@ -247,13 +247,14 @@ class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [], // In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1. class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ST_tc_3stall_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>, + OpcodeHexagon; // ALU32 Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>, OpcodeHexagon; // ALU64 Instruction Class in V2/V3. // XTYPE Instruction Class in V4. @@ -261,7 +262,8 @@ class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], // Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4. class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>, + OpcodeHexagon; class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23> @@ -274,7 +276,8 @@ class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], // Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>, + OpcodeHexagon; // M Instruction Class in V2/V3. // XTYPE Instruction Class in V4. @@ -290,7 +293,8 @@ class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], // Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>, + OpcodeHexagon; // S Instruction Class in V2/V3. // XTYPE Instruction Class in V4. @@ -304,34 +308,37 @@ class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], // Definition of the instruction class NOT CHANGED. class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = J_tc_2early_SLOT23> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>, OpcodeHexagon; // JR Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = J_tc_2early_SLOT2> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>, OpcodeHexagon; // CR Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = CR_tc_2early_SLOT3> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>, OpcodeHexagon; let isCodeGenOnly = 1, isPseudo = 1 in class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = J_tc_2early_SLOT0123> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>, + OpcodeHexagon; let isCodeGenOnly = 1, isPseudo = 1 in class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = ""> - : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>, + OpcodeHexagon; let isCodeGenOnly = 1, isPseudo = 1 in class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr=""> - : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>, + OpcodeHexagon; //===----------------------------------------------------------------------===// // Instruction Classes Definitions - diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td index 5fec80b..7f7b2c9 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -17,10 +17,88 @@ // *** Must match BaseInfo.h *** //----------------------------------------------------------------------------// -def TypeMEMOP : IType<9>; -def TypeNV : IType<10>; +def TypeMEMOP : IType<9>; +def TypeNV : IType<10>; +def TypeDUPLEX : IType<11>; def TypeCOMPOUND : IType<12>; -def TypePREFIX : IType<30>; +def TypeAG_VX : IType<28>; +def TypeAG_VM : IType<29>; +def TypePREFIX : IType<30>; + +// Duplex Instruction Class Declaration +//===----------------------------------------------------------------------===// + +class OpcodeDuplex { + field bits<32> Inst = ?; // Default to an invalid insn. + bits<4> IClass = 0; // ICLASS + bits<13> ISubHi = 0; // Low sub-insn + bits<13> ISubLo = 0; // High sub-insn + + let Inst{31-29} = IClass{3-1}; + let Inst{13} = IClass{0}; + let Inst{15-14} = 0; + let Inst{28-16} = ISubHi; + let Inst{12-0} = ISubLo; +} + +class InstDuplex<bits<4> iClass, list<dag> pattern = [], + string cstr = ""> + : Instruction, OpcodeDuplex { + let Namespace = "Hexagon"; + IType Type = TypeDUPLEX; // uses slot 0,1 + let isCodeGenOnly = 1; + let hasSideEffects = 0; + dag OutOperandList = (outs); + dag InOperandList = (ins); + let IClass = iClass; + let Constraints = cstr; + let Itinerary = DUPLEX; + let Size = 4; + + // SoftFail is a field the disassembler can use to provide a way for + // instructions to not match without killing the whole decode process. It is + // mainly used for ARM, but Tablegen expects this field to exist or it fails + // to build the decode table. + field bits<32> SoftFail = 0; + + // *** Must match MCTargetDesc/HexagonBaseInfo.h *** + + let TSFlags{4-0} = Type.Value; + + // Predicated instructions. + bits<1> isPredicated = 0; + let TSFlags{6} = isPredicated; + bits<1> isPredicatedFalse = 0; + let TSFlags{7} = isPredicatedFalse; + bits<1> isPredicatedNew = 0; + let TSFlags{8} = isPredicatedNew; + + // New-value insn helper fields. + bits<1> isNewValue = 0; + let TSFlags{9} = isNewValue; // New-value consumer insn. + bits<1> hasNewValue = 0; + let TSFlags{10} = hasNewValue; // New-value producer insn. + bits<3> opNewValue = 0; + let TSFlags{13-11} = opNewValue; // New-value produced operand. + bits<1> isNVStorable = 0; + let TSFlags{14} = isNVStorable; // Store that can become new-value store. + bits<1> isNVStore = 0; + let TSFlags{15} = isNVStore; // New-value store insn. + + // Immediate extender helper fields. + bits<1> isExtendable = 0; + let TSFlags{16} = isExtendable; // Insn may be extended. + bits<1> isExtended = 0; + let TSFlags{17} = isExtended; // Insn must be extended. + bits<3> opExtendable = 0; + let TSFlags{20-18} = opExtendable; // Which operand may be extended. + bits<1> isExtentSigned = 0; + let TSFlags{21} = isExtentSigned; // Signed or unsigned range. + bits<5> opExtentBits = 0; + let TSFlags{26-22} = opExtentBits; //Number of bits of range before extending. + bits<2> opExtentAlign = 0; + let TSFlags{28-27} = opExtentAlign; // Alignment exponent before extending. +} //----------------------------------------------------------------------------// // Instruction Classes Definitions @@ -31,7 +109,7 @@ def TypePREFIX : IType<30>; // class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>, OpcodeHexagon; class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0> @@ -56,7 +134,8 @@ class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], let mayLoad = 1, mayStore = 1 in class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>, + OpcodeHexagon; class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0> @@ -65,8 +144,9 @@ class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], let isCodeGenOnly = 1 in class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []> : InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123, - TypePREFIX>; + TypePREFIX>, OpcodeHexagon; class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = ""> - : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>, + OpcodeHexagon; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 9bae12c..fbf1ca9 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -62,10 +62,8 @@ const int Hexagon_MEMB_AUTOINC_MIN = -8; void HexagonInstrInfo::anchor() {} HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) - : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), - RI(ST), Subtarget(ST) { -} - + : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), + RI(), Subtarget(ST) {} /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of @@ -159,15 +157,19 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, } BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); } else { - BuildMI(&MBB, DL, - get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); + // If Cond[0] is a basic block, insert ENDLOOP0. + if (Cond[0].isMBB()) + BuildMI(&MBB, DL, get(Hexagon::ENDLOOP0)).addMBB(Cond[0].getMBB()); + else + BuildMI(&MBB, DL, + get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); } return 1; } + // We don't handle ENDLOOP0 with a conditional branch in AnalyzeBranch. BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); - return 2; } @@ -211,9 +213,11 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return false; --I; } - + + bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump && + I->getOperand(0).isMBB(); // Delete the JMP if it's equivalent to a fall-through. - if (AllowModify && I->getOpcode() == Hexagon::J2_jump && + if (AllowModify && JumpToBlock && MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { DEBUG(dbgs()<< "\nErasing the jump to successor block\n";); I->eraseFromParent(); @@ -243,6 +247,14 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, } while(I); int LastOpcode = LastInst->getOpcode(); + int SecLastOpcode = SecondLastInst ? SecondLastInst->getOpcode() : 0; + // If the branch target is not a basic block, it could be a tail call. + // (It is, if the target is a function.) + if (LastOpcode == Hexagon::J2_jump && !LastInst->getOperand(0).isMBB()) + return true; + if (SecLastOpcode == Hexagon::J2_jump && + !SecondLastInst->getOperand(0).isMBB()) + return true; bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode); bool LastOpcodeHasNot = PredOpcodeHasNot(LastOpcode); @@ -270,8 +282,6 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return true; } - int SecLastOpcode = SecondLastInst->getOpcode(); - bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode); bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode); if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) { @@ -308,30 +318,35 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - int BOpc = Hexagon::J2_jump; - int BccOpc = Hexagon::J2_jumpt; - int BccOpcNot = Hexagon::J2_jumpf; - MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return 0; --I; - if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc && - I->getOpcode() != BccOpcNot) - return 0; - - // Remove the branch. - I->eraseFromParent(); + unsigned Opc1 = I->getOpcode(); + switch (Opc1) { + case Hexagon::J2_jump: + case Hexagon::J2_jumpt: + case Hexagon::J2_jumpf: + case Hexagon::ENDLOOP0: + I->eraseFromParent(); + break; + default: + return 0; + } I = MBB.end(); if (I == MBB.begin()) return 1; --I; - if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot) - return 1; - - // Remove the branch. - I->eraseFromParent(); - return 2; + unsigned Opc2 = I->getOpcode(); + switch (Opc2) { + case Hexagon::J2_jumpt: + case Hexagon::J2_jumpf: + case Hexagon::ENDLOOP0: + I->eraseFromParent(); + return 2; + default: + return 1; + } } @@ -549,12 +564,95 @@ void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl<MachineInstr*> &NewMIs) const { llvm_unreachable("Unimplemented"); } +bool +HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + const HexagonRegisterInfo &TRI = getRegisterInfo(); + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::TFR_PdTrue: { + unsigned Reg = MI->getOperand(0).getReg(); + BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + MBB.erase(MI); + return true; + } + case Hexagon::TFR_PdFalse: { + unsigned Reg = MI->getOperand(0).getReg(); + BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + MBB.erase(MI); + return true; + } + case Hexagon::VMULW: { + // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies. + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned Src1Reg = MI->getOperand(1).getReg(); + unsigned Src2Reg = MI->getOperand(2).getReg(); + unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); + unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); + unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); + unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), + TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) + .addReg(Src2SubHi); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), + TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) + .addReg(Src2SubLo); + MBB.erase(MI); + MRI.clearKillFlags(Src1SubHi); + MRI.clearKillFlags(Src1SubLo); + MRI.clearKillFlags(Src2SubHi); + MRI.clearKillFlags(Src2SubLo); + return true; + } + case Hexagon::VMULW_ACC: { + // Expand 64-bit vector multiply with addition into 2 scalar multiplies. + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned Src1Reg = MI->getOperand(1).getReg(); + unsigned Src2Reg = MI->getOperand(2).getReg(); + unsigned Src3Reg = MI->getOperand(3).getReg(); + unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); + unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); + unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); + unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); + unsigned Src3SubHi = TRI.getSubReg(Src3Reg, Hexagon::subreg_hireg); + unsigned Src3SubLo = TRI.getSubReg(Src3Reg, Hexagon::subreg_loreg); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), + TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) + .addReg(Src2SubHi).addReg(Src3SubHi); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), + TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) + .addReg(Src2SubLo).addReg(Src3SubLo); + MBB.erase(MI); + MRI.clearKillFlags(Src1SubHi); + MRI.clearKillFlags(Src1SubLo); + MRI.clearKillFlags(Src2SubHi); + MRI.clearKillFlags(Src2SubLo); + MRI.clearKillFlags(Src3SubHi); + MRI.clearKillFlags(Src3SubLo); + return true; + } + case Hexagon::TCRETURNi: + MI->setDesc(get(Hexagon::J2_jump)); + return true; + case Hexagon::TCRETURNr: + MI->setDesc(get(Hexagon::J2_jumpr)); + return true; + } + + return false; +} MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FI) const { + MachineInstr *MI, + ArrayRef<unsigned> Ops, + int FI) const { // Hexagon_TODO: Implement. return nullptr; } @@ -641,7 +739,7 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { switch(Opc) { case Hexagon::A2_tfrsi: - return isInt<12>(MI->getOperand(1).getImm()); + return (isOperandExtended(MI, 1) && isConstExtended(MI)) || isInt<12>(MI->getOperand(1).getImm()); case Hexagon::S2_storerd_io: return isShiftedUInt<6,3>(MI->getOperand(1).getImm()); @@ -1036,6 +1134,8 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, // bool HexagonInstrInfo:: ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + if (!Cond.empty() && Cond[0].isMBB()) + return true; if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { Cond.erase(Cond.begin()); } else { @@ -1521,7 +1621,6 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const { switch (MI->getOpcode()) { default: llvm_unreachable("Unknown .new type"); - // store new value byte case Hexagon::S4_storerb_ur: return Hexagon::S4_storerbnew_ur; @@ -1531,6 +1630,20 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const { case Hexagon::S4_storeri_ur: return Hexagon::S4_storerinew_ur; + case Hexagon::S2_storerb_pci: + return Hexagon::S2_storerb_pci; + + case Hexagon::S2_storeri_pci: + return Hexagon::S2_storeri_pci; + + case Hexagon::S2_storerh_pci: + return Hexagon::S2_storerh_pci; + + case Hexagon::S2_storerd_pci: + return Hexagon::S2_storerd_pci; + + case Hexagon::S2_storerf_pci: + return Hexagon::S2_storerf_pci; } return 0; } @@ -1647,7 +1760,7 @@ bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const { // We currently only handle isGlobal() because it is the only kind of // object we are going to end up with here for now. // In the future we probably should add isSymbol(), etc. - if (MO.isGlobal() || MO.isSymbol()) + if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress()) return true; // If the extendable operand is not 'Immediate' type, the instruction should diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 6acfbec..2644248 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -26,7 +26,7 @@ namespace llvm { struct EVT; - +class HexagonSubtarget; class HexagonInstrInfo : public HexagonGenInstrInfo { virtual void anchor(); const HexagonRegisterInfo RI; @@ -102,15 +102,21 @@ public: const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const; - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, + /// expandPostRAPseudo - This function is called for all pseudo instructions + /// that remain after register allocation. Many pseudo instructions are + /// created to help register allocation. This is the place to convert them + /// into real instructions. The target can edit MI in place, or it can insert + /// new instructions and erase MI. The function should return true if + /// anything was changed. + bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; + + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, int FrameIndex) const override; - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const override { + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, + MachineInstr *LoadMI) const override { return nullptr; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index 60635cf..19cf993 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -104,10 +104,16 @@ def : T_CMP_pat <C2_cmpgtui, setugt, u9ImmPred>; //===----------------------------------------------------------------------===// // ALU32/ALU + //===----------------------------------------------------------------------===// +// Add. + +def SDT_Int32Leaf : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; +def SDT_Int32Unary : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + def SDTHexagonI64I32I32 : SDTypeProfile<1, 2, [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; +def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>; let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev, @@ -243,6 +249,9 @@ let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in { def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>; } +def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>; +def: BinOp32_pat<HexagonPACKHL, S2_packhl, i64>; + let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm> : ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), @@ -321,7 +330,7 @@ let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1, def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8), "$Rdd = combine(#$s8, #$S8)", [(set (i64 DoubleRegs:$Rdd), - (i64 (HexagonCOMBINE(i32 s8ExtPred:$s8), (i32 s8ImmPred:$S8))))]> { + (i64 (HexagonCOMBINE(i32 s32ImmPred:$s8), (i32 s8ImmPred:$S8))))]> { bits<5> Rdd; bits<8> s8; bits<8> S8; @@ -406,7 +415,7 @@ multiclass Addri_base<string mnemonic, SDNode OpNode> { defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel; -def: Pat<(i32 (add I32:$Rs, s16ExtPred:$s16)), +def: Pat<(i32 (add I32:$Rs, s32ImmPred:$s16)), (i32 (A2_addi I32:$Rs, imm:$s16))>; //===----------------------------------------------------------------------===// @@ -420,7 +429,7 @@ class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp> : ALU32_ri <(outs IntRegs:$Rd), (ins IntRegs:$Rs, s10Ext:$s10), "$Rd = "#mnemonic#"($Rs, #$s10)" , - [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10))]> { + [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10))]> { bits<5> Rd; bits<5> Rs; bits<10> s10; @@ -465,7 +474,7 @@ def A2_nop: ALU32Inst <(outs), (ins), "nop" > { let Inst{27-24} = 0b1111; } -def: Pat<(sub s10ExtPred:$s10, IntRegs:$Rs), +def: Pat<(sub s32ImmPred:$s10, IntRegs:$Rs), (A2_subri imm:$s10, IntRegs:$Rs)>; // Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs). @@ -613,7 +622,7 @@ let InputType = "imm", isExtendable = 1, isExtentSigned = 1, isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1, isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16", - [(set (i32 IntRegs:$Rd), s16ExtPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>, + [(set (i32 IntRegs:$Rd), s32ImmPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel, PredRel { bits<5> Rd; bits<16> s16; @@ -637,9 +646,13 @@ def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1), // TODO: see if this instruction can be deleted.. let isExtendable = 1, opExtendable = 1, opExtentBits = 6, - isAsmParserOnly = 1 in -def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u6Ext:$src1), + isAsmParserOnly = 1 in { +def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64Imm:$src1), "$dst = #$src1">; +def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst), + (ins s8Ext:$src1, s8Imm:$src2), + "$dst = combine(##$src1, #$src2)">; +} //===----------------------------------------------------------------------===// // ALU32/ALU - @@ -677,11 +690,11 @@ let opExtendable = 3 in def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8), "$Rd = mux($Pu, $Rs, #$s8)">; -def : Pat<(i32 (select I1:$Pu, s8ExtPred:$s8, I32:$Rs)), - (C2_muxri I1:$Pu, s8ExtPred:$s8, I32:$Rs)>; +def : Pat<(i32 (select I1:$Pu, s32ImmPred:$s8, I32:$Rs)), + (C2_muxri I1:$Pu, s32ImmPred:$s8, I32:$Rs)>; -def : Pat<(i32 (select I1:$Pu, I32:$Rs, s8ExtPred:$s8)), - (C2_muxir I1:$Pu, I32:$Rs, s8ExtPred:$s8)>; +def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32ImmPred:$s8)), + (C2_muxir I1:$Pu, I32:$Rs, s32ImmPred:$s8)>; // C2_muxii: Scalar mux immediates. let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, @@ -690,7 +703,7 @@ def C2_muxii: ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, s8Ext:$s8, s8Imm:$S8), "$Rd = mux($Pu, #$s8, #$S8)" , [(set (i32 IntRegs:$Rd), - (i32 (select I1:$Pu, s8ExtPred:$s8, s8ImmPred:$S8)))] > { + (i32 (select I1:$Pu, s32ImmPred:$s8, s8ImmPred:$S8)))] > { bits<5> Rd; bits<2> Pu; bits<8> s8; @@ -706,6 +719,12 @@ def C2_muxii: ALU32Inst <(outs IntRegs:$Rd), let Inst{4-0} = Rd; } +let isCodeGenOnly = 1, isPseudo = 1 in +def MUX64_rr : ALU64_rr<(outs DoubleRegs:$Rd), + (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), + ".error \"should not emit\" ", []>; + + //===----------------------------------------------------------------------===// // template class for non-predicated alu32_2op instructions // - aslh, asrh, sxtb, sxth, zxth @@ -987,6 +1006,17 @@ def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>; //===----------------------------------------------------------------------===// // ALU32/PRED + //===----------------------------------------------------------------------===// +// No bits needed. If cmp.ge is found the assembler parser will +// transform it to cmp.gt subtracting 1 from the immediate. +let isPseudo = 1 in { +def C2_cmpgei: ALU32Inst < + (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8Ext:$s8), + "$Pd = cmp.ge($Rs, #$s8)">; +def C2_cmpgeui: ALU32Inst < + (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8Ext:$s8), + "$Pd = cmp.geu($Rs, #$s8)">; +} + //===----------------------------------------------------------------------===// // ALU32/PRED - @@ -1742,27 +1772,29 @@ def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>; multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred, InstHexagon MI> { def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>; + def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (MI AddrFI:$fi, imm:$Off))>; def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))), (VT (MI IntRegs:$Rs, imm:$Off))>; def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>; } let AddedComplexity = 20 in { - defm: Loadx_pat<load, i32, s11_2ExtPred, L2_loadri_io>; - defm: Loadx_pat<load, i64, s11_3ExtPred, L2_loadrd_io>; - defm: Loadx_pat<atomic_load_8 , i32, s11_0ExtPred, L2_loadrub_io>; - defm: Loadx_pat<atomic_load_16, i32, s11_1ExtPred, L2_loadruh_io>; - defm: Loadx_pat<atomic_load_32, i32, s11_2ExtPred, L2_loadri_io>; - defm: Loadx_pat<atomic_load_64, i64, s11_3ExtPred, L2_loadrd_io>; - - defm: Loadx_pat<extloadi1, i32, s11_0ExtPred, L2_loadrub_io>; - defm: Loadx_pat<extloadi8, i32, s11_0ExtPred, L2_loadrub_io>; - defm: Loadx_pat<extloadi16, i32, s11_1ExtPred, L2_loadruh_io>; - defm: Loadx_pat<sextloadi8, i32, s11_0ExtPred, L2_loadrb_io>; - defm: Loadx_pat<sextloadi16, i32, s11_1ExtPred, L2_loadrh_io>; - defm: Loadx_pat<zextloadi1, i32, s11_0ExtPred, L2_loadrub_io>; - defm: Loadx_pat<zextloadi8, i32, s11_0ExtPred, L2_loadrub_io>; - defm: Loadx_pat<zextloadi16, i32, s11_1ExtPred, L2_loadruh_io>; + defm: Loadx_pat<load, i32, s30_2ImmPred, L2_loadri_io>; + defm: Loadx_pat<load, i64, s29_3ImmPred, L2_loadrd_io>; + defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>; + defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>; + defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>; + + defm: Loadx_pat<extloadi1, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<extloadi8, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<extloadi16, i32, s31_1ImmPred, L2_loadruh_io>; + defm: Loadx_pat<sextloadi8, i32, s32_0ImmPred, L2_loadrb_io>; + defm: Loadx_pat<sextloadi16, i32, s31_1ImmPred, L2_loadrh_io>; + defm: Loadx_pat<zextloadi1, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<zextloadi8, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<zextloadi16, i32, s31_1ImmPred, L2_loadruh_io>; // No sextloadi1. } @@ -2707,7 +2739,7 @@ class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern> let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in def M2_mpysip : T_MType_mpy_ri <0, u8Ext, - [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u8ExtPred:$u8))]>; + [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u32ImmPred:$u8))]>; def M2_mpysin : T_MType_mpy_ri <1, u8Imm, [(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs, @@ -2729,7 +2761,7 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9, def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2), "$dst = mpyi($src1, #$src2)", [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - s9ExtPred:$src2))]>, ImmRegRel; + s32ImmPred:$src2))]>, ImmRegRel; let hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 3, InputType = "imm" in @@ -2780,7 +2812,7 @@ class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp, let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in { def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8Ext, [(set (i32 IntRegs:$dst), - (add (mul IntRegs:$src2, u8ExtPred:$src3), + (add (mul IntRegs:$src2, u32ImmPred:$src3), IntRegs:$src1))]>, ImmRegRel; def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0, @@ -2793,7 +2825,7 @@ let CextOpcode = "ADD_acc" in { let isExtentSigned = 1 in def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext, [(set (i32 IntRegs:$dst), - (add (add (i32 IntRegs:$src2), s8_16ExtPred:$src3), + (add (add (i32 IntRegs:$src2), s16_16ImmPred:$src3), (i32 IntRegs:$src1)))]>, ImmRegRel; def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0, @@ -2825,9 +2857,9 @@ class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp> (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>; -def : T_MType_acc_pat1 <M2_macsin, mul, sub, u8ExtPred>; +def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>; -def : T_MType_acc_pat1 <M2_naccii, add, sub, s8_16ExtPred>; +def : T_MType_acc_pat1 <M2_naccii, add, sub, s16_16ImmPred>; def : T_MType_acc_pat2 <M2_nacci, add, sub>; //===----------------------------------------------------------------------===// @@ -3514,7 +3546,8 @@ let addrMode = BaseImmOffset, InputType = "imm" in { } // Patterns for generating stores, where the address takes different forms: -// - frameindex,, +// - frameindex, +// - frameindex + offset, // - base + offset, // - simple (base address without offset). // These would usually be used together (via Storex_pat defined below), but @@ -3522,6 +3555,10 @@ let addrMode = BaseImmOffset, InputType = "imm" in { // AddedComplexity) to the individual patterns. class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI> : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>; +class Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + InstHexagon MI> + : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; class Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, InstHexagon MI> : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), @@ -3537,6 +3574,10 @@ class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, InstHexagon MI> : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>; +class Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + PatFrag ValueMod, InstHexagon MI> + : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; class Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, PatFrag ValueMod, InstHexagon MI> : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), @@ -3548,14 +3589,16 @@ class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, InstHexagon MI> { - def: Storex_fi_pat <Store, Value, MI>; - def: Storex_add_pat <Store, Value, ImmPred, MI>; + def: Storex_fi_pat <Store, Value, MI>; + def: Storex_fi_add_pat <Store, Value, ImmPred, MI>; + def: Storex_add_pat <Store, Value, ImmPred, MI>; } multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, PatFrag ValueMod, InstHexagon MI> { - def: Storexm_fi_pat <Store, Value, ValueMod, MI>; - def: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>; + def: Storexm_fi_pat <Store, Value, ValueMod, MI>; + def: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>; + def: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>; } // Regular stores in the DAG have two operands: value and address. @@ -3567,15 +3610,15 @@ class SwapSt<PatFrag F> : PatFrag<(ops node:$val, node:$ptr), F.Fragment>; let AddedComplexity = 20 in { - defm: Storex_pat<truncstorei8, I32, s11_0ExtPred, S2_storerb_io>; - defm: Storex_pat<truncstorei16, I32, s11_1ExtPred, S2_storerh_io>; - defm: Storex_pat<store, I32, s11_2ExtPred, S2_storeri_io>; - defm: Storex_pat<store, I64, s11_3ExtPred, S2_storerd_io>; + defm: Storex_pat<truncstorei8, I32, s32_0ImmPred, S2_storerb_io>; + defm: Storex_pat<truncstorei16, I32, s31_1ImmPred, S2_storerh_io>; + defm: Storex_pat<store, I32, s30_2ImmPred, S2_storeri_io>; + defm: Storex_pat<store, I64, s29_3ImmPred, S2_storerd_io>; - defm: Storex_pat<SwapSt<atomic_store_8>, I32, s11_0ExtPred, S2_storerb_io>; - defm: Storex_pat<SwapSt<atomic_store_16>, I32, s11_1ExtPred, S2_storerh_io>; - defm: Storex_pat<SwapSt<atomic_store_32>, I32, s11_2ExtPred, S2_storeri_io>; - defm: Storex_pat<SwapSt<atomic_store_64>, I64, s11_3ExtPred, S2_storerd_io>; + defm: Storex_pat<SwapSt<atomic_store_8>, I32, s32_0ImmPred, S2_storerb_io>; + defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>; + defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>; + defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>; } // Simple patterns should be tried with the least priority. @@ -3590,9 +3633,9 @@ def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>; def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>; let AddedComplexity = 20 in { - defm: Storexm_pat<truncstorei8, I64, s11_0ExtPred, LoReg, S2_storerb_io>; - defm: Storexm_pat<truncstorei16, I64, s11_1ExtPred, LoReg, S2_storerh_io>; - defm: Storexm_pat<truncstorei32, I64, s11_2ExtPred, LoReg, S2_storeri_io>; + defm: Storexm_pat<truncstorei8, I64, s32_0ImmPred, LoReg, S2_storerb_io>; + defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>; + defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>; } def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>; @@ -4321,6 +4364,14 @@ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)), // XTYPE/PERM + //===----------------------------------------------------------------------===// +def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))), + (i32 8)), + (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))), + (i32 16)), + (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))), + (zextloadi8 (i32 IntRegs:$b))), + (A2_swiz (L2_loadri_io IntRegs:$b, 0))>; + //===----------------------------------------------------------------------===// // XTYPE/PERM - //===----------------------------------------------------------------------===// @@ -4364,7 +4415,7 @@ def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src), // Patterns for loads of i1: def: Pat<(i1 (load AddrFI:$fi)), (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>; -def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s11_0ExtPred:$Off))), +def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32ImmPred:$Off))), (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>; def: Pat<(i1 (load (i32 IntRegs:$Rs))), (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; @@ -4375,7 +4426,7 @@ def I1toI32: OutPatFrag<(ops node:$Rs), def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_tfrrp (i32 $Rs)))>; -defm: Storexm_pat<store, I1, s11_0ExtPred, I1toI32, S2_storerb_io>; +defm: Storexm_pat<store, I1, s32ImmPred, I1toI32, S2_storerb_io>; def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>; //===----------------------------------------------------------------------===// @@ -4474,6 +4525,12 @@ def Y2_barrier : SYSInst<(outs), (ins), //===----------------------------------------------------------------------===// // SYSTEM/SUPER - //===----------------------------------------------------------------------===// + +// Generate frameindex addresses. +let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1, + isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in +def TFR_FI: ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$fi, s32Imm:$Off), "">; + //===----------------------------------------------------------------------===// // CRUSER - Type. //===----------------------------------------------------------------------===// @@ -4519,6 +4576,11 @@ class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0> multiclass LOOP_ri<string mnemonic> { def i : LOOP_iBase<mnemonic, brtarget>; def r : LOOP_rBase<mnemonic, brtarget>; + + let isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in { + def iext: LOOP_iBase<mnemonic, brtargetExt, 1>; + def rext: LOOP_rBase<mnemonic, brtargetExt, 1>; + } } @@ -4676,36 +4738,6 @@ def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs), let Inst{20-16} = Rs; } -let AddedComplexity = 100, isPredicated = 1, isCodeGenOnly = 1 in -def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3), - "Error; should not emit", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2), - s12ImmPred:$src3)))]>; - -let AddedComplexity = 100, isPredicated = 1, isCodeGenOnly = 1 in -def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, s12Imm:$src2, IntRegs:$src3), - "Error; should not emit", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2, - (i32 IntRegs:$src3))))]>; - -let AddedComplexity = 100, isPredicated = 1, isCodeGenOnly = 1 in -def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3), - "Error; should not emit", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2, - s12ImmPred:$src3)))]>; - -// Generate frameindex addresses. -let isReMaterializable = 1, isCodeGenOnly = 1 in -def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1), - "$dst = add($src1)", - [(set (i32 IntRegs:$dst), ADDRri:$src1)]>; - // Support for generating global address. // Taken from X86InstrInfo.td. def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, @@ -4750,30 +4782,29 @@ def HI_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), "$dst.h = #HI($label@GOTREL)", []>; -let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, - isAsmParserOnly = 1 in -def LOi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value), - "$dst.l = #LO($imm_value)", - []>; - +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def HI_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.h = #HI($global@GOT)", + []>; -let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, - isAsmParserOnly = 1 in -def HIi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value), - "$dst.h = #HI($imm_value)", - []>; +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def LO_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.l = #LO($global@GOT)", + []>; -let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, - isAsmParserOnly = 1 in -def LO_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt), - "$dst.l = #LO($jt)", - []>; +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def HI_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.h = #HI($global@GOTREL)", + []>; -let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, - isAsmParserOnly = 1 in -def HI_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt), - "$dst.h = #HI($jt)", - []>; +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def LO_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.l = #LO($global@GOTREL)", + []>; // This pattern is incorrect. When we add small data, we should change // this pattern to use memw(#foo). @@ -4785,31 +4816,19 @@ def CONST32 : CONSTLDInst<(outs IntRegs:$dst), (ins globaladdress:$global), (load (HexagonCONST32 tglobaltlsaddr:$global)))]>; let isReMaterializable = 1, isMoveImm = 1 in -def CONST32_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst = CONST32(#$global)", - [(set (i32 IntRegs:$dst), - (HexagonCONST32 tglobaladdr:$global))]>; - -let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in def CONST32_set_jt : CONSTLDInst<(outs IntRegs:$dst), (ins jumptablebase:$jt), "$dst = CONST32(#$jt)", [(set (i32 IntRegs:$dst), (HexagonCONST32 tjumptable:$jt))]>; let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in -def CONST32GP_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst = CONST32(#$global)", - [(set (i32 IntRegs:$dst), - (HexagonCONST32_GP tglobaladdr:$global))]>; - -let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in def CONST32_Int_Real : CONSTLDInst<(outs IntRegs:$dst), (ins i32imm:$global), "$dst = CONST32(#$global)", [(set (i32 IntRegs:$dst), imm:$global) ]>; -// Map BlockAddress lowering to CONST32_Int_Real -def : Pat<(HexagonCONST32_GP tblockaddress:$addr), - (CONST32_Int_Real tblockaddress:$addr)>; +// Map TLS addressses to a CONST32 instruction +def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16Ext:$addr)>; +def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16Ext:$label)>; let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label), @@ -4869,21 +4888,17 @@ let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, def TCRETURNr : T_JMPr; // Direct tail-calls. -let isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, -isTerminator = 1, isCodeGenOnly = 1 in { - def TCRETURNtg : JInst<(outs), (ins calltarget:$dst), "jump $dst", - [], "", J_tc_2early_SLOT23>; - def TCRETURNtext : JInst<(outs), (ins calltarget:$dst), "jump $dst", - [], "", J_tc_2early_SLOT23>; -} +let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, + isTerminator = 1, isCodeGenOnly = 1 in +def TCRETURNi : JInst<(outs), (ins calltarget:$dst), "", []>; //Tail calls. def: Pat<(HexagonTCRet tglobaladdr:$dst), - (TCRETURNtg tglobaladdr:$dst)>; + (TCRETURNi tglobaladdr:$dst)>; def: Pat<(HexagonTCRet texternalsym:$dst), - (TCRETURNtext texternalsym:$dst)>; + (TCRETURNi texternalsym:$dst)>; def: Pat<(HexagonTCRet (i32 IntRegs:$dst)), - (TCRETURNr (i32 IntRegs:$dst))>; + (TCRETURNr IntRegs:$dst)>; // Map from r0 = and(r1, 65535) to r0 = zxth(r1) def: Pat<(and (i32 IntRegs:$src1), 65535), @@ -4900,19 +4915,19 @@ def: Pat<(add (i1 PredRegs:$src1), -1), (C2_not PredRegs:$src1)>; // Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). -def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s8ExtPred:$src3), - (C2_muxii PredRegs:$src1, s8ExtPred:$src3, s8ImmPred:$src2)>; +def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s32ImmPred:$src3), + (C2_muxii PredRegs:$src1, s32ImmPred:$src3, s8ImmPred:$src2)>; // Map from p0 = pnot(p0); r0 = select(p0, #i, r1) // => r0 = C2_muxir(p0, r1, #i) -def: Pat<(select (not (i1 PredRegs:$src1)), s8ExtPred:$src2, +def: Pat<(select (not (i1 PredRegs:$src1)), s32ImmPred:$src2, (i32 IntRegs:$src3)), - (C2_muxir PredRegs:$src1, IntRegs:$src3, s8ExtPred:$src2)>; + (C2_muxir PredRegs:$src1, IntRegs:$src3, s32ImmPred:$src2)>; // Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) // => r0 = C2_muxri (p0, #i, r1) -def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s8ExtPred:$src3), - (C2_muxri PredRegs:$src1, s8ExtPred:$src3, IntRegs:$src2)>; +def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32ImmPred:$src3), + (C2_muxri PredRegs:$src1, s32ImmPred:$src3, IntRegs:$src2)>; // Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset), @@ -4952,26 +4967,6 @@ def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), bb:$offset), (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ImmPred:$src2)), bb:$offset)>; -// cmp.lt(r0, r1) -> cmp.gt(r1, r0) -def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - bb:$offset), - (J2_jumpt (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>; - -def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - bb:$offset), - (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)), - bb:$offset)>; - -def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - bb:$offset), - (J2_jumpf (C2_cmpgtu (i32 IntRegs:$src1), (i32 IntRegs:$src2)), - bb:$offset)>; - -def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - bb:$offset), - (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), - bb:$offset)>; - // Map from a 64-bit select to an emulated 64-bit mux. // Hexagon does not support 64-bit MUXes; so emulate with combines. def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2), @@ -4987,10 +4982,6 @@ def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)), (C2_or (C2_and PredRegs:$src1, PredRegs:$src2), (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>; -// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs. -def : Pat<(i1 (load ADDRriS11_2:$addr)), - (i1 (C2_tfrrp (i32 (L2_loadrb_io AddrFI:$addr, 0))))>; - // Map for truncating from 64 immediates to 32 bit immediates. def: Pat<(i32 (trunc (i64 DoubleRegs:$src))), (LoReg DoubleRegs:$src)>; @@ -4999,42 +4990,10 @@ def: Pat<(i32 (trunc (i64 DoubleRegs:$src))), def: Pat<(i1 (trunc (i64 DoubleRegs:$src))), (C2_tfrrp (LoReg DoubleRegs:$src))>; -// Map memb(Rs) = Rdd -> memb(Rs) = Rt. -def : Pat<(truncstorei8 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (S2_storerb_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), - subreg_loreg)))>; - -// Map memh(Rs) = Rdd -> memh(Rs) = Rt. -def : Pat<(truncstorei16 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (S2_storerh_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), - subreg_loreg)))>; -// Map memw(Rs) = Rdd -> memw(Rs) = Rt -def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), - subreg_loreg)))>; - -// Map memw(Rs) = Rdd -> memw(Rs) = Rt. -def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), - subreg_loreg)))>; - -// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0. -def : Pat<(store (i1 -1), ADDRriS11_2:$addr), - (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>; - - -// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0. -def : Pat<(store (i1 -1), ADDRriS11_2:$addr), - (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>; - -// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt. -def : Pat<(store (i1 PredRegs:$src1), ADDRriS11_2:$addr), - (S2_storerb_io AddrFI:$addr, 0, (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0)) )>; - // rs <= rt -> !(rs > rt). let AddedComplexity = 30 in -def: Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, s10ExtPred:$src2))>; +def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>; // rs <= rt -> !(rs > rt). def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))), @@ -5048,13 +5007,8 @@ def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), // Hexagon_TODO: We should improve on this. // rs != rt -> !(rs == rt). let AddedComplexity = 30 in -def: Pat<(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)), - (C2_not (C2_cmpeqi IntRegs:$src1, s10ExtPred:$src2))>; - -// Map cmpne(Rs) -> !cmpeqe(Rs). -// rs != rt -> !(rs == rt). -def : Pat <(i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (C2_not (i1 (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)))))>; +def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpeqi IntRegs:$src1, s32ImmPred:$src2))>; // Convert setne back to xor for hexagon since we compute w/ pred registers. def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))), @@ -5072,8 +5026,8 @@ def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), // cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) let AddedComplexity = 30 in -def: Pat<(i1 (setge (i32 IntRegs:$src1), s8ExtPred:$src2)), - (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ExtPred:$src2))>; +def: Pat<(i1 (setge (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>; // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). // rss >= rtt -> !(rtt > rss). @@ -5084,20 +5038,21 @@ def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), // !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1). // rs < rt -> !(rs >= rt). let AddedComplexity = 30 in -def: Pat<(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ExtPred:$src2)))>; +def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, + (DEC_CONST_SIGNED s32ImmPred:$src2)))>; // Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs) def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)), (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>; // Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1) -def: Pat<(i1 (setuge (i32 IntRegs:$src1), u8ExtPred:$src2)), - (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u8ExtPred:$src2))>; +def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32ImmPred:$src2)), + (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32ImmPred:$src2))>; // Generate cmpgtu(Rs, #u9) -def: Pat<(i1 (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)), - (C2_cmpgtui IntRegs:$src1, u9ExtPred:$src2)>; +def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32ImmPred:$src2)), + (C2_cmpgtui IntRegs:$src1, u32ImmPred:$src2)>; // Map from Rs >= Rt -> !(Rt > Rs). // rs >= rt -> !(rt > rs). @@ -5118,11 +5073,6 @@ def: Pat<(i32 (sext (i1 PredRegs:$src1))), def: Pat<(i64 (sext (i1 PredRegs:$src1))), (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>; -// Convert sign-extended load back to load and sign extend. -// i32 -> i64 -def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)), - (i64 (A2_sxtw (L2_loadri_io AddrFI:$src1, 0)))>; - // Zero extends. // i1 -> i32 def: Pat<(i32 (zext (i1 PredRegs:$src1))), @@ -5136,12 +5086,6 @@ def: Pat<(i32 (anyext (i1 PredRegs:$src1))), def: Pat<(i64 (anyext (i1 PredRegs:$src1))), (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>; -def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), - (i32 32))), - (i64 (zextloadi32 ADDRriS11_2:$srcLow)))), - (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), - (L2_loadri_io AddrFI:$srcLow, 0)))>; - // Multiply 64-bit unsigned and use upper result. def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), (A2_addp @@ -5186,10 +5130,13 @@ let AddedComplexity = 100 in def: Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)), (i32 IntRegs:$src1)>; -def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>; +def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>; +def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>; -def : Pat<(HexagonWrapperJT tjumptable:$dst), - (i32 (CONST32_set_jt tjumptable:$dst))>; +def: Pat<(HexagonJT tjumptable:$dst), + (CONST32_set_jt tjumptable:$dst)>; +def: Pat<(HexagonCP tconstpool :$dst), + (CONST32_set_jt tconstpool:$dst)>; // XTYPE/SHIFT // @@ -5626,6 +5573,43 @@ let hasNewValue = 1 in { def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>; def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6Imm>; + +def SDTHexagonINSERT_ri : SDTypeProfile<1, 4, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisVT<2, i32>, + SDTCisVT<3, i32>, + SDTCisVT<4, i32>]>; +def SDTHexagonINSERT_rd : SDTypeProfile<1, 4, [SDTCisVT<0, i64>, + SDTCisVT<1, i64>, + SDTCisVT<2, i64>, + SDTCisVT<3, i32>, + SDTCisVT<4, i32>]>; +def SDTHexagonINSERT_riv : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisVT<2, i32>, + SDTCisVT<3, i64>]>; +def SDTHexagonINSERT_rdv : SDTypeProfile<1, 3, [SDTCisVT<0, i64>, + SDTCisVT<1, i64>, + SDTCisVT<2, i64>, + SDTCisVT<3, i64>]>; +def HexagonINSERT_ri : SDNode<"HexagonISD::INSERT_ri", SDTHexagonINSERT_ri>; +def HexagonINSERT_rd : SDNode<"HexagonISD::INSERT_rd", SDTHexagonINSERT_rd>; +def HexagonINSERT_riv: SDNode<"HexagonISD::INSERT_riv", SDTHexagonINSERT_riv>; +def HexagonINSERT_rdv: SDNode<"HexagonISD::INSERT_rdv", SDTHexagonINSERT_rdv>; + +def: Pat<(HexagonINSERT_ri I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2), + (S2_insert I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2)>; + +def: Pat<(HexagonINSERT_rd I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2), + (S2_insertp I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2)>; + +def: Pat<(HexagonINSERT_riv I32:$Rs, I32:$Rt, I64:$Ru), + (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>; + +def: Pat<(HexagonINSERT_rdv I64:$Rs, I64:$Rt, I64:$Ru), + (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>; + + //===----------------------------------------------------------------------===// // Template class for 'extract bitfield' instructions //===----------------------------------------------------------------------===// @@ -5692,6 +5676,37 @@ let hasNewValue = 1 in { def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5Imm>; } +def SDTHexagonEXTRACTU_ri : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisVT<2, i32>, + SDTCisVT<3, i32>]>; +def SDTHexagonEXTRACTU_rd : SDTypeProfile<1, 3, [SDTCisVT<0, i64>, + SDTCisVT<1, i64>, + SDTCisVT<2, i32>, + SDTCisVT<3, i32>]>; +def SDTHexagonEXTRACTU_riv : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisVT<2, i64>]>; +def SDTHexagonEXTRACTU_rdv : SDTypeProfile<1, 2, [SDTCisVT<0, i64>, + SDTCisVT<1, i64>, + SDTCisVT<2, i64>]>; +def HexagonEXTRACTU_ri : SDNode<"HexagonISD::EXTRACTU_ri", SDTHexagonEXTRACTU_ri>; +def HexagonEXTRACTU_rd : SDNode<"HexagonISD::EXTRACTU_rd", SDTHexagonEXTRACTU_rd>; +def HexagonEXTRACTU_riv: SDNode<"HexagonISD::EXTRACTU_riv", SDTHexagonEXTRACTU_riv>; +def HexagonEXTRACTU_rdv: SDNode<"HexagonISD::EXTRACTU_rdv", SDTHexagonEXTRACTU_rdv>; + +def: Pat<(HexagonEXTRACTU_ri I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3), + (S2_extractu I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3)>; + +def: Pat<(HexagonEXTRACTU_rd I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3), + (S2_extractup I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3)>; + +def: Pat<(HexagonEXTRACTU_riv I32:$src1, I64:$src2), + (S2_extractu_rp I32:$src1, I64:$src2)>; + +def: Pat<(HexagonEXTRACTU_rdv I64:$src1, I64:$src2), + (S2_extractup_rp I64:$src1, I64:$src2)>; + // Change the sign of the immediate for Rd=-mpyi(Rs,#u8) def: Pat<(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)), (M2_mpysin IntRegs:$src1, u8ImmPred:$src2)>; @@ -5728,6 +5743,22 @@ def S2_tableidxw : tableidxRaw<"tableidxw", 0b10>; def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>; //===----------------------------------------------------------------------===// +// Template class for 'table index' instructions which are assembler mapped +// to their :raw format. +//===----------------------------------------------------------------------===// +let isPseudo = 1 in +class tableidx_goodsyntax <string mnemonic> + : SInst <(outs IntRegs:$Rx), + (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, u5Imm:$u5), + "$Rx = "#mnemonic#"($Rs, #$u4, #$u5)", + [], "$Rx = $_dst_" >; + +def S2_tableidxb_goodsyntax : tableidx_goodsyntax<"tableidxb">; +def S2_tableidxh_goodsyntax : tableidx_goodsyntax<"tableidxh">; +def S2_tableidxw_goodsyntax : tableidx_goodsyntax<"tableidxw">; +def S2_tableidxd_goodsyntax : tableidx_goodsyntax<"tableidxd">; + +//===----------------------------------------------------------------------===// // V3 Instructions + //===----------------------------------------------------------------------===// @@ -5761,4 +5792,4 @@ include "HexagonInstrInfoV5.td" // ALU32/64/Vector + //===----------------------------------------------------------------------===/// -include "HexagonInstrInfoVector.td"
\ No newline at end of file +include "HexagonInstrInfoVector.td" diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 0e4dde3..918b482 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -11,6 +11,25 @@ // //===----------------------------------------------------------------------===// +def DuplexIClass0: InstDuplex < 0 >; +def DuplexIClass1: InstDuplex < 1 >; +def DuplexIClass2: InstDuplex < 2 >; +let isExtendable = 1 in { + def DuplexIClass3: InstDuplex < 3 >; + def DuplexIClass4: InstDuplex < 4 >; + def DuplexIClass5: InstDuplex < 5 >; + def DuplexIClass6: InstDuplex < 6 >; + def DuplexIClass7: InstDuplex < 7 >; +} +def DuplexIClass8: InstDuplex < 8 >; +def DuplexIClass9: InstDuplex < 9 >; +def DuplexIClassA: InstDuplex < 0xA >; +def DuplexIClassB: InstDuplex < 0xB >; +def DuplexIClassC: InstDuplex < 0xC >; +def DuplexIClassD: InstDuplex < 0xD >; +def DuplexIClassE: InstDuplex < 0xE >; +def DuplexIClassF: InstDuplex < 0xF >; + def addrga: PatLeaf<(i32 AddrGA:$Addr)>; def addrgp: PatLeaf<(i32 AddrGP:$Addr)>; @@ -137,6 +156,9 @@ def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>; def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>; def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>; +def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>; + +def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>; class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm> : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), @@ -247,10 +269,10 @@ class T_RCMP_EQ_ri<string mnemonic, bit IsNeg> def A4_rcmpeqi : T_RCMP_EQ_ri<"cmp.eq", 0>; def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>; -def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s8ExtPred:$s8)))), - (A4_rcmpeqi IntRegs:$Rs, s8ExtPred:$s8)>; -def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s8ExtPred:$s8)))), - (A4_rcmpneqi IntRegs:$Rs, s8ExtPred:$s8)>; +def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32ImmPred:$s8)))), + (A4_rcmpeqi IntRegs:$Rs, s32ImmPred:$s8)>; +def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32ImmPred:$s8)))), + (A4_rcmpneqi IntRegs:$Rs, s32ImmPred:$s8)>; // Preserve the S2_tstbit_r generation def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))), @@ -292,16 +314,15 @@ let opExtendable = 1 in def A4_combineir : T_Combine1<0b01, (ins s8Ext:$s8, IntRegs:$Rs), "$Rdd = combine(#$s8, $Rs)">; -def HexagonWrapperCombineRI_V4 : - SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>; -def HexagonWrapperCombineIR_V4 : - SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>; +// The complexity of the combines involving immediates should be greater +// than the complexity of the combine with two registers. +let AddedComplexity = 50 in { +def: Pat<(HexagonCOMBINE IntRegs:$r, s32ImmPred:$i), + (A4_combineri IntRegs:$r, s32ImmPred:$i)>; -def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i), - (A4_combineri IntRegs:$r, s8ExtPred:$i)>; - -def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r), - (A4_combineir s8ExtPred:$i, IntRegs:$r)>; +def: Pat<(HexagonCOMBINE s32ImmPred:$i, IntRegs:$r), + (A4_combineir s32ImmPred:$i, IntRegs:$r)>; +} // A4_combineii: Set two small immediates. let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in @@ -322,7 +343,7 @@ def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6), // The complexity of the combine with two immediates should be greater than // the complexity of a combine involving a register. let AddedComplexity = 75 in -def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u6ExtPred:$u6), +def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u32ImmPred:$u6), (A4_combineii imm:$s8, imm:$u6)>; //===----------------------------------------------------------------------===// @@ -346,20 +367,22 @@ multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, PatLeaf ImmPred, InstHexagon MI> { def: Pat<(VT (Load AddrFI:$fi)), (VT (ValueMod (MI AddrFI:$fi, 0)))>; + def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))), + (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))), (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (ValueMod (MI IntRegs:$Rs, 0)))>; } -defm: Loadxm_pat<extloadi1, i64, Zext64, s11_0ExtPred, L2_loadrub_io>; -defm: Loadxm_pat<extloadi8, i64, Zext64, s11_0ExtPred, L2_loadrub_io>; -defm: Loadxm_pat<extloadi16, i64, Zext64, s11_1ExtPred, L2_loadruh_io>; -defm: Loadxm_pat<zextloadi1, i64, Zext64, s11_0ExtPred, L2_loadrub_io>; -defm: Loadxm_pat<zextloadi8, i64, Zext64, s11_0ExtPred, L2_loadrub_io>; -defm: Loadxm_pat<zextloadi16, i64, Zext64, s11_1ExtPred, L2_loadruh_io>; -defm: Loadxm_pat<sextloadi8, i64, Sext64, s11_0ExtPred, L2_loadrb_io>; -defm: Loadxm_pat<sextloadi16, i64, Sext64, s11_1ExtPred, L2_loadrh_io>; +defm: Loadxm_pat<extloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<extloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<extloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>; +defm: Loadxm_pat<zextloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<zextloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<zextloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>; +defm: Loadxm_pat<sextloadi8, i64, Sext64, s32_0ImmPred, L2_loadrb_io>; +defm: Loadxm_pat<sextloadi16, i64, Sext64, s31_1ImmPred, L2_loadrh_io>; // Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs). def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>; @@ -635,19 +658,6 @@ def: Pat<(i64 (zext (i1 PredRegs:$src1))), def: Pat<(i64 (zext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>; -// zext i32->i64 -def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), - (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>; - -let AddedComplexity = 100 in -def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), - (i64 (A4_combineir 0, (L2_loadri_io IntRegs:$src1, - s11_2ExtPred:$offset)))>; - -// anyext i32->i64 -def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), - (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>; - //===----------------------------------------------------------------------===// // LD - //===----------------------------------------------------------------------===// @@ -768,8 +778,8 @@ multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT, PatFrag stOp> { def : Pat<(stOp (VT RC:$src4), (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u0AlwaysExtPred:$src3)), - (MI IntRegs:$src1, u2ImmPred:$src2, u0AlwaysExtPred:$src3, RC:$src4)>; + u32ImmPred:$src3)), + (MI IntRegs:$src1, u2ImmPred:$src2, u32ImmPred:$src3, RC:$src4)>; def : Pat<(stOp (VT RC:$src4), (add (shl IntRegs:$src1, u2ImmPred:$src2), @@ -1157,17 +1167,17 @@ let AddedComplexity = 40 in { // is not extendable. This could cause problems during removing the frame // indices, since the offset with respect to R29/R30 may not fit in the // u6 field. - def: Storexm_add_pat<truncstorei8, s8ExtPred, u6_0ImmPred, ToImmByte, + def: Storexm_add_pat<truncstorei8, s32ImmPred, u6_0ImmPred, ToImmByte, S4_storeirb_io>; - def: Storexm_add_pat<truncstorei16, s8ExtPred, u6_1ImmPred, ToImmHalf, + def: Storexm_add_pat<truncstorei16, s32ImmPred, u6_1ImmPred, ToImmHalf, S4_storeirh_io>; - def: Storexm_add_pat<store, s8ExtPred, u6_2ImmPred, ToImmWord, + def: Storexm_add_pat<store, s32ImmPred, u6_2ImmPred, ToImmWord, S4_storeiri_io>; } -def: Storexm_simple_pat<truncstorei8, s8ExtPred, ToImmByte, S4_storeirb_io>; -def: Storexm_simple_pat<truncstorei16, s8ExtPred, ToImmHalf, S4_storeirh_io>; -def: Storexm_simple_pat<store, s8ExtPred, ToImmWord, S4_storeiri_io>; +def: Storexm_simple_pat<truncstorei8, s32ImmPred, ToImmByte, S4_storeirb_io>; +def: Storexm_simple_pat<truncstorei16, s32ImmPred, ToImmHalf, S4_storeirh_io>; +def: Storexm_simple_pat<store, s32ImmPred, ToImmWord, S4_storeiri_io>; // memb(Rx++#s4:0:circ(Mu))=Rt // memb(Rx++I:circ(Mu))=Rt @@ -1798,6 +1808,49 @@ def: LogLogNot_pat<or, and, C4_or_andn>; def: LogLogNot_pat<or, or, C4_or_orn>; //===----------------------------------------------------------------------===// +// PIC: Support for PIC compilations. The patterns and SD nodes defined +// below are needed to support code generation for PIC +//===----------------------------------------------------------------------===// + +def SDT_HexagonPICAdd + : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def SDT_HexagonGOTAdd + : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +def SDT_HexagonGOTAddInternal : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; +def SDT_HexagonGOTAddInternalJT : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; +def SDT_HexagonGOTAddInternalBA : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; + +def Hexagonpic_add : SDNode<"HexagonISD::PIC_ADD", SDT_HexagonPICAdd>; +def Hexagonat_got : SDNode<"HexagonISD::AT_GOT", SDT_HexagonGOTAdd>; +def Hexagongat_pcrel : SDNode<"HexagonISD::AT_PCREL", + SDT_HexagonGOTAddInternal>; +def Hexagongat_pcrel_jt : SDNode<"HexagonISD::AT_PCREL", + SDT_HexagonGOTAddInternalJT>; +def Hexagongat_pcrel_ba : SDNode<"HexagonISD::AT_PCREL", + SDT_HexagonGOTAddInternalBA>; + +// PIC: Map from a block address computation to a PC-relative add +def: Pat<(Hexagongat_pcrel_ba tblockaddress:$src1), + (C4_addipc u32ImmPred:$src1)>; + +// PIC: Map from the computation to generate a GOT pointer to a PC-relative add +def: Pat<(Hexagonpic_add texternalsym:$src1), + (C4_addipc u32ImmPred:$src1)>; + +// PIC: Map from a jump table address computation to a PC-relative add +def: Pat<(Hexagongat_pcrel_jt tjumptable:$src1), + (C4_addipc u32ImmPred:$src1)>; + +// PIC: Map from a GOT-relative symbol reference to a load +def: Pat<(Hexagonat_got (i32 IntRegs:$src1), tglobaladdr:$src2), + (L2_loadri_io IntRegs:$src1, s30_2ImmPred:$src2)>; + +// PIC: Map from a static symbol reference to a PC-relative add +def: Pat<(Hexagongat_pcrel tglobaladdr:$src1), + (C4_addipc u32ImmPred:$src1)>; + +//===----------------------------------------------------------------------===// // CR - //===----------------------------------------------------------------------===// @@ -1836,7 +1889,7 @@ def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6), "$Rd = add($Rs, add($Ru, #$s6))" , [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs), - (add (i32 IntRegs:$Ru), s6_16ExtPred:$s6)))], + (add (i32 IntRegs:$Ru), s16_16ImmPred:$s6)))], "", ALU64_tc_2_SLOT23> { bits<5> Rd; bits<5> Rs; @@ -1877,19 +1930,19 @@ def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd), } // Rd=add(Rs,sub(#s6,Ru)) -def: Pat<(add (i32 IntRegs:$src1), (sub s6_10ExtPred:$src2, +def: Pat<(add (i32 IntRegs:$src1), (sub s32ImmPred:$src2, (i32 IntRegs:$src3))), - (S4_subaddi IntRegs:$src1, s6_10ExtPred:$src2, IntRegs:$src3)>; + (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>; // Rd=sub(add(Rs,#s6),Ru) -def: Pat<(sub (add (i32 IntRegs:$src1), s6_10ExtPred:$src2), +def: Pat<(sub (add (i32 IntRegs:$src1), s32ImmPred:$src2), (i32 IntRegs:$src3)), - (S4_subaddi IntRegs:$src1, s6_10ExtPred:$src2, IntRegs:$src3)>; + (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>; // Rd=add(sub(Rs,Ru),#s6) def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)), - (s6_10ExtPred:$src2)), - (S4_subaddi IntRegs:$src1, s6_10ExtPred:$src2, IntRegs:$src3)>; + (s32ImmPred:$src2)), + (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>; // Add or subtract doublewords with carry. @@ -2042,7 +2095,7 @@ def S4_or_andix: (ins IntRegs:$Ru, IntRegs:$_src_, s10Ext:$s10), "$Rx = or($Ru, and($_src_, #$s10))" , [(set (i32 IntRegs:$Rx), - (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s10ExtPred:$s10)))] , + (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32ImmPred:$s10)))] , "$_src_ = $Rx", ALU64_tc_2_SLOT23> { bits<5> Rx; bits<5> Ru; @@ -2187,7 +2240,7 @@ class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode> (ins IntRegs:$src1, IntRegs:$Rs, s10Ext:$s10), "$Rx |= "#mnemonic#"($Rs, #$s10)", [(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1), - (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10)))], + (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10)))], "$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel { bits<5> Rx; bits<5> Rs; @@ -2349,7 +2402,7 @@ def M4_mpyri_addi : MInst<(outs IntRegs:$Rd), "$Rd = add(#$u6, mpyi($Rs, #$U6))" , [(set (i32 IntRegs:$Rd), (add (mul (i32 IntRegs:$Rs), u6ImmPred:$U6), - u6ExtPred:$u6))] ,"",ALU64_tc_3x_SLOT23> { + u32ImmPred:$u6))] ,"",ALU64_tc_3x_SLOT23> { bits<5> Rd; bits<6> u6; bits<5> Rs; @@ -2374,7 +2427,7 @@ def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd), (ins u6Ext:$u6, IntRegs:$Rs, IntRegs:$Rt), "$Rd = add(#$u6, mpyi($Rs, $Rt))" , [(set (i32 IntRegs:$Rd), - (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u6ExtPred:$u6))], + (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32ImmPred:$u6))], "", ALU64_tc_3x_SLOT23>, ImmRegRel { bits<5> Rd; bits<6> u6; @@ -2424,7 +2477,7 @@ def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred, let isExtendable = 1, opExtentBits = 6, opExtendable = 3, CextOpcode = "ADD_MPY", InputType = "imm" in -def M4_mpyri_addr : T_AddMpy<0b1, u6ExtPred, +def M4_mpyri_addr : T_AddMpy<0b1, u32ImmPred, (ins IntRegs:$src1, IntRegs:$src3, u6Ext:$src2)>, ImmRegRel; // Rx=add(Ru,mpyi(Rx,Rs)) @@ -2447,17 +2500,6 @@ def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx), let Inst{20-16} = Rs; } -// Rd=add(##,mpyi(Rs,#U6)) -def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3), - (HexagonCONST32 tglobaladdr:$src1)), - (i32 (M4_mpyri_addi tglobaladdr:$src1, IntRegs:$src2, - u6ImmPred:$src3))>; - -// Rd=add(##,mpyi(Rs,Rt)) -def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), - (HexagonCONST32 tglobaladdr:$src1)), - (i32 (M4_mpyrr_addi tglobaladdr:$src1, IntRegs:$src2, - IntRegs:$src3))>; // Vector reduce multiply word by signed half (32x16) //Rdd=vrmpyweh(Rss,Rtt)[:<<1] @@ -2569,7 +2611,7 @@ class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh, : MInst_acc<(outs IntRegs:$Rd), (ins u8Ext:$u8, IntRegs:$Rx, u5Imm:$U5), "$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))", [(set (i32 IntRegs:$Rd), - (Op (Sh I32:$Rx, u5ImmPred:$U5), u8ExtPred:$u8))], + (Op (Sh I32:$Rx, u5ImmPred:$U5), u32ImmPred:$u8))], "$Rd = $Rx", Itin> { bits<5> Rd; @@ -2904,7 +2946,7 @@ let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in { // mem[bh](Rs+#u6) += #U5 //===----------------------------------------------------------------------===// -multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, +multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred, InstHexagon MI, SDNode OpNode> { let AddedComplexity = 180 in def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend), @@ -2912,24 +2954,24 @@ multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, (MI IntRegs:$addr, 0, u5ImmPred:$addend)>; let AddedComplexity = 190 in - def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)), + def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ImmPred:$offset)), u5ImmPred:$addend), - (add IntRegs:$base, ExtPred:$offset)), - (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>; + (add IntRegs:$base, ImmPred:$offset)), + (MI IntRegs:$base, ImmPred:$offset, u5ImmPred:$addend)>; } -multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, +multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred, InstHexagon addMI, InstHexagon subMI> { - defm: MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>; - defm: MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>; + defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, addMI, add>; + defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, subMI, sub>; } multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { // Half Word - defm: MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred, + defm: MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred, L4_iadd_memoph_io, L4_isub_memoph_io>; // Byte - defm: MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred, + defm: MemOpi_u5ALUOp <ldOpByte, truncstorei8, u32ImmPred, L4_iadd_memopb_io, L4_isub_memopb_io>; } @@ -2939,7 +2981,7 @@ let Predicates = [UseMEMOP] in { defm: MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend // Word - defm: MemOpi_u5ALUOp <load, store, u6_2ExtPred, L4_iadd_memopw_io, + defm: MemOpi_u5ALUOp <load, store, u30_2ImmPred, L4_iadd_memopw_io, L4_isub_memopw_io>; } @@ -2950,7 +2992,7 @@ let Predicates = [UseMEMOP] in { // mem[bh](Rs+#u6) += #m5 //===----------------------------------------------------------------------===// -multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred, +multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred, PatLeaf immPred, SDNodeXForm xformFunc, InstHexagon MI> { let AddedComplexity = 190 in @@ -2958,18 +3000,18 @@ multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred, (MI IntRegs:$addr, 0, (xformFunc immPred:$subend))>; let AddedComplexity = 195 in - def: Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)), + def: Pat<(stOp (add (ldOp (add IntRegs:$base, ImmPred:$offset)), immPred:$subend), - (add IntRegs:$base, extPred:$offset)), - (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>; + (add IntRegs:$base, ImmPred:$offset)), + (MI IntRegs:$base, ImmPred:$offset, (xformFunc immPred:$subend))>; } multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { // Half Word - defm: MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred, + defm: MemOpi_m5Pats <ldOpHalf, truncstorei16, u31_1ImmPred, m5HImmPred, MEMOPIMM_HALF, L4_isub_memoph_io>; // Byte - defm: MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred, + defm: MemOpi_m5Pats <ldOpByte, truncstorei8, u32ImmPred, m5BImmPred, MEMOPIMM_BYTE, L4_isub_memopb_io>; } @@ -2979,7 +3021,7 @@ let Predicates = [UseMEMOP] in { defm: MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend // Word - defm: MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred, + defm: MemOpi_m5Pats <load, store, u30_2ImmPred, m5ImmPred, MEMOPIMM, L4_isub_memopw_io>; } @@ -3008,16 +3050,16 @@ multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred, multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf> { // Byte - clrbit - defm: MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred, + defm: MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u32ImmPred, CLRMEMIMM_BYTE, L4_iand_memopb_io, and>; // Byte - setbit - defm: MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u6ExtPred, + defm: MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u32ImmPred, SETMEMIMM_BYTE, L4_ior_memopb_io, or>; // Half Word - clrbit - defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred, + defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u31_1ImmPred, CLRMEMIMM_SHORT, L4_iand_memoph_io, and>; // Half Word - setbit - defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred, + defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u31_1ImmPred, SETMEMIMM_SHORT, L4_ior_memoph_io, or>; } @@ -3030,9 +3072,9 @@ let Predicates = [UseMEMOP] in { // memw(Rs+#0) = [clrbit|setbit](#U5) // memw(Rs+#u6:2) = [clrbit|setbit](#U5) - defm: MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, CLRMEMIMM, + defm: MemOpi_bitPats<load, store, Clr5ImmPred, u30_2ImmPred, CLRMEMIMM, L4_iand_memopw_io, and>; - defm: MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, SETMEMIMM, + defm: MemOpi_bitPats<load, store, Set5ImmPred, u30_2ImmPred, SETMEMIMM, L4_ior_memopw_io, or>; } @@ -3070,11 +3112,11 @@ multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf extPred, multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { // Half Word - defm: MemOPr_ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred, + defm: MemOPr_ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred, L4_add_memoph_io, L4_sub_memoph_io, L4_and_memoph_io, L4_or_memoph_io>; // Byte - defm: MemOPr_ALUOp <ldOpByte, truncstorei8, u6ExtPred, + defm: MemOPr_ALUOp <ldOpByte, truncstorei8, u32ImmPred, L4_add_memopb_io, L4_sub_memopb_io, L4_and_memopb_io, L4_or_memopb_io>; } @@ -3086,7 +3128,7 @@ let Predicates = [UseMEMOP] in { defm: MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend defm: MemOPr_ExtType<extloadi8, extloadi16>; // any extend // Word - defm: MemOPr_ALUOp <load, store, u6_2ExtPred, L4_add_memopw_io, + defm: MemOPr_ALUOp <load, store, u30_2ImmPred, L4_add_memopw_io, L4_sub_memopw_io, L4_and_memopw_io, L4_or_memopw_io>; } @@ -3110,23 +3152,23 @@ def C4_cmpneqi : T_CMP <"cmp.eq", 0b00, 1, s10Ext>; def C4_cmpltei : T_CMP <"cmp.gt", 0b01, 1, s10Ext>; def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9Ext>; -def : T_CMP_pat <C4_cmpneqi, setne, s10ExtPred>; -def : T_CMP_pat <C4_cmpltei, setle, s10ExtPred>; +def : T_CMP_pat <C4_cmpneqi, setne, s32ImmPred>; +def : T_CMP_pat <C4_cmpltei, setle, s32ImmPred>; def : T_CMP_pat <C4_cmplteui, setule, u9ImmPred>; // rs <= rt -> !(rs > rt). /* -def: Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, s10ExtPred:$src2))>; -// (C4_cmpltei IntRegs:$src1, s10ExtPred:$src2)>; +def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>; +// (C4_cmpltei IntRegs:$src1, s32ImmPred:$src2)>; */ // Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1). -def: Pat<(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)), - (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s8ExtPred:$src2))>; +def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>; // rs != rt -> !(rs == rt). -def: Pat<(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)), - (C4_cmpneqi IntRegs:$src1, s10ExtPred:$src2)>; +def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C4_cmpneqi IntRegs:$src1, s32ImmPred:$src2)>; // SDNode for converting immediate C to C-1. def DEC_CONST_BYTE : SDNodeXForm<imm, [{ @@ -3136,168 +3178,6 @@ def DEC_CONST_BYTE : SDNodeXForm<imm, [{ }]>; // For the sequence -// zext( seteq ( and(Rs, 255), u8)) -// Generate -// Pd=cmpb.eq(Rs, #u8) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)), - u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (A4_cmpbeqi (i32 IntRegs:$Rs), - (u8ExtPred:$u8))), - 1, 0))>; - -// For the sequence -// zext( setne ( and(Rs, 255), u8)) -// Generate -// Pd=cmpb.eq(Rs, #u8) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)), - u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (A4_cmpbeqi (i32 IntRegs:$Rs), - (u8ExtPred:$u8))), - 0, 1))>; - -// For the sequence -// zext( seteq (Rs, and(Rt, 255))) -// Generate -// Pd=cmpb.eq(Rs, Rt) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt), - (i32 (and (i32 IntRegs:$Rs), 255)))))), - (i32 (TFR_condset_ii (i1 (A4_cmpbeq (i32 IntRegs:$Rs), - (i32 IntRegs:$Rt))), - 1, 0))>; - -// For the sequence -// zext( setne (Rs, and(Rt, 255))) -// Generate -// Pd=cmpb.eq(Rs, Rt) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt), - (i32 (and (i32 IntRegs:$Rs), 255)))))), - (i32 (TFR_condset_ii (i1 (A4_cmpbeq (i32 IntRegs:$Rs), - (i32 IntRegs:$Rt))), - 0, 1))>; - -// For the sequence -// zext( setugt ( and(Rs, 255), u8)) -// Generate -// Pd=cmpb.gtu(Rs, #u8) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)), - u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (A4_cmpbgtui (i32 IntRegs:$Rs), - (u8ExtPred:$u8))), - 1, 0))>; - -// For the sequence -// zext( setugt ( and(Rs, 254), u8)) -// Generate -// Pd=cmpb.gtu(Rs, #u8) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)), - u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (A4_cmpbgtui (i32 IntRegs:$Rs), - (u8ExtPred:$u8))), - 1, 0))>; - -// For the sequence -// zext( setult ( Rs, Rt)) -// Generate -// Pd=cmp.ltu(Rs, Rt) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs) -def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt), - (i32 IntRegs:$Rs))), - 1, 0))>; - -// For the sequence -// zext( setlt ( Rs, Rt)) -// Generate -// Pd=cmp.lt(Rs, Rt) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs) -def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt), - (i32 IntRegs:$Rs))), - 1, 0))>; - -// For the sequence -// zext( setugt ( Rs, Rt)) -// Generate -// Pd=cmp.gtu(Rs, Rt) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs), - (i32 IntRegs:$Rt))), - 1, 0))>; - -// This pattern interefers with coremark performance, not implementing at this -// time. -// For the sequence -// zext( setgt ( Rs, Rt)) -// Generate -// Pd=cmp.gt(Rs, Rt) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 - -// For the sequence -// zext( setuge ( Rs, Rt)) -// Generate -// Pd=cmp.ltu(Rs, Rt) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs) -def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt), - (i32 IntRegs:$Rs))), - 0, 1))>; - -// For the sequence -// zext( setge ( Rs, Rt)) -// Generate -// Pd=cmp.lt(Rs, Rt) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs) -def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt), - (i32 IntRegs:$Rs))), - 0, 1))>; - -// For the sequence -// zext( setule ( Rs, Rt)) -// Generate -// Pd=cmp.gtu(Rs, Rt) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs), - (i32 IntRegs:$Rt))), - 0, 1))>; - -// For the sequence -// zext( setle ( Rs, Rt)) -// Generate -// Pd=cmp.gt(Rs, Rt) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rs), - (i32 IntRegs:$Rt))), - 0, 1))>; - -// For the sequence // zext( setult ( and(Rs, 255), u8)) // Use the isdigit transformation below @@ -3381,26 +3261,17 @@ defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel; // Restore registers and dealloc return function call. let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in { - def RESTORE_DEALLOC_RET_JMP_V4 : JInst<(outs), - (ins calltarget:$dst), - "jump $dst", - []>; + def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">; } // Restore registers and dealloc frame before a tail call. let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in { - def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : JInst<(outs), - (ins calltarget:$dst), - "call $dst", - []>; + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel; } // Save registers function call. let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in { - def SAVE_REGISTERS_CALL_V4 : JInst<(outs), - (ins calltarget:$dst), - "call $dst // Save_calle_saved_registers", - []>; + def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel; } //===----------------------------------------------------------------------===// @@ -3472,7 +3343,7 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp, //===----------------------------------------------------------------------===// class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp, bits<2> MajOp, bit isHalf> - : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1, isHalf>, + : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1, isHalf>, AddrModeRel { string ImmOpStr = !cast<string>(ImmOp); let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, @@ -3513,7 +3384,7 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC, let hasSideEffects = 0, isPredicable = 1, mayStore = 1, isNVStore = 1, isNewValue = 1, opNewValue = 1 in class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs> - : NVInst_V4<(outs), (ins u0AlwaysExt:$addr, IntRegs:$src), + : NVInst_V4<(outs), (ins u32Imm:$addr, IntRegs:$src), mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src.new", [], "", V2LDST_tc_st_SLOT0> { bits<19> addr; @@ -3743,7 +3614,7 @@ class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp, bits<3> MajOp> - : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1>, AddrModeRel { + : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1>, AddrModeRel { string ImmOpStr = !cast<string>(ImmOp); let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, @@ -3903,17 +3774,17 @@ def: Pat<(i64 (ctlz I64:$src1)), (Zext64 (S2_cl0p I64:$src1))>; def: Pat<(i64 (cttz I64:$src1)), (Zext64 (S2_ct0p I64:$src1))>; let AddedComplexity = 30 in { - def: Storea_pat<truncstorei8, I32, u0AlwaysExtPred, S2_storerbabs>; - def: Storea_pat<truncstorei16, I32, u0AlwaysExtPred, S2_storerhabs>; - def: Storea_pat<store, I32, u0AlwaysExtPred, S2_storeriabs>; + def: Storea_pat<truncstorei8, I32, u32ImmPred, S2_storerbabs>; + def: Storea_pat<truncstorei16, I32, u32ImmPred, S2_storerhabs>; + def: Storea_pat<store, I32, u32ImmPred, S2_storeriabs>; } let AddedComplexity = 30 in { - def: Loada_pat<load, i32, u0AlwaysExtPred, L4_loadri_abs>; - def: Loada_pat<sextloadi8, i32, u0AlwaysExtPred, L4_loadrb_abs>; - def: Loada_pat<zextloadi8, i32, u0AlwaysExtPred, L4_loadrub_abs>; - def: Loada_pat<sextloadi16, i32, u0AlwaysExtPred, L4_loadrh_abs>; - def: Loada_pat<zextloadi16, i32, u0AlwaysExtPred, L4_loadruh_abs>; + def: Loada_pat<load, i32, u32ImmPred, L4_loadri_abs>; + def: Loada_pat<sextloadi8, i32, u32ImmPred, L4_loadrb_abs>; + def: Loada_pat<zextloadi8, i32, u32ImmPred, L4_loadrub_abs>; + def: Loada_pat<sextloadi16, i32, u32ImmPred, L4_loadrh_abs>; + def: Loada_pat<zextloadi16, i32, u32ImmPred, L4_loadruh_abs>; } // Indexed store word - global address. @@ -4012,6 +3883,18 @@ def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhabs>; def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storeriabs>; def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdabs>; +let Constraints = "@earlyclobber $dst" in +def Insert4 : PseudoM<(outs DoubleRegs:$dst), (ins IntRegs:$a, IntRegs:$b, + IntRegs:$c, IntRegs:$d), + ".error \"Should never try to emit Insert4\"", + [(set (i64 DoubleRegs:$dst), + (or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))), + (i32 16)), + (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))), + (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))), + (i32 32))), + (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))))]>; + //===----------------------------------------------------------------------===// // :raw for of boundscheck:hi:lo insns //===----------------------------------------------------------------------===// @@ -4116,7 +3999,7 @@ class CJInst_tstbit_R0<string px, bit np, string tnt> : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2), ""#px#" = tstbit($Rs, #0); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND> { + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<11> r9_2; @@ -4162,7 +4045,7 @@ class CJInst_RR<string px, string op, bit np, string tnt> : InstHexagon<(outs), (ins IntRegs:$Rs, IntRegs:$Rt, brtarget:$r9_2), ""#px#" = cmp."#op#"($Rs, $Rt); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND> { + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<4> Rt; bits<11> r9_2; @@ -4216,7 +4099,7 @@ class CJInst_RU5<string px, string op, bit np, string tnt> : InstHexagon<(outs), (ins IntRegs:$Rs, u5Imm:$U5, brtarget:$r9_2), ""#px#" = cmp."#op#"($Rs, #$U5); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND> { + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<5> U5; bits<11> r9_2; @@ -4271,7 +4154,7 @@ class CJInst_Rn1<string px, string op, bit np, string tnt> : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2), ""#px#" = cmp."#op#"($Rs,#-1); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND> { + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<11> r9_2; diff --git a/lib/Target/Hexagon/HexagonInstrInfoV5.td b/lib/Target/Hexagon/HexagonInstrInfoV5.td index 19b0935..337f4ea 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV5.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV5.td @@ -139,11 +139,11 @@ def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss), let Inst{20-16} = Rss; } -defm: Loadx_pat<load, f32, s11_2ExtPred, L2_loadri_io>; -defm: Loadx_pat<load, f64, s11_3ExtPred, L2_loadrd_io>; +defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>; +defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>; -defm: Storex_pat<store, F32, s11_2ExtPred, S2_storeri_io>; -defm: Storex_pat<store, F64, s11_3ExtPred, S2_storerd_io>; +defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>; +defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>; def: Storex_simple_pat<store, F32, S2_storeri_io>; def: Storex_simple_pat<store, F64, S2_storerd_io>; diff --git a/lib/Target/Hexagon/HexagonInstrInfoVector.td b/lib/Target/Hexagon/HexagonInstrInfoVector.td index 6e67b6e..f4fb946 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoVector.td +++ b/lib/Target/Hexagon/HexagonInstrInfoVector.td @@ -20,6 +20,34 @@ def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; + +multiclass bitconvert_32<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a IntRegs:$src))), + (b IntRegs:$src)>; + def : Pat <(a (bitconvert (b IntRegs:$src))), + (a IntRegs:$src)>; +} + +multiclass bitconvert_64<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a DoubleRegs:$src))), + (b DoubleRegs:$src)>; + def : Pat <(a (bitconvert (b DoubleRegs:$src))), + (a DoubleRegs:$src)>; +} + +// Bit convert vector types. +defm : bitconvert_32<v4i8, i32>; +defm : bitconvert_32<v2i16, i32>; +defm : bitconvert_32<v2i16, v4i8>; + +defm : bitconvert_64<v8i8, i64>; +defm : bitconvert_64<v4i16, i64>; +defm : bitconvert_64<v2i32, i64>; +defm : bitconvert_64<v8i8, v4i16>; +defm : bitconvert_64<v8i8, v2i32>; +defm : bitconvert_64<v4i16, v2i32>; + + // Vector shift support. Vector shifting in Hexagon is rather different // from internal representation of LLVM. // LLVM assumes all shifts (in vector case) will have the form @@ -44,6 +72,12 @@ class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> let Inst{12-8} = src2; } +def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), + (A2_svaddh IntRegs:$src1, IntRegs:$src2)>; + +def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), + (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; + def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>; def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>; def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>; @@ -52,6 +86,87 @@ def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>; def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>; def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>; + +def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>; +def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>; + +// Replicate the low 8-bits from 32-bits input register into each of the +// four bytes of 32-bits destination register. +def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>; + +// Replicate the low 16-bits from 32-bits input register into each of the +// four halfwords of 64-bits destination register. +def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>; + + +class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type> + : Pat <(Op Type:$Rss, Type:$Rtt), + (MI Type:$Rss, Type:$Rtt)>; + +def: VArith_pat <A2_vaddub, add, V8I8>; +def: VArith_pat <A2_vaddh, add, V4I16>; +def: VArith_pat <A2_vaddw, add, V2I32>; +def: VArith_pat <A2_vsubub, sub, V8I8>; +def: VArith_pat <A2_vsubh, sub, V4I16>; +def: VArith_pat <A2_vsubw, sub, V2I32>; + +def: VArith_pat <A2_and, and, V2I16>; +def: VArith_pat <A2_xor, xor, V2I16>; +def: VArith_pat <A2_or, or, V2I16>; + +def: VArith_pat <A2_andp, and, V8I8>; +def: VArith_pat <A2_andp, and, V4I16>; +def: VArith_pat <A2_andp, and, V2I32>; +def: VArith_pat <A2_orp, or, V8I8>; +def: VArith_pat <A2_orp, or, V4I16>; +def: VArith_pat <A2_orp, or, V2I32>; +def: VArith_pat <A2_xorp, xor, V8I8>; +def: VArith_pat <A2_xorp, xor, V4I16>; +def: VArith_pat <A2_xorp, xor, V2I32>; + +def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), + (i32 u5ImmPred:$c))))), + (S2_asr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), + (i32 u5ImmPred:$c))))), + (S2_lsr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), + (i32 u5ImmPred:$c))))), + (S2_asl_i_vw V2I32:$b, imm:$c)>; + +def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), + (S2_asr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), + (S2_lsr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), + (S2_asl_i_vh V4I16:$b, imm:$c)>; + + +def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>; +def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>; + +def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>; + +def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)), + (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)), + (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)), + (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)), + (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)), + (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)), + (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; + // Vector shift words by register def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>; def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>; @@ -63,3 +178,306 @@ def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>; def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>; def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>; def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>; + +class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value> + : Pat <(Op Value:$Rs, I32:$Rt), + (MI Value:$Rs, I32:$Rt)>; + +def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>; +def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>; +def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>; +def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>; +def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>; +def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>; + + +def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>; +def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>; +def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>; + +def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>; +def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>; +def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>; + + +class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value> + : Pat <(i1 (Op Value:$Rs, Value:$Rt)), + (MI Value:$Rs, Value:$Rt)>; + +def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>; +def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>; +def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>; + +def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>; +def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>; +def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>; + +def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>; +def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>; +def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>; + + +class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy> + : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)), + (MI InVal:$Rs, InVal:$Rt)>; + +def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>; +def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>; +def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>; + +def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>; +def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>; +def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>; + + +// Hexagon doesn't have a vector multiply with C semantics. +// Instead, generate a pseudo instruction that gets expaneded into two +// scalar MPYI instructions. +// This is expanded by ExpandPostRAPseudos. +let isPseudo = 1 in +def VMULW : PseudoM<(outs DoubleRegs:$Rd), + (ins DoubleRegs:$Rs, DoubleRegs:$Rt), + ".error \"Should never try to emit VMULW\"", + [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>; + +let isPseudo = 1 in +def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd), + (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), + ".error \"Should never try to emit VMULW_ACC\"", + [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))], + "$Rd = $Rx">; + +// Adds two v4i8: Hexagon does not have an insn for this one, so we +// use the double add v8i8, and use only the low part of the result. +def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), + (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>; + +// Subtract two v4i8: Hexagon does not have an insn for this one, so we +// use the double sub v8i8, and use only the low part of the result. +def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), + (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>; + +// +// No 32 bit vector mux. +// +def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)), + (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; +def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)), + (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; + +// +// 64-bit vector mux. +// +def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)), + (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; +def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)), + (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; +def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)), + (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; + +// +// No 32 bit vector compare. +// +def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), + (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>; + +def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), + (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>; + + +class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value, + ValueType CmpTy> + : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)), + (InvMI Value:$Rt, Value:$Rs)>; + +// Map from a compare operation to the corresponding instruction with the +// order of operands reversed, e.g. x > y --> cmp.lt(y,x). +def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>; +def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>; +def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>; +def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>; +def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>; +def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>; + +def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>; +def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>; +def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>; +def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>; +def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>; +def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>; + +// Map from vcmpne(Rss) -> !vcmpew(Rss). +// rs != rt -> !(rs == rt). +def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), + (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; + + +// Truncate: from vector B copy all 'E'ven 'B'yte elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; +def: Pat<(v4i8 (trunc V4I16:$Rs)), + (S2_vtrunehb V4I16:$Rs)>; + +// Truncate: from vector B copy all 'O'dd 'B'yte elements: +// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; +// S2_vtrunohb + +// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; +// S2_vtruneh + +def: Pat<(v2i16 (trunc V2I32:$Rs)), + (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; + + +def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>; +def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>; + +def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>; +def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>; + +def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; + +// Sign extends a v2i8 into a v2i32. +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), + (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; + +// Sign extends a v2i16 into a v2i32. +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), + (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; + + +// Multiplies two v2i16 and returns a v2i32. We are using here the +// saturating multiply, as hexagon does not provide a non saturating +// vector multiply, and saturation does not impact the result that is +// in double precision of the operands. + +// Multiplies two v2i16 vectors: as Hexagon does not have a multiply +// with the C semantics for this one, this pattern uses the half word +// multiply vmpyh that takes two v2i16 and returns a v2i32. This is +// then truncated to fit this back into a v2i16 and to simulate the +// wrap around semantics for unsigned in C. +def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt), + (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>; + +def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), + (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)), + (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>; + +// Multiplies two v4i16 vectors. +def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), + (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)), + (vmpyh (LoReg $Rs), (LoReg $Rt)))>; + +def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt), + (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))), + (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>; + +// Multiplies two v4i8 vectors. +def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), + (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>, + Requires<[HasV5T]>; + +def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), + (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>; + +// Multiplies two v8i8 vectors. +def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), + (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))), + (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>, + Requires<[HasV5T]>; + +def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), + (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), + (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; + + +class shuffler<SDNode Op, string Str> + : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c), + "$a = " # Str # "($b, $c)", + [(set (i64 DoubleRegs:$a), + (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))], + "", S_3op_tc_1_SLOT23>; + +def SDTHexagonBinOp64 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>; + +def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>; +def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>; +def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>; +def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>; + +class ShufflePat<InstHexagon MI, SDNode Op> + : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)), + (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>; + +// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b +def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>; + +// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b +def: ShufflePat<S2_shuffob, HexagonSHUFFOB>; + +// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h +def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>; + +// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h +def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>; + + +// Truncated store from v4i16 to v4i8. +def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), + [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>; + +// Truncated store from v2i32 to v2i16. +def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), + [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>; + +def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt), + (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs), + (LoReg $Rs))))>; + +def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt), + (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>; + + +// Zero and sign extended load from v2i8 into v2i16. +def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), + [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; + +def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), + [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; + +def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)), + (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>; + +def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)), + (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>; + +def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)), + (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>; + +def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)), + (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>; diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td index c0551e8..4275230 100644 --- a/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/lib/Target/Hexagon/HexagonIntrinsics.td @@ -690,16 +690,15 @@ def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>; def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>; def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>; -def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s8ExtPred, s8ImmPred>; +def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>; -def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), - (I32:$Rt))), +def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))), (i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>; // Mux -def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s8ExtPred>; -def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s8ExtPred>; -def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s8ExtPred, s8ImmPred>; +def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>; +def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>; +def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>; // Shift halfword def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>; @@ -720,17 +719,17 @@ def : T_RR_pat<C2_cmpeq, int_hexagon_C2_cmpeq>; def : T_RR_pat<C2_cmpgt, int_hexagon_C2_cmpgt>; def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>; -def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s10ExtPred>; -def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s10ExtPred>; -def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u9ExtPred>; +def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>; +def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>; +def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>; -def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s8ExtPred:$src2)), +def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)), (i32 (C2_cmpgti (I32:$src1), - (DEC_CONST_SIGNED s8ExtPred:$src2)))>; + (DEC_CONST_SIGNED s32ImmPred:$src2)))>; -def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u8ExtPred:$src2)), +def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)), (i32 (C2_cmpgtui (I32:$src1), - (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>; + (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>; // The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0. def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)), @@ -1258,6 +1257,30 @@ def: Pat<(i32 (int_hexagon_S2_storew_locked (I32:$Rs), (I32:$Rt))), def: Pat<(i32 (int_hexagon_S4_stored_locked (I32:$Rs), (I64:$Rt))), (i32 (C2_tfrpr (S4_stored_locked (I32:$Rs), (I64:$Rt))))>; +/******************************************************************** +* ST +*********************************************************************/ + +class T_stb_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Val> + : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru), + (MI I32:$Rs, Val:$Rt, I32:$Ru)>; + +def : T_stb_pat <S2_storerh_pbr_pseudo, int_hexagon_brev_sth, I32>; +def : T_stb_pat <S2_storerb_pbr_pseudo, int_hexagon_brev_stb, I32>; +def : T_stb_pat <S2_storeri_pbr_pseudo, int_hexagon_brev_stw, I32>; +def : T_stb_pat <S2_storerf_pbr_pseudo, int_hexagon_brev_sthhi, I32>; +def : T_stb_pat <S2_storerd_pbr_pseudo, int_hexagon_brev_std, I64>; + +class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val> + : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s), + (MI I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s)>; + +def: T_stc_pat<S2_storerb_pci_pseudo, int_hexagon_circ_stb, s4_0ImmPred, I32>; +def: T_stc_pat<S2_storerh_pci_pseudo, int_hexagon_circ_sth, s4_1ImmPred, I32>; +def: T_stc_pat<S2_storeri_pci_pseudo, int_hexagon_circ_stw, s4_2ImmPred, I32>; +def: T_stc_pat<S2_storerd_pci_pseudo, int_hexagon_circ_std, s4_3ImmPred, I64>; +def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>; + include "HexagonIntrinsicsV3.td" include "HexagonIntrinsicsV4.td" include "HexagonIntrinsicsV5.td" diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td index 8d068eb..c80a188 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsV4.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td @@ -234,17 +234,17 @@ def: T_RR_pat<A4_orn, int_hexagon_A4_orn>; *********************************************************************/ // Combine Words Into Doublewords. -def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s8ExtPred>; -def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s8ExtPred>; +def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32ImmPred>; +def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32ImmPred>; /******************************************************************** * ALU32/PRED * *********************************************************************/ // Compare -def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s10ExtPred>; -def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s10ExtPred>; -def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u9ExtPred>; +def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32ImmPred>; +def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32ImmPred>; +def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32ImmPred>; def: T_RR_pat<A4_rcmpeq, int_hexagon_A4_rcmpeq>; def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>; diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index 806d448..81af4db 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -199,10 +200,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA, // of registers by individual passes in the backend. At this time, // we don't know the scope of usage and definitions of these // instructions. - if (MII->getOpcode() == Hexagon::TFR_condset_ii || - MII->getOpcode() == Hexagon::TFR_condset_ri || - MII->getOpcode() == Hexagon::TFR_condset_ir || - MII->getOpcode() == Hexagon::LDriw_pred || + if (MII->getOpcode() == Hexagon::LDriw_pred || MII->getOpcode() == Hexagon::STriw_pred) return false; } diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td index 318ca72..450f594 100644 --- a/lib/Target/Hexagon/HexagonOperands.td +++ b/lib/Target/Hexagon/HexagonOperands.td @@ -66,162 +66,131 @@ def nOneImm : Operand<i32>; // Immediate predicates // def s32ImmPred : PatLeaf<(i32 imm), [{ - // s32ImmPred predicate - True if the immediate fits in a 32-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<32>(v); }]>; -def s32_24ImmPred : PatLeaf<(i32 imm), [{ - // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign - // extended field that is a multiple of 0x1000000. +def s32_0ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isInt<32>(v); +}]>; + +def s31_1ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<31,1>(v); +}]>; + +def s30_2ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<31,1>(v); +}]>; + +def s29_3ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<31,1>(v); +}]>; + +def s22_10ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<22,10>(v); +}]>; + +def s8_24ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<32,24>(v); + return isShiftedInt<8,24>(v); }]>; -def s32_16s8ImmPred : PatLeaf<(i32 imm), [{ - // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign - // extended field that is a multiple of 0x10000. +def s16_16ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<24,16>(v); + return isShiftedInt<16,16>(v); }]>; def s26_6ImmPred : PatLeaf<(i32 imm), [{ - // s26_6ImmPred predicate - True if the immediate fits in a 32-bit - // sign extended field. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<26,6>(v); }]>; - def s16ImmPred : PatLeaf<(i32 imm), [{ - // s16ImmPred predicate - True if the immediate fits in a 16-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<16>(v); }]>; - def s13ImmPred : PatLeaf<(i32 imm), [{ - // s13ImmPred predicate - True if the immediate fits in a 13-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<13>(v); }]>; - def s12ImmPred : PatLeaf<(i32 imm), [{ - // s12ImmPred predicate - True if the immediate fits in a 12-bit - // sign extended field. int64_t v = (int64_t)N->getSExtValue(); return isInt<12>(v); }]>; def s11_0ImmPred : PatLeaf<(i32 imm), [{ - // s11_0ImmPred predicate - True if the immediate fits in a 11-bit - // sign extended field. int64_t v = (int64_t)N->getSExtValue(); return isInt<11>(v); }]>; - def s11_1ImmPred : PatLeaf<(i32 imm), [{ - // s11_1ImmPred predicate - True if the immediate fits in a 12-bit - // sign extended field and is a multiple of 2. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<11,1>(v); }]>; - def s11_2ImmPred : PatLeaf<(i32 imm), [{ - // s11_2ImmPred predicate - True if the immediate fits in a 13-bit - // sign extended field and is a multiple of 4. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<11,2>(v); }]>; - def s11_3ImmPred : PatLeaf<(i32 imm), [{ - // s11_3ImmPred predicate - True if the immediate fits in a 14-bit - // sign extended field and is a multiple of 8. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<11,3>(v); }]>; - def s10ImmPred : PatLeaf<(i32 imm), [{ - // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<10>(v); }]>; - def s9ImmPred : PatLeaf<(i32 imm), [{ - // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<9>(v); }]>; def m9ImmPred : PatLeaf<(i32 imm), [{ - // m9ImmPred predicate - True if the immediate fits in a 9-bit magnitude - // field. The range of m9 is -255 to 255. int64_t v = (int64_t)N->getSExtValue(); return isInt<9>(v) && (v != -256); }]>; def s8ImmPred : PatLeaf<(i32 imm), [{ - // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<8>(v); }]>; - def s8Imm64Pred : PatLeaf<(i64 imm), [{ - // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<8>(v); }]>; - def s6ImmPred : PatLeaf<(i32 imm), [{ - // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<6>(v); }]>; - def s4_0ImmPred : PatLeaf<(i32 imm), [{ - // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<4>(v); }]>; - def s4_1ImmPred : PatLeaf<(i32 imm), [{ - // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended - // field of 2. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<4,1>(v); }]>; - def s4_2ImmPred : PatLeaf<(i32 imm), [{ - // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended - // field that is a multiple of 4. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<4,2>(v); }]>; - def s4_3ImmPred : PatLeaf<(i32 imm), [{ - // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended - // field that is a multiple of 8. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<4,3>(v); }]>; @@ -233,56 +202,61 @@ def u64ImmPred : PatLeaf<(i64 imm), [{ }]>; def u32ImmPred : PatLeaf<(i32 imm), [{ - // u32ImmPred predicate - True if the immediate fits in a 32-bit field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<32>(v); }]>; +def u32_0ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<32>(v); +}]>; + +def u31_1ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<31,1>(v); +}]>; + +def u30_2ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<30,2>(v); +}]>; + +def u29_3ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<29,3>(v); +}]>; + def u26_6ImmPred : PatLeaf<(i32 imm), [{ - // u26_6ImmPred - True if the immediate fits in a 32-bit field and - // is a multiple of 64. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<26,6>(v); }]>; def u16ImmPred : PatLeaf<(i32 imm), [{ - // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<16>(v); }]>; def u16_s8ImmPred : PatLeaf<(i32 imm), [{ - // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign - // extended s8 field. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<16,8>(v); }]>; def u16_0ImmPred : PatLeaf<(i32 imm), [{ - // True if the immediate fits in a 16-bit unsigned field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<16>(v); }]>; def u11_3ImmPred : PatLeaf<(i32 imm), [{ - // True if the immediate fits in a 14-bit unsigned field, and the lowest - // three bits are 0. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<11,3>(v); }]>; def u9ImmPred : PatLeaf<(i32 imm), [{ - // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<9>(v); }]>; - def u8ImmPred : PatLeaf<(i32 imm), [{ - // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<8>(v); }]>; @@ -294,81 +268,56 @@ def u7StrictPosImmPred : ImmLeaf<i32, [{ }]>; def u7ImmPred : PatLeaf<(i32 imm), [{ - // u7ImmPred predicate - True if the immediate fits in a 7-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<7>(v); }]>; - def u6ImmPred : PatLeaf<(i32 imm), [{ - // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<6>(v); }]>; def u6_0ImmPred : PatLeaf<(i32 imm), [{ - // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned - // field. Same as u6ImmPred. int64_t v = (int64_t)N->getSExtValue(); return isUInt<6>(v); }]>; def u6_1ImmPred : PatLeaf<(i32 imm), [{ - // u6_1ImmPred predicate - True if the immediate fits in a 7-bit unsigned - // field that is 1 bit alinged - multiple of 2. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<6,1>(v); }]>; def u6_2ImmPred : PatLeaf<(i32 imm), [{ - // u6_2ImmPred predicate - True if the immediate fits in a 8-bit unsigned - // field that is 2 bits alinged - multiple of 4. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<6,2>(v); }]>; def u6_3ImmPred : PatLeaf<(i32 imm), [{ - // u6_3ImmPred predicate - True if the immediate fits in a 9-bit unsigned - // field that is 3 bits alinged - multiple of 8. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<6,3>(v); }]>; def u5ImmPred : PatLeaf<(i32 imm), [{ - // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<5>(v); }]>; def u4ImmPred : PatLeaf<(i32 imm), [{ - // u4ImmPred predicate - True if the immediate fits in a 4-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<4>(v); }]>; def u3ImmPred : PatLeaf<(i32 imm), [{ - // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<3>(v); }]>; - def u2ImmPred : PatLeaf<(i32 imm), [{ - // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<2>(v); }]>; - def u1ImmPred : PatLeaf<(i1 imm), [{ - // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<1>(v); }]>; @@ -511,212 +460,6 @@ let PrintMethod = "printExtOperand" in { def u6_3Ext : Operand<i32>; } -let PrintMethod = "printImmOperand" in -def u0AlwaysExt : Operand<i32>; - -// Predicates for constant extendable operands -def s16ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<16>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); -}]>; - -def s10ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<10>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); -}]>; - -def s9ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<9>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isInt<32>(v); -}]>; - -def s8ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<8>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); -}]>; - -def s8_16ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<8>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can't fit in a 16-bit signed field. This is required to avoid - // unnecessary constant extenders. - return isConstExtProfitable(Node) && !isInt<16>(v); -}]>; - -def s6ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<6>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isInt<32>(v); -}]>; - -def s6_16ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<6>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can't fit in a 16-bit signed field. This is required to avoid - // unnecessary constant extenders. - return isConstExtProfitable(Node) && !isInt<16>(v); -}]>; - -def s6_10ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<6>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can't fit in a 10-bit signed field. This is required to avoid - // unnecessary constant extenders. - return isConstExtProfitable(Node) && !isInt<10>(v); -}]>; - -def s11_0ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<11>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); -}]>; - -def s11_1ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<12>(v)) - return isShiftedInt<11,1>(v); - - // Return true if extending this immediate is profitable and the low 1 bit - // is zero (2-byte aligned). - return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 2) == 0); -}]>; - -def s11_2ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<13>(v)) - return isShiftedInt<11,2>(v); - - // Return true if extending this immediate is profitable and the low 2-bits - // are zero (4-byte aligned). - return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 4) == 0); -}]>; - -def s11_3ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<14>(v)) - return isShiftedInt<11,3>(v); - - // Return true if extending this immediate is profitable and the low 3-bits - // are zero (8-byte aligned). - return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 8) == 0); -}]>; - -def u0AlwaysExtPred : PatLeaf<(i32 imm), [{ - // Predicate for an unsigned 32-bit value that always needs to be extended. - if (isConstExtProfitable(Node)) { - int64_t v = (int64_t)N->getSExtValue(); - return isUInt<32>(v); - } - return false; -}]>; - -def u6ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<6>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); -}]>; - -def u7ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<7>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); -}]>; - -def u8ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<8>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); -}]>; - -def u9ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<9>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); -}]>; - -def u6_1ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<7>(v)) - return isShiftedUInt<6,1>(v); - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 2) == 0); -}]>; - -def u6_2ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<8>(v)) - return isShiftedUInt<6,2>(v); - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 4) == 0); -}]>; - -def u6_3ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<9>(v)) - return isShiftedUInt<6,3>(v); - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 8) == 0); -}]>; - // This complex pattern exists only to create a machine instruction operand // of type "frame index". There doesn't seem to be a way to do that directly @@ -729,41 +472,8 @@ def AddrFI : ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>; def AddrGA : ComplexPattern<i32, 1, "SelectAddrGA", [], []>; def AddrGP : ComplexPattern<i32, 1, "SelectAddrGP", [], []>; -// Addressing modes. - -def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; -def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>; -def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>; -def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>; -def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>; -def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>; -def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>; -def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>; -def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>; - // Address operands. -def MEMrr : Operand<i32> { - let PrintMethod = "printMEMrrOperand"; - let MIOperandInfo = (ops IntRegs, IntRegs); -} - -def MEMri : Operand<i32> { - let PrintMethod = "printMEMriOperand"; - let MIOperandInfo = (ops IntRegs, IntRegs); -} - -def MEMri_s11_2 : Operand<i32>, - ComplexPattern<i32, 2, "SelectMEMriS11_2", []> { - let PrintMethod = "printMEMriOperand"; - let MIOperandInfo = (ops IntRegs, s11Imm); -} - -def FrameIndex : Operand<i32> { - let PrintMethod = "printFrameIndexOperand"; - let MIOperandInfo = (ops IntRegs, s11Imm); -} - let PrintMethod = "printGlobalOperand" in { def globaladdress : Operand<i32>; def globaladdressExt : Operand<i32>; diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index afd3a17..503bfdb 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -271,15 +271,8 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { switch (Op) { case Hexagon::C2_mux: case Hexagon::C2_muxii: - case Hexagon::TFR_condset_ii: NewOp = Op; break; - case Hexagon::TFR_condset_ri: - NewOp = Hexagon::TFR_condset_ir; - break; - case Hexagon::TFR_condset_ir: - NewOp = Hexagon::TFR_condset_ri; - break; case Hexagon::C2_muxri: NewOp = Hexagon::C2_muxir; break; diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 3df98d6..86eaee8 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -37,11 +37,8 @@ using namespace llvm; - -HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st) - : HexagonGenRegisterInfo(Hexagon::R31), - Subtarget(st) { -} +HexagonRegisterInfo::HexagonRegisterInfo() + : HexagonGenRegisterInfo(Hexagon::R31) {} const MCPhysReg * HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { @@ -51,7 +48,7 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 }; - switch(Subtarget.getHexagonArchVersion()) { + switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) { case HexagonSubtarget::V4: case HexagonSubtarget::V5: return CalleeSavedRegsV3; @@ -89,7 +86,7 @@ HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, }; - switch(Subtarget.getHexagonArchVersion()) { + switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) { case HexagonSubtarget::V4: case HexagonSubtarget::V5: return CalleeSavedRegClassesV3; @@ -122,7 +119,9 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset -= 2 * Hexagon_WordSize; } - const unsigned FrameSize = MFI.getStackSize(); + unsigned FrameSize = MFI.getStackSize(); + if (MI.getOpcode() == Hexagon::TFR_FI) + MI.setDesc(TII.get(Hexagon::A2_addi)); if (!MFI.hasVarSizedObjects() && TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) && diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index a83b502..dc6dd2a 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -37,19 +37,11 @@ #define HEXAGON_RESERVED_REG_2 Hexagon::R11 namespace llvm { - -class HexagonSubtarget; -class HexagonInstrInfo; -class Type; - struct HexagonRegisterInfo : public HexagonGenRegisterInfo { - HexagonSubtarget &Subtarget; - - HexagonRegisterInfo(HexagonSubtarget &st); + HexagonRegisterInfo(); /// Code Generation virtual methods... - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; const TargetRegisterClass* const* getCalleeSavedRegClasses(const MachineFunction *MF = nullptr) const; diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index ce6a39a..1a4c7ae 100644 --- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -71,6 +71,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { return true; const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); // Loop over all of the basic blocks for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); @@ -82,82 +83,78 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { while (MII != MIE) { MachineInstr *MI = MII; int Opc = MI->getOpcode(); - if (Opc == Hexagon::CONST32_set) { + if (Opc == Hexagon::CONST32_set_jt) { int DestReg = MI->getOperand(0).getReg(); MachineOperand &Symbol = MI->getOperand (1); - - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LO), DestReg).addOperand(Symbol); BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HI), DestReg).addOperand(Symbol); - // MBB->erase returns the iterator to the next instruction, which is the - // one we want to process next - MII = MBB->erase (MI); - continue; - } - else if (Opc == Hexagon::CONST32_set_jt) { - int DestReg = MI->getOperand(0).getReg(); - MachineOperand &Symbol = MI->getOperand (1); + TII->get(Hexagon::A2_tfrsi), DestReg).addOperand(Symbol); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LO_jt), DestReg).addOperand(Symbol); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HI_jt), DestReg).addOperand(Symbol); // MBB->erase returns the iterator to the next instruction, which is the // one we want to process next MII = MBB->erase (MI); continue; } - else if (Opc == Hexagon::CONST32_Label) { + else if (Opc == Hexagon::CONST32_Int_Real && + MI->getOperand(1).isBlockAddress()) { int DestReg = MI->getOperand(0).getReg(); MachineOperand &Symbol = MI->getOperand (1); BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LO_PIC), DestReg).addOperand(Symbol); + TII->get(Hexagon::LO), DestReg).addOperand(Symbol); BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HI_PIC), DestReg).addOperand(Symbol); + TII->get(Hexagon::HI), DestReg).addOperand(Symbol); // MBB->erase returns the iterator to the next instruction, which is the // one we want to process next MII = MBB->erase (MI); continue; } - else if (Opc == Hexagon::CONST32_Int_Real) { + + else if (Opc == Hexagon::CONST32_Int_Real || + Opc == Hexagon::CONST32_Float_Real) { int DestReg = MI->getOperand(0).getReg(); - int64_t ImmValue = MI->getOperand(1).getImm (); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LOi), DestReg).addImm(ImmValue); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HIi), DestReg).addImm(ImmValue); + // We have to convert an FP immediate into its corresponding integer + // representation + int64_t ImmValue; + if (Opc == Hexagon::CONST32_Float_Real) { + APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF(); + ImmValue = *Val.bitcastToAPInt().getRawData(); + } + else + ImmValue = MI->getOperand(1).getImm(); + + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::A2_tfrsi), DestReg).addImm(ImmValue); MII = MBB->erase (MI); continue; } - else if (Opc == Hexagon::CONST64_Int_Real) { + else if (Opc == Hexagon::CONST64_Int_Real || + Opc == Hexagon::CONST64_Float_Real) { int DestReg = MI->getOperand(0).getReg(); - int64_t ImmValue = MI->getOperand(1).getImm (); - unsigned DestLo = Fn.getSubtarget().getRegisterInfo()->getSubReg( - DestReg, Hexagon::subreg_loreg); - unsigned DestHi = Fn.getSubtarget().getRegisterInfo()->getSubReg( - DestReg, Hexagon::subreg_hireg); + + // We have to convert an FP immediate into its corresponding integer + // representation + int64_t ImmValue; + if (Opc == Hexagon::CONST64_Float_Real) { + APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF(); + ImmValue = *Val.bitcastToAPInt().getRawData(); + } + else + ImmValue = MI->getOperand(1).getImm(); + + unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::subreg_loreg); + unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::subreg_hireg); int32_t LowWord = (ImmValue & 0xFFFFFFFF); int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF; - // Lower Registers Lower Half - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LOi), DestLo).addImm(LowWord); - // Lower Registers Higher Half + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::A2_tfrsi), DestLo).addImm(LowWord); BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HIi), DestLo).addImm(LowWord); - // Higher Registers Lower Half - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LOi), DestHi).addImm(HighWord); - // Higher Registers Higher Half. - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HIi), DestHi).addImm(HighWord); + TII->get(Hexagon::A2_tfrsi), DestHi).addImm(HighWord); MII = MBB->erase (MI); continue; - } + } ++MII; } } diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp deleted file mode 100644 index 8873bb9..0000000 --- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp +++ /dev/null @@ -1,172 +0,0 @@ -//===-- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -// -//===----------------------------------------------------------------------===// -// This pass tries to provide opportunities for better optimization of muxes. -// The default code generated for something like: flag = (a == b) ? 1 : 3; -// would be: -// -// {p0 = cmp.eq(r0,r1)} -// {r3 = mux(p0,#1,#3)} -// -// This requires two packets. If we use .new predicated immediate transfers, -// then we can do this in a single packet, e.g.: -// -// {p0 = cmp.eq(r0,r1) -// if (p0.new) r3 = #1 -// if (!p0.new) r3 = #3} -// -// Note that the conditional assignments are not generated in .new form here. -// We assume opptimisically that they will be formed later. -// -//===----------------------------------------------------------------------===// - -#include "Hexagon.h" -#include "HexagonMachineFunctionInfo.h" -#include "HexagonSubtarget.h" -#include "HexagonTargetMachine.h" -#include "llvm/CodeGen/LatencyPriorityQueue.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" - -using namespace llvm; - -#define DEBUG_TYPE "xfer" - -namespace llvm { - void initializeHexagonSplitTFRCondSetsPass(PassRegistry&); -} - - -namespace { - -class HexagonSplitTFRCondSets : public MachineFunctionPass { - public: - static char ID; - HexagonSplitTFRCondSets() : MachineFunctionPass(ID) { - initializeHexagonSplitTFRCondSetsPass(*PassRegistry::getPassRegistry()); - } - - const char *getPassName() const override { - return "Hexagon Split TFRCondSets"; - } - bool runOnMachineFunction(MachineFunction &Fn) override; -}; - - -char HexagonSplitTFRCondSets::ID = 0; - - -bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) { - - const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); - - // Loop over all of the basic blocks. - for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); - MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; - // Traverse the basic block. - for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); - ++MII) { - MachineInstr *MI = MII; - switch(MI->getOpcode()) { - case Hexagon::TFR_condset_ri: { - int DestReg = MI->getOperand(0).getReg(); - int SrcReg1 = MI->getOperand(2).getReg(); - - // Do not emit the predicated copy if the source and the destination - // is the same register. - if (DestReg != SrcReg1) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::A2_tfrt), DestReg). - addReg(MI->getOperand(1).getReg()).addReg(SrcReg1); - } - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::C2_cmoveif), DestReg). - addReg(MI->getOperand(1).getReg()). - addImm(MI->getOperand(3).getImm()); - - MII = MBB->erase(MI); - --MII; - break; - } - case Hexagon::TFR_condset_ir: { - int DestReg = MI->getOperand(0).getReg(); - int SrcReg2 = MI->getOperand(3).getReg(); - - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::C2_cmoveit), DestReg). - addReg(MI->getOperand(1).getReg()). - addImm(MI->getOperand(2).getImm()); - - // Do not emit the predicated copy if the source and - // the destination is the same register. - if (DestReg != SrcReg2) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::A2_tfrf), DestReg). - addReg(MI->getOperand(1).getReg()).addReg(SrcReg2); - } - MII = MBB->erase(MI); - --MII; - break; - } - case Hexagon::TFR_condset_ii: { - int DestReg = MI->getOperand(0).getReg(); - int SrcReg1 = MI->getOperand(1).getReg(); - - int Immed1 = MI->getOperand(2).getImm(); - int Immed2 = MI->getOperand(3).getImm(); - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::C2_cmoveit), - DestReg).addReg(SrcReg1).addImm(Immed1); - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::C2_cmoveif), - DestReg).addReg(SrcReg1).addImm(Immed2); - MII = MBB->erase(MI); - --MII; - break; - } - } - } - } - return true; -} - -} - -//===----------------------------------------------------------------------===// -// Public Constructor Functions -//===----------------------------------------------------------------------===// - -static void initializePassOnce(PassRegistry &Registry) { - const char *Name = "Hexagon Split TFRCondSets"; - PassInfo *PI = new PassInfo(Name, "hexagon-split-tfr", - &HexagonSplitTFRCondSets::ID, nullptr, false, - false); - Registry.registerPass(*PI, true); -} - -void llvm::initializeHexagonSplitTFRCondSetsPass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializePassOnce) -} - -FunctionPass *llvm::createHexagonSplitTFRCondSets() { - return new HexagonSplitTFRCondSets(); -} diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 380f023..1717ae3 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -48,6 +48,10 @@ EnableIEEERndNear( cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Generate non-chopped conversion from fp to int.")); +static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon MI Scheduling")); + HexagonSubtarget & HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { // If the programmer has not specified a Hexagon version, default to -mv4. @@ -91,3 +95,9 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS, // Pin the vtable to this file. void HexagonSubtarget::anchor() {} + +bool HexagonSubtarget::enableMachineScheduler() const { + if (DisableHexagonMISched.getNumOccurrences()) + return !DisableHexagonMISched; + return true; +} diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index 57de546..780567b 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -85,6 +85,11 @@ public: bool hasV5TOps() const { return getHexagonArchVersion() >= V5; } bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; } bool modeIEEERndNear() const { return ModeIEEERndNear; } + bool enableMachineScheduler() const override; + // Always use the TargetLowering default scheduler. + // FIXME: This will use the vliw scheduler which is probably just hurting + // compiler time and will be removed eventually anyway. + bool enableMachineSchedDefaultSched() const override { return false; } const std::string &getCPUString () const { return CPUString; } diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 64f75a3..48b0bc8 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -29,10 +29,6 @@ using namespace llvm; static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops", cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); -static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Hexagon MI Scheduling")); - static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon CFG Optimization")); @@ -69,9 +65,10 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + : LLVMTargetMachine(T, "e-m:e-p:32:32-i1:32-i64:64-a:0-n32", TT, CPU, FS, + Options, RM, CM, OL), TLOF(make_unique<HexagonTargetObjectFile>()), - DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32"), Subtarget(TT, CPU, FS, *this) { + Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } @@ -82,16 +79,7 @@ namespace { class HexagonPassConfig : public TargetPassConfig { public: HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) { - // FIXME: Rather than calling enablePass(&MachineSchedulerID) below, define - // HexagonSubtarget::enableMachineScheduler() { return true; }. - // That will bypass the SelectionDAG VLIW scheduler, which is probably just - // hurting compile time and will be removed eventually anyway. - if (DisableHexagonMISched) - disablePass(&MachineSchedulerID); - else - enablePass(&MachineSchedulerID); - } + : TargetPassConfig(TM, PM) {} HexagonTargetMachine &getHexagonTargetMachine() const { return getTM<HexagonTargetMachine>(); @@ -159,9 +147,6 @@ void HexagonPassConfig::addPreEmitPass() { // Expand Spill code for predicate registers. addPass(createHexagonExpandPredSpillCode(), false); - // Split up TFRcondsets into conditional transfers. - addPass(createHexagonSplitTFRCondSets(), false); - // Create Packets. if (!NoOpt) { if (!DisableHardwareLoops) diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index e0b3a9b..5774f7e 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -24,7 +24,6 @@ class Module; class HexagonTargetMachine : public LLVMTargetMachine { std::unique_ptr<TargetLoweringObjectFile> TLOF; - const DataLayout DL; // Calculates type size & alignment. HexagonSubtarget Subtarget; public: @@ -33,8 +32,7 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); ~HexagonTargetMachine() override; - const DataLayout *getDataLayout() const override { return &DL; } - const HexagonSubtarget *getSubtargetImpl() const override { + const HexagonSubtarget *getSubtargetImpl(const Function &) const override { return &Subtarget; } static unsigned getModuleMatchQuality(const Module &M); diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index c123640..4ca628e 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -389,7 +389,9 @@ static bool IsLoopN(MachineInstr *MI) { /// callee-saved register. static bool DoesModifyCalleeSavedReg(MachineInstr *MI, const TargetRegisterInfo *TRI) { - for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) { + for (const MCPhysReg *CSR = + TRI->getCalleeSavedRegs(MI->getParent()->getParent()); + *CSR; ++CSR) { unsigned CalleeSavedReg = *CSR; if (MI->modifiesRegister(CalleeSavedReg, TRI)) return true; @@ -401,10 +403,7 @@ static bool DoesModifyCalleeSavedReg(MachineInstr *MI, // or new-value store. bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) { const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - if ( isCondInst(MI) || QII->mayBeNewStore(MI)) - return true; - else - return false; + return isCondInst(MI) || QII->mayBeNewStore(MI); } bool HexagonPacketizerList::isCondInst (MachineInstr* MI) { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index 56c9dc7..4a3ac8c 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -11,6 +11,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "hexagon-elf-writer" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index a5a09ba..eac7d6d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -49,9 +49,8 @@ void emitLittleEndian(uint64_t Binary, raw_ostream &OS) { } HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII, - MCSubtargetInfo const &aMST, MCContext &aMCT) - : MST(aMST), MCT(aMCT), MCII (aMII) {} + : MCT(aMCT), MCII(aMII) {} void HexagonMCCodeEmitter::EncodeInstruction(MCInst const &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, @@ -75,15 +74,10 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO, llvm_unreachable("Only Immediates and Registers implemented right now"); } -MCSubtargetInfo const &HexagonMCCodeEmitter::getSubtargetInfo() const { - return MST; -} - MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII, MCRegisterInfo const &MRI, - MCSubtargetInfo const &MST, MCContext &MCT) { - return new HexagonMCCodeEmitter(MII, MST, MCT); + return new HexagonMCCodeEmitter(MII, MCT); } #include "HexagonGenMCCodeEmitter.inc" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h index db1d707..768c10e 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h @@ -26,13 +26,11 @@ namespace llvm { class HexagonMCCodeEmitter : public MCCodeEmitter { - MCSubtargetInfo const &MST; MCContext &MCT; MCInstrInfo const &MCII; public: - HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCSubtargetInfo const &aMST, - MCContext &aMCT); + HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT); MCSubtargetInfo const &getSubtargetInfo() const; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 09a305b..c63bf32 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -47,15 +47,6 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) { return X; } -static MCStreamer * -createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *CE, - bool RelaxAll) { - MCELFStreamer *ES = new MCELFStreamer(Context, MAB, OS, CE); - return ES; -} - - static MCSubtargetInfo * createHexagonMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { MCSubtargetInfo *X = new MCSubtargetInfo(); @@ -75,16 +66,6 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCStreamer *createMCStreamer(Target const &T, StringRef TT, - MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - MCSubtargetInfo const &STI, bool RelaxAll) { - MCStreamer *ES = createHexagonELFStreamer(Context, MAB, OS, Emitter, RelaxAll); - new MCTargetStreamer(*ES); - return ES; -} - - static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { @@ -135,7 +116,4 @@ extern "C" void LLVMInitializeHexagonTargetMC() { // Register the asm backend TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget, createHexagonAsmBackend); - - // Register the obj streamer - TargetRegistry::RegisterMCObjectStreamer(TheHexagonTarget, createMCStreamer); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index f074b65..17072d9 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -34,7 +34,6 @@ MCInstrInfo *createHexagonMCInstrInfo(); MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII, MCRegisterInfo const &MRI, - MCSubtargetInfo const &MST, MCContext &MCT); MCAsmBackend *createHexagonAsmBackend(Target const &T, diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h index 586f5d9..241f1d6 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h @@ -14,6 +14,8 @@ #ifndef LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCTARGETDESC_H #define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCTARGETDESC_H +#include "llvm/Support/DataTypes.h" + namespace llvm { class Target; diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 2f70cde..591ceb5 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -104,7 +104,7 @@ namespace { bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM); bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM); - bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) override; // Include the pieces autogenerated from the target description. @@ -280,12 +280,12 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N, } bool MSP430DAGToDAGISel:: -SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, +SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { SDValue Op0, Op1; - switch (ConstraintCode) { + switch (ConstraintID) { default: return true; - case 'm': // memory + case InlineAsm::Constraint_m: // memory if (!SelectAddr(Op, Op0, Op1)) return true; break; diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 9266c3b..68868b6 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -102,6 +102,12 @@ namespace llvm { const std::string &Constraint, MVT VT) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + // FIXME: Map different constraints differently. + return InlineAsm::Constraint_m; + } + /// isTruncateFree - Return true if it's free to truncate a value of type /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in /// register R15W to i8 by referencing its sub-register R15B. diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index 3f88a69..0cfa4a4 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -26,8 +26,7 @@ public: MSP430RegisterInfo(); /// Code Generation virtual methods... - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; const TargetRegisterClass* diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp index 7468519..3dda3bf 100644 --- a/lib/Target/MSP430/MSP430Subtarget.cpp +++ b/lib/Target/MSP430/MSP430Subtarget.cpp @@ -25,7 +25,8 @@ using namespace llvm; void MSP430Subtarget::anchor() { } -MSP430Subtarget &MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { +MSP430Subtarget & +MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { ParseSubtargetFeatures("generic", FS); return *this; } diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 348e672..d6cc4ae 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -30,10 +30,11 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, StringRef TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + : LLVMTargetMachine(T, "e-m:e-p:16:16-i32:16:32-a:16-n8:16", TT, CPU, FS, + Options, RM, CM, OL), TLOF(make_unique<TargetLoweringObjectFileELF>()), // FIXME: Check DataLayout string. - DL("e-m:e-p:16:16-i32:16:32-a:16-n8:16"), Subtarget(TT, CPU, FS, *this) { + Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index c6a6a70..6ccd30d 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -25,7 +25,6 @@ namespace llvm { /// class MSP430TargetMachine : public LLVMTargetMachine { std::unique_ptr<TargetLoweringObjectFile> TLOF; - const DataLayout DL; // Calculates type size & alignment MSP430Subtarget Subtarget; public: @@ -35,8 +34,7 @@ public: CodeGenOpt::Level OL); ~MSP430TargetMachine() override; - const DataLayout *getDataLayout() const override { return &DL; } - const MSP430Subtarget *getSubtargetImpl() const override { + const MSP430Subtarget *getSubtargetImpl(const Function &F) const override { return &Subtarget; } TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 1040bf7..6401bc1 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include <memory> using namespace llvm; @@ -53,7 +54,13 @@ public: } unsigned getATRegNum() const { return ATReg; } - bool setATReg(unsigned Reg); + bool setATReg(unsigned Reg) { + if (Reg > 31) + return false; + + ATReg = Reg; + return true; + } bool isReorder() const { return Reorder; } void setReorder() { Reorder = true; } @@ -193,6 +200,9 @@ class MipsAsmParser : public MCTargetAsmParser { bool expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions); + void createNop(bool hasShortDelaySlot, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions); + bool reportParseError(Twine ErrorMsg); bool reportParseError(SMLoc Loc, Twine ErrorMsg); @@ -236,6 +246,8 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI, StringRef Directive); + bool parseInternalDirectiveReallowModule(); + MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol); bool eatComma(StringRef ErrorStr); @@ -1365,22 +1377,11 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, } } + // If this instruction has a delay slot and .set reorder is active, + // emit a NOP after it. if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder()) { - // If this instruction has a delay slot and .set reorder is active, - // emit a NOP after it. Instructions.push_back(Inst); - MCInst NopInst; - if (hasShortDelaySlot(Inst.getOpcode())) { - NopInst.setOpcode(Mips::MOVE16_MM); - NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); - NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); - } else { - NopInst.setOpcode(Mips::SLL); - NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); - NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); - NopInst.addOperand(MCOperand::CreateImm(0)); - } - Instructions.push_back(NopInst); + createNop(hasShortDelaySlot(Inst.getOpcode()), IDLoc, Instructions); return false; } @@ -1584,10 +1585,10 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, bool MipsAsmParser::needsExpansion(MCInst &Inst) { switch (Inst.getOpcode()) { - case Mips::LoadImm32Reg: - case Mips::LoadAddr32Imm: - case Mips::LoadAddr32Reg: - case Mips::LoadImm64Reg: + case Mips::LoadImm32: + case Mips::LoadImm64: + case Mips::LoadAddrImm32: + case Mips::LoadAddrReg32: case Mips::B_MM_Pseudo: case Mips::LWM_MM: case Mips::SWM_MM: @@ -1603,17 +1604,17 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) { switch (Inst.getOpcode()) { default: llvm_unreachable("unimplemented expansion"); - case Mips::LoadImm32Reg: + case Mips::LoadImm32: return expandLoadImm(Inst, IDLoc, Instructions); - case Mips::LoadImm64Reg: + case Mips::LoadImm64: if (!isGP64bit()) { Error(IDLoc, "instruction requires a 64-bit architecture"); return true; } return expandLoadImm(Inst, IDLoc, Instructions); - case Mips::LoadAddr32Imm: + case Mips::LoadAddrImm32: return expandLoadAddressImm(Inst, IDLoc, Instructions); - case Mips::LoadAddr32Reg: + case Mips::LoadAddrReg32: return expandLoadAddressReg(Inst, IDLoc, Instructions); case Mips::B_MM_Pseudo: return expandUncondBranchMMPseudo(Inst, IDLoc, Instructions); @@ -1982,14 +1983,10 @@ bool MipsAsmParser::expandUncondBranchMMPseudo( } Instructions.push_back(Inst); - if (AssemblerOptions.back()->isReorder()) { - // If .set reorder is active, emit a NOP after the branch instruction. - MCInst NopInst; - NopInst.setOpcode(Mips::MOVE16_MM); - NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); - NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); - Instructions.push_back(NopInst); - } + // If .set reorder is active, emit a NOP after the branch instruction. + if (AssemblerOptions.back()->isReorder()) + createNop(true, IDLoc, Instructions); + return false; } @@ -2132,6 +2129,22 @@ MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc, return false; } +void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions) { + MCInst NopInst; + if (hasShortDelaySlot) { + NopInst.setOpcode(Mips::MOVE16_MM); + NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); + NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); + } else { + NopInst.setOpcode(Mips::SLL); + NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); + NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); + NopInst.addOperand(MCOperand::CreateImm(0)); + } + Instructions.push_back(NopInst); +} + unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { // As described by the Mips32r2 spec, the registers Rd and Rs for // jalr.hb must be different. @@ -2370,14 +2383,6 @@ int MipsAsmParser::matchMSA128CtrlRegisterName(StringRef Name) { return CC; } -bool MipsAssemblerOptions::setATReg(unsigned Reg) { - if (Reg > 31) - return false; - - ATReg = Reg; - return true; -} - int MipsAsmParser::getATReg(SMLoc Loc) { int AT = AssemblerOptions.back()->getATRegNum(); if (AT == 0) @@ -4429,9 +4434,25 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".module") return parseDirectiveModule(); + if (IDVal == ".llvm_internal_mips_reallow_module_directive") + return parseInternalDirectiveReallowModule(); + return true; } +bool MipsAsmParser::parseInternalDirectiveReallowModule() { + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + getTargetStreamer().reallowModuleDirective(); + + getParser().Lex(); // Eat EndOfStatement token. + return false; +} + extern "C" void LLVMInitializeMipsAsmParser() { RegisterMCAsmParser<MipsAsmParser> X(TheMipsTarget); RegisterMCAsmParser<MipsAsmParser> Y(TheMipselTarget); diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index dd0e54c..243b73d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -32,10 +32,9 @@ class MipsAsmBackend : public MCAsmBackend { bool Is64Bit; // 32 or 64 bit words public: - MipsAsmBackend(const Target &T, Triple::OSType _OSType, bool _isLittle, - bool _is64Bit) - : MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle), - Is64Bit(_is64Bit) {} + MipsAsmBackend(const Target &T, Triple::OSType OSType, bool IsLittle, + bool Is64Bit) + : MCAsmBackend(), OSType(OSType), IsLittle(IsLittle), Is64Bit(Is64Bit) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const override; diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index e14dc8d..a68bf16 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -38,9 +38,9 @@ namespace { MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64, bool IsLittleEndian) - : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS, - /*HasRelocationAddend*/ (_isN64) ? true : false, - /*IsN64*/ _isN64) {} + : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS, + /*HasRelocationAddend*/ _isN64, + /*IsN64*/ _isN64) {} MipsELFObjectWriter::~MipsELFObjectWriter() {} @@ -54,9 +54,11 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, switch (Kind) { default: llvm_unreachable("invalid fixup kind!"); + case Mips::fixup_Mips_32: case FK_Data_4: Type = ELF::R_MIPS_32; break; + case Mips::fixup_Mips_64: case FK_Data_8: Type = ELF::R_MIPS_64; break; @@ -262,12 +264,10 @@ MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, } } -MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, - uint8_t OSABI, +MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI, bool IsLittleEndian, bool Is64Bit) { - MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI, - (Is64Bit) ? true : false, - IsLittleEndian); + MCELFObjectTargetWriter *MOTW = + new MipsELFObjectWriter(Is64Bit, OSABI, Is64Bit, IsLittleEndian); return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index 18c4a20..93f60df 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -69,11 +69,9 @@ void MipsELFStreamer::EmitMipsOptionRecords() { I->EmitMipsOptionRecord(); } -namespace llvm { -MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, - bool RelaxAll) { - return new MipsELFStreamer(Context, MAB, OS, Emitter, STI); -} +MCELFStreamer *llvm::createMipsELFStreamer(MCContext &Context, + MCAsmBackend &MAB, raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll) { + return new MipsELFStreamer(Context, MAB, OS, Emitter); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h index bc76d8a..6b834c6 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -34,7 +34,7 @@ class MipsELFStreamer : public MCELFStreamer { public: MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS, - MCCodeEmitter *Emitter, const MCSubtargetInfo &STI) + MCCodeEmitter *Emitter) : MCELFStreamer(Context, MAB, OS, Emitter) { RegInfoRecord = new MipsRegInfoRecord(this, Context); @@ -69,6 +69,6 @@ public: MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll); + bool RelaxAll); } // namespace llvm. #endif diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index fa8d6a6..e601963 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -18,7 +18,7 @@ namespace Mips { // one can have multiple fixup types for a given relocation and thus need // to be uniquely named. // - // This table *must* be in the save order of + // This table *must* be in the same order of // MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] // in MipsAsmBackend.cpp. // diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 8208725..1c2f2da 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -35,14 +35,12 @@ namespace llvm { MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx) { return new MipsMCCodeEmitter(MCII, Ctx, false); } MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx) { return new MipsMCCodeEmitter(MCII, Ctx, true); } @@ -451,7 +449,7 @@ getSImm9AddiuspValue(const MCInst &MI, unsigned OpNo, } unsigned MipsMCCodeEmitter:: -getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups, +getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { int64_t Res; @@ -500,6 +498,9 @@ getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups, switch(cast<MCSymbolRefExpr>(Expr)->getKind()) { default: llvm_unreachable("Unknown fixup kind!"); break; + case MCSymbolRefExpr::VK_None: + FixupKind = Mips::fixup_Mips_32; // FIXME: This is ok for O32/N32 but not N64. + break; case MCSymbolRefExpr::VK_Mips_GPOFF_HI : FixupKind = Mips::fixup_Mips_GPOFF_HI; break; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h index e756b47..e6b5be7 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h @@ -25,7 +25,6 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg); MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 9b56067..6f3f37b 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -106,96 +106,73 @@ static MCInstPrinter *createMipsMCInstPrinter(const Target &T, return new MipsInstPrinter(MAI, MII, MRI); } -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll) { +static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, + MCAsmBackend &MAB, raw_ostream &OS, + MCCodeEmitter *Emitter, bool RelaxAll) { MCStreamer *S; - if (!Triple(TT).isOSNaCl()) - S = createMipsELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll); + if (!T.isOSNaCl()) + S = createMipsELFStreamer(Context, MAB, OS, Emitter, RelaxAll); else - S = createMipsNaClELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll); - new MipsTargetELFStreamer(*S, STI); + S = createMipsNaClELFStreamer(Context, MAB, OS, Emitter, RelaxAll); return S; } -static MCStreamer * -createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst) { - MCStreamer *S = llvm::createAsmStreamer( - Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); - new MipsTargetAsmStreamer(*S, OS); - return S; +static MCTargetStreamer *createMipsAsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new MipsTargetAsmStreamer(S, OS); } static MCTargetStreamer *createMipsNullTargetStreamer(MCStreamer &S) { return new MipsTargetStreamer(S); } +static MCTargetStreamer * +createMipsObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + return new MipsTargetELFStreamer(S, STI); +} + extern "C" void LLVMInitializeMipsTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn X(TheMipsTarget, createMipsMCAsmInfo); - RegisterMCAsmInfoFn Y(TheMipselTarget, createMipsMCAsmInfo); - RegisterMCAsmInfoFn A(TheMips64Target, createMipsMCAsmInfo); - RegisterMCAsmInfoFn B(TheMips64elTarget, createMipsMCAsmInfo); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheMipsTarget, - createMipsMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheMipselTarget, - createMipsMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheMips64Target, - createMipsMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheMips64elTarget, - createMipsMCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheMipselTarget, createMipsMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheMips64Target, createMipsMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget, - createMipsMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheMipsTarget, createMipsMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheMipselTarget, createMipsMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheMips64Target, createMipsMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheMips64elTarget, - createMipsMCRegisterInfo); + for (Target *T : {&TheMipsTarget, &TheMipselTarget, &TheMips64Target, + &TheMips64elTarget}) { + // Register the MC asm info. + RegisterMCAsmInfoFn X(*T, createMipsMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(*T, createMipsMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createMipsMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createMipsMCRegisterInfo); + + // Register the elf streamer. + TargetRegistry::RegisterELFStreamer(*T, createMCStreamer); + + // Register the asm target streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, createMipsAsmTargetStreamer); + + TargetRegistry::RegisterNullTargetStreamer(*T, + createMipsNullTargetStreamer); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createMipsMCSubtargetInfo); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createMipsMCInstPrinter); + + TargetRegistry::RegisterObjectTargetStreamer( + *T, createMipsObjectTargetStreamer); + } // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget, - createMipsMCCodeEmitterEB); - TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget, - createMipsMCCodeEmitterEL); - TargetRegistry::RegisterMCCodeEmitter(TheMips64Target, - createMipsMCCodeEmitterEB); - TargetRegistry::RegisterMCCodeEmitter(TheMips64elTarget, - createMipsMCCodeEmitterEL); - - // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(TheMipsTarget, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheMipselTarget, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheMips64Target, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget, - createMCStreamer); - - // Register the asm streamer. - TargetRegistry::RegisterAsmStreamer(TheMipsTarget, createMCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheMipselTarget, createMCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheMips64Target, createMCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheMips64elTarget, createMCAsmStreamer); - - TargetRegistry::RegisterNullTargetStreamer(TheMipsTarget, - createMipsNullTargetStreamer); - TargetRegistry::RegisterNullTargetStreamer(TheMipselTarget, - createMipsNullTargetStreamer); - TargetRegistry::RegisterNullTargetStreamer(TheMips64Target, - createMipsNullTargetStreamer); - TargetRegistry::RegisterNullTargetStreamer(TheMips64elTarget, - createMipsNullTargetStreamer); + for (Target *T : {&TheMipsTarget, &TheMips64Target}) + TargetRegistry::RegisterMCCodeEmitter(*T, createMipsMCCodeEmitterEB); + + for (Target *T : {&TheMipselTarget, &TheMips64elTarget}) + TargetRegistry::RegisterMCCodeEmitter(*T, createMipsMCCodeEmitterEL); // Register the asm backend. TargetRegistry::RegisterMCAsmBackend(TheMipsTarget, @@ -207,23 +184,4 @@ extern "C" void LLVMInitializeMipsTargetMC() { TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget, createMipsAsmBackendEL64); - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget, - createMipsMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheMipselTarget, - createMipsMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheMips64Target, - createMipsMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheMips64elTarget, - createMipsMCSubtargetInfo); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheMipsTarget, - createMipsMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheMipselTarget, - createMipsMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheMips64Target, - createMipsMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheMips64elTarget, - createMipsMCInstPrinter); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index 9528b4e..92f394a 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -35,11 +35,9 @@ extern Target TheMips64elTarget; MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx); MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx); MCAsmBackend *createMipsAsmBackendEB32(const Target &T, diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp index 92b8455..1adfdf9 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp @@ -37,8 +37,8 @@ const unsigned LoadStoreStackMaskReg = Mips::T7; class MipsNaClELFStreamer : public MipsELFStreamer { public: MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, - MCCodeEmitter *Emitter, const MCSubtargetInfo &STI) - : MipsELFStreamer(Context, TAB, OS, Emitter, STI), PendingCall(false) {} + MCCodeEmitter *Emitter) + : MipsELFStreamer(Context, TAB, OS, Emitter), PendingCall(false) {} ~MipsNaClELFStreamer() {} @@ -254,10 +254,8 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg) { MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll) { - MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter, - STI); + MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter); if (RelaxAll) S->getAssembler().setRelaxAll(true); diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 64d7cab..5790a5c 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -62,7 +62,7 @@ void MipsTargetStreamer::emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) { void MipsTargetStreamer::emitDirectiveSetArch(StringRef Arch) { forbidModuleDirective(); } -void MipsTargetStreamer::emitDirectiveSetMips0() {} +void MipsTargetStreamer::emitDirectiveSetMips0() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips1() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips2() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips3() { forbidModuleDirective(); } @@ -78,8 +78,8 @@ void MipsTargetStreamer::emitDirectiveSetMips64R2() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips64R3() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips64R5() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips64R6() { forbidModuleDirective(); } -void MipsTargetStreamer::emitDirectiveSetPop() {} -void MipsTargetStreamer::emitDirectiveSetPush() {} +void MipsTargetStreamer::emitDirectiveSetPop() { forbidModuleDirective(); } +void MipsTargetStreamer::emitDirectiveSetPush() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {} @@ -91,6 +91,10 @@ void MipsTargetStreamer::emitDirectiveModuleOddSPReg(bool Enabled, if (!Enabled && !IsO32ABI) report_fatal_error("+nooddspreg is only valid for O32"); } +void MipsTargetStreamer::emitDirectiveSetFp( + MipsABIFlagsSection::FpABIKind Value) { + forbidModuleDirective(); +} MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) @@ -198,7 +202,10 @@ void MipsTargetAsmStreamer::emitDirectiveSetArch(StringRef Arch) { MipsTargetStreamer::emitDirectiveSetArch(Arch); } -void MipsTargetAsmStreamer::emitDirectiveSetMips0() { OS << "\t.set\tmips0\n"; } +void MipsTargetAsmStreamer::emitDirectiveSetMips0() { + OS << "\t.set\tmips0\n"; + MipsTargetStreamer::emitDirectiveSetMips0(); +} void MipsTargetAsmStreamer::emitDirectiveSetMips1() { OS << "\t.set\tmips1\n"; @@ -285,9 +292,15 @@ void MipsTargetAsmStreamer::emitDirectiveSetNoDsp() { MipsTargetStreamer::emitDirectiveSetNoDsp(); } -void MipsTargetAsmStreamer::emitDirectiveSetPop() { OS << "\t.set\tpop\n"; } +void MipsTargetAsmStreamer::emitDirectiveSetPop() { + OS << "\t.set\tpop\n"; + MipsTargetStreamer::emitDirectiveSetPop(); +} -void MipsTargetAsmStreamer::emitDirectiveSetPush() { OS << "\t.set\tpush\n"; } +void MipsTargetAsmStreamer::emitDirectiveSetPush() { + OS << "\t.set\tpush\n"; + MipsTargetStreamer::emitDirectiveSetPush(); +} // Print a 32 bit hex number with all numbers. static void printHex32(unsigned Value, raw_ostream &OS) { @@ -346,15 +359,13 @@ void MipsTargetAsmStreamer::emitDirectiveModuleFP( void MipsTargetAsmStreamer::emitDirectiveSetFp( MipsABIFlagsSection::FpABIKind Value) { + MipsTargetStreamer::emitDirectiveSetFp(Value); + StringRef ModuleValue; OS << "\t.set\tfp="; OS << ABIFlagsSection.getFpABIString(Value) << "\n"; } -void MipsTargetAsmStreamer::emitMipsAbiFlags() { - // No action required for text output. -} - void MipsTargetAsmStreamer::emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) { MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI); @@ -367,10 +378,7 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI) : MipsTargetStreamer(S), MicroMipsEnabled(false), STI(STI) { MCAssembler &MCA = getStreamer().getAssembler(); - Triple T(STI.getTargetTriple()); - Pic = (MCA.getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_) - ? true - : false; + Pic = MCA.getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_; uint64_t Features = STI.getFeatureBits(); diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index e20df2f..2aab739 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -642,8 +642,10 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { LW_FM_MM<0xc>; /// Arithmetic Instructions (3-Operand, R-Type) - def ADDu_MM : MMRel, ArithLogicR<"addu", GPR32Opnd>, ADD_FM_MM<0, 0x150>; - def SUBu_MM : MMRel, ArithLogicR<"subu", GPR32Opnd>, ADD_FM_MM<0, 0x1d0>; + def ADDu_MM : MMRel, ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU, add>, + ADD_FM_MM<0, 0x150>; + def SUBu_MM : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>, + ADD_FM_MM<0, 0x1d0>; def MUL_MM : MMRel, ArithLogicR<"mul", GPR32Opnd>, ADD_FM_MM<0, 0x210>; def ADD_MM : MMRel, ArithLogicR<"add", GPR32Opnd>, ADD_FM_MM<0, 0x110>; def SUB_MM : MMRel, ArithLogicR<"sub", GPR32Opnd>, ADD_FM_MM<0, 0x190>; @@ -883,6 +885,8 @@ def : MipsPat<(i32 immSExt16:$imm), (ADDiu_MM ZERO, immSExt16:$imm)>; def : MipsPat<(i32 immZExt16:$imm), (ORi_MM ZERO, immZExt16:$imm)>; +def : MipsPat<(not GPR32:$in), + (NOR_MM GPR32Opnd:$in, ZERO)>; def : MipsPat<(add GPRMM16:$src, immSExtAddiur2:$imm), (ADDIUR2_MM GPRMM16:$src, immSExtAddiur2:$imm)>; diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index cb09c1a..671d7a8 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -20,8 +20,13 @@ namespace llvm { class MipsTargetMachine; + class ModulePass; class FunctionPass; + ModulePass *createMipsOs16Pass(MipsTargetMachine &TM); + ModulePass *createMips16HardFloatPass(MipsTargetMachine &TM); + + FunctionPass *createMipsModuleISelDagPass(MipsTargetMachine &TM); FunctionPass *createMipsOptimizePICCallPass(MipsTargetMachine &TM); FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 01c548e..ca24741 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -58,22 +58,22 @@ def MipsInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// def FeatureNoABICalls : SubtargetFeature<"noabicalls", "NoABICalls", "true", - "Disable SVR4-style position-independent code.">; + "Disable SVR4-style position-independent code">; def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true", - "General Purpose Registers are 64-bit wide.">; + "General Purpose Registers are 64-bit wide">; def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true", - "Support 64-bit FP registers.">; + "Support 64-bit FP registers">; def FeatureFPXX : SubtargetFeature<"fpxx", "IsFPXX", "true", - "Support for FPXX.">; + "Support for FPXX">; def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true", - "IEEE 754-2008 NaN encoding.">; + "IEEE 754-2008 NaN encoding">; def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat", "true", "Only supports single precision float">; def FeatureNoOddSPReg : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false", "Disable odd numbered single-precision " "registers">; def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU", - "true", "Enable vector FPU instructions.">; + "true", "Enable vector FPU instructions">; def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1", "Mips I ISA Support [highly experimental]">; def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2", diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index 32dc90a..893fc7c 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "Mips16HardFloat.h" +#include "MipsTargetMachine.h" #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "llvm/Support/Debug.h" @@ -19,38 +19,51 @@ #include <algorithm> #include <string> -#define DEBUG_TYPE "mips16-hard-float" +using namespace llvm; -static void inlineAsmOut - (LLVMContext &C, StringRef AsmString, BasicBlock *BB ) { - std::vector<llvm::Type *> AsmArgTypes; - std::vector<llvm::Value*> AsmArgs; - llvm::FunctionType *AsmFTy = - llvm::FunctionType::get(Type::getVoidTy(C), - AsmArgTypes, false); - llvm::InlineAsm *IA = - llvm::InlineAsm::get(AsmFTy, AsmString, "", true, - /* IsAlignStack */ false, - llvm::InlineAsm::AD_ATT); - CallInst::Create(IA, AsmArgs, "", BB); -} +#define DEBUG_TYPE "mips16-hard-float" namespace { + class Mips16HardFloat : public ModulePass { + public: + static char ID; -class InlineAsmHelper { - LLVMContext &C; - BasicBlock *BB; -public: - InlineAsmHelper(LLVMContext &C_, BasicBlock *BB_) : - C(C_), BB(BB_) { - } + Mips16HardFloat(MipsTargetMachine &TM_) : ModulePass(ID), TM(TM_) {} - void Out(StringRef AsmString) { - inlineAsmOut(C, AsmString, BB); - } + const char *getPassName() const override { + return "MIPS16 Hard Float Pass"; + } -}; + bool runOnModule(Module &M) override; + + protected: + const MipsTargetMachine &TM; + }; + + class InlineAsmHelper { + LLVMContext &C; + BasicBlock *BB; + public: + InlineAsmHelper(LLVMContext &C_, BasicBlock *BB_) : + C(C_), BB(BB_) { + } + + void Out(StringRef AsmString) { + std::vector<llvm::Type *> AsmArgTypes; + std::vector<llvm::Value*> AsmArgs; + + llvm::FunctionType *AsmFTy = llvm::FunctionType::get(Type::getVoidTy(C), + AsmArgTypes, false); + llvm::InlineAsm *IA = llvm::InlineAsm::get(AsmFTy, AsmString, "", true, + /* IsAlignStack */ false, + llvm::InlineAsm::AD_ATT); + CallInst::Create(IA, AsmArgs, "", BB); + } + }; + + char Mips16HardFloat::ID = 0; } + // // Return types that matter for hard float are: // float, double, complex float, and complex double @@ -154,11 +167,11 @@ static bool needsFPStubFromParams(Function &F) { if (F.arg_size() >=1) { Type *ArgType = F.getFunctionType()->getParamType(0); switch (ArgType->getTypeID()) { - case Type::FloatTyID: - case Type::DoubleTyID: - return true; - default: - break; + case Type::FloatTyID: + case Type::DoubleTyID: + return true; + default: + break; } } return false; @@ -182,10 +195,8 @@ static bool needsFPHelperFromSig(Function &F) { // We swap between FP and Integer registers to allow Mips16 and Mips32 to // interoperate // - -static void swapFPIntParams - (FPParamVariant PV, Module *M, InlineAsmHelper &IAH, - bool LE, bool ToFP) { +static void swapFPIntParams(FPParamVariant PV, Module *M, InlineAsmHelper &IAH, + bool LE, bool ToFP) { //LLVMContext &Context = M->getContext(); std::string MI = ToFP? "mtc1 ": "mfc1 "; switch (PV) { @@ -242,6 +253,7 @@ static void swapFPIntParams return; } } + // // Make sure that we know we already need a stub for this function. // Having called needsFPHelperFromSig @@ -297,8 +309,8 @@ static void assureFPCallStub(Function &F, Module *M, break; case CFRet: if (LE) { - IAH.Out("mfc1 $$2,$$f0"); - IAH.Out("mfc1 $$3,$$f2"); + IAH.Out("mfc1 $$2,$$f0"); + IAH.Out("mfc1 $$3,$$f2"); } else { IAH.Out("mfc1 $$3,$$f0"); IAH.Out("mfc1 $$3,$$f2"); @@ -331,28 +343,27 @@ static void assureFPCallStub(Function &F, Module *M, // // Functions that are llvm intrinsics and don't need helpers. // -static const char *IntrinsicInline[] = - {"fabs", - "fabsf", - "llvm.ceil.f32", "llvm.ceil.f64", - "llvm.copysign.f32", "llvm.copysign.f64", - "llvm.cos.f32", "llvm.cos.f64", - "llvm.exp.f32", "llvm.exp.f64", - "llvm.exp2.f32", "llvm.exp2.f64", - "llvm.fabs.f32", "llvm.fabs.f64", - "llvm.floor.f32", "llvm.floor.f64", - "llvm.fma.f32", "llvm.fma.f64", - "llvm.log.f32", "llvm.log.f64", - "llvm.log10.f32", "llvm.log10.f64", - "llvm.nearbyint.f32", "llvm.nearbyint.f64", - "llvm.pow.f32", "llvm.pow.f64", - "llvm.powi.f32", "llvm.powi.f64", - "llvm.rint.f32", "llvm.rint.f64", - "llvm.round.f32", "llvm.round.f64", - "llvm.sin.f32", "llvm.sin.f64", - "llvm.sqrt.f32", "llvm.sqrt.f64", - "llvm.trunc.f32", "llvm.trunc.f64", - }; +static const char *IntrinsicInline[] = { + "fabs", "fabsf", + "llvm.ceil.f32", "llvm.ceil.f64", + "llvm.copysign.f32", "llvm.copysign.f64", + "llvm.cos.f32", "llvm.cos.f64", + "llvm.exp.f32", "llvm.exp.f64", + "llvm.exp2.f32", "llvm.exp2.f64", + "llvm.fabs.f32", "llvm.fabs.f64", + "llvm.floor.f32", "llvm.floor.f64", + "llvm.fma.f32", "llvm.fma.f64", + "llvm.log.f32", "llvm.log.f64", + "llvm.log10.f32", "llvm.log10.f64", + "llvm.nearbyint.f32", "llvm.nearbyint.f64", + "llvm.pow.f32", "llvm.pow.f64", + "llvm.powi.f32", "llvm.powi.f64", + "llvm.rint.f32", "llvm.rint.f64", + "llvm.round.f32", "llvm.round.f64", + "llvm.sin.f32", "llvm.sin.f64", + "llvm.sqrt.f32", "llvm.sqrt.f64", + "llvm.trunc.f32", "llvm.trunc.f64", +}; static bool isIntrinsicInline(Function *F) { return std::binary_search(std::begin(IntrinsicInline), @@ -384,9 +395,10 @@ static bool fixupFPReturnAndCall(Function &F, Module *M, Type *T = RVal->getType(); FPReturnVariant RV = whichFPReturnVariant(T); if (RV == NoFPRet) continue; - static const char* Helper[NoFPRet] = - {"__mips16_ret_sf", "__mips16_ret_df", "__mips16_ret_sc", - "__mips16_ret_dc"}; + static const char* Helper[NoFPRet] = { + "__mips16_ret_sf", "__mips16_ret_df", "__mips16_ret_sc", + "__mips16_ret_dc" + }; const char *Name = Helper[RV]; AttributeSet A; Value *Params[] = {RVal}; @@ -406,33 +418,33 @@ static bool fixupFPReturnAndCall(Function &F, Module *M, Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T, nullptr)); CallInst::Create(F, Params, "", &Inst ); } else if (const CallInst *CI = dyn_cast<CallInst>(I)) { - const Value* V = CI->getCalledValue(); - const Type* T = nullptr; - if (V) T = V->getType(); - const PointerType *PFT=nullptr; - if (T) PFT = dyn_cast<PointerType>(T); - const FunctionType *FT=nullptr; - if (PFT) FT = dyn_cast<FunctionType>(PFT->getElementType()); - Function *F_ = CI->getCalledFunction(); - if (FT && needsFPReturnHelper(*FT) && - !(F_ && isIntrinsicInline(F_))) { + const Value* V = CI->getCalledValue(); + const Type* T = nullptr; + if (V) T = V->getType(); + const PointerType *PFT=nullptr; + if (T) PFT = dyn_cast<PointerType>(T); + const FunctionType *FT=nullptr; + if (PFT) FT = dyn_cast<FunctionType>(PFT->getElementType()); + Function *F_ = CI->getCalledFunction(); + if (FT && needsFPReturnHelper(*FT) && + !(F_ && isIntrinsicInline(F_))) { + Modified=true; + F.addFnAttr("saveS2"); + } + if (F_ && !isIntrinsicInline(F_)) { + // pic mode calls are handled by already defined + // helper functions + if (needsFPReturnHelper(*F_)) { Modified=true; F.addFnAttr("saveS2"); } - if (F_ && !isIntrinsicInline(F_)) { - // pic mode calls are handled by already defined - // helper functions - if (needsFPReturnHelper(*F_)) { + if (TM.getRelocationModel() != Reloc::PIC_ ) { + if (needsFPHelperFromSig(*F_)) { + assureFPCallStub(*F_, M, TM); Modified=true; - F.addFnAttr("saveS2"); - } - if (TM.getRelocationModel() != Reloc::PIC_ ) { - if (needsFPHelperFromSig(*F_)) { - assureFPCallStub(*F_, M, TM); - Modified=true; - } } } + } } } return Modified; @@ -489,7 +501,6 @@ static void removeUseSoftFloat(Function &F) { F.addAttributes(AttributeSet::FunctionIndex, A); } -namespace llvm { // // This pass only makes sense when the underlying chip has floating point but @@ -530,11 +541,7 @@ bool Mips16HardFloat::runOnModule(Module &M) { return Modified; } -char Mips16HardFloat::ID = 0; - -} -ModulePass *llvm::createMips16HardFloat(MipsTargetMachine &TM) { +ModulePass *llvm::createMips16HardFloatPass(MipsTargetMachine &TM) { return new Mips16HardFloat(TM); } - diff --git a/lib/Target/Mips/Mips16HardFloat.h b/lib/Target/Mips/Mips16HardFloat.h deleted file mode 100644 index 586cc25..0000000 --- a/lib/Target/Mips/Mips16HardFloat.h +++ /dev/null @@ -1,43 +0,0 @@ -//===---- Mips16HardFloat.h for Mips16 Hard Float --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a phase which implements part of the floating point -// interoperability between Mips16 and Mips32 code. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOAT_H -#define LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOAT_H - -#include "MCTargetDesc/MipsMCTargetDesc.h" -#include "MipsTargetMachine.h" -#include "llvm/Pass.h" -#include "llvm/Target/TargetMachine.h" - -using namespace llvm; - -namespace llvm { - -class Mips16HardFloat : public ModulePass { -public: - static char ID; - - Mips16HardFloat(MipsTargetMachine &TM_) : ModulePass(ID), TM(TM_) {} - - const char *getPassName() const override { return "MIPS16 Hard Float Pass"; } - bool runOnModule(Module &M) override; - -protected: - const MipsTargetMachine &TM; -}; - -ModulePass *createMips16HardFloat(MipsTargetMachine &TM); - -} -#endif diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 976becc..00d4495 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -1,4 +1,3 @@ - //===-- Mips16InstrInfo.cpp - Mips16 Instruction Information --------------===// // // The LLVM Compiler Infrastructure @@ -25,6 +24,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include <cctype> using namespace llvm; @@ -32,7 +32,7 @@ using namespace llvm; #define DEBUG_TYPE "mips16-instrinfo" Mips16InstrInfo::Mips16InstrInfo(const MipsSubtarget &STI) - : MipsInstrInfo(STI, Mips::Bimm16), RI(STI) {} + : MipsInstrInfo(STI, Mips::Bimm16), RI() {} const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const { return RI; diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index e7d0c07..f9b7387 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -18,7 +18,7 @@ #include "MipsInstrInfo.h" namespace llvm { - +class MipsSubtarget; class Mips16InstrInfo : public MipsInstrInfo { const Mips16RegisterInfo RI; diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp index c45acc4..ebd51d7 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -41,8 +41,7 @@ using namespace llvm; #define DEBUG_TYPE "mips16-registerinfo" -Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST) - : MipsRegisterInfo(ST) {} +Mips16RegisterInfo::Mips16RegisterInfo() : MipsRegisterInfo() {} bool Mips16RegisterInfo::requiresRegisterScavenging (const MachineFunction &MF) const { @@ -65,7 +64,7 @@ bool Mips16RegisterInfo::saveScavengerRegister const TargetRegisterClass *RC, unsigned Reg) const { DebugLoc DL; - const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo(); TII.copyPhysReg(MBB, I, DL, Mips::T0, Reg, true); TII.copyPhysReg(MBB, UseMI, DL, Reg, Mips::T0, true); return true; @@ -106,7 +105,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) FrameReg = Mips::SP; else { - const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); if (TFI->hasFP(MF)) { FrameReg = Mips::S0; } @@ -140,7 +139,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, DebugLoc DL = II->getDebugLoc(); unsigned NewImm; const Mips16InstrInfo &TII = - *static_cast<const Mips16InstrInfo *>(Subtarget.getInstrInfo()); + *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo()); FrameReg = TII.loadImmediate(FrameReg, Offset, MBB, II, DL, NewImm); Offset = SignExtend64<16>(NewImm); IsKill = true; diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h index 3cdf836..d67a79b 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.h +++ b/lib/Target/Mips/Mips16RegisterInfo.h @@ -21,7 +21,7 @@ class Mips16InstrInfo; class Mips16RegisterInfo : public MipsRegisterInfo { public: - Mips16RegisterInfo(const MipsSubtarget &Subtarget); + Mips16RegisterInfo(); bool requiresRegisterScavenging(const MachineFunction &MF) const override; diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 776e473..b1cb7f7 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -604,7 +604,7 @@ def : MipsInstAlias<"syncws", (SYNC 0x5), 0>; // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// -class LoadImm64<string instr_asm, Operand Od, RegisterOperand RO> : +class LoadImmediate64<string instr_asm, Operand Od, RegisterOperand RO> : MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64), !strconcat(instr_asm, "\t$rt, $imm64")> ; -def LoadImm64Reg : LoadImm64<"dli", imm64, GPR64Opnd>; +def LoadImm64 : LoadImmediate64<"dli", imm64, GPR64Opnd>; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index c662e13..1eb3b2c 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -252,6 +252,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() { // Set the CPU and FPU Bitmasks const MachineFrameInfo *MFI = MF->getFrameInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); // size of stack area to which FP callee-saved regs are saved. unsigned CPURegSize = Mips::GPR32RegClass.getSize(); @@ -267,8 +268,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() { if (Mips::GPR32RegClass.contains(Reg)) break; - unsigned RegNum = - TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue(Reg); + unsigned RegNum = TRI->getEncodingValue(Reg); if (Mips::AFGR64RegClass.contains(Reg)) { FPUBitmask |= (3 << RegNum); CSFPRegsSize += AFGR64RegSize; @@ -283,8 +283,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() { // Set CPU Bitmask. for (; i != e; ++i) { unsigned Reg = CSI[i].getReg(); - unsigned RegNum = - TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue(Reg); + unsigned RegNum = TRI->getEncodingValue(Reg); CPUBitmask |= (1 << RegNum); } @@ -309,7 +308,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() { /// Frame Directive void MipsAsmPrinter::emitFrameDirective() { - const TargetRegisterInfo &RI = *TM.getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo &RI = *MF->getSubtarget().getRegisterInfo(); unsigned stackReg = RI.getFrameRegister(*MF); unsigned returnReg = RI.getRARegister(); @@ -438,7 +437,7 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock* // Print out an operand for an inline asm expression. bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant,const char *ExtraCode, + unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { @@ -540,18 +539,24 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { - int Offset = 0; + assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands"); + const MachineOperand &BaseMO = MI->getOperand(OpNum); + const MachineOperand &OffsetMO = MI->getOperand(OpNum + 1); + assert(BaseMO.isReg() && "Unexpected base pointer for inline asm memory operand."); + assert(OffsetMO.isImm() && "Unexpected offset for inline asm memory operand."); + int Offset = OffsetMO.getImm(); + // Currently we are expecting either no ExtraCode or 'D' if (ExtraCode) { if (ExtraCode[0] == 'D') - Offset = 4; + Offset += 4; else return true; // Unknown modifier. + // FIXME: M = high order bits + // FIXME: L = low order bits } - const MachineOperand &MO = MI->getOperand(OpNum); - assert(MO.isReg() && "unexpected inline asm memory operand"); - O << Offset << "($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")"; + O << Offset << "($" << MipsInstPrinter::getRegisterName(BaseMO.getReg()) << ")"; return false; } diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index abee185..dcd88f2 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -123,7 +123,7 @@ def CC_MipsN_SoftFloat : CallingConv<[ ]>; def CC_MipsN : CallingConv<[ - CCIfType<[i8, i16, i32], + CCIfType<[i8, i16, i32, i64], CCIfSubtargetNot<"isLittle()", CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>, diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index ac03c0b..606964d 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -140,7 +140,7 @@ namespace { /// memory instruction can be moved to a delay slot. class MemDefsUses : public InspectMemInstr { public: - MemDefsUses(const MachineFrameInfo *MFI); + MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI); private: typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType; @@ -158,6 +158,7 @@ namespace { const MachineFrameInfo *MFI; SmallPtrSet<ValueType, 4> Uses, Defs; + const DataLayout &DL; /// Flags indicating whether loads or stores with no underlying objects have /// been seen. @@ -212,8 +213,8 @@ namespace { /// moved to the delay slot. Returns true on success. template<typename IterTy> bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End, - RegDefsUses &RegDU, InspectMemInstr &IM, - IterTy &Filler, Iter Slot) const; + RegDefsUses &RegDU, InspectMemInstr &IM, Iter Slot, + IterTy &Filler) const; /// This function searches in the backward direction for an instruction that /// can be moved to the delay slot. Returns true on success. @@ -320,7 +321,8 @@ void RegDefsUses::setCallerSaved(const MachineInstr &MI) { CallerSavedRegs.reset(Mips::ZERO); CallerSavedRegs.reset(Mips::ZERO_64); - for (const MCPhysReg *R = TRI.getCalleeSavedRegs(); *R; ++R) + for (const MCPhysReg *R = TRI.getCalleeSavedRegs(MI.getParent()->getParent()); + *R; ++R) for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI) CallerSavedRegs.reset(*AI); @@ -427,9 +429,9 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) { return true; } -MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_) - : InspectMemInstr(false), MFI(MFI_), SeenNoObjLoad(false), - SeenNoObjStore(false) {} +MemDefsUses::MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI_) + : InspectMemInstr(false), MFI(MFI_), DL(DL), SeenNoObjLoad(false), + SeenNoObjStore(false) {} bool MemDefsUses::hasHazard_(const MachineInstr &MI) { bool HasHazard = false; @@ -482,7 +484,7 @@ getUnderlyingObjects(const MachineInstr &MI, const Value *V = (*MI.memoperands_begin())->getValue(); SmallVector<Value *, 4> Objs; - GetUnderlyingObjects(const_cast<Value *>(V), Objs); + GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL); for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), E = Objs.end(); I != E; ++I) { @@ -639,8 +641,8 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { template<typename IterTy> bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End, - RegDefsUses &RegDU, InspectMemInstr& IM, - IterTy &Filler, Iter Slot) const { + RegDefsUses &RegDU, InspectMemInstr& IM, Iter Slot, + IterTy &Filler) const { for (IterTy I = Begin; I != End; ++I) { // skip debug value if (I->isDebugValue()) @@ -688,13 +690,13 @@ bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const { return false; RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo()); - MemDefsUses MemDU(MBB.getParent()->getFrameInfo()); + MemDefsUses MemDU(*TM.getDataLayout(), MBB.getParent()->getFrameInfo()); ReverseIter Filler; RegDU.init(*Slot); - if (!searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Filler, - Slot)) + if (!searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Slot, + Filler)) return false; MBB.splice(std::next(Slot), &MBB, std::next(Filler).base()); @@ -714,7 +716,7 @@ bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const { RegDU.setCallerSaved(*Slot); - if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Filler, Slot)) + if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Slot, Filler)) return false; MBB.splice(std::next(Slot), &MBB, Filler); @@ -754,11 +756,11 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const { IM.reset(new LoadFromStackOrConst()); } else { const MachineFrameInfo *MFI = MBB.getParent()->getFrameInfo(); - IM.reset(new MemDefsUses(MFI)); + IM.reset(new MemDefsUses(*TM.getDataLayout(), MFI)); } - if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Filler, - Slot)) + if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Slot, + Filler)) return false; insertDelayFiller(Filler, BrMap); diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index 7d69659..7de0081 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -89,6 +89,7 @@ class MipsFastISel final : public FastISel { private: // Selection routines. + bool selectLogicalOp(const Instruction *I); bool selectLoad(const Instruction *I); bool selectStore(const Instruction *I); bool selectBranch(const Instruction *I); @@ -102,6 +103,7 @@ private: // Utility helper routines. bool isTypeLegal(Type *Ty, MVT &VT); + bool isTypeSupported(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool computeAddress(const Value *Obj, Address &Addr); bool computeCallAddress(const Value *V, Address &Addr); @@ -129,6 +131,9 @@ private: unsigned getRegEnsuringSimpleIntegerWidening(const Value *, bool IsUnsigned); + unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, + const Value *RHS); + unsigned materializeFP(const ConstantFP *CFP, MVT VT); unsigned materializeGV(const GlobalValue *GV, MVT VT); unsigned materializeInt(const Constant *C, MVT VT); @@ -210,6 +215,43 @@ CCAssignFn *MipsFastISel::CCAssignFnForCall(CallingConv::ID CC) const { return CC_MipsO32; } +unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, + const Value *LHS, const Value *RHS) { + // Canonicalize immediates to the RHS first. + if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) + std::swap(LHS, RHS); + + unsigned Opc; + if (ISDOpc == ISD::AND) { + Opc = Mips::AND; + } else if (ISDOpc == ISD::OR) { + Opc = Mips::OR; + } else if (ISDOpc == ISD::XOR) { + Opc = Mips::XOR; + } else + llvm_unreachable("unexpected opcode"); + + unsigned LHSReg = getRegForValue(LHS); + unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); + if (!ResultReg) + return 0; + + unsigned RHSReg; + if (!LHSReg) + return 0; + + if (const auto *C = dyn_cast<ConstantInt>(RHS)) + RHSReg = materializeInt(C, MVT::i32); + else + RHSReg = getRegForValue(RHS); + + if (!RHSReg) + return 0; + + emitInst(Opc, ResultReg).addReg(LHSReg).addReg(RHSReg); + return ResultReg; +} + unsigned MipsFastISel::materializeInt(const Constant *C, MVT VT) { if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) return 0; @@ -421,6 +463,21 @@ bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) { return TLI.isTypeLegal(VT); } +bool MipsFastISel::isTypeSupported(Type *Ty, MVT &VT) { + if (Ty->isVectorTy()) + return false; + + if (isTypeLegal(Ty, VT)) + return true; + + // If this is a type than can be sign or zero-extended to a basic operation + // go ahead and accept it now. + if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) + return true; + + return false; +} + bool MipsFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { if (isTypeLegal(Ty, VT)) return true; @@ -671,6 +728,33 @@ bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr, return false; } +bool MipsFastISel::selectLogicalOp(const Instruction *I) { + MVT VT; + if (!isTypeSupported(I->getType(), VT)) + return false; + + unsigned ResultReg; + switch (I->getOpcode()) { + default: + llvm_unreachable("Unexpected instruction."); + case Instruction::And: + ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); + break; + case Instruction::Or: + ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); + break; + case Instruction::Xor: + ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); + break; + } + + if (!ResultReg) + return false; + + updateValueMap(I, ResultReg); + return true; +} + bool MipsFastISel::selectLoad(const Instruction *I) { // Atomic loads need special handling. if (cast<LoadInst>(I)->isAtomic()) @@ -1083,7 +1167,7 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); CLI.Call = MIB; @@ -1312,6 +1396,10 @@ bool MipsFastISel::fastSelectInstruction(const Instruction *I) { return selectLoad(I); case Instruction::Store: return selectStore(I); + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + return selectLogicalOp(I); case Instruction::Br: return selectBranch(I); case Instruction::Ret: @@ -1354,7 +1442,7 @@ unsigned MipsFastISel::getRegEnsuringSimpleIntegerWidening(const Value *V, void MipsFastISel::simplifyAddress(Address &Addr) { if (!isInt<16>(Addr.getOffset())) { unsigned TempReg = - materialize32BitInt(Addr.getOffset(), &Mips::GPR32RegClass); + materialize32BitInt(Addr.getOffset(), &Mips::GPR32RegClass); unsigned DestReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::ADDu, DestReg).addReg(TempReg).addReg(Addr.getReg()); Addr.setReg(DestReg); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 21fc8ce..c78c329 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -230,9 +230,18 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { } bool MipsDAGToDAGISel:: -SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, +SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { - assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); - OutOps.push_back(Op); - return false; + // All memory constraints can at least accept raw pointers. + switch(ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_m: + case InlineAsm::Constraint_R: + case InlineAsm::Constraint_ZC: + OutOps.push_back(Op); + return false; + } + return true; } diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h index 6b72877..aec731e 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -125,7 +125,7 @@ private: virtual void processFunctionAfterISel(MachineFunction &MF) = 0; bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) override; }; } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 9253b2e..e4bae03 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -617,6 +617,33 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue performCMovFPCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget &Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue ValueIfTrue = N->getOperand(0), ValueIfFalse = N->getOperand(2); + + ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(ValueIfFalse); + if (!FalseC || FalseC->getZExtValue()) + return SDValue(); + + // Since RHS (False) is 0, we swap the order of the True/False operands + // (obviously also inverting the condition) so that we can + // take advantage of conditional moves using the $0 register. + // Example: + // return (a != 0) ? x : 0; + // load $reg, x + // movz $reg, $0, a + unsigned Opc = (N->getOpcode() == MipsISD::CMovFP_T) ? MipsISD::CMovFP_F : + MipsISD::CMovFP_T; + + SDValue FCC = N->getOperand(1), Glue = N->getOperand(3); + return DAG.getNode(Opc, SDLoc(N), ValueIfFalse.getValueType(), + ValueIfFalse, FCC, ValueIfTrue, Glue); +} + static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { @@ -750,6 +777,9 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return performDivRemCombine(N, DAG, DCI, Subtarget); case ISD::SELECT: return performSELECTCombine(N, DAG, DCI, Subtarget); + case MipsISD::CMovFP_F: + case MipsISD::CMovFP_T: + return performCMovFPCombine(N, DAG, DCI, Subtarget); case ISD::AND: return performANDCombine(N, DAG, DCI, Subtarget); case ISD::OR: @@ -2451,7 +2481,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops, // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv); + const uint32_t *Mask = + TRI->getCallPreservedMask(CLI.DAG.getMachineFunction(), CLI.CallConv); assert(Mask && "Missing call preserved mask for calling convention"); if (Subtarget.inMips16HardFloat()) { if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(CLI.Callee)) { @@ -3001,6 +3032,15 @@ MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv, return CCInfo.CheckReturn(Outs, RetCC_Mips); } +bool +MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { + if (Subtarget.hasMips3() && Subtarget.abiUsesSoftFloat()) { + if (Type == MVT::i32) + return true; + } + return IsSigned; +} + SDValue MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, @@ -3133,6 +3173,10 @@ getConstraintType(const std::string &Constraint) const return C_Memory; } } + + if (Constraint == "ZC") + return C_Memory; + return TargetLowering::getConstraintType(Constraint); } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 9f86a43..40b6661 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -475,6 +475,8 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, SDLoc dl, SelectionDAG &DAG) const override; + bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; + // Inline asm support ConstraintType getConstraintType(const std::string &Constraint) const override; @@ -503,6 +505,15 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + if (ConstraintCode == "R") + return InlineAsm::Constraint_R; + else if (ConstraintCode == "ZC") + return InlineAsm::Constraint_ZC; + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index db149d4..7b2b289 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -29,7 +29,7 @@ #include "MipsGenInstrInfo.inc" namespace llvm { - +class MipsSubtarget; class MipsInstrInfo : public MipsGenInstrInfo { virtual void anchor(); protected: diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 04a16b3..c937d2b 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -1000,7 +1000,7 @@ class ExtBase<string opstr, RegisterOperand RO, Operand PosOpnd, SDPatternOperator Op = null_frag>: InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ext:$size), !strconcat(opstr, " $rt, $rs, $pos, $size"), - [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], NoItinerary, + [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], II_EXT, FrmR, opstr>, ISA_MIPS32R2; class InsBase<string opstr, RegisterOperand RO, Operand PosOpnd, @@ -1008,7 +1008,7 @@ class InsBase<string opstr, RegisterOperand RO, Operand PosOpnd, InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ins:$size, RO:$src), !strconcat(opstr, " $rt, $rs, $pos, $size"), [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size, RO:$src))], - NoItinerary, FrmR, opstr>, ISA_MIPS32R2 { + II_INS, FrmR, opstr>, ISA_MIPS32R2 { let Constraints = "$src = $rt"; } @@ -1140,12 +1140,13 @@ def XORi : MMRel, ArithLogicI<"xori", uimm16, GPR32Opnd, II_XORI, immZExt16, xor>, ADDI_FM<0xe>; def LUi : MMRel, LoadUpper<"lui", GPR32Opnd, uimm16>, LUI_FM; - +let AdditionalPredicates = [NotInMicroMips] in { /// Arithmetic Instructions (3-Operand, R-Type) def ADDu : MMRel, ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU, add>, ADD_FM<0, 0x21>; def SUBu : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>, ADD_FM<0, 0x23>; +} let Defs = [HI0, LO0] in def MUL : MMRel, ArithLogicR<"mul", GPR32Opnd, 1, II_MUL, mul>, ADD_FM<0x1c, 2>, ISA_MIPS32_NOT_32R6_64R6; @@ -1579,6 +1580,8 @@ def : MipsInstAlias<"sltu $rt, $rs, $imm", (SLTiu GPR32Opnd:$rt, GPR32Opnd:$rs, simm16:$imm), 0>; def : MipsInstAlias<"xor $rs, $rt, $imm", (XORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>; +def : MipsInstAlias<"xor $rs, $imm", + (XORi GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>; def : MipsInstAlias<"or $rs, $rt, $imm", (ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>; def : MipsInstAlias<"or $rs, $imm", @@ -1639,20 +1642,21 @@ def : MipsInstAlias<"sync", // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// -class LoadImm32<string instr_asm, Operand Od, RegisterOperand RO> : +class LoadImmediate32<string instr_asm, Operand Od, RegisterOperand RO> : MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32), !strconcat(instr_asm, "\t$rt, $imm32")> ; -def LoadImm32Reg : LoadImm32<"li", uimm5, GPR32Opnd>; +def LoadImm32 : LoadImmediate32<"li", uimm5, GPR32Opnd>; -class LoadAddress<string instr_asm, Operand MemOpnd, RegisterOperand RO> : +class LoadAddressFromReg32<string instr_asm, Operand MemOpnd, + RegisterOperand RO> : MipsAsmPseudoInst<(outs RO:$rt), (ins MemOpnd:$addr), !strconcat(instr_asm, "\t$rt, $addr")> ; -def LoadAddr32Reg : LoadAddress<"la", mem, GPR32Opnd>; +def LoadAddrReg32 : LoadAddressFromReg32<"la", mem, GPR32Opnd>; -class LoadAddressImm<string instr_asm, Operand Od, RegisterOperand RO> : +class LoadAddressFromImm32<string instr_asm, Operand Od, RegisterOperand RO> : MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32), !strconcat(instr_asm, "\t$rt, $imm32")> ; -def LoadAddr32Imm : LoadAddressImm<"la", uimm5, GPR32Opnd>; +def LoadAddrImm32 : LoadAddressFromImm32<"la", uimm5, GPR32Opnd>; def JalTwoReg : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rs), "jal\t$rd, $rs"> ; @@ -1761,9 +1765,11 @@ def : WrapperPat<tblockaddress, ADDiu, GPR32>; def : WrapperPat<tjumptable, ADDiu, GPR32>; def : WrapperPat<tglobaltlsaddr, ADDiu, GPR32>; +let AdditionalPredicates = [NotInMicroMips] in { // Mips does not have "not", so we expand our way def : MipsPat<(not GPR32:$in), (NOR GPR32Opnd:$in, ZERO)>; +} // extended loads def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>; diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 821392e..5258181 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" using namespace llvm; diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index 30b93dc..09e722d 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -79,14 +79,19 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() { if (GlobalBaseReg) return GlobalBaseReg; + MipsSubtarget const &STI = + static_cast<const MipsSubtarget &>(MF.getSubtarget()); + const TargetRegisterClass *RC = - static_cast<const MipsSubtarget &>(MF.getSubtarget()).inMips16Mode() + STI.inMips16Mode() ? &Mips::CPU16RegsRegClass - : static_cast<const MipsTargetMachine &>(MF.getTarget()) - .getABI() - .IsN64() - ? &Mips::GPR64RegClass - : &Mips::GPR32RegClass; + : STI.inMicroMipsMode() + ? &Mips::GPRMM16RegClass + : static_cast<const MipsTargetMachine &>(MF.getTarget()) + .getABI() + .IsN64() + ? &Mips::GPR64RegClass + : &Mips::GPR32RegClass; return GlobalBaseReg = MF.getRegInfo().createVirtualRegister(RC); } diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp index b011e8f..b18a673 100644 --- a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp @@ -8,15 +8,36 @@ // //===----------------------------------------------------------------------===// -#include "MipsISelDAGToDAG.h" -#include "MipsModuleISelDAGToDAG.h" -#include "llvm/Support/Casting.h" +#include "Mips.h" +#include "MipsTargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +using namespace llvm; + #define DEBUG_TYPE "mips-isel" -namespace llvm { +namespace { + class MipsModuleDAGToDAGISel : public MachineFunctionPass { + public: + static char ID; + + explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_) + : MachineFunctionPass(ID), TM(TM_) {} + + // Pass Name + const char *getPassName() const override { + return "MIPS DAG->DAG Pattern Instruction Selection"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + protected: + MipsTargetMachine &TM; + }; + + char MipsModuleDAGToDAGISel::ID = 0; +} bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n"); @@ -24,13 +45,6 @@ bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { return false; } -char MipsModuleDAGToDAGISel::ID = 0; - -} - - -llvm::FunctionPass *llvm::createMipsModuleISelDag(MipsTargetMachine &TM) { +llvm::FunctionPass *llvm::createMipsModuleISelDagPass(MipsTargetMachine &TM) { return new MipsModuleDAGToDAGISel(TM); } - - diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.h b/lib/Target/Mips/MipsModuleISelDAGToDAG.h deleted file mode 100644 index 85bae47..0000000 --- a/lib/Target/Mips/MipsModuleISelDAGToDAG.h +++ /dev/null @@ -1,58 +0,0 @@ -//===---- MipsModuleISelDAGToDAG.h - Change Subtarget --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a pass used to change the subtarget for the -// Mips Instruction selector. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_MIPS_MIPSMODULEISELDAGTODAG_H -#define LLVM_LIB_TARGET_MIPS_MIPSMODULEISELDAGTODAG_H - -#include "Mips.h" -#include "MipsSubtarget.h" -#include "MipsTargetMachine.h" -#include "llvm/CodeGen/SelectionDAGISel.h" - - -//===----------------------------------------------------------------------===// -// Instruction Selector Implementation -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// MipsModuleDAGToDAGISel - MIPS specific code to select MIPS machine -// instructions for SelectionDAG operations. -//===----------------------------------------------------------------------===// -namespace llvm { - -class MipsModuleDAGToDAGISel : public MachineFunctionPass { -public: - - static char ID; - - explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_) - : MachineFunctionPass(ID), TM(TM_) {} - - // Pass Name - const char *getPassName() const override { - return "MIPS DAG->DAG Pattern Instruction Selection"; - } - - bool runOnMachineFunction(MachineFunction &MF) override; - -protected: - MipsTargetMachine &TM; -}; - -/// createMipsISelDag - This pass converts a legalized DAG into a -/// MIPS-specific DAG, ready for instruction scheduling. -FunctionPass *createMipsModuleISelDag(MipsTargetMachine &TM); -} - -#endif diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp index 7aae964..b6cd791 100644 --- a/lib/Target/Mips/MipsOs16.cpp +++ b/lib/Target/Mips/MipsOs16.cpp @@ -11,14 +11,16 @@ // //===----------------------------------------------------------------------===// -#include "MipsOs16.h" +#include "llvm/IR/Instructions.h" +#include "Mips.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#define DEBUG_TYPE "mips-os16" +using namespace llvm; +#define DEBUG_TYPE "mips-os16" static cl::opt<std::string> Mips32FunctionMask( "mips32-function-mask", @@ -27,70 +29,83 @@ static cl::opt<std::string> Mips32FunctionMask( cl::Hidden); namespace { + class MipsOs16 : public ModulePass { + public: + static char ID; + + MipsOs16() : ModulePass(ID) {} + + const char *getPassName() const override { + return "MIPS Os16 Optimization"; + } + + bool runOnModule(Module &M) override; + }; + + char MipsOs16::ID = 0; +} - // Figure out if we need float point based on the function signature. - // We need to move variables in and/or out of floating point - // registers because of the ABI - // - bool needsFPFromSig(Function &F) { - Type* RetType = F.getReturnType(); - switch (RetType->getTypeID()) { +// Figure out if we need float point based on the function signature. +// We need to move variables in and/or out of floating point +// registers because of the ABI +// +static bool needsFPFromSig(Function &F) { + Type* RetType = F.getReturnType(); + switch (RetType->getTypeID()) { + case Type::FloatTyID: + case Type::DoubleTyID: + return true; + default: + ; + } + if (F.arg_size() >=1) { + Argument &Arg = F.getArgumentList().front(); + switch (Arg.getType()->getTypeID()) { case Type::FloatTyID: case Type::DoubleTyID: return true; default: ; } - if (F.arg_size() >=1) { - Argument &Arg = F.getArgumentList().front(); - switch (Arg.getType()->getTypeID()) { - case Type::FloatTyID: - case Type::DoubleTyID: - return true; - default: - ; - } - } - return false; } + return false; +} - // Figure out if the function will need floating point operations - // - bool needsFP(Function &F) { - if (needsFPFromSig(F)) - return true; - for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); +// Figure out if the function will need floating point operations +// +static bool needsFP(Function &F) { + if (needsFPFromSig(F)) + return true; + for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - const Instruction &Inst = *I; - switch (Inst.getOpcode()) { - case Instruction::FAdd: - case Instruction::FSub: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FRem: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::FCmp: + const Instruction &Inst = *I; + switch (Inst.getOpcode()) { + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::FCmp: + return true; + default: + ; + } + if (const CallInst *CI = dyn_cast<CallInst>(I)) { + DEBUG(dbgs() << "Working on call" << "\n"); + Function &F_ = *CI->getCalledFunction(); + if (needsFPFromSig(F_)) return true; - default: - ; - } - if (const CallInst *CI = dyn_cast<CallInst>(I)) { - DEBUG(dbgs() << "Working on call" << "\n"); - Function &F_ = *CI->getCalledFunction(); - if (needsFPFromSig(F_)) - return true; - } } - return false; - } + } + return false; } -namespace llvm { bool MipsOs16::runOnModule(Module &M) { @@ -136,12 +151,6 @@ bool MipsOs16::runOnModule(Module &M) { return modified; } -char MipsOs16::ID = 0; - -} - -ModulePass *llvm::createMipsOs16(MipsTargetMachine &TM) { +ModulePass *llvm::createMipsOs16Pass(MipsTargetMachine &TM) { return new MipsOs16; } - - diff --git a/lib/Target/Mips/MipsOs16.h b/lib/Target/Mips/MipsOs16.h deleted file mode 100644 index 77183ec..0000000 --- a/lib/Target/Mips/MipsOs16.h +++ /dev/null @@ -1,47 +0,0 @@ -//===---- MipsOs16.h for Mips Option -Os16 --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines an optimization phase for the MIPS target. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_MIPS_MIPSOS16_H -#define LLVM_LIB_TARGET_MIPS_MIPSOS16_H - -#include "MCTargetDesc/MipsMCTargetDesc.h" -#include "MipsTargetMachine.h" -#include "llvm/Pass.h" -#include "llvm/Target/TargetMachine.h" - -using namespace llvm; - -namespace llvm { - -class MipsOs16 : public ModulePass { - -public: - static char ID; - - MipsOs16() : ModulePass(ID) { - - } - - const char *getPassName() const override { - return "MIPS Os16 Optimization"; - } - - bool runOnModule(Module &M) override; - -}; - -ModulePass *createMipsOs16(MipsTargetMachine &TM); - -} - -#endif diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 2110c03..0ea48b1 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -43,14 +43,14 @@ using namespace llvm; #define GET_REGINFO_TARGET_DESC #include "MipsGenRegisterInfo.inc" -MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST) - : MipsGenRegisterInfo(Mips::RA), Subtarget(ST) {} +MipsRegisterInfo::MipsRegisterInfo() : MipsGenRegisterInfo(Mips::RA) {} unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } const TargetRegisterClass * MipsRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { + const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>(); return Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; } @@ -63,7 +63,7 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case Mips::GPR32RegClassID: case Mips::GPR64RegClassID: case Mips::DSPRRegClassID: { - const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); return 28 - TFI->hasFP(MF); } case Mips::FGR32RegClassID: @@ -82,6 +82,7 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, /// Mips Callee Saved Registers const MCPhysReg * MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + const MipsSubtarget &Subtarget = MF->getSubtarget<MipsSubtarget>(); if (Subtarget.isSingleFloat()) return CSR_SingleFloatOnly_SaveList; @@ -100,8 +101,10 @@ MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_O32_SaveList; } -const uint32_t* -MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const { +const uint32_t * +MipsRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const { + const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>(); if (Subtarget.isSingleFloat()) return CSR_SingleFloatOnly_RegMask; @@ -135,6 +138,7 @@ getReservedRegs(const MachineFunction &MF) const { }; BitVector Reserved(getNumRegs()); + const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>(); typedef TargetRegisterClass::const_iterator RegIter; for (unsigned I = 0; I < array_lengthof(ReservedGPR32); ++I) @@ -257,6 +261,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned MipsRegisterInfo:: getFrameRegister(const MachineFunction &MF) const { + const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>(); const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); bool IsN64 = static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI().IsN64(); diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 9ec4a38..031b93e 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -21,15 +21,9 @@ #include "MipsGenRegisterInfo.inc" namespace llvm { -class MipsSubtarget; -class Type; - class MipsRegisterInfo : public MipsGenRegisterInfo { -protected: - const MipsSubtarget &Subtarget; - public: - MipsRegisterInfo(const MipsSubtarget &Subtarget); + MipsRegisterInfo(); /// getRegisterNumbering - Given the enum value for some register, e.g. /// Mips::RA, return the number that it corresponds to (e.g. 31). @@ -47,9 +41,9 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; - const uint32_t *getCallPreservedMask(CallingConv::ID) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; static const uint32_t *getMips16RetHelperMask(); BitVector getReservedRegs(const MachineFunction &MF) const override; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 0761ded..a598c3f 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -258,8 +258,12 @@ SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, CurDAG->getTargetConstant(Mips::sub_32, VT)); } - SDNode *AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, - SDValue(Carry, 0), RHS); + // Generate a second addition only if we know that RHS is not a + // constant-zero node. + SDNode *AddCarry = Carry; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS); + if (!C || C->getZExtValue()) + AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS); return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0)); @@ -378,6 +382,17 @@ bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, selectAddrDefault(Addr, Base, Offset); } +bool MipsSEDAGToDAGISel::selectAddrRegImm9(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 9)) + return true; + + return false; +} + bool MipsSEDAGToDAGISel::selectAddrRegImm10(SDValue Addr, SDValue &Base, SDValue &Offset) const { if (selectAddrFrameIndex(Addr, Base, Offset)) @@ -401,6 +416,17 @@ bool MipsSEDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base, return false; } +bool MipsSEDAGToDAGISel::selectAddrRegImm16(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 16)) + return true; + + return false; +} + bool MipsSEDAGToDAGISel::selectIntAddrMM(SDValue Addr, SDValue &Base, SDValue &Offset) const { return selectAddrRegImm12(Addr, Base, Offset) || @@ -912,6 +938,60 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { return std::make_pair(false, nullptr); } +bool MipsSEDAGToDAGISel:: +SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector<SDValue> &OutOps) { + SDValue Base, Offset; + + switch(ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + // All memory constraints can at least accept raw pointers. + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_R: + OutOps.push_back(Op); + OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); + return false; + case InlineAsm::Constraint_m: + if (selectAddrRegImm16(Op, Base, Offset)) { + OutOps.push_back(Base); + OutOps.push_back(Offset); + return false; + } + OutOps.push_back(Op); + OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); + return false; + case InlineAsm::Constraint_ZC: + // ZC matches whatever the pref, ll, and sc instructions can handle for the + // given subtarget. + if (Subtarget->inMicroMipsMode()) { + // On microMIPS, they can handle 12-bit offsets. + if (selectAddrRegImm12(Op, Base, Offset)) { + OutOps.push_back(Base); + OutOps.push_back(Offset); + return false; + } + } else if (Subtarget->hasMips32r6()) { + // On MIPS32r6/MIPS64r6, they can only handle 9-bit offsets. + if (selectAddrRegImm9(Op, Base, Offset)) { + OutOps.push_back(Base); + OutOps.push_back(Offset); + return false; + } + } else if (selectAddrRegImm16(Op, Base, Offset)) { + // Prior to MIPS32r6/MIPS64r6, they can handle 16-bit offsets. + OutOps.push_back(Base); + OutOps.push_back(Offset); + return false; + } + // In all cases, 0-bit offsets are acceptable. + OutOps.push_back(Op); + OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); + return false; + } + return true; +} + FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) { return new MipsSEDAGToDAGISel(TM); } diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index 2d24eb4..a11fcf4 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -56,12 +56,18 @@ private: bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const override; + bool selectAddrRegImm9(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + bool selectAddrRegImm10(SDValue Addr, SDValue &Base, SDValue &Offset) const; bool selectAddrRegImm12(SDValue Addr, SDValue &Base, SDValue &Offset) const; + bool selectAddrRegImm16(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + bool selectIntAddrMM(SDValue Addr, SDValue &Base, SDValue &Offset) const override; @@ -111,6 +117,10 @@ private: // Insert instructions to initialize the global base register in the // first MBB of the function. void initGlobalBaseReg(MachineFunction &MF); + + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + unsigned ConstraintID, + std::vector<SDValue> &OutOps) override; }; FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM); diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 74f291f..b992579 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -27,7 +27,7 @@ using namespace llvm; MipsSEInstrInfo::MipsSEInstrInfo(const MipsSubtarget &STI) : MipsInstrInfo(STI, STI.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J), - RI(STI) {} + RI() {} const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const { return RI; diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index 55c6638..b89207e 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -18,6 +18,7 @@ #include "MipsMachineFunction.h" #include "MipsSEInstrInfo.h" #include "MipsSubtarget.h" +#include "MipsTargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -41,8 +42,7 @@ using namespace llvm; #define DEBUG_TYPE "mips-reg-info" -MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST) - : MipsRegisterInfo(ST) {} +MipsSERegisterInfo::MipsSERegisterInfo() : MipsRegisterInfo() {} bool MipsSERegisterInfo:: requiresRegisterScavenging(const MachineFunction &MF) const { @@ -110,6 +110,8 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, MachineFunction &MF = *MI.getParent()->getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + bool isN64 = + static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI().IsN64(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); int MinCSFI = 0; @@ -132,7 +134,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, unsigned FrameReg; if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI) - FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; + FrameReg = isN64 ? Mips::SP_64 : Mips::SP; else FrameReg = getFrameRegister(MF); @@ -165,9 +167,9 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // (where n < 16) and doesn't, but does fit into 16-bits then use an ADDiu MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = II->getDebugLoc(); - unsigned ADDiu = Subtarget.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; + unsigned ADDiu = isN64 ? Mips::DADDiu : Mips::ADDiu; const TargetRegisterClass *RC = - Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; + isN64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); unsigned Reg = RegInfo.createVirtualRegister(RC); const MipsSEInstrInfo &TII = @@ -183,7 +185,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // instructions. MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = II->getDebugLoc(); - unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; + unsigned ADDu = isN64 ? Mips::DADDu : Mips::ADDu; unsigned NewImm = 0; const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>( diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h index 6b70d07..ebae190 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.h +++ b/lib/Target/Mips/MipsSERegisterInfo.h @@ -22,7 +22,7 @@ class MipsSEInstrInfo; class MipsSERegisterInfo : public MipsRegisterInfo { public: - MipsSERegisterInfo(const MipsSubtarget &Subtarget); + MipsSERegisterInfo(); bool requiresRegisterScavenging(const MachineFunction &MF) const override; diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td index ea98199..54b5d28 100644 --- a/lib/Target/Mips/MipsSchedule.td +++ b/lib/Target/Mips/MipsSchedule.td @@ -65,7 +65,9 @@ def II_DSRL32 : InstrItinClass; def II_DSRLV : InstrItinClass; def II_DSUBU : InstrItinClass; def II_DSUB : InstrItinClass; +def II_EXT : InstrItinClass; // Any EXT instruction def II_FLOOR : InstrItinClass; +def II_INS : InstrItinClass; // Any INS instruction def II_LB : InstrItinClass; def II_LBU : InstrItinClass; def II_LD : InstrItinClass; @@ -198,6 +200,8 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData<II_DSUB , [InstrStage<1, [ALU]>]>, InstrItinData<II_DROTR , [InstrStage<1, [ALU]>]>, InstrItinData<II_DROTRV , [InstrStage<1, [ALU]>]>, + InstrItinData<II_EXT , [InstrStage<1, [ALU]>]>, + InstrItinData<II_INS , [InstrStage<1, [ALU]>]>, InstrItinData<II_LUI , [InstrStage<1, [ALU]>]>, InstrItinData<II_MOVF , [InstrStage<1, [ALU]>]>, InstrItinData<II_MOVN , [InstrStage<1, [ALU]>]>, diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 86c8931..79f6617 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -14,14 +14,11 @@ #include "MipsTargetMachine.h" #include "Mips.h" #include "Mips16FrameLowering.h" -#include "Mips16HardFloat.h" #include "Mips16ISelDAGToDAG.h" #include "Mips16ISelLowering.h" #include "Mips16InstrInfo.h" #include "MipsFrameLowering.h" #include "MipsInstrInfo.h" -#include "MipsModuleISelDAGToDAG.h" -#include "MipsOs16.h" #include "MipsSEFrameLowering.h" #include "MipsSEISelDAGToDAG.h" #include "MipsSEISelLowering.h" @@ -34,6 +31,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" + using namespace llvm; #define DEBUG_TYPE "mips" @@ -46,8 +44,12 @@ extern "C" void LLVMInitializeMipsTarget() { RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget); } -static std::string computeDataLayout(bool isLittle, MipsABIInfo &ABI) { +static std::string computeDataLayout(StringRef TT, StringRef CPU, + const TargetOptions &Options, + bool isLittle) { std::string Ret = ""; + MipsABIInfo ABI = + MipsABIInfo::computeTargetABI(Triple(TT), CPU, Options.MCOptions); // There are both little and big endian mips. if (isLittle) @@ -86,11 +88,11 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, StringRef TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT, + CPU, FS, Options, RM, CM, OL), isLittle(isLittle), TLOF(make_unique<MipsTargetObjectFile>()), ABI(MipsABIInfo::computeTargetABI(Triple(TT), CPU, Options.MCOptions)), - DL(computeDataLayout(isLittle, ABI)), Subtarget(nullptr), - DefaultSubtarget(TT, CPU, FS, isLittle, *this), + Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, *this), NoMips16Subtarget(TT, CPU, FS.empty() ? "-mips16" : FS.str() + ",-mips16", isLittle, *this), Mips16Subtarget(TT, CPU, FS.empty() ? "+mips16" : FS.str() + ",+mips16", @@ -209,14 +211,14 @@ void MipsPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); addPass(createAtomicExpandPass(&getMipsTargetMachine())); if (getMipsSubtarget().os16()) - addPass(createMipsOs16(getMipsTargetMachine())); + addPass(createMipsOs16Pass(getMipsTargetMachine())); if (getMipsSubtarget().inMips16HardFloat()) - addPass(createMips16HardFloat(getMipsTargetMachine())); + addPass(createMips16HardFloatPass(getMipsTargetMachine())); } // Install an instruction selector pass using // the ISelDag to gen Mips code. bool MipsPassConfig::addInstSelector() { - addPass(createMipsModuleISelDag(getMipsTargetMachine())); + addPass(createMipsModuleISelDagPass(getMipsTargetMachine())); addPass(createMips16ISelDag(getMipsTargetMachine())); addPass(createMipsSEISelDag(getMipsTargetMachine())); return false; diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index afd0cea..5427d6a 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -31,7 +31,6 @@ class MipsTargetMachine : public LLVMTargetMachine { std::unique_ptr<TargetLoweringObjectFile> TLOF; // Selected ABI MipsABIInfo ABI; - const DataLayout DL; // Calculates type size & alignment MipsSubtarget *Subtarget; MipsSubtarget DefaultSubtarget; MipsSubtarget NoMips16Subtarget; @@ -47,8 +46,7 @@ public: TargetIRAnalysis getTargetIRAnalysis() override; - const DataLayout *getDataLayout() const override { return &DL; } - const MipsSubtarget *getSubtargetImpl() const override { + const MipsSubtarget *getSubtargetImpl() const { if (Subtarget) return Subtarget; return &DefaultSubtarget; diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index c07693e..723b63b 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -9,6 +9,7 @@ #include "MipsTargetObjectFile.h" #include "MipsSubtarget.h" +#include "MipsTargetMachine.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" @@ -44,7 +45,7 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); - this->TM = &TM; + this->TM = &static_cast<const MipsTargetMachine &>(TM); } // A address must be loaded from a small section if its size is less than the @@ -84,7 +85,8 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, bool MipsTargetObjectFile:: IsGlobalInSmallSectionImpl(const GlobalValue *GV, const TargetMachine &TM) const { - const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>(); + const MipsSubtarget &Subtarget = + *static_cast<const MipsTargetMachine &>(TM).getSubtargetImpl(); // Return if small section is not available. if (!Subtarget.useSmallSection()) @@ -127,9 +129,11 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, /// Return true if this constant should be placed into small data section. bool MipsTargetObjectFile:: IsConstantInSmallSection(const Constant *CN, const TargetMachine &TM) const { - return ( - TM.getSubtarget<MipsSubtarget>().useSmallSection() && LocalSData && - IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(CN->getType()))); + return (static_cast<const MipsTargetMachine &>(TM) + .getSubtargetImpl() + ->useSmallSection() && + LocalSData && IsInSmallSection(TM.getDataLayout()->getTypeAllocSize( + CN->getType()))); } const MCSection *MipsTargetObjectFile:: diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h index 3a2b298..45ed9d0 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.h +++ b/lib/Target/Mips/MipsTargetObjectFile.h @@ -13,11 +13,11 @@ #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" namespace llvm { - +class MipsTargetMachine; class MipsTargetObjectFile : public TargetLoweringObjectFileELF { const MCSection *SmallDataSection; const MCSection *SmallBSSSection; - const TargetMachine *TM; + const MipsTargetMachine *TM; public: void Initialize(MCContext &Ctx, const TargetMachine &TM) override; diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h index b3b8296..1ff041d 100644 --- a/lib/Target/Mips/MipsTargetStreamer.h +++ b/lib/Target/Mips/MipsTargetStreamer.h @@ -92,9 +92,9 @@ public: } virtual void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI); - virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value){}; - virtual void emitMipsAbiFlags(){}; + virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value); void forbidModuleDirective() { ModuleDirectiveAllowed = false; } + void reallowModuleDirective() { ModuleDirectiveAllowed = true; } bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; } // This method enables template classes to set internal abi flags @@ -197,7 +197,6 @@ public: bool Is32BitABI) override; void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override; void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value) override; - void emitMipsAbiFlags() override; }; // This part is for ELF object output @@ -240,7 +239,7 @@ public: // ABI Flags void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override; - void emitMipsAbiFlags() override; + void emitMipsAbiFlags(); }; } #endif diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 3a4a19d..cdd2f1f 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -29,7 +29,6 @@ set(NVPTXCodeGen_sources NVPTXTargetMachine.cpp NVPTXTargetTransformInfo.cpp NVPTXUtilities.cpp - NVPTXutil.cpp NVVMReflect.cpp ) diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 11d737e..b9df3d1 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -39,6 +39,8 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) { InlineAsmEnd = " inline asm"; SupportsDebugInformation = CompileForDebugging; + // PTX does not allow .align on functions. + HasFunctionAlignment = false; HasDotTypeDotSizeDirective = false; Data8bitsDirective = " .b8 "; diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index 158ca90..2b4d864 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -71,35 +71,23 @@ static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T, // Force static initialization. extern "C" void LLVMInitializeNVPTXTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfo<NVPTXMCAsmInfo> X(TheNVPTXTarget32); - RegisterMCAsmInfo<NVPTXMCAsmInfo> Y(TheNVPTXTarget64); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32, - createNVPTXMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64, - createNVPTXMCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32, - createNVPTXMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64, - createNVPTXMCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32, - createNVPTXMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64, - createNVPTXMCSubtargetInfo); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget32, - createNVPTXMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget64, - createNVPTXMCInstPrinter); + for (Target *T : {&TheNVPTXTarget32, &TheNVPTXTarget64}) { + // Register the MC asm info. + RegisterMCAsmInfo<NVPTXMCAsmInfo> X(*T); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(*T, createNVPTXMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createNVPTXMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createNVPTXMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createNVPTXMCSubtargetInfo); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createNVPTXMCInstPrinter); + } } diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h index 98821d2..bfd5123 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h @@ -14,6 +14,8 @@ #ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H #define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H +#include <stdint.h> + namespace llvm { class Target; diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp index 1f37696..4f3ccf4 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp @@ -12,11 +12,33 @@ //===----------------------------------------------------------------------===// #include "NVPTXAllocaHoisting.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +using namespace llvm; -namespace llvm { +namespace { +// Hoisting the alloca instructions in the non-entry blocks to the entry +// block. +class NVPTXAllocaHoisting : public FunctionPass { +public: + static char ID; // Pass ID + NVPTXAllocaHoisting() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<MachineFunctionAnalysis>(); + AU.addPreserved<StackProtector>(); + } + + const char *getPassName() const override { + return "NVPTX specific alloca hoisting"; + } + + bool runOnFunction(Function &function) override; +}; +} // namespace bool NVPTXAllocaHoisting::runOnFunction(Function &function) { bool functionModified = false; @@ -36,11 +58,15 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) { return functionModified; } -char NVPTXAllocaHoisting::ID = 1; -static RegisterPass<NVPTXAllocaHoisting> -X("alloca-hoisting", "Hoisting alloca instructions in non-entry " - "blocks to the entry block"); +char NVPTXAllocaHoisting::ID = 0; + +namespace llvm { +void initializeNVPTXAllocaHoistingPass(PassRegistry &); +} -FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); } +INITIALIZE_PASS( + NVPTXAllocaHoisting, "alloca-hoisting", + "Hoisting alloca instructions in non-entry blocks to the entry block", + false, false) -} // end namespace llvm +FunctionPass *llvm::createAllocaHoisting() { return new NVPTXAllocaHoisting; } diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h index c343980..7a6fc7d 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -14,38 +14,10 @@ #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H #define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/StackProtector.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Pass.h" - namespace llvm { - class FunctionPass; -class Function; - -// Hoisting the alloca instructions in the non-entry blocks to the entry -// block. -class NVPTXAllocaHoisting : public FunctionPass { -public: - static char ID; // Pass ID - NVPTXAllocaHoisting() : FunctionPass(ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DataLayoutPass>(); - AU.addPreserved<MachineFunctionAnalysis>(); - AU.addPreserved<StackProtector>(); - } - - const char *getPassName() const override { - return "NVPTX specific alloca hoisting"; - } - - bool runOnFunction(Function &function) override; -}; extern FunctionPass *createAllocaHoisting(); - } // end namespace llvm #endif diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 833db04..cc58b07 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -504,8 +504,7 @@ void NVPTXAsmPrinter::EmitFunctionBodyEnd() { void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - const TargetRegisterInfo *TRI = nvptxSubtarget->getRegisterInfo(); - if (TRI->isVirtualRegister(RegNo)) { + if (TargetRegisterInfo::isVirtualRegister(RegNo)) { OutStreamer.AddComment(Twine("implicit-def: ") + getVirtualRegisterName(RegNo)); } else { @@ -522,15 +521,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, // If none of reqntid* is specified, don't output reqntid directive. unsigned reqntidx, reqntidy, reqntidz; bool specified = false; - if (llvm::getReqNTIDx(F, reqntidx) == false) + if (!llvm::getReqNTIDx(F, reqntidx)) reqntidx = 1; else specified = true; - if (llvm::getReqNTIDy(F, reqntidy) == false) + if (!llvm::getReqNTIDy(F, reqntidy)) reqntidy = 1; else specified = true; - if (llvm::getReqNTIDz(F, reqntidz) == false) + if (!llvm::getReqNTIDz(F, reqntidz)) reqntidz = 1; else specified = true; @@ -544,15 +543,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, // If none of maxntid* is specified, don't output maxntid directive. unsigned maxntidx, maxntidy, maxntidz; specified = false; - if (llvm::getMaxNTIDx(F, maxntidx) == false) + if (!llvm::getMaxNTIDx(F, maxntidx)) maxntidx = 1; else specified = true; - if (llvm::getMaxNTIDy(F, maxntidy) == false) + if (!llvm::getMaxNTIDy(F, maxntidy)) maxntidy = 1; else specified = true; - if (llvm::getMaxNTIDz(F, maxntidz) == false) + if (!llvm::getMaxNTIDz(F, maxntidz)) maxntidz = 1; else specified = true; @@ -673,7 +672,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) { } for (const User *UU : U->users()) - if (usedInOneFunc(UU, oneFunc) == false) + if (!usedInOneFunc(UU, oneFunc)) return false; return true; @@ -687,7 +686,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) { * 3. Is the global variable referenced only in one function? */ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { - if (gv->hasInternalLinkage() == false) + if (!gv->hasInternalLinkage()) return false; const PointerType *Pty = gv->getType(); if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) @@ -696,7 +695,7 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { const Function *oneFunc = nullptr; bool flag = usedInOneFunc(gv, oneFunc); - if (flag == false) + if (!flag) return false; if (!oneFunc) return false; @@ -1472,7 +1471,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { } } - if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { + if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) { if (Ty->isAggregateType() || Ty->isVectorTy()) { // Just print .param .align <a> .b8 .param[size]; // <a> = PAL.getparamalignment @@ -1788,7 +1787,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, break; } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>( - ConstantFoldConstantExpression(Cexpr, TD))) { + ConstantFoldConstantExpression(Cexpr, *TD))) { int int32 = (int)(constInt->getZExtValue()); ptr = (unsigned char *)&int32; aggBuffer->addBytes(ptr, 4, Bytes); @@ -1810,7 +1809,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, break; } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>( - ConstantFoldConstantExpression(Cexpr, TD))) { + ConstantFoldConstantExpression(Cexpr, *TD))) { long long int64 = (long long)(constInt->getZExtValue()); ptr = (unsigned char *)&int64; aggBuffer->addBytes(ptr, 8, Bytes); @@ -2085,13 +2084,6 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, } } - -// Force static initialization. -extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { - RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); - RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); -} - void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { std::stringstream temp; LineReader *reader = this->getReader(filename.str()); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 7e6b5e8..9b11e70 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -92,8 +92,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { bool EmitGeneric; public: - AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP) - : size(_size), buffer(_size), O(_O), AP(_AP) { + AggBuffer(unsigned size, raw_ostream &O, NVPTXAsmPrinter &AP) + : size(size), buffer(size), O(O), AP(AP) { curpos = 0; numSymbols = 0; EmitGeneric = AP.EmitGeneric; diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp index f3a095d..6d7c99c 100644 --- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp +++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @@ -123,10 +123,9 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( // => // %0 = gep X, indices // %1 = addrspacecast %0 - GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0), - Indices, - GEP->getName(), - GEPI); + GetElementPtrInst *NewGEPI = GetElementPtrInst::Create( + GEP->getSourceElementType(), Cast->getOperand(0), Indices, + GEP->getName(), GEPI); NewGEPI->setIsInBounds(GEP->isInBounds()); GEP->replaceAllUsesWith( new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI)); diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 86d134b..850c020 100644 --- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -343,6 +343,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, // GetElementPtrConstantExpr return cast<GEPOperator>(C)->isInBounds() ? Builder.CreateGEP( + cast<GEPOperator>(C)->getSourceElementType(), NewOperands[0], makeArrayRef(&NewOperands[1], NumOperands - 1)) : Builder.CreateInBoundsGEP( diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index e01c780..52c5e1b 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -78,10 +78,7 @@ bool NVPTXDAGToDAGISel::usePrecSqrtF32() const { return UsePrecSqrtF32; } else { // Otherwise, use sqrt.approx if fast math is enabled - if (TM.Options.UnsafeFPMath) - return false; - else - return true; + return !TM.Options.UnsafeFPMath; } } @@ -5044,12 +5041,12 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( - const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { + const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { SDValue Op0, Op1; - switch (ConstraintCode) { + switch (ConstraintID) { default: return true; - case 'm': // memory + case InlineAsm::Constraint_m: // memory if (SelectDirectAddr(Op, Op0)) { OutOps.push_back(Op0); OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index ca432b5..6d845c9 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -48,7 +48,7 @@ public: const NVPTXSubtarget *Subtarget; bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) override; private: // Include the pieces autogenerated from the target description. diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 1dc81f7..ff74e6e 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -930,7 +930,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, } first = false; - if (Outs[OIdx].Flags.isByVal() == false) { + if (!Outs[OIdx].Flags.isByVal()) { if (Ty->isAggregateType() || Ty->isVectorTy()) { unsigned align = 0; const CallInst *CallI = cast<CallInst>(CS->getInstruction()); @@ -1075,7 +1075,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT VT = Outs[OIdx].VT; Type *Ty = Args[i].Ty; - if (Outs[OIdx].Flags.isByVal() == false) { + if (!Outs[OIdx].Flags.isByVal()) { if (Ty->isAggregateType()) { // aggregate SmallVector<EVT, 16> vtparts; @@ -1459,7 +1459,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ObjectVT) == NumElts && "Vector was not scalarized"); unsigned sz = EltVT.getSizeInBits(); - bool needTruncate = sz < 8 ? true : false; + bool needTruncate = sz < 8; if (NumElts == 1) { // Just a simple load @@ -1577,7 +1577,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, for (unsigned i = 0, e = Ins.size(); i != e; ++i) { unsigned sz = VTs[i].getSizeInBits(); unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]); - bool needTruncate = sz < 8 ? true : false; + bool needTruncate = sz < 8; if (VTs[i].isInteger() && (sz < 8)) sz = 8; @@ -1940,9 +1940,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { } // Then any remaining arguments - for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) { - Ops.push_back(N->getOperand(i)); - } + Ops.append(N->op_begin() + 2, N->op_end()); SDValue NewSt = DAG.getMemIntrinsicNode( Opcode, DL, DAG.getVTList(MVT::Other), Ops, @@ -2118,7 +2116,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // to newly created nodes. The SDNodes for params have to // appear in the same order as their order of appearance // in the original function. "idx+1" holds that order. - if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) { + if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) { if (Ty->isAggregateType()) { SmallVector<EVT, 16> vtparts; SmallVector<uint64_t, 16> offsets; @@ -4494,7 +4492,6 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { delete DwarfLocSection; delete DwarfARangesSection; delete DwarfRangesSection; - delete DwarfMacroInfoSection; } const MCSection * diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 1b4da2c..8594364 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -497,6 +497,12 @@ public: std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + // FIXME: Map different constraints differently. + return InlineAsm::Constraint_m; + } + const NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index f0c3663..578401a 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "NVPTXLowerAggrCopies.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -22,10 +24,33 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "nvptx" using namespace llvm; -namespace llvm { FunctionPass *createLowerAggrCopies(); } +namespace { +// actual analysis class, which is a functionpass +struct NVPTXLowerAggrCopies : public FunctionPass { + static char ID; + + NVPTXLowerAggrCopies() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<MachineFunctionAnalysis>(); + AU.addPreserved<StackProtector>(); + } + + bool runOnFunction(Function &F) override; + + static const unsigned MaxAggrCopySize = 128; + + const char *getPassName() const override { + return "Lower aggregate copies/intrinsics into loops"; + } +}; +} // namespace char NVPTXLowerAggrCopies::ID = 0; @@ -104,7 +129,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { SmallVector<MemTransferInst *, 4> aggrMemcpys; SmallVector<MemSetInst *, 4> aggrMemsets; - const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout(); + const DataLayout &DL = F.getParent()->getDataLayout(); LLVMContext &Context = F.getParent()->getContext(); // @@ -117,10 +142,10 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { ++II) { if (LoadInst *load = dyn_cast<LoadInst>(II)) { - if (load->hasOneUse() == false) + if (!load->hasOneUse()) continue; - if (DL->getTypeStoreSize(load->getType()) < MaxAggrCopySize) + if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue; User *use = load->user_back(); @@ -166,7 +191,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { StoreInst *store = dyn_cast<StoreInst>(*load->user_begin()); Value *srcAddr = load->getOperand(0); Value *dstAddr = store->getOperand(1); - unsigned numLoads = DL->getTypeStoreSize(load->getType()); + unsigned numLoads = DL.getTypeStoreSize(load->getType()); Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads); convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(), diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h index da301d5..3c39f53 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -15,35 +15,10 @@ #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H #define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/StackProtector.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Pass.h" - namespace llvm { +class FunctionPass; -// actual analysis class, which is a functionpass -struct NVPTXLowerAggrCopies : public FunctionPass { - static char ID; - - NVPTXLowerAggrCopies() : FunctionPass(ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DataLayoutPass>(); - AU.addPreserved<MachineFunctionAnalysis>(); - AU.addPreserved<StackProtector>(); - } - - bool runOnFunction(Function &F) override; - - static const unsigned MaxAggrCopySize = 128; - - const char *getPassName() const override { - return "Lower aggregate copies/intrinsics into loops"; - } -}; - -extern FunctionPass *createLowerAggrCopies(); +FunctionPass *createLowerAggrCopies(); } #endif diff --git a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp index 3149399..68dfbb7 100644 --- a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp +++ b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp @@ -35,7 +35,8 @@ namespace llvm { void initializeNVPTXLowerStructArgsPass(PassRegistry &); } -class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass { +namespace { +class NVPTXLowerStructArgs : public FunctionPass { bool runOnFunction(Function &F) override; void handleStructPtrArgs(Function &); @@ -48,6 +49,7 @@ public: return "Copy structure (byval *) arguments to stack"; } }; +} // namespace char NVPTXLowerStructArgs::ID = 1; diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h index d39a394..f075b8b 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.h +++ b/lib/Target/NVPTX/NVPTXMCExpr.h @@ -29,8 +29,8 @@ private: const VariantKind Kind; const APFloat Flt; - explicit NVPTXFloatMCExpr(VariantKind _Kind, APFloat _Flt) - : Kind(_Kind), Flt(_Flt) {} + explicit NVPTXFloatMCExpr(VariantKind Kind, APFloat Flt) + : Kind(Kind), Flt(Flt) {} public: /// @name Construction diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 5ca96e4..6e97f9ef 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -78,7 +78,7 @@ NVPTXRegisterInfo::NVPTXRegisterInfo() : NVPTXGenRegisterInfo(0) {} /// NVPTX Callee Saved Registers const MCPhysReg * -NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { +NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const { static const MCPhysReg CalleeSavedRegs[] = { 0 }; return CalleeSavedRegs; } diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index 75b8f15..c310a9c 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -35,8 +35,7 @@ public: //------------------------------------------------------ // NVPTX callee saved registers - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h index f1d3cb4..0d2627d 100644 --- a/lib/Target/NVPTX/NVPTXSection.h +++ b/lib/Target/NVPTX/NVPTXSection.h @@ -26,7 +26,7 @@ namespace llvm { class NVPTXSection : public MCSection { virtual void anchor(); public: - NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {} + NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {} virtual ~NVPTXSection() {} /// Override this as NVPTX has its own way of printing switching @@ -36,11 +36,8 @@ public: const MCExpr *Subsection) const override {} /// Base address of PTX sections is zero. - bool isBaseAddressKnownZero() const override { return true; } bool UseCodeAlign() const override { return false; } bool isVirtualSection() const override { return false; } - std::string getLabelBeginName() const override { return ""; } - std::string getLabelEndName() const override { return ""; } }; } // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 1a267a6..1b6bc71 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -50,6 +50,7 @@ using namespace llvm; namespace llvm { void initializeNVVMReflectPass(PassRegistry&); void initializeGenericToNVVMPass(PassRegistry&); +void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); void initializeNVPTXLowerStructArgsPass(PassRegistry &); @@ -64,6 +65,7 @@ extern "C" void LLVMInitializeNVPTXTarget() { // but it's very NVPTX-specific. initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); + initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry()); initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); initializeNVPTXFavorNonGenericAddrSpacesPass( *PassRegistry::getPassRegistry()); @@ -86,9 +88,10 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64bit) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), is64bit(is64bit), - TLOF(make_unique<NVPTXTargetObjectFile>()), - DL(computeDataLayout(is64bit)), Subtarget(TT, CPU, FS, *this) { + : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM, + CM, OL), + is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()), + Subtarget(TT, CPU, FS, *this) { if (Triple(TT).getOS() == Triple::NVCL) drvInterface = NVPTX::NVCL; else @@ -183,8 +186,7 @@ void NVPTXPassConfig::addIRPasses() { } bool NVPTXPassConfig::addInstSelector() { - const NVPTXSubtarget &ST = - getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>(); + const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); addPass(createLowerAggrCopies()); addPass(createAllocaHoisting()); diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index a81abfe..b8df5af 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -27,7 +27,6 @@ namespace llvm { class NVPTXTargetMachine : public LLVMTargetMachine { bool is64bit; std::unique_ptr<TargetLoweringObjectFile> TLOF; - const DataLayout DL; // Calculates type size & alignment NVPTX::DrvInterface drvInterface; NVPTXSubtarget Subtarget; @@ -40,8 +39,10 @@ public: CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit); ~NVPTXTargetMachine() override; - const DataLayout *getDataLayout() const override { return &DL; } - const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; } + const NVPTXSubtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } + const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; } bool is64Bit() const { return is64bit; } NVPTX::DrvInterface getDrvInterface() const { return drvInterface; } ManagedStringPool *getManagedStrPool() const { diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h index 00ceca5..5d9ab0d 100644 --- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -41,7 +41,6 @@ public: DwarfLocSection = nullptr; DwarfARangesSection = nullptr; DwarfRangesSection = nullptr; - DwarfMacroInfoSection = nullptr; } virtual ~NVPTXTargetObjectFile(); @@ -83,8 +82,6 @@ public: new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); - DwarfMacroInfoSection = - new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); } const MCSection *getSectionForConstant(SectionKind Kind, diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp index cf1feac..1f178af 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -293,12 +293,9 @@ bool llvm::isKernelFunction(const Function &F) { unsigned x = 0; bool retval = llvm::findOneNVVMAnnotation( &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x); - if (retval == false) { + if (!retval) { // There is no NVVM metadata, check the calling convention - if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel) - return true; - else - return false; + return F.getCallingConv() == llvm::CallingConv::PTX_Kernel; } return (x == 1); } @@ -307,7 +304,7 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) { std::vector<unsigned> Vs; bool retval = llvm::findAllNVVMAnnotation( &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs); - if (retval == false) + if (!retval) return false; for (int i = 0, e = Vs.size(); i < e; i++) { unsigned v = Vs[i]; diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp deleted file mode 100644 index 5f074b3..0000000 --- a/lib/Target/NVPTX/NVPTXutil.cpp +++ /dev/null @@ -1,90 +0,0 @@ -//===-- NVPTXutil.cpp - Functions exported to CodeGen --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the functions that can be used in CodeGen. -// -//===----------------------------------------------------------------------===// - -#include "NVPTXutil.h" -#include "NVPTX.h" - -using namespace llvm; - -namespace llvm { - -bool isParamLoad(const MachineInstr *MI) { - if ((MI->getOpcode() != NVPTX::LD_i32_avar) && - (MI->getOpcode() != NVPTX::LD_i64_avar)) - return false; - if (MI->getOperand(2).isImm() == false) - return false; - if (MI->getOperand(2).getImm() != NVPTX::PTXLdStInstCode::PARAM) - return false; - return true; -} - -#define DATA_MASK 0x7f -#define DIGIT_WIDTH 7 -#define MORE_BYTES 0x80 - -static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) { - char *a; - char *end = space + splen; - - a = space; - do { - unsigned char uc; - - if (a >= end) - return 1; - uc = val & DATA_MASK; - val >>= DIGIT_WIDTH; - if (val != 0) - uc |= MORE_BYTES; - *a = uc; - a++; - } while (val); - *nbytes = a - space; - return 0; -} - -#undef DATA_MASK -#undef DIGIT_WIDTH -#undef MORE_BYTES - -uint64_t encode_leb128(const char *str) { - union { - uint64_t x; - char a[8]; - } temp64; - - temp64.x = 0; - - for (unsigned i = 0, e = strlen(str); i != e; ++i) - temp64.a[i] = str[e - 1 - i]; - - char encoded[16]; - int nbytes; - - int retval = encode_leb128(temp64.x, &nbytes, encoded, 16); - - (void) retval; - assert(retval == 0 && "Encoding to leb128 failed"); - - assert(nbytes <= 8 && - "Cannot support register names with leb128 encoding > 8 bytes"); - - temp64.x = 0; - for (int i = 0; i < nbytes; ++i) - temp64.a[i] = encoded[i]; - - return temp64.x; -} - -} // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXutil.h b/lib/Target/NVPTX/NVPTXutil.h deleted file mode 100644 index 1915dac..0000000 --- a/lib/Target/NVPTX/NVPTXutil.h +++ /dev/null @@ -1,25 +0,0 @@ -//===-- NVPTXutil.h - Functions exported to CodeGen --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the functions that can be used in CodeGen. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H -#define LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H - -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" - -namespace llvm { -bool isParamLoad(const MachineInstr *); -uint64_t encode_leb128(const char *str); -} - -#endif diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp index a8d6b95..5e375b7 100644 --- a/lib/Target/NVPTX/NVVMReflect.cpp +++ b/lib/Target/NVPTX/NVVMReflect.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_os_ostream.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include <map> #include <sstream> @@ -137,6 +138,26 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) { // ConstantArray can be found successfully, see if it can be // found in VarMap. If so, replace the uses of CallInst with the // value found in VarMap. If not, replace the use with value 0. + + // IR for __nvvm_reflect calls differs between CUDA versions: + // CUDA 6.5 and earlier uses this sequence: + // %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8 + // (i8 addrspace(4)* getelementptr inbounds + // ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0)) + // %reflect = tail call i32 @__nvvm_reflect(i8* %ptr) + // + // Value returned by Sym->getOperand(0) is a Constant with a + // ConstantDataSequential operand which can be converted to string and used + // for lookup. + // + // CUDA 7.0 does it slightly differently: + // %reflect = call i32 @__nvvm_reflect(i8* addrspacecast + // (i8 addrspace(1)* getelementptr inbounds + // ([8 x i8], [8 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*)) + // + // In this case, we get a Constant with a GlobalVariable operand and we need + // to dig deeper to find its initializer with the string we'll use for lookup. + for (User *U : ReflectFunction->users()) { assert(isa<CallInst>(U) && "Only a call instruction can use _reflect"); CallInst *Reflect = cast<CallInst>(U); @@ -158,16 +179,23 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) { const Value *Sym = GEP->getOperand(0); assert(isa<Constant>(Sym) && "Format of _reflect function not recognized"); - const Constant *SymStr = cast<Constant>(Sym); + const Value *Operand = cast<Constant>(Sym)->getOperand(0); + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) { + // For CUDA-7.0 style __nvvm_reflect calls we need to find operand's + // initializer. + assert(GV->hasInitializer() && + "Format of _reflect function not recognized"); + const Constant *Initializer = GV->getInitializer(); + Operand = Initializer; + } - assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) && + assert(isa<ConstantDataSequential>(Operand) && "Format of _reflect function not recognized"); - - assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() && + assert(cast<ConstantDataSequential>(Operand)->isCString() && "Format of _reflect function not recognized"); std::string ReflectArg = - cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString(); + cast<ConstantDataSequential>(Operand)->getAsString(); ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1); DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n"); diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index bf00e73..99a1633 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -271,9 +271,9 @@ class PPCAsmParser : public MCTargetAsmParser { public: - PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &_MII, const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(_STI), MII(_MII) { + PPCAsmParser(MCSubtargetInfo &STI, MCAsmParser &, const MCInstrInfo &MII, + const MCTargetOptions &Options) + : MCTargetAsmParser(), STI(STI), MII(MII) { // Check for 64-bit vs. 32-bit pointer mode. Triple TheTriple(STI.getTargetTriple()); IsPPC64 = (TheTriple.getArch() == Triple::ppc64 || @@ -425,7 +425,9 @@ public: bool isToken() const override { return Kind == Token; } bool isImm() const override { return Kind == Immediate || Kind == Expression; } + bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); } bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); } + bool isU3Imm() const { return Kind == Immediate && isUInt<3>(getImm()); } bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); } bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); } bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); } diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 0ed0723..a9f5fc7 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -189,6 +189,12 @@ static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo, return decodeRegisterClass(Inst, RegNo, CRRegs); } +static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, CRRegs); +} + static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index c287fbe..311a4f2 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -214,6 +214,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, printOperand(MI, OpNo+1, O); } +void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned int Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 1 && "Invalid u1imm argument!"); + O << (unsigned int)Value; +} + void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); @@ -221,6 +228,13 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo, O << (unsigned int)Value; } +void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned int Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 8 && "Invalid u3imm argument!"); + O << (unsigned int)Value; +} + void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 6ead19b..8718743 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -43,7 +43,9 @@ public: void printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier = nullptr); + void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 2b4f2d8..d8fab5b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -45,6 +45,10 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) { void PPCELFMCAsmInfo::anchor() { } PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) { + // FIXME: This is not always needed. For example, it is not needed in the + // v2 abi. + NeedsLocalForSize = true; + if (is64Bit) { PointerSize = CalleeSaveStackSlotSize = 8; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 06d380e..b9f0afb 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/ADT/Statistic.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -39,10 +40,10 @@ class PPCMCCodeEmitter : public MCCodeEmitter { bool IsLittleEndian; public: - PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool isLittle) - : MCII(mcii), CTX(ctx), IsLittleEndian(isLittle) { - } - + PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) + : MCII(mcii), CTX(ctx), + IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {} + ~PPCMCCodeEmitter() {} unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, @@ -158,14 +159,11 @@ public: }; } // end anonymous namespace - + MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx) { - Triple TT(STI.getTargetTriple()); - bool IsLittleEndian = TT.getArch() == Triple::ppc64le; - return new PPCMCCodeEmitter(MCII, Ctx, IsLittleEndian); + return new PPCMCCodeEmitter(MCII, Ctx); } unsigned PPCMCCodeEmitter:: diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index f0a6bb9..1c840d9 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -36,9 +36,8 @@ private: int64_t EvaluateAsInt64(int64_t Value) const; - explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr, - bool _IsDarwin) - : Kind(_Kind), Expr(_Expr), IsDarwin(_IsDarwin) {} + explicit PPCMCExpr(VariantKind Kind, const MCExpr *Expr, bool IsDarwin) + : Kind(Kind), Expr(Expr), IsDarwin(IsDarwin) {} public: /// @name Construction diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index f2da389..2f7a768 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -145,6 +145,7 @@ public: } void emitTCEntry(const MCSymbol &S) override { // Creates a R_PPC64_TOC relocation + Streamer.EmitValueToAlignment(8); Streamer.EmitSymbolValue(&S, 8); } void emitMachine(StringRef CPU) override { @@ -222,32 +223,19 @@ public: }; } -// This is duplicated code. Refactor this. -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll) { - if (Triple(TT).isOSDarwin()) { - MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); - new PPCTargetMachOStreamer(*S); - return S; - } - - MCStreamer *S = createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll); - new PPCTargetELFStreamer(*S); - return S; +static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new PPCTargetAsmStreamer(S, OS); } -static MCStreamer * -createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst) { - - MCStreamer *S = llvm::createAsmStreamer( - Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); - new PPCTargetAsmStreamer(*S, OS); - return S; +static MCTargetStreamer * +createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + Triple TT(STI.getTargetTriple()); + if (TT.getObjectFormat() == Triple::ELF) + return new PPCTargetELFStreamer(S); + return new PPCTargetMachOStreamer(S); } static MCInstPrinter *createPPCMCInstPrinter(const Target &T, @@ -261,60 +249,36 @@ static MCInstPrinter *createPPCMCInstPrinter(const Target &T, } extern "C" void LLVMInitializePowerPCTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo); - RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo); - RegisterMCAsmInfoFn E(ThePPC64LETarget, createPPCMCAsmInfo); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(ThePPC64LETarget, - createPPCMCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(ThePPC64LETarget, - createPPCMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(ThePPC64LETarget, createPPCMCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target, - createPPCMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target, - createPPCMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(ThePPC64LETarget, - createPPCMCSubtargetInfo); - - // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(ThePPC64LETarget, - createPPCMCCodeEmitter); - + for (Target *T : {&ThePPC32Target, &ThePPC64Target, &ThePPC64LETarget}) { + // Register the MC asm info. + RegisterMCAsmInfoFn C(*T, createPPCMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(*T, createPPCMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createPPCMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createPPCMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createPPCMCSubtargetInfo); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(*T, createPPCMCCodeEmitter); + // Register the asm backend. - TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend); - TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend); - TargetRegistry::RegisterMCAsmBackend(ThePPC64LETarget, createPPCAsmBackend); - - // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(ThePPC64LETarget, createMCStreamer); - - // Register the asm streamer. - TargetRegistry::RegisterAsmStreamer(ThePPC32Target, createMCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(ThePPC64Target, createMCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(ThePPC64LETarget, createMCAsmStreamer); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(ThePPC64LETarget, - createPPCMCInstPrinter); + TargetRegistry::RegisterMCAsmBackend(*T, createPPCAsmBackend); + + // Register the object target streamer. + TargetRegistry::RegisterObjectTargetStreamer(*T, + createObjectTargetStreamer); + + // Register the asm target streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createPPCMCInstPrinter); + } } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 68f7f7a..8b1e3b4 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -34,10 +34,9 @@ class raw_ostream; extern Target ThePPC32Target; extern Target ThePPC64Target; extern Target ThePPC64LETarget; - + MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx); MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI, diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index f53add5..f175f6d 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -18,7 +18,7 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// // PowerPC Subtarget features. // - + //===----------------------------------------------------------------------===// // CPU Directives // //===----------------------------------------------------------------------===// @@ -112,14 +112,21 @@ def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true", def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true", "Enable POWER8 Altivec instructions", [FeatureAltivec]>; +def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true", + "Enable POWER8 Crypto instructions", + [FeatureP8Altivec]>; def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true", "Enable POWER8 vector instructions", [FeatureVSX, FeatureP8Altivec]>; - +def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics", + "HasPartwordAtomics", "true", + "Enable l[bh]arx and st[bh]cx.">; def FeatureInvariantFunctionDescriptors : SubtargetFeature<"invariant-function-descriptors", "HasInvariantFunctionDescriptors", "true", "Assume function descriptors are invariant">; +def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true", + "Enable Hardware Transactional Memory instructions">; def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true", "Treat mftb as deprecated">; @@ -256,11 +263,11 @@ def ProcessorFeatures { [DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX, FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, - FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureHTM, FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, + FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto, Feature64Bit /*, Feature64BitRegs */, FeatureICBT, - DeprecatedMFTB, DeprecatedDST]; + FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST]; } def : ProcessorModel<"970", G5Model, @@ -339,7 +346,7 @@ def : ProcessorModel<"pwr7", P7Model, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, - Feature64Bit /*, Feature64BitRegs */, + Feature64Bit /*, Feature64BitRegs */, FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>; def : Processor<"ppc", G3Itineraries, [Directive32]>; diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 1327290..cd60906 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -69,12 +69,11 @@ namespace { protected: MapVector<MCSymbol*, MCSymbol*> TOC; const PPCSubtarget *Subtarget; - uint64_t TOCLabelID; StackMaps SM; public: explicit PPCAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) - : AsmPrinter(TM, std::move(Streamer)), TOCLabelID(0), SM(*this) {} + : AsmPrinter(TM, std::move(Streamer)), SM(*this) {} const char *getPassName() const override { return "PowerPC Assembly Printer"; @@ -321,17 +320,9 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, /// exists for it. If not, create one. Then return a symbol that references /// the TOC entry. MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { - const DataLayout *DL = TM.getDataLayout(); MCSymbol *&TOCEntry = TOC[Sym]; - - // To avoid name clash check if the name already exists. - while (!TOCEntry) { - if (OutContext.LookupSymbol(Twine(DL->getPrivateGlobalPrefix()) + - "C" + Twine(TOCLabelID++)) == nullptr) { - TOCEntry = GetTempSymbol("C", TOCLabelID); - } - } - + if (!TOCEntry) + TOCEntry = createTempSymbol("C"); return TOCEntry; } @@ -1068,8 +1059,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.SwitchSection(Section); OutStreamer.EmitLabel(CurrentFnSym); OutStreamer.EmitValueToAlignment(8); - MCSymbol *Symbol1 = - OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName())); + MCSymbol *Symbol1 = CurrentFnSymForSize; // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function // entry point. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), @@ -1082,11 +1072,6 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { // Emit a null environment pointer. OutStreamer.EmitIntValue(0, 8 /* size */); OutStreamer.SwitchSection(Current.first, Current.second); - - MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( - ".L." + Twine(CurrentFnSym->getName())); - OutStreamer.EmitLabel(RealFnSym); - CurrentFnSymForSize = RealFnSym; } diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 5af8aab..c595f44 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -171,8 +171,7 @@ bool PPCCTRLoops::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; + DL = &F.getParent()->getDataLayout(); auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); LibInfo = TLIP ? &TLIP->getTLI() : nullptr; @@ -533,7 +532,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // selected branch. MadeChange = true; - SCEVExpander SCEVE(*SE, "loopcnt"); + SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt"); LLVMContext &C = SE->getContext(); Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 54532b5..fbd7b6d 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -675,8 +675,18 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { case PPC::STFS: Opc = PPC::STFSX; break; case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) - .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg); + + auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(SrcReg); + + // If we have an index register defined we use it in the store inst, + // otherwise we use X0 as base as it makes the vector instructions to + // use zero in the computation of the effective address regardless the + // content of the register. + if (IndexReg) + MIB.addReg(Addr.Base.Reg).addReg(IndexReg); + else + MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg); } return true; @@ -1532,7 +1542,7 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) { // Add a register mask with the call-preserved registers. Proper // defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); CLI.Call = MIB; diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index b10e854..3ac8e94 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -186,20 +186,34 @@ namespace { /// register can be improved, but it is wrong to substitute Reg+Reg for /// Reg in an asm, because the load or store opcode would have to change. bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) override { - // We need to make sure that this one operand does not end up in r0 - // (because we might end up lowering this as 0(%op)). - const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); - const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); - SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); - SDValue NewOp = - SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, - SDLoc(Op), Op.getValueType(), - Op, RC), 0); - - OutOps.push_back(NewOp); - return false; + + switch(ConstraintID) { + default: + errs() << "ConstraintID: " << ConstraintID << "\n"; + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::Constraint_es: + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_m: + case InlineAsm::Constraint_o: + case InlineAsm::Constraint_Q: + case InlineAsm::Constraint_Z: + case InlineAsm::Constraint_Zy: + // We need to make sure that this one operand does not end up in r0 + // (because we might end up lowering this as 0(%op)). + const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); + const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); + SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); + SDValue NewOp = + SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, + SDLoc(Op), Op.getValueType(), + Op, RC), 0); + + OutOps.push_back(NewOp); + return false; + } + return true; } void InsertVRSaveCode(MachineFunction &MF); @@ -2105,7 +2119,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { // getVCmpInst: return the vector compare instruction for the specified // vector type and condition code. Since this is for altivec specific code, -// only support the altivec types (v16i8, v8i16, v4i32, and v4f32). +// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32). static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate) { Swap = false; @@ -2184,6 +2198,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, return PPC::VCMPEQUH; else if (VecVT == MVT::v4i32) return PPC::VCMPEQUW; + else if (VecVT == MVT::v2i64) + return PPC::VCMPEQUD; break; case ISD::SETGT: if (VecVT == MVT::v16i8) @@ -2192,6 +2208,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, return PPC::VCMPGTSH; else if (VecVT == MVT::v4i32) return PPC::VCMPGTSW; + else if (VecVT == MVT::v2i64) + return PPC::VCMPGTSD; break; case ISD::SETUGT: if (VecVT == MVT::v16i8) @@ -2200,6 +2218,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, return PPC::VCMPGTUH; else if (VecVT == MVT::v4i32) return PPC::VCMPGTUW; + else if (VecVT == MVT::v2i64) + return PPC::VCMPGTUD; break; default: break; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 147e94b..871531e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -516,7 +516,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } - setOperationAction(ISD::MUL, MVT::v4i32, Custom); + + if (Subtarget.hasP8Altivec()) + setOperationAction(ISD::MUL, MVT::v4i32, Legal); + else + setOperationAction(ISD::MUL, MVT::v4i32, Custom); + setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); @@ -574,15 +579,24 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); - // VSX v2i64 only supports non-arithmetic operations. - setOperationAction(ISD::ADD, MVT::v2i64, Expand); - setOperationAction(ISD::SUB, MVT::v2i64, Expand); + if (Subtarget.hasP8Altivec()) { + setOperationAction(ISD::SHL, MVT::v2i64, Legal); + setOperationAction(ISD::SRA, MVT::v2i64, Legal); + setOperationAction(ISD::SRL, MVT::v2i64, Legal); - setOperationAction(ISD::SHL, MVT::v2i64, Expand); - setOperationAction(ISD::SRA, MVT::v2i64, Expand); - setOperationAction(ISD::SRL, MVT::v2i64, Expand); + setOperationAction(ISD::SETCC, MVT::v2i64, Legal); + } + else { + setOperationAction(ISD::SHL, MVT::v2i64, Expand); + setOperationAction(ISD::SRA, MVT::v2i64, Expand); + setOperationAction(ISD::SRL, MVT::v2i64, Expand); - setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + + // VSX v2i64 only supports non-arithmetic operations. + setOperationAction(ISD::ADD, MVT::v2i64, Expand); + setOperationAction(ISD::SUB, MVT::v2i64, Expand); + } setOperationAction(ISD::LOAD, MVT::v2i64, Promote); AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); @@ -892,6 +906,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, MaxStoresPerMemcpyOptSize = 8; MaxStoresPerMemmove = 32; MaxStoresPerMemmoveOptSize = 8; + } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { + // The A2 also benefits from (very) aggressive inlining of memcpy and + // friends. The overhead of a the function call, even when warm, can be + // over one hundred cycles. + MaxStoresPerMemset = 128; + MaxStoresPerMemcpy = 128; + MaxStoresPerMemmove = 128; } } @@ -981,8 +1002,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::STBRX: return "PPCISD::STBRX"; case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; - case PPCISD::LARX: return "PPCISD::LARX"; - case PPCISD::STCX: return "PPCISD::STCX"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; case PPCISD::BDNZ: return "PPCISD::BDNZ"; case PPCISD::BDZ: return "PPCISD::BDZ"; @@ -1384,17 +1403,10 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { // immediate field for would be zero, and we prefer to use vxor for it. if (ValSizeInBytes < ByteSize) return SDValue(); - // If the element value is larger than the splat value, cut it in half and - // check to see if the two halves are equal. Continue doing this until we - // get to ByteSize. This allows us to handle 0x01010101 as 0x01. - while (ValSizeInBytes > ByteSize) { - ValSizeInBytes >>= 1; - - // If the top half equals the bottom half, we're still ok. - if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != - (Value & ((1 << (8*ValSizeInBytes))-1))) - return SDValue(); - } + // If the element value is larger than the splat value, check if it consists + // of a repeated bit pattern of size ByteSize. + if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) + return SDValue(); // Properly sign extend the value. int MaskVal = SignExtend32(Value, ByteSize * 8); @@ -2436,27 +2448,16 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, return false; } -/// GetFPR - Get the set of FP registers that should be allocated for arguments, +/// FPR - The set of FP registers that should be allocated for arguments, /// on Darwin. -static const MCPhysReg *GetFPR() { - static const MCPhysReg FPR[] = { - PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 - }; +static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, + PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, + PPC::F11, PPC::F12, PPC::F13}; - return FPR; -} - -/// GetQFPR - Get the set of QPX registers that should be allocated for -/// arguments. -static const MCPhysReg *GetQFPR() { - static const MCPhysReg QFPR[] = { - PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, - PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13 - }; - - return QFPR; -} +/// QFPR - The set of QPX registers that should be allocated for arguments. +static const MCPhysReg QFPR[] = { + PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, + PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; /// CalculateStackSlotSize - Calculates the size reserved for this argument on /// the stack. @@ -2880,9 +2881,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -2892,8 +2890,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; - static const MCPhysReg *QFPR = GetQFPR(); - const unsigned Num_GPR_Regs = array_lengthof(GPR); const unsigned Num_FPR_Regs = 13; const unsigned Num_VR_Regs = array_lengthof(VR); @@ -3291,9 +3287,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin( PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -4187,7 +4180,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + const uint32_t *Mask = + TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -4582,8 +4576,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -4593,8 +4585,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; - static const MCPhysReg *QFPR = GetQFPR(); - const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); @@ -5280,8 +5270,6 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -6418,7 +6406,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, SDLoc dl) { assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); - static const EVT VTys[] = { // canonical VT to use for each size. + static const MVT VTys[] = { // canonical VT to use for each size. MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 }; @@ -7045,7 +7033,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, /// altivec comparison. If it is, return true and fill in Opc/isDot with /// information about the intrinsic. static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, - bool &isDot) { + bool &isDot, const PPCSubtarget &Subtarget) { unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); CompareOpc = -1; @@ -7058,29 +7046,83 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpequd_p: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 199; + isDot = 1; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtsd_p: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 967; + isDot = 1; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtud_p: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 711; + isDot = 1; + } + else + return false; + break; + // Normal Comparisons. case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpequd: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 199; + isDot = 0; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtsd: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 967; + isDot = 0; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtud: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 711; + isDot = 0; + } + else + return false; + + break; } return true; } @@ -7094,7 +7136,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDLoc dl(Op); int CompareOpc; bool isDot; - if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) + if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget)) return SDValue(); // Don't custom lower most intrinsics. // If this is a non-dot comparison, make the VCMP node and we are done. @@ -7738,10 +7780,36 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, MachineBasicBlock * PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - bool is64bit, unsigned BinOpcode) const { + unsigned AtomicSize, + unsigned BinOpcode) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + auto LoadMnemonic = PPC::LDARX; + auto StoreMnemonic = PPC::STDCX; + switch (AtomicSize) { + default: + llvm_unreachable("Unexpected size of atomic entity"); + case 1: + LoadMnemonic = PPC::LBARX; + StoreMnemonic = PPC::STBCX; + assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); + break; + case 2: + LoadMnemonic = PPC::LHARX; + StoreMnemonic = PPC::STHCX; + assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); + break; + case 4: + LoadMnemonic = PPC::LWARX; + StoreMnemonic = PPC::STWCX; + break; + case 8: + LoadMnemonic = PPC::LDARX; + StoreMnemonic = PPC::STDCX; + break; + } + const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); MachineFunction::iterator It = BB; @@ -7763,7 +7831,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned TmpReg = (!BinOpcode) ? incr : - RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass + RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); // thisMBB: @@ -7778,11 +7846,11 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(TmpReg).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); @@ -7800,6 +7868,10 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, bool is8bit, // operation unsigned BinOpcode) const { + // If we support part-word atomic mnemonics, just use them + if (Subtarget.hasPartwordAtomics()) + return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode); + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // In 64 bit mode we have to use 64 bits for addresses, even though the @@ -8365,68 +8437,96 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); + BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::AND); + BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::OR); + BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); + BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::NAND); + BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); + BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) - BB = EmitAtomicBinary(MI, BB, false, 0); + BB = EmitAtomicBinary(MI, BB, 4, 0); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) - BB = EmitAtomicBinary(MI, BB, true, 0); + BB = EmitAtomicBinary(MI, BB, 8, 0); else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || - MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || + (Subtarget.hasPartwordAtomics() && + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || + (Subtarget.hasPartwordAtomics() && + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; + auto LoadMnemonic = PPC::LDARX; + auto StoreMnemonic = PPC::STDCX; + switch(MI->getOpcode()) { + default: + llvm_unreachable("Compare and swap of unknown size"); + case PPC::ATOMIC_CMP_SWAP_I8: + LoadMnemonic = PPC::LBARX; + StoreMnemonic = PPC::STBCX; + assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); + break; + case PPC::ATOMIC_CMP_SWAP_I16: + LoadMnemonic = PPC::LHARX; + StoreMnemonic = PPC::STHCX; + assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); + break; + case PPC::ATOMIC_CMP_SWAP_I32: + LoadMnemonic = PPC::LWARX; + StoreMnemonic = PPC::STWCX; + break; + case PPC::ATOMIC_CMP_SWAP_I64: + LoadMnemonic = PPC::LDARX; + StoreMnemonic = PPC::STDCX; + break; + } unsigned dest = MI->getOperand(0).getReg(); unsigned ptrA = MI->getOperand(1).getReg(); unsigned ptrB = MI->getOperand(2).getReg(); @@ -8452,18 +8552,18 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(loop1MBB); // loop1MBB: - // l[wd]arx dest, ptr + // l[bhwd]arx dest, ptr // cmp[wd] dest, oldval // bne- midMBB // loop2MBB: - // st[wd]cx. newval, ptr + // st[bhwd]cx. newval, ptr // bne- loopMBB // b exitBB // midMBB: - // st[wd]cx. dest, ptr + // st[bhwd]cx. dest, ptr // exitBB: BB = loop1MBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) .addReg(oldval).addReg(dest); @@ -8473,7 +8573,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(midMBB); BB = loop2MBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(newval).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); @@ -8482,7 +8582,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(exitMBB); BB = midMBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(dest).addReg(ptrA).addReg(ptrB); BB->addSuccessor(exitMBB); @@ -8682,6 +8782,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); + } else if (MI->getOpcode() == PPC::TCHECK_RET) { + DebugLoc Dl = MI->getDebugLoc(); + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); + BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); + return BB; } else { llvm_unreachable("Unexpected instr type to insert"); } @@ -10184,7 +10290,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && - getAltivecCompareInfo(LHS, CompareOpc, isDot)) { + getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { assert(isDot && "Can't compare against a vector result!"); // If this is a comparison against something other than 0/1, then we know @@ -10297,14 +10403,17 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case Intrinsic::ppc_altivec_vcmpequb_p: case Intrinsic::ppc_altivec_vcmpequh_p: case Intrinsic::ppc_altivec_vcmpequw_p: + case Intrinsic::ppc_altivec_vcmpequd_p: case Intrinsic::ppc_altivec_vcmpgefp_p: case Intrinsic::ppc_altivec_vcmpgtfp_p: case Intrinsic::ppc_altivec_vcmpgtsb_p: case Intrinsic::ppc_altivec_vcmpgtsh_p: case Intrinsic::ppc_altivec_vcmpgtsw_p: + case Intrinsic::ppc_altivec_vcmpgtsd_p: case Intrinsic::ppc_altivec_vcmpgtub_p: case Intrinsic::ppc_altivec_vcmpgtuh_p: case Intrinsic::ppc_altivec_vcmpgtuw_p: + case Intrinsic::ppc_altivec_vcmpgtud_p: KnownZero = ~1U; // All bits but the low one are known to be zero. break; } @@ -10914,11 +11023,27 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { + const Function *F = MF.getFunction(); + // When expanding a memset, require at least two QPX instructions to cover + // the cost of loading the value to be stored from the constant pool. + if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && + (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && + !F->hasFnAttribute(Attribute::NoImplicitFloat)) { + return MVT::v4f64; + } + + // We should use Altivec/VSX loads and stores when available. For unaligned + // addresses, unaligned VSX loads are only fast starting with the P8. + if (Subtarget.hasAltivec() && Size >= 16 && + (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || + ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) + return MVT::v4i32; + if (Subtarget.isPPC64()) { return MVT::i64; - } else { - return MVT::i32; } + + return MVT::i32; } /// \brief Returns true if it is beneficial to convert a load of a constant diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 04afe88..8afd7ef 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -166,14 +166,6 @@ namespace llvm { /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. MFFS, - /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and - /// reserve indexed. This is used to implement atomic operations. - LARX, - - /// STCX = This corresponds to PPC stcx. instrcution: store conditional - /// indexed. This is used to implement atomic operations. - STCX, - /// TC_RETURN - A tail call return. /// operand #0 chain /// operand #1 callee (register or absolute) @@ -489,7 +481,8 @@ namespace llvm { EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const override; MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, - MachineBasicBlock *MBB, bool is64Bit, + MachineBasicBlock *MBB, + unsigned AtomicSize, unsigned BinOpcode) const; MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, @@ -526,6 +519,21 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + if (ConstraintCode == "es") + return InlineAsm::Constraint_es; + else if (ConstraintCode == "o") + return InlineAsm::Constraint_o; + else if (ConstraintCode == "Q") + return InlineAsm::Constraint_Q; + else if (ConstraintCode == "Z") + return InlineAsm::Constraint_Z; + else if (ConstraintCode == "Zy") + return InlineAsm::Constraint_Zy; + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 69c0d7d..183d088 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -235,15 +235,19 @@ let usesCustomInserter = 1 in { } // Instructions to support atomic operations +let mayLoad = 1, hasSideEffects = 0 in { def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), - "ldarx $rD, $ptr", IIC_LdStLDARX, - [(set i64:$rD, (PPClarx xoaddr:$ptr))]>; + "ldarx $rD, $ptr", IIC_LdStLDARX, []>; + +// Instruction to support lock versions of atomics +// (EH=1 - see Power ISA 2.07 Book II 4.4.2) +def LDARXL : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), + "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isDOT; +} -let Defs = [CR0] in +let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst), - "stdcx. $rS, $dst", IIC_LdStSTDCX, - [(PPCstcx i64:$rS, xoaddr:$dst)]>, - isDOT; + "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT; let Interpretation64Bit = 1, isCodeGenOnly = 1 in { let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in @@ -325,6 +329,12 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { Requires<[In64BitMode]>; } +def MFSPR8 : XFXForm_1<31, 339, (outs g8rc:$RT), (ins i32imm:$SPR), + "mfspr $RT, $SPR", IIC_SprMFSPR>; +def MTSPR8 : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, g8rc:$RT), + "mtspr $SPR, $RT", IIC_SprMTSPR>; + + //===----------------------------------------------------------------------===// // 64-bit SPR manipulation instrs. @@ -696,7 +706,7 @@ def ISEL8 : AForm_4<31, 15, // Sign extending loads. -let canFoldAsLoad = 1, PPC970_Unit = 2 in { +let PPC970_Unit = 2 in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src), "lha $rD, $src", IIC_LdStLHA, @@ -752,7 +762,7 @@ def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), let Interpretation64Bit = 1, isCodeGenOnly = 1 in { // Zero extending loads. -let canFoldAsLoad = 1, PPC970_Unit = 2 in { +let PPC970_Unit = 2 in { def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src), "lbz $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi8 iaddr:$src))]>; @@ -810,7 +820,7 @@ def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), // Full 8-byte loads. -let canFoldAsLoad = 1, PPC970_Unit = 2 in { +let PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src), "ld $rD, $src", IIC_LdStLD, [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64; diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index f6acd6e..123808b 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -269,6 +269,16 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, !strconcat(opc, " $vD, $vB"), IIC_VecFP, [(set OutTy:$vD, (IntID InTy:$vB))]>; +class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> + : VXForm_BX<xo, (outs vrrc:$vD), (ins vrrc:$vA), + !strconcat(opc, " $vD, $vA"), IIC_VecFP, + [(set Ty:$vD, (IntID Ty:$vA))]>; + +class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> + : VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX), + !strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP, + [(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>; + //===----------------------------------------------------------------------===// // Instruction Definitions. @@ -342,7 +352,7 @@ def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), "mtvscr $vB", IIC_LdStLoad, [(int_ppc_altivec_mtvscr v4i32:$vB)]>; -let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads. +let PPC970_Unit = 2 in { // Loads. def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src), "lvebx $vD, $src", IIC_LdStLoad, [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; @@ -750,7 +760,7 @@ def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>; def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>; def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>; def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; - + let isCodeGenOnly = 1 in { def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins), "vxor $vD, $vD, $vD", IIC_VecFP, @@ -939,8 +949,50 @@ def : Pat<(v4f32 (fnearbyint v4f32:$vA)), } // end HasAltivec def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">; +def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">; let Predicates = [HasP8Altivec] in { +let isCommutable = 1 in { +def VMULESW : VX1_Int_Ty2<904, "vmulesw", int_ppc_altivec_vmulesw, + v2i64, v4i32>; +def VMULEUW : VX1_Int_Ty2<648, "vmuleuw", int_ppc_altivec_vmuleuw, + v2i64, v4i32>; +def VMULOSW : VX1_Int_Ty2<392, "vmulosw", int_ppc_altivec_vmulosw, + v2i64, v4i32>; +def VMULOUW : VX1_Int_Ty2<136, "vmulouw", int_ppc_altivec_vmulouw, + v2i64, v4i32>; +def VMULUWM : VXForm_1<137, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmuluwm $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (mul v4i32:$vA, v4i32:$vB))]>; +def VMAXSD : VX1_Int_Ty<450, "vmaxsd", int_ppc_altivec_vmaxsd, v2i64>; +def VMAXUD : VX1_Int_Ty<194, "vmaxud", int_ppc_altivec_vmaxud, v2i64>; +def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>; +def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>; +} // isCommutable + +// Vector shifts +def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>; +def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsld $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (shl v2i64:$vA, v2i64:$vB))]>; +def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsrd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (srl v2i64:$vA, v2i64:$vB))]>; +def VSRAD : VXForm_1<964, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsrad $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (sra v2i64:$vA, v2i64:$vB))]>; + +// Vector Integer Arithmetic Instructions +let isCommutable = 1 in { +def VADDUDM : VXForm_1<192, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vaddudm $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (add v2i64:$vA, v2i64:$vB))]>; +} // isCommutable + +def VSUBUDM : VXForm_1<1216, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsubudm $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (sub v2i64:$vA, v2i64:$vB))]>; + // Count Leading Zeros def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB), "vclzb $vD, $vB", IIC_VecGeneral, @@ -992,4 +1044,42 @@ def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vorc $vD, $vA, $vB", IIC_VecGeneral, [(set v4i32:$vD, (or v4i32:$vA, (vnot_ppc v4i32:$vB)))]>; + +// i64 element comparisons. +def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>; +def VCMPEQUDo : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>; +def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>; +def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>; +def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>; +def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>; + +// The cryptography instructions that do not require Category:Vector.Crypto +def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb", + int_ppc_altivec_crypto_vpmsumb, v16i8>; +def VPMSUMH : VX1_Int_Ty<1096, "vpmsumh", + int_ppc_altivec_crypto_vpmsumh, v8i16>; +def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw", + int_ppc_altivec_crypto_vpmsumw, v4i32>; +def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd", + int_ppc_altivec_crypto_vpmsumd, v2i64>; +def VPERMXOR : VA1a_Int_Ty<45, "vpermxor", + int_ppc_altivec_crypto_vpermxor, v16i8>; + } // end HasP8Altivec + +// Crypto instructions (from builtins) +let Predicates = [HasP8Crypto] in { +def VSHASIGMAW : VXCR_Int_Ty<1666, "vshasigmaw", + int_ppc_altivec_crypto_vshasigmaw, v4i32>; +def VSHASIGMAD : VXCR_Int_Ty<1730, "vshasigmad", + int_ppc_altivec_crypto_vshasigmad, v2i64>; +def VCIPHER : VX1_Int_Ty<1288, "vcipher", int_ppc_altivec_crypto_vcipher, + v2i64>; +def VCIPHERLAST : VX1_Int_Ty<1289, "vcipherlast", + int_ppc_altivec_crypto_vcipherlast, v2i64>; +def VNCIPHER : VX1_Int_Ty<1352, "vncipher", + int_ppc_altivec_crypto_vncipher, v2i64>; +def VNCIPHERLAST : VX1_Int_Ty<1353, "vncipherlast", + int_ppc_altivec_crypto_vncipherlast, v2i64>; +def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>; +} // HasP8Crypto diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 506a2d0..b7a7a1f 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -693,6 +693,60 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let A = 0; } +class XForm_htm0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bit R; + + bit RC = 1; + + let Inst{6-9} = 0; + let Inst{10} = R; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XForm_htm1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bit A; + + bit RC = 1; + + let Inst{6} = A; + let Inst{7-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bit L; + + bit RC = 0; // set by isDOT + + let Inst{7-9} = 0; + let Inst{10} = L; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + + bit RC = 0; + + let Inst{6-8} = BF; + let Inst{9-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + // XX*-Form (VSX) class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> @@ -1470,6 +1524,39 @@ class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr, let Inst{21-31} = xo; } +/// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX" +class VXForm_CR<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<1> ST; + bits<4> SIX; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16} = ST; + let Inst{17-20} = SIX; + let Inst{21-31} = xo; +} + +/// VXForm_BX - VX crypto instructions with "VRT, VRA, 0 - like vsbox" +class VXForm_BX<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = 0; + let Inst{21-31} = xo; +} + // E-4 VXR-Form class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/lib/Target/PowerPC/PPCInstrHTM.td b/lib/Target/PowerPC/PPCInstrHTM.td new file mode 100644 index 0000000..20e6a62 --- /dev/null +++ b/lib/Target/PowerPC/PPCInstrHTM.td @@ -0,0 +1,172 @@ +//===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hardware Transactional Memory extension to the +// PowerPC instruction set. +// +//===----------------------------------------------------------------------===// + + + +def HasHTM : Predicate<"PPCSubTarget->hasHTM()">; + +def HTM_get_imm : SDNodeXForm<imm, [{ + return getI32Imm (N->getZExtValue()); +}]>; + +let hasSideEffects = 1, usesCustomInserter = 1 in { +def TCHECK_RET : Pseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>; +} + + +let Predicates = [HasHTM] in { + +def TBEGIN : XForm_htm0 <31, 654, + (outs crrc0:$ret), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>; + +def TEND : XForm_htm1 <31, 686, + (outs crrc0:$ret), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>; + +def TABORT : XForm_base_r3xo <31, 910, + (outs crrc0:$ret), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR, + []>, isDOT { + let RST = 0; + let B = 0; +} + +def TABORTWC : XForm_base_r3xo <31, 782, + (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B), + "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TABORTWCI : XForm_base_r3xo <31, 846, + (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B), + "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TABORTDC : XForm_base_r3xo <31, 814, + (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B), + "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TABORTDCI : XForm_base_r3xo <31, 878, + (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B), + "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TSR : XForm_htm2 <31, 750, + (outs crrc0:$ret), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>, + isDOT; + +def TCHECK : XForm_htm3 <31, 718, + (outs), (ins crrc:$BF), "tcheck $BF", IIC_SprMTSPR, []>; + + +def TRECLAIM : XForm_base_r3xo <31, 942, + (outs crrc:$ret), (ins gprc:$A), "treclaim. $A", + IIC_SprMTSPR, []>, + isDOT { + let RST = 0; + let B = 0; +} + +def TRECHKPT : XForm_base_r3xo <31, 1006, + (outs crrc:$ret), (ins), "trechkpt.", IIC_SprMTSPR, []>, + isDOT { + let RST = 0; + let A = 0; + let B = 0; +} + +// Builtins + +// All HTM instructions, with the exception of tcheck, set CR0 with the +// value of the MSR Transaction State (TS) bits that exist before the +// instruction is executed. For tbegin., the EQ bit in CR0 can be used +// to determine whether the transaction was successfully started (0) or +// failed (1). We use an XORI pattern to 'flip' the bit to match the +// tbegin builtin API which defines a return value of 1 as success. + +def : Pat<(int_ppc_tbegin i32:$R), + (XORI + (EXTRACT_SUBREG ( + TBEGIN (HTM_get_imm imm:$R)), sub_eq), + 1)>; + +def : Pat<(int_ppc_tend i32:$R), + (TEND (HTM_get_imm imm:$R))>; + + +def : Pat<(int_ppc_tabort i32:$R), + (TABORT $R)>; + +def : Pat<(int_ppc_tabortwc i32:$TO, i32:$RA, i32:$RB), + (TABORTWC (HTM_get_imm imm:$TO), $RA, $RB)>; + +def : Pat<(int_ppc_tabortwci i32:$TO, i32:$RA, i32:$SI), + (TABORTWCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>; + +def : Pat<(int_ppc_tabortdc i32:$TO, i32:$RA, i32:$RB), + (TABORTDC (HTM_get_imm imm:$TO), $RA, $RB)>; + +def : Pat<(int_ppc_tabortdci i32:$TO, i32:$RA, i32:$SI), + (TABORTDCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>; + +def : Pat<(int_ppc_tcheck), + (TCHECK_RET)>; + +def : Pat<(int_ppc_treclaim i32:$RA), + (TRECLAIM $RA)>; + +def : Pat<(int_ppc_trechkpt), + (TRECHKPT)>; + +def : Pat<(int_ppc_tsr i32:$L), + (TSR (HTM_get_imm imm:$L))>; + +def : Pat<(int_ppc_get_texasr), + (MFSPR8 130)>; + +def : Pat<(int_ppc_get_texasru), + (MFSPR8 131)>; + +def : Pat<(int_ppc_get_tfhar), + (MFSPR8 128)>; + +def : Pat<(int_ppc_get_tfiar), + (MFSPR8 129)>; + + +def : Pat<(int_ppc_set_texasr i64:$V), + (MTSPR8 130, $V)>; + +def : Pat<(int_ppc_set_texasru i64:$V), + (MTSPR8 131, $V)>; + +def : Pat<(int_ppc_set_tfhar i64:$V), + (MTSPR8 128, $V)>; + +def : Pat<(int_ppc_set_tfiar i64:$V), + (MTSPR8 129, $V)>; + + +// Extended mnemonics +def : Pat<(int_ppc_tendall), + (TEND 1)>; + +def : Pat<(int_ppc_tresume), + (TSR 1)>; + +def : Pat<(int_ppc_tsuspend), + (TSR 0)>; + +def : Pat<(i64 (int_ppc_ttest)), + (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>; + +} // [HasHTM] diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index fe9474a..c9c2949 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -61,7 +61,7 @@ void PPCInstrInfo::anchor() {} PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI) : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), - Subtarget(STI), RI(STI) {} + Subtarget(STI), RI(STI.getTargetMachine()) {} /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for /// this target when scheduling the DAG. @@ -113,9 +113,8 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MachineOperand &DefMO = DefMI->getOperand(DefIdx); unsigned Reg = DefMO.getReg(); - const TargetRegisterInfo *TRI = &getRegisterInfo(); bool IsRegCR; - if (TRI->isVirtualRegister(Reg)) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { const MachineRegisterInfo *MRI = &DefMI->getParent()->getParent()->getRegInfo(); IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) || @@ -697,6 +696,33 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, .addReg(Cond[1].getReg(), 0, SubIdx); } +static unsigned getCRBitValue(unsigned CRBit) { + unsigned Ret = 4; + if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT || + CRBit == PPC::CR2LT || CRBit == PPC::CR3LT || + CRBit == PPC::CR4LT || CRBit == PPC::CR5LT || + CRBit == PPC::CR6LT || CRBit == PPC::CR7LT) + Ret = 3; + if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT || + CRBit == PPC::CR2GT || CRBit == PPC::CR3GT || + CRBit == PPC::CR4GT || CRBit == PPC::CR5GT || + CRBit == PPC::CR6GT || CRBit == PPC::CR7GT) + Ret = 2; + if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ || + CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ || + CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ || + CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ) + Ret = 1; + if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN || + CRBit == PPC::CR2UN || CRBit == PPC::CR3UN || + CRBit == PPC::CR4UN || CRBit == PPC::CR5UN || + CRBit == PPC::CR6UN || CRBit == PPC::CR7UN) + Ret = 0; + + assert(Ret != 4 && "Invalid CR bit register"); + return Ret; +} + void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -742,6 +768,32 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, SrcReg = SuperReg; } + // Different class register copy + if (PPC::CRBITRCRegClass.contains(SrcReg) && + PPC::GPRCRegClass.contains(DestReg)) { + unsigned CRReg = getCRFromCRBit(SrcReg); + BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg) + .addReg(CRReg), getKillRegState(KillSrc); + // Rotate the CR bit in the CR fields to be the least significant bit and + // then mask with 0x1 (MB = ME = 31). + BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg) + .addReg(DestReg, RegState::Kill) + .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg))) + .addImm(31) + .addImm(31); + return; + } else if (PPC::CRRCRegClass.contains(SrcReg) && + PPC::G8RCRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg) + .addReg(SrcReg), getKillRegState(KillSrc); + return; + } else if (PPC::CRRCRegClass.contains(SrcReg) && + PPC::GPRCRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg) + .addReg(SrcReg), getKillRegState(KillSrc); + return; + } + unsigned Opc; if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::OR; diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 4add6f9..7fd076a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -63,7 +63,7 @@ enum PPC970_Unit { }; } // end namespace PPCII - +class PPCSubtarget; class PPCInstrInfo : public PPCGenInstrInfo { PPCSubtarget &Subtarget; const PPCRegisterInfo RI; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 1a045b1..5eff156 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -46,13 +46,6 @@ def SDT_PPCstbrx : SDTypeProfile<0, 3, [ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; -def SDT_PPClarx : SDTypeProfile<1, 1, [ - SDTCisInt<0>, SDTCisPtrTy<1> -]>; -def SDT_PPCstcx : SDTypeProfile<0, 2, [ - SDTCisInt<0>, SDTCisPtrTy<1> -]>; - def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ SDTCisPtrTy<0>, SDTCisVT<1, i32> ]>; @@ -225,12 +218,6 @@ def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone, def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -// Instructions to support atomic operations -def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, - [SDNPHasChain, SDNPMayLoad]>; -def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx, - [SDNPHasChain, SDNPMayStore]>; - // Instructions to support dynamic alloca. def SDTDynOp : SDTypeProfile<1, 2, []>; def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>; @@ -445,6 +432,18 @@ def PPCRegCRRCAsmOperand : AsmOperandClass { def crrc : RegisterOperand<CRRC> { let ParserMatchClass = PPCRegCRRCAsmOperand; } +def crrc0 : RegisterOperand<CRRC0> { + let ParserMatchClass = PPCRegCRRCAsmOperand; +} + +def PPCU1ImmAsmOperand : AsmOperandClass { + let Name = "U1Imm"; let PredicateMethod = "isU1Imm"; + let RenderMethod = "addImmOperands"; +} +def u1imm : Operand<i32> { + let PrintMethod = "printU1ImmOperand"; + let ParserMatchClass = PPCU1ImmAsmOperand; +} def PPCU2ImmAsmOperand : AsmOperandClass { let Name = "U2Imm"; let PredicateMethod = "isU2Imm"; @@ -455,6 +454,15 @@ def u2imm : Operand<i32> { let ParserMatchClass = PPCU2ImmAsmOperand; } +def PPCU3ImmAsmOperand : AsmOperandClass { + let Name = "U3Imm"; let PredicateMethod = "isU3Imm"; + let RenderMethod = "addImmOperands"; +} +def u3imm : Operand<i32> { + let PrintMethod = "printU3ImmOperand"; + let ParserMatchClass = PPCU3ImmAsmOperand; +} + def PPCU4ImmAsmOperand : AsmOperandClass { let Name = "U4Imm"; let PredicateMethod = "isU4Imm"; let RenderMethod = "addImmOperands"; @@ -715,7 +723,7 @@ def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">; def IsE500 : Predicate<"PPCSubTarget->isE500()">; def HasSPE : Predicate<"PPCSubTarget->HasSPE()">; def HasICBT : Predicate<"PPCSubTarget->hasICBT()">; - +def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">; def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">; @@ -1446,15 +1454,44 @@ let usesCustomInserter = 1 in { } // Instructions to support atomic operations +let mayLoad = 1, hasSideEffects = 0 in { +def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src), + "lbarx $rD, $src", IIC_LdStLWARX, []>, + Requires<[HasPartwordAtomics]>; + +def LHARX : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src), + "lharx $rD, $src", IIC_LdStLWARX, []>, + Requires<[HasPartwordAtomics]>; + def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src), - "lwarx $rD, $src", IIC_LdStLWARX, - [(set i32:$rD, (PPClarx xoaddr:$src))]>; + "lwarx $rD, $src", IIC_LdStLWARX, []>; + +// Instructions to support lock versions of atomics +// (EH=1 - see Power ISA 2.07 Book II 4.4.2) +def LBARXL : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src), + "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT, + Requires<[HasPartwordAtomics]>; + +def LHARXL : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src), + "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT, + Requires<[HasPartwordAtomics]>; + +def LWARXL : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src), + "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT; +} + +let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in { +def STBCX : XForm_1<31, 694, (outs), (ins gprc:$rS, memrr:$dst), + "stbcx. $rS, $dst", IIC_LdStSTWCX, []>, + isDOT, Requires<[HasPartwordAtomics]>; + +def STHCX : XForm_1<31, 726, (outs), (ins gprc:$rS, memrr:$dst), + "sthcx. $rS, $dst", IIC_LdStSTWCX, []>, + isDOT, Requires<[HasPartwordAtomics]>; -let Defs = [CR0] in def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst), - "stwcx. $rS, $dst", IIC_LdStSTWCX, - [(PPCstcx i32:$rS, xoaddr:$dst)]>, - isDOT; + "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT; +} let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>; @@ -1473,7 +1510,7 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB), // // Unindexed (r+i) Loads. -let canFoldAsLoad = 1, PPC970_Unit = 2 in { +let PPC970_Unit = 2 in { def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src), "lbz $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi8 iaddr:$src))]>; @@ -1570,7 +1607,7 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), // Indexed (r+r) Loads. // -let canFoldAsLoad = 1, PPC970_Unit = 2 in { +let PPC970_Unit = 2 in { def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src), "lbzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi8 xaddr:$src))]>; @@ -2683,6 +2720,7 @@ include "PPCInstrSPE.td" include "PPCInstr64Bit.td" include "PPCInstrVSX.td" include "PPCInstrQPX.td" +include "PPCInstrHTM.td" def crnot : OutPatFrag<(ops node:$in), (CRNOR $in, $in)>; diff --git a/lib/Target/PowerPC/PPCInstrQPX.td b/lib/Target/PowerPC/PPCInstrQPX.td index c984d46..5c66b42 100644 --- a/lib/Target/PowerPC/PPCInstrQPX.td +++ b/lib/Target/PowerPC/PPCInstrQPX.td @@ -501,7 +501,7 @@ let Uses = [RM] in { "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>; // Load indexed instructions - let mayLoad = 1, canFoldAsLoad = 1 in { + let mayLoad = 1 in { def QVLFDX : XForm_1<31, 583, (outs qfrc:$FRT), (ins memrr:$src), "qvlfdx $FRT, $src", IIC_LdStLFD, diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index d6cb3a0..ec04da4 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -66,7 +66,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects. let Uses = [RM] in { // Load indexed instructions - let mayLoad = 1, canFoldAsLoad = 1 in { + let mayLoad = 1 in { def LXSDX : XX1Form<31, 588, (outs vsfrc:$XT), (ins memrr:$src), "lxsdx $XT, $src", IIC_LdStLFD, diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp index efd2d92..005bcaf 100644 --- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp +++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp @@ -104,7 +104,7 @@ FunctionPass *llvm::createPPCLoopDataPrefetchPass() { return new PPCLoopDataPref bool PPCLoopDataPrefetch::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); SE = &getAnalysis<ScalarEvolution>(); - DL = F.getParent()->getDataLayout(); + DL = &F.getParent()->getDataLayout(); AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); @@ -192,7 +192,7 @@ bool PPCLoopDataPrefetch::runOnLoop(Loop *L) { const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, K->second); if (const SCEVConstant *ConstPtrDiff = dyn_cast<SCEVConstant>(PtrDiff)) { - int64_t PD = abs64(ConstPtrDiff->getValue()->getSExtValue()); + int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue()); if (PD < (int64_t) CacheLineSize) { DupPref = true; break; @@ -211,7 +211,7 @@ bool PPCLoopDataPrefetch::runOnLoop(Loop *L) { PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec)); Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace); - SCEVExpander SCEVE(*SE, "prefaddr"); + SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr"); Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI); IRBuilder<> Builder(MemI); diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index df65227..092a4ef 100644 --- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" @@ -84,7 +85,6 @@ namespace { PPCTargetMachine *TM; LoopInfo *LI; ScalarEvolution *SE; - const DataLayout *DL; }; } @@ -141,9 +141,6 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); SE = &getAnalysis<ScalarEvolution>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; - bool MadeChange = false; for (LoopInfo::iterator I = LI->begin(), E = LI->end(); @@ -158,9 +155,6 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { bool MadeChange = false; - if (!DL) - return MadeChange; - // Only prep. the inner-most loop if (!L->empty()) return MadeChange; @@ -261,6 +255,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { Value *BasePtr = GetPointerOperand(MemI); assert(BasePtr && "No pointer operand"); + Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext()); Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(), BasePtr->getType()->getPointerAddressSpace()); @@ -280,7 +275,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { MemI->hasName() ? MemI->getName() + ".phi" : "", Header->getFirstNonPHI()); - SCEVExpander SCEVE(*SE, "pistart"); + SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart"); Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy, LoopPredecessor->getTerminator()); @@ -295,8 +290,8 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { } Instruction *InsPoint = Header->getFirstInsertionPt(); - GetElementPtrInst *PtrInc = - GetElementPtrInst::Create(NewPHI, BasePtrIncSCEV->getValue(), + GetElementPtrInst *PtrInc = GetElementPtrInst::Create( + I8Ty, NewPHI, BasePtrIncSCEV->getValue(), MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint); PtrInc->setIsInBounds(IsPtrInBounds(BasePtr)); for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); @@ -341,9 +336,9 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { PtrIP = PtrIP->getParent()->getFirstInsertionPt(); else if (!PtrIP) PtrIP = I->second; - - GetElementPtrInst *NewPtr = - GetElementPtrInst::Create(PtrInc, Diff->getValue(), + + GetElementPtrInst *NewPtr = GetElementPtrInst::Create( + I8Ty, PtrInc, Diff->getValue(), I->second->hasName() ? I->second->getName() + ".off" : "", PtrIP); if (!PtrIP) NewPtr->insertAfter(cast<Instruction>(PtrInc)); diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 819738b..0965cb3 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -184,6 +184,9 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: assert(!MO.getSubReg() && "Subregs should be eliminated!"); + assert(MO.getReg() > PPC::NoRegister && + MO.getReg() < PPC::NUM_TARGET_REGS && + "Invalid register for this target!"); MCOp = MCOperand::CreateReg(MO.getReg()); break; case MachineOperand::MO_Immediate: diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index c9a9684..0e568d3 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -18,6 +18,7 @@ #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" +#include "PPCTargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -56,11 +57,11 @@ static cl::opt<bool> AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false), cl::desc("Force the use of a base pointer in every function")); -PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST) - : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, - ST.isPPC64() ? 0 : 1, - ST.isPPC64() ? 0 : 1), - Subtarget(ST) { +PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM) + : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR, + TM.isPPC64() ? 0 : 1, + TM.isPPC64() ? 0 : 1), + TM(TM) { ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; @@ -87,18 +88,19 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) // Note that PPCInstrInfo::FoldImmediate also directly uses this Kind value // when it checks for ZERO folding. if (Kind == 1) { - if (Subtarget.isPPC64()) + if (TM.isPPC64()) return &PPC::G8RC_NOX0RegClass; return &PPC::GPRC_NOR0RegClass; } - if (Subtarget.isPPC64()) + if (TM.isPPC64()) return &PPC::G8RCRegClass; return &PPC::GPRCRegClass; } const MCPhysReg* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>(); if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) { if (Subtarget.hasVSX()) return CSR_64_AllRegs_VSX_SaveList; @@ -108,28 +110,28 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } if (Subtarget.isDarwinABI()) - return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? - CSR_Darwin64_Altivec_SaveList : - CSR_Darwin64_SaveList) : - (Subtarget.hasAltivec() ? - CSR_Darwin32_Altivec_SaveList : - CSR_Darwin32_SaveList); + return TM.isPPC64() + ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_SaveList + : CSR_Darwin64_SaveList) + : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList + : CSR_Darwin32_SaveList); // On PPC64, we might need to save r2 (but only if it is not reserved). bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2); - return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? - (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList : - CSR_SVR464_Altivec_SaveList) : - (SaveR2 ? CSR_SVR464_R2_SaveList : - CSR_SVR464_SaveList)) : - (Subtarget.hasAltivec() ? - CSR_SVR432_Altivec_SaveList : - CSR_SVR432_SaveList); + return TM.isPPC64() + ? (Subtarget.hasAltivec() + ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList + : CSR_SVR464_Altivec_SaveList) + : (SaveR2 ? CSR_SVR464_R2_SaveList : CSR_SVR464_SaveList)) + : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_SaveList + : CSR_SVR432_SaveList); } -const uint32_t* -PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { +const uint32_t * +PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); if (CC == CallingConv::AnyReg) { if (Subtarget.hasVSX()) return CSR_64_AllRegs_VSX_RegMask; @@ -139,19 +141,15 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { } if (Subtarget.isDarwinABI()) - return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? - CSR_Darwin64_Altivec_RegMask : - CSR_Darwin64_RegMask) : - (Subtarget.hasAltivec() ? - CSR_Darwin32_Altivec_RegMask : - CSR_Darwin32_RegMask); - - return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? - CSR_SVR464_Altivec_RegMask : - CSR_SVR464_RegMask) : - (Subtarget.hasAltivec() ? - CSR_SVR432_Altivec_RegMask : - CSR_SVR432_RegMask); + return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_RegMask + : CSR_Darwin64_RegMask) + : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask + : CSR_Darwin32_RegMask); + + return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask + : CSR_SVR464_RegMask) + : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask + : CSR_SVR432_RegMask); } const uint32_t* @@ -160,15 +158,13 @@ PPCRegisterInfo::getNoPreservedMask() const { } void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { - unsigned PseudoRegs[] = { PPC::ZERO, PPC::ZERO8, PPC::RM }; - for (unsigned i = 0, ie = array_lengthof(PseudoRegs); i != ie; ++i) { - unsigned Reg = PseudoRegs[i]; - Mask[Reg / 32] &= ~(1u << (Reg % 32)); - } + for (unsigned PseudoReg : {PPC::ZERO, PPC::ZERO8, PPC::RM}) + Mask[PseudoReg / 32] &= ~(1u << (PseudoReg % 32)); } BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering()); @@ -207,7 +203,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { } // On PPC64, r13 is the thread pointer. Never allocate this register. - if (Subtarget.isPPC64()) { + if (TM.isPPC64()) { Reserved.set(PPC::R13); Reserved.set(PPC::X1); @@ -238,15 +234,15 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R31); if (hasBasePointer(MF)) { - if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() && - MF.getTarget().getRelocationModel() == Reloc::PIC_) + if (Subtarget.isSVR4ABI() && !TM.isPPC64() && + TM.getRelocationModel() == Reloc::PIC_) Reserved.set(PPC::R29); else Reserved.set(PPC::R30); } - if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() && - MF.getTarget().getRelocationModel() == Reloc::PIC_) + if (Subtarget.isSVR4ABI() && !TM.isPPC64() && + TM.getRelocationModel() == Reloc::PIC_) Reserved.set(PPC::R30); // Reserve Altivec registers when Altivec is unavailable. @@ -260,6 +256,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); const unsigned DefaultSafety = 1; @@ -291,8 +288,10 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -const TargetRegisterClass *PPCRegisterInfo::getLargestLegalSuperClass( - const TargetRegisterClass *RC) const { +const TargetRegisterClass * +PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &MF) const { + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); if (Subtarget.hasVSX()) { // With VSX, we can inflate various sub-register classes to the full VSX // register set. @@ -303,7 +302,7 @@ const TargetRegisterClass *PPCRegisterInfo::getLargestLegalSuperClass( return &PPC::VSRCRegClass; } - return TargetRegisterInfo::getLargestLegalSuperClass(RC); + return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF); } //===----------------------------------------------------------------------===// @@ -326,10 +325,11 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { MachineFunction &MF = *MBB.getParent(); // Get the frame info. MachineFrameInfo *MFI = MF.getFrameInfo(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); // Get the instruction info. const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); // Determine whether 64-bit pointers are used. - bool LP64 = Subtarget.isPPC64(); + bool LP64 = TM.isPPC64(); DebugLoc dl = MI.getDebugLoc(); // Get the maximum call stack size. @@ -443,10 +443,11 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); - bool LP64 = Subtarget.isPPC64(); + bool LP64 = TM.isPPC64(); const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; @@ -487,10 +488,11 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); - bool LP64 = Subtarget.isPPC64(); + bool LP64 = TM.isPPC64(); const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; @@ -522,37 +524,6 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, MBB.erase(II); } -static unsigned getCRFromCRBit(unsigned SrcReg) { - unsigned Reg = 0; - if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT || - SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN) - Reg = PPC::CR0; - else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT || - SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN) - Reg = PPC::CR1; - else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT || - SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN) - Reg = PPC::CR2; - else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT || - SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN) - Reg = PPC::CR3; - else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT || - SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN) - Reg = PPC::CR4; - else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT || - SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN) - Reg = PPC::CR5; - else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT || - SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN) - Reg = PPC::CR6; - else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT || - SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) - Reg = PPC::CR7; - - assert(Reg != 0 && "Invalid CR bit register"); - return Reg; -} - void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex) const { // Get the instruction. @@ -560,10 +531,11 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); - bool LP64 = Subtarget.isPPC64(); + bool LP64 = TM.isPPC64(); const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; @@ -603,10 +575,11 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); - bool LP64 = Subtarget.isPPC64(); + bool LP64 = TM.isPPC64(); const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; @@ -650,6 +623,7 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); @@ -675,6 +649,7 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); @@ -697,14 +672,14 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const { - + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4 // ABI, return true to prevent allocating an additional frame slot. // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0 // is arbitrary and will be subsequently ignored. For 32-bit, we have // previously created the stack slot if needed, so return its FrameIdx. if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) { - if (Subtarget.isPPC64()) + if (TM.isPPC64()) FrameIdx = 0; else { const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); @@ -757,6 +732,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineBasicBlock &MBB = *MI.getParent(); // Get the basic block's function. MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); // Get the instruction info. const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); // Get the frame info. @@ -847,7 +823,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // The offset doesn't fit into a single register, scavenge one to build the // offset in. - bool is64Bit = Subtarget.isPPC64(); + bool is64Bit = TM.isPPC64(); const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC; @@ -885,23 +861,25 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); - if (!Subtarget.isPPC64()) + if (!TM.isPPC64()) return TFI->hasFP(MF) ? PPC::R31 : PPC::R1; else return TFI->hasFP(MF) ? PPC::X31 : PPC::X1; } unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const { + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); if (!hasBasePointer(MF)) return getFrameRegister(MF); - if (Subtarget.isPPC64()) + if (TM.isPPC64()) return PPC::X30; if (Subtarget.isSVR4ABI() && - MF.getTarget().getRelocationModel() == Reloc::PIC_) + TM.getRelocationModel() == Reloc::PIC_) return PPC::R29; return PPC::R30; @@ -927,6 +905,7 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const { } bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const { + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); unsigned StackAlign = Subtarget.getFrameLowering()->getStackAlignment(); @@ -964,7 +943,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { MachineBasicBlock &MBB = *MI->getParent(); MachineFunction &MF = *MBB.getParent(); - + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering()); unsigned StackEst = @@ -983,7 +962,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // The frame pointer will point to the end of the stack, so estimate the // offset as the difference between the object offset and the FP location. - return !isFrameOffsetLegal(MI, Offset); + return !isFrameOffsetLegal(MI, getBaseRegister(MF), Offset); } /// Insert defining instruction(s) for BaseReg to @@ -992,7 +971,7 @@ void PPCRegisterInfo:: materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const { - unsigned ADDriOpc = Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI; + unsigned ADDriOpc = TM.isPPC64() ? PPC::ADDI8 : PPC::ADDI; MachineBasicBlock::iterator Ins = MBB->begin(); DebugLoc DL; // Defaults to "unknown" @@ -1000,6 +979,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, DL = Ins->getDebugLoc(); const MachineFunction &MF = *MBB->getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); const MCInstrDesc &MCID = TII.get(ADDriOpc); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); @@ -1025,6 +1005,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); const MCInstrDesc &MCID = MI.getDesc(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1033,6 +1014,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, } bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, + unsigned BaseReg, int64_t Offset) const { unsigned FIOperandNum = 0; while (!MI->getOperand(FIOperandNum).isFI()) { diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 4c2ef90..d304e1d 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -22,15 +22,44 @@ #include "PPCGenRegisterInfo.inc" namespace llvm { -class PPCSubtarget; -class TargetInstrInfo; -class Type; + +inline static unsigned getCRFromCRBit(unsigned SrcReg) { + unsigned Reg = 0; + if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT || + SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN) + Reg = PPC::CR0; + else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT || + SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN) + Reg = PPC::CR1; + else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT || + SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN) + Reg = PPC::CR2; + else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT || + SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN) + Reg = PPC::CR3; + else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT || + SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN) + Reg = PPC::CR4; + else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT || + SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN) + Reg = PPC::CR5; + else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT || + SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN) + Reg = PPC::CR6; + else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT || + SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) + Reg = PPC::CR7; + + assert(Reg != 0 && "Invalid CR bit register"); + return Reg; +} + class PPCRegisterInfo : public PPCGenRegisterInfo { DenseMap<unsigned, unsigned> ImmToIdxMap; - const PPCSubtarget &Subtarget; + const PPCTargetMachine &TM; public: - PPCRegisterInfo(const PPCSubtarget &SubTarget); + PPCRegisterInfo(const PPCTargetMachine &TM); /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. @@ -40,13 +69,14 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; - const TargetRegisterClass* - getLargestLegalSuperClass(const TargetRegisterClass *RC) const override; + const TargetRegisterClass * + getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &MF) const override; /// Code Generation virtual methods... - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction* MF =nullptr) const override; - const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const override; const uint32_t *getNoPreservedMask() const; void adjustStackMapLiveOutMask(uint32_t *Mask) const override; @@ -97,7 +127,7 @@ public: int64_t Offset) const override; void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override; - bool isFrameOffsetLegal(const MachineInstr *MI, + bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override; // Debug information queries. diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 9a7df96..6ca68ed 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -341,6 +341,8 @@ def CRBITRC : RegisterClass<"PPC", [i1], 32, def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6, CR7, CR2, CR3, CR4)>; +def CRRC0 : RegisterClass<"PPC", [i32], 32, (add CR0)>; + // The CTR registers are not allocatable because they're used by the // decrement-and-branch instructions, and thus need to stay live across // multiple basic blocks. diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 2f3a1f9..d0954a1 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -124,401 +124,3 @@ include "PPCScheduleP8.td" include "PPCScheduleA2.td" include "PPCScheduleE500mc.td" include "PPCScheduleE5500.td" - -//===----------------------------------------------------------------------===// -// Instruction to itinerary class map - When add new opcodes to the supported -// set, refer to the following table to determine which itinerary class the -// opcode belongs. -// -// opcode itinerary class -// ====== =============== -// add IIC_IntSimple -// addc IIC_IntGeneral -// adde IIC_IntGeneral -// addi IIC_IntSimple -// addic IIC_IntGeneral -// addic. IIC_IntGeneral -// addis IIC_IntSimple -// addme IIC_IntGeneral -// addze IIC_IntGeneral -// and IIC_IntSimple -// andc IIC_IntSimple -// andi. IIC_IntGeneral -// andis. IIC_IntGeneral -// b IIC_BrB -// bc IIC_BrB -// bcctr IIC_BrB -// bclr IIC_BrB -// cmp IIC_IntCompare -// cmpi IIC_IntCompare -// cmpl IIC_IntCompare -// cmpli IIC_IntCompare -// cntlzd IIC_IntRotateD -// cntlzw IIC_IntGeneral -// crand IIC_BrCR -// crandc IIC_BrCR -// creqv IIC_BrCR -// crnand IIC_BrCR -// crnor IIC_BrCR -// cror IIC_BrCR -// crorc IIC_BrCR -// crxor IIC_BrCR -// dcba IIC_LdStDCBA -// dcbf IIC_LdStDCBF -// dcbi IIC_LdStDCBI -// dcbst IIC_LdStDCBF -// dcbt IIC_LdStLoad -// dcbtst IIC_LdStLoad -// dcbz IIC_LdStDCBF -// divd IIC_IntDivD -// divdu IIC_IntDivD -// divw IIC_IntDivW -// divwu IIC_IntDivW -// dss IIC_LdStDSS -// dst IIC_LdStDSS -// dstst IIC_LdStDSS -// eciwx IIC_LdStLoad -// ecowx IIC_LdStLoad -// eieio IIC_LdStLoad -// eqv IIC_IntSimple -// extsb IIC_IntSimple -// extsh IIC_IntSimple -// extsw IIC_IntSimple -// fabs IIC_FPGeneral -// fadd IIC_FPAddSub -// fadds IIC_FPGeneral -// fcfid IIC_FPGeneral -// fcmpo IIC_FPCompare -// fcmpu IIC_FPCompare -// fctid IIC_FPGeneral -// fctidz IIC_FPGeneral -// fctiw IIC_FPGeneral -// fctiwz IIC_FPGeneral -// fdiv IIC_FPDivD -// fdivs IIC_FPDivS -// fmadd IIC_FPFused -// fmadds IIC_FPGeneral -// fmr IIC_FPGeneral -// fmsub IIC_FPFused -// fmsubs IIC_FPGeneral -// fmul IIC_FPFused -// fmuls IIC_FPGeneral -// fnabs IIC_FPGeneral -// fneg IIC_FPGeneral -// fnmadd IIC_FPFused -// fnmadds IIC_FPGeneral -// fnmsub IIC_FPFused -// fnmsubs IIC_FPGeneral -// fres IIC_FPRes -// frsp IIC_FPGeneral -// frsqrte IIC_FPGeneral -// fsel IIC_FPGeneral -// fsqrt IIC_FPSqrtD -// fsqrts IIC_FPSqrtS -// fsub IIC_FPAddSub -// fsubs IIC_FPGeneral -// icbi IIC_LdStICBI -// isel IIC_IntISEL -// isync IIC_SprISYNC -// lbz IIC_LdStLoad -// lbzu IIC_LdStLoadUpd -// lbzux IIC_LdStLoadUpdX -// lbzx IIC_LdStLoad -// ld IIC_LdStLD -// ldarx IIC_LdStLDARX -// ldu IIC_LdStLDU -// ldux IIC_LdStLDUX -// ldx IIC_LdStLD -// lfd IIC_LdStLFD -// lfdu IIC_LdStLFDU -// lfdux IIC_LdStLFDUX -// lfdx IIC_LdStLFD -// lfs IIC_LdStLFD -// lfsu IIC_LdStLFDU -// lfsux IIC_LdStLFDUX -// lfsx IIC_LdStLFD -// lha IIC_LdStLHA -// lhau IIC_LdStLHAU -// lhaux IIC_LdStLHAUX -// lhax IIC_LdStLHA -// lhbrx IIC_LdStLoad -// lhz IIC_LdStLoad -// lhzu IIC_LdStLoadUpd -// lhzux IIC_LdStLoadUpdX -// lhzx IIC_LdStLoad -// lmw IIC_LdStLMW -// lswi IIC_LdStLMW -// lswx IIC_LdStLMW -// lvebx IIC_LdStLVecX -// lvehx IIC_LdStLVecX -// lvewx IIC_LdStLVecX -// lvsl IIC_LdStLVecX -// lvsr IIC_LdStLVecX -// lvx IIC_LdStLVecX -// lvxl IIC_LdStLVecX -// lwa IIC_LdStLWA -// lwarx IIC_LdStLWARX -// lwaux IIC_LdStLHAUX -// lwax IIC_LdStLHA -// lwbrx IIC_LdStLoad -// lwz IIC_LdStLoad -// lwzu IIC_LdStLoadUpd -// lwzux IIC_LdStLoadUpdX -// lwzx IIC_LdStLoad -// mcrf IIC_BrMCR -// mcrfs IIC_FPGeneral -// mcrxr IIC_BrMCRX -// mfcr IIC_SprMFCR -// mffs IIC_IntMFFS -// mfmsr IIC_SprMFMSR -// mfspr IIC_SprMFSPR -// mfsr IIC_SprMFSR -// mfsrin IIC_SprMFSR -// mftb IIC_SprMFTB -// mfvscr IIC_IntMFVSCR -// mtcrf IIC_BrMCRX -// mtfsb0 IIC_IntMTFSB0 -// mtfsb1 IIC_IntMTFSB0 -// mtfsf IIC_IntMTFSB0 -// mtfsfi IIC_IntMTFSB0 -// mtmsr IIC_SprMTMSR -// mtmsrd IIC_LdStLD -// mtspr IIC_SprMTSPR -// mtsr IIC_SprMTSR -// mtsrd IIC_IntMTSRD -// mtsrdin IIC_IntMTSRD -// mtsrin IIC_SprMTSRIN -// mtvscr IIC_IntMFVSCR -// mulhd IIC_IntMulHD -// mulhdu IIC_IntMulHD -// mulhw IIC_IntMulHW -// mulhwu IIC_IntMulHWU -// mulld IIC_IntMulHD -// mulli IIC_IntMulLI -// mullw IIC_IntMulHW -// nand IIC_IntSimple -// neg IIC_IntSimple -// nor IIC_IntSimple -// or IIC_IntSimple -// orc IIC_IntSimple -// ori IIC_IntSimple -// oris IIC_IntSimple -// rfi IIC_SprRFI -// rfid IIC_IntRFID -// rldcl IIC_IntRotateD -// rldcr IIC_IntRotateD -// rldic IIC_IntRotateDI -// rldicl IIC_IntRotateDI -// rldicr IIC_IntRotateDI -// rldimi IIC_IntRotateDI -// rlwimi IIC_IntRotate -// rlwinm IIC_IntGeneral -// rlwnm IIC_IntGeneral -// sc IIC_SprSC -// slbia IIC_LdStSLBIA -// slbie IIC_LdStSLBIE -// sld IIC_IntRotateD -// slw IIC_IntGeneral -// srad IIC_IntRotateD -// sradi IIC_IntRotateDI -// sraw IIC_IntShift -// srawi IIC_IntShift -// srd IIC_IntRotateD -// srw IIC_IntGeneral -// stb IIC_LdStStore -// stbu IIC_LdStStoreUpd -// stbux IIC_LdStStoreUpd -// stbx IIC_LdStStore -// std IIC_LdStSTD -// stdcx. IIC_LdStSTDCX -// stdu IIC_LdStSTDU -// stdux IIC_LdStSTDUX -// stdx IIC_LdStSTD -// stfd IIC_LdStSTFD -// stfdu IIC_LdStSTFDU -// stfdux IIC_LdStSTFDU -// stfdx IIC_LdStSTFD -// stfiwx IIC_LdStSTFD -// stfs IIC_LdStSTFD -// stfsu IIC_LdStSTFDU -// stfsux IIC_LdStSTFDU -// stfsx IIC_LdStSTFD -// sth IIC_LdStStore -// sthbrx IIC_LdStStore -// sthu IIC_LdStStoreUpd -// sthux IIC_LdStStoreUpd -// sthx IIC_LdStStore -// stmw IIC_LdStLMW -// stswi IIC_LdStLMW -// stswx IIC_LdStLMW -// stvebx IIC_LdStSTVEBX -// stvehx IIC_LdStSTVEBX -// stvewx IIC_LdStSTVEBX -// stvx IIC_LdStSTVEBX -// stvxl IIC_LdStSTVEBX -// stw IIC_LdStStore -// stwbrx IIC_LdStStore -// stwcx. IIC_LdStSTWCX -// stwu IIC_LdStStoreUpd -// stwux IIC_LdStStoreUpd -// stwx IIC_LdStStore -// subf IIC_IntGeneral -// subfc IIC_IntGeneral -// subfe IIC_IntGeneral -// subfic IIC_IntGeneral -// subfme IIC_IntGeneral -// subfze IIC_IntGeneral -// sync IIC_LdStSync -// td IIC_IntTrapD -// tdi IIC_IntTrapD -// tlbia IIC_LdStSLBIA -// tlbie IIC_LdStDCBF -// tlbsync IIC_SprTLBSYNC -// tw IIC_IntTrapW -// twi IIC_IntTrapW -// vaddcuw IIC_VecGeneral -// vaddfp IIC_VecFP -// vaddsbs IIC_VecGeneral -// vaddshs IIC_VecGeneral -// vaddsws IIC_VecGeneral -// vaddubm IIC_VecGeneral -// vaddubs IIC_VecGeneral -// vadduhm IIC_VecGeneral -// vadduhs IIC_VecGeneral -// vadduwm IIC_VecGeneral -// vadduws IIC_VecGeneral -// vand IIC_VecGeneral -// vandc IIC_VecGeneral -// vavgsb IIC_VecGeneral -// vavgsh IIC_VecGeneral -// vavgsw IIC_VecGeneral -// vavgub IIC_VecGeneral -// vavguh IIC_VecGeneral -// vavguw IIC_VecGeneral -// vcfsx IIC_VecFP -// vcfux IIC_VecFP -// vcmpbfp IIC_VecFPCompare -// vcmpeqfp IIC_VecFPCompare -// vcmpequb IIC_VecGeneral -// vcmpequh IIC_VecGeneral -// vcmpequw IIC_VecGeneral -// vcmpgefp IIC_VecFPCompare -// vcmpgtfp IIC_VecFPCompare -// vcmpgtsb IIC_VecGeneral -// vcmpgtsh IIC_VecGeneral -// vcmpgtsw IIC_VecGeneral -// vcmpgtub IIC_VecGeneral -// vcmpgtuh IIC_VecGeneral -// vcmpgtuw IIC_VecGeneral -// vctsxs IIC_VecFP -// vctuxs IIC_VecFP -// vexptefp IIC_VecFP -// vlogefp IIC_VecFP -// vmaddfp IIC_VecFP -// vmaxfp IIC_VecFPCompare -// vmaxsb IIC_VecGeneral -// vmaxsh IIC_VecGeneral -// vmaxsw IIC_VecGeneral -// vmaxub IIC_VecGeneral -// vmaxuh IIC_VecGeneral -// vmaxuw IIC_VecGeneral -// vmhaddshs IIC_VecComplex -// vmhraddshs IIC_VecComplex -// vminfp IIC_VecFPCompare -// vminsb IIC_VecGeneral -// vminsh IIC_VecGeneral -// vminsw IIC_VecGeneral -// vminub IIC_VecGeneral -// vminuh IIC_VecGeneral -// vminuw IIC_VecGeneral -// vmladduhm IIC_VecComplex -// vmrghb IIC_VecPerm -// vmrghh IIC_VecPerm -// vmrghw IIC_VecPerm -// vmrglb IIC_VecPerm -// vmrglh IIC_VecPerm -// vmrglw IIC_VecPerm -// vmsubfp IIC_VecFP -// vmsummbm IIC_VecComplex -// vmsumshm IIC_VecComplex -// vmsumshs IIC_VecComplex -// vmsumubm IIC_VecComplex -// vmsumuhm IIC_VecComplex -// vmsumuhs IIC_VecComplex -// vmulesb IIC_VecComplex -// vmulesh IIC_VecComplex -// vmuleub IIC_VecComplex -// vmuleuh IIC_VecComplex -// vmulosb IIC_VecComplex -// vmulosh IIC_VecComplex -// vmuloub IIC_VecComplex -// vmulouh IIC_VecComplex -// vnor IIC_VecGeneral -// vor IIC_VecGeneral -// vperm IIC_VecPerm -// vpkpx IIC_VecPerm -// vpkshss IIC_VecPerm -// vpkshus IIC_VecPerm -// vpkswss IIC_VecPerm -// vpkswus IIC_VecPerm -// vpkuhum IIC_VecPerm -// vpkuhus IIC_VecPerm -// vpkuwum IIC_VecPerm -// vpkuwus IIC_VecPerm -// vrefp IIC_VecFPRound -// vrfim IIC_VecFPRound -// vrfin IIC_VecFPRound -// vrfip IIC_VecFPRound -// vrfiz IIC_VecFPRound -// vrlb IIC_VecGeneral -// vrlh IIC_VecGeneral -// vrlw IIC_VecGeneral -// vrsqrtefp IIC_VecFP -// vsel IIC_VecGeneral -// vsl IIC_VecVSL -// vslb IIC_VecGeneral -// vsldoi IIC_VecPerm -// vslh IIC_VecGeneral -// vslo IIC_VecPerm -// vslw IIC_VecGeneral -// vspltb IIC_VecPerm -// vsplth IIC_VecPerm -// vspltisb IIC_VecPerm -// vspltish IIC_VecPerm -// vspltisw IIC_VecPerm -// vspltw IIC_VecPerm -// vsr IIC_VecVSR -// vsrab IIC_VecGeneral -// vsrah IIC_VecGeneral -// vsraw IIC_VecGeneral -// vsrb IIC_VecGeneral -// vsrh IIC_VecGeneral -// vsro IIC_VecPerm -// vsrw IIC_VecGeneral -// vsubcuw IIC_VecGeneral -// vsubfp IIC_VecFP -// vsubsbs IIC_VecGeneral -// vsubshs IIC_VecGeneral -// vsubsws IIC_VecGeneral -// vsububm IIC_VecGeneral -// vsububs IIC_VecGeneral -// vsubuhm IIC_VecGeneral -// vsubuhs IIC_VecGeneral -// vsubuwm IIC_VecGeneral -// vsubuws IIC_VecGeneral -// vsum2sws IIC_VecComplex -// vsum4sbs IIC_VecComplex -// vsum4shs IIC_VecComplex -// vsum4ubs IIC_VecComplex -// vsumsws IIC_VecComplex -// vupkhpx IIC_VecPerm -// vupkhsb IIC_VecPerm -// vupkhsh IIC_VecPerm -// vupklpx IIC_VecPerm -// vupklsb IIC_VecPerm -// vupklsh IIC_VecPerm -// vxor IIC_VecGeneral -// xor IIC_IntSimple -// xori IIC_IntSimple -// xoris IIC_IntSimple -// diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index c91428d..ed88803 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -69,6 +69,7 @@ void PPCSubtarget::initializeEnvironment() { HasVSX = false; HasP8Vector = false; HasP8Altivec = false; + HasP8Crypto = false; HasFCPSGN = false; HasFSQRT = false; HasFRE = false; @@ -94,7 +95,9 @@ void PPCSubtarget::initializeEnvironment() { HasLazyResolverStubs = false; HasICBT = false; HasInvariantFunctionDescriptors = false; + HasPartwordAtomics = false; IsQPXStackUnaligned = false; + HasHTM = false; } void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 247a96d..b4c1bb1 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -90,6 +90,7 @@ protected: bool HasVSX; bool HasP8Vector; bool HasP8Altivec; + bool HasP8Crypto; bool HasFCPSGN; bool HasFSQRT; bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES; @@ -113,6 +114,8 @@ protected: bool IsLittleEndian; bool HasICBT; bool HasInvariantFunctionDescriptors; + bool HasPartwordAtomics; + bool HasHTM; /// When targeting QPX running a stock PPC64 Linux kernel where the stack /// alignment has not been changed, we need to keep the 16-byte alignment @@ -218,6 +221,7 @@ public: bool hasVSX() const { return HasVSX; } bool hasP8Vector() const { return HasP8Vector; } bool hasP8Altivec() const { return HasP8Altivec; } + bool hasP8Crypto() const { return HasP8Crypto; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } @@ -234,6 +238,7 @@ public: bool hasInvariantFunctionDescriptors() const { return HasInvariantFunctionDescriptors; } + bool hasPartwordAtomics() const { return HasPartwordAtomics; } bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; } unsigned getPlatformStackAlignment() const { @@ -242,6 +247,7 @@ public: return 16; } + bool hasHTM() const { return HasHTM; } const Triple &getTargetTriple() const { return TargetTriple; } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index b219e93..7267529 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -160,11 +160,10 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, computeFSAdditions(FS, OL, TT), Options, RM, - CM, OL), + : LLVMTargetMachine(T, getDataLayoutString(Triple(TT)), TT, CPU, + computeFSAdditions(FS, OL, TT), Options, RM, CM, OL), TLOF(createTLOF(Triple(getTargetTriple()))), - TargetABI(computeTargetABI(Triple(TT), Options)), - DL(getDataLayoutString(Triple(TT))), Subtarget(TT, CPU, TargetFS, *this) { + TargetABI(computeTargetABI(Triple(TT), Options)) { initAsmInfo(); } @@ -208,7 +207,15 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique<PPCSubtarget>(TargetTriple, CPU, FS, *this); + I = llvm::make_unique<PPCSubtarget>( + TargetTriple, CPU, + // FIXME: It would be good to have the subtarget additions here + // not necessary. Anything that turns them on/off (overrides) ends + // up being put at the end of the feature string, but the defaults + // shouldn't require adding them. Fixing this means pulling Feature64Bit + // out of most of the target cpus in the .td file and making it set only + // as part of initialization via the TargetTriple. + computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this); } return I.get(); } diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 6508484..7a49058 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -29,10 +29,6 @@ public: private: std::unique_ptr<TargetLoweringObjectFile> TLOF; PPCABI TargetABI; - // Calculates type size & alignment - const DataLayout DL; - PPCSubtarget Subtarget; - mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap; public: @@ -42,8 +38,6 @@ public: ~PPCTargetMachine() override; - const DataLayout *getDataLayout() const override { return &DL; } - const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; } const PPCSubtarget *getSubtargetImpl(const Function &F) const override; // Pass Pipeline Configuration diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 073bbb0..b46acd4 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -192,6 +192,10 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, BaseT::getUnrollingPreferences(L, UP); } +bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) { + return LoopHasReductions; +} + unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) { if (Vector && !ST->hasAltivec() && !ST->hasQPX()) return 0; diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h index cef7079..21acea1 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -78,6 +78,7 @@ public: /// \name Vector TTI Implementations /// @{ + bool enableAggressiveInterleaving(bool LoopHasReductions); unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); unsigned getMaxInterleaveFactor(); diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index 4132b04..dfe988f 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -621,3 +621,10 @@ void foo() { bar(x); __asm__("" ::: "cr2"); } + +//===----------------------------------------------------------------------===// + +Instruction fusion was introduced in ISA 2.06 and more opportunities added in +ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities +and force instruction pairs to be scheduled together. + diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt index 1e4c6fb..43d87d3 100644 --- a/lib/Target/PowerPC/README_ALTIVEC.txt +++ b/lib/Target/PowerPC/README_ALTIVEC.txt @@ -209,3 +209,107 @@ vector float f(vector float a, vector float b) { return b; } +//===----------------------------------------------------------------------===// + +We should do a little better with eliminating dead stores. +The stores to the stack are dead since %a and %b are not needed + +; Function Attrs: nounwind +define <16 x i8> @test_vpmsumb() #0 { + entry: + %a = alloca <16 x i8>, align 16 + %b = alloca <16 x i8>, align 16 + store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16 + store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16 + %0 = load <16 x i8>* %a, align 16 + %1 = load <16 x i8>* %b, align 16 + %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1) + ret <16 x i8> %2 +} + + +; Function Attrs: nounwind readnone +declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1 + + +Produces the following code with -mtriple=powerpc64-unknown-linux-gnu: +# BB#0: # %entry + addis 3, 2, .LCPI0_0@toc@ha + addis 4, 2, .LCPI0_1@toc@ha + addi 3, 3, .LCPI0_0@toc@l + addi 4, 4, .LCPI0_1@toc@l + lxvw4x 0, 0, 3 + addi 3, 1, -16 + lxvw4x 35, 0, 4 + stxvw4x 0, 0, 3 + ori 2, 2, 0 + lxvw4x 34, 0, 3 + addi 3, 1, -32 + stxvw4x 35, 0, 3 + vpmsumb 2, 2, 3 + blr + .long 0 + .quad 0 + +The two stxvw4x instructions are not needed. +With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes +are present too. + +//===----------------------------------------------------------------------===// + +The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll: + +define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind { + %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0 + %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1 + %result = add <2 x i64> %x, %tmpvec2 + ret <2 x i64> %result + +This will generate the following instruction sequence: + std 5, -8(1) + std 5, -16(1) + addi 3, 1, -16 + ori 2, 2, 0 + lxvd2x 35, 0, 3 + vaddudm 2, 2, 3 + blr + +This will almost certainly cause a load-hit-store hazard. +Since val is a value parameter, it should not need to be saved onto +the stack, unless it's being done set up the vector register. Instead, +it would be better to splat teh value into a vector register, and then +remove the (dead) stores to the stack. + +//===----------------------------------------------------------------------===// + +At the moment we always generate a lxsdx in preference to lfd, or stxsdx in +preference to stfd. When we have a reg-immediate addressing mode, this is a +poor choice, since we have to load the address into an index register. This +should be fixed for P7/P8. + +//===----------------------------------------------------------------------===// + +Right now, ShuffleKind 0 is supported only on BE, and ShuffleKind 2 only on LE. +However, we could actually support both kinds on either endianness, if we check +for the appropriate shufflevector pattern for each case ... this would cause +some additional shufflevectors to be recognized and implemented via the +"swapped" form. + +//===----------------------------------------------------------------------===// + +There is a utility program called PerfectShuffle that generates a table of the +shortest instruction sequence for implementing a shufflevector operation on +PowerPC. However, this was designed for big-endian code generation. We could +modify this program to create a little endian version of the table. The table +is used in PPCISelLowering.cpp, PPCTargetLowering::LOWERVECTOR_SHUFFLE(). + +//===----------------------------------------------------------------------===// + +Opportunies to use instructions from PPCInstrVSX.td during code gen + - Conversion instructions (Sections 7.6.1.5 and 7.6.1.6 of ISA 2.07) + - Scalar comparisons (xscmpodp and xscmpudp) + - Min and max (xsmaxdp, xsmindp, xvmaxdp, xvmindp, xvmaxsp, xvminsp) + +Related to this: we currently do not generate the lxvw4x instruction for either +v4f32 or v4i32, probably because adding a dag pattern to the recognizer requires +a single target type. This should probably be addressed in the PPCISelDAGToDAG logic. diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td index a7d48b3..e5d5ce2 100644 --- a/lib/Target/R600/AMDGPU.td +++ b/lib/Target/R600/AMDGPU.td @@ -103,6 +103,11 @@ def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", "true", "Enable spilling of VGPRs to scratch memory">; +def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", + "SGPRInitBug", + "true", + "VI SGPR initilization bug requiring a fixed SGPR allocation size">; + class SubtargetFeatureFetchLimit <string Value> : SubtargetFeature <"fetch"#Value, "TexVTXClauseSize", diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index 92bc314..d911014 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -105,8 +105,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); - EmitFunctionHeader(); - MCContext &Context = getObjFileLowering().getContext(); const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0); @@ -129,7 +127,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { HexLines.clear(); DisasmLineMaxLen = 0; - OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); EmitFunctionBody(); if (isVerbose()) { @@ -339,6 +336,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.NumVGPR = MaxVGPR + 1; ProgInfo.NumSGPR = MaxSGPR + 1; + if (STM.hasSGPRInitBug()) { + if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) + llvm_unreachable("Too many SGPRs used with the SGPR init bug"); + + ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; + } + ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4; ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8; // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index b5ab703..7341cd9 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -95,7 +95,8 @@ private: SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, SDValue &TFE) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, - SDValue &SOffset, SDValue &Offset) const; + SDValue &SOffset, SDValue &Offset, SDValue &GLC, + SDValue &SLC, SDValue &TFE) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &SLC) const; @@ -120,6 +121,11 @@ private: SDNode *SelectADD_SUB_I64(SDNode *N); SDNode *SelectDIV_SCALE(SDNode *N); + SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, + uint32_t Offset, uint32_t Width); + SDNode *SelectS_BFEFromShifts(SDNode *N); + SDNode *SelectS_BFE(SDNode *N); + // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" }; @@ -519,21 +525,11 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { bool Signed = Opc == AMDGPUISD::BFE_I32; - // Transformation function, pack the offset and width of a BFE into - // the format expected by the S_BFE_I32 / S_BFE_U32. In the second - // source, bits [5:0] contain the offset and bits [22:16] the width. - uint32_t OffsetVal = Offset->getZExtValue(); uint32_t WidthVal = Width->getZExtValue(); - uint32_t PackedVal = OffsetVal | WidthVal << 16; - - SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32); - return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, - SDLoc(N), - MVT::i32, - N->getOperand(0), - PackedOffsetWidth); + return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N), + N->getOperand(0), OffsetVal, WidthVal); } case AMDGPUISD::DIV_SCALE: { @@ -547,6 +543,14 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } case ISD::ADDRSPACECAST: return SelectAddrSpaceCast(N); + case ISD::AND: + case ISD::SRL: + case ISD::SRA: + if (N->getValueType(0) != MVT::i32 || + Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) + break; + + return SelectS_BFE(N); } return SelectCode(N); @@ -966,8 +970,9 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, - SDValue &Offset) const { - SDValue Ptr, Offen, Idxen, Addr64, GLC, SLC, TFE; + SDValue &Offset, SDValue &GLC, + SDValue &SLC, SDValue &TFE) const { + SDValue Ptr, Offen, Idxen, Addr64; SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, GLC, SLC, TFE); @@ -991,8 +996,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &Offset, SDValue &SLC) const { SLC = CurDAG->getTargetConstant(0, MVT::i1); + SDValue GLC, TFE; - return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset); + return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); } bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, @@ -1147,6 +1153,95 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode(); } +SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, + uint32_t Offset, uint32_t Width) { + // Transformation function, pack the offset and width of a BFE into + // the format expected by the S_BFE_I32 / S_BFE_U32. In the second + // source, bits [5:0] contain the offset and bits [22:16] the width. + uint32_t PackedVal = Offset | (Width << 16); + SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, MVT::i32); + + return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); +} + +SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { + // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) + // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) + // Predicate: 0 < b <= c < 32 + + const SDValue &Shl = N->getOperand(0); + ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + + if (B && C) { + uint32_t BVal = B->getZExtValue(); + uint32_t CVal = C->getZExtValue(); + + if (0 < BVal && BVal <= CVal && CVal < 32) { + bool Signed = N->getOpcode() == ISD::SRA; + unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; + + return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), + CVal - BVal, 32 - CVal); + } + } + return SelectCode(N); +} + +SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { + switch (N->getOpcode()) { + case ISD::AND: + if (N->getOperand(0).getOpcode() == ISD::SRL) { + // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" + // Predicate: isMask(mask) + const SDValue &Srl = N->getOperand(0); + ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); + ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); + + if (Shift && Mask) { + uint32_t ShiftVal = Shift->getZExtValue(); + uint32_t MaskVal = Mask->getZExtValue(); + + if (isMask_32(MaskVal)) { + uint32_t WidthVal = countPopulation(MaskVal); + + return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0), + ShiftVal, WidthVal); + } + } + } + break; + case ISD::SRL: + if (N->getOperand(0).getOpcode() == ISD::AND) { + // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" + // Predicate: isMask(mask >> b) + const SDValue &And = N->getOperand(0); + ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); + ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); + + if (Shift && Mask) { + uint32_t ShiftVal = Shift->getZExtValue(); + uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; + + if (isMask_32(MaskVal)) { + uint32_t WidthVal = countPopulation(MaskVal); + + return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0), + ShiftVal, WidthVal); + } + } + } else if (N->getOperand(0).getOpcode() == ISD::SHL) + return SelectS_BFEFromShifts(N); + break; + case ISD::SRA: + if (N->getOperand(0).getOpcode() == ISD::SHL) + return SelectS_BFEFromShifts(N); + break; + } + + return SelectCode(N); +} + bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const { diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 4707279..62a33fa 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -885,9 +885,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return LowerIntrinsicIABS(Op, DAG); case AMDGPUIntrinsic::AMDGPU_lrp: return LowerIntrinsicLRP(Op, DAG); - case AMDGPUIntrinsic::AMDGPU_fract: - case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name. - return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); case AMDGPUIntrinsic::AMDGPU_clamp: case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name. diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index f4de2d6..f0f10ca 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -31,7 +31,7 @@ using namespace llvm; void AMDGPUInstrInfo::anchor() {} AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st) - : AMDGPUGenInstrInfo(-1,-1), RI(st), ST(st) { } + : AMDGPUGenInstrInfo(-1, -1), ST(st) {} const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const { return RI; @@ -152,26 +152,22 @@ bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const return true; } - -MachineInstr * -AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const { +MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + ArrayRef<unsigned> Ops, + int FrameIndex) const { // TODO: Implement this function return nullptr; } -MachineInstr* -AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr *LoadMI) const { +MachineInstr * +AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, + MachineInstr *LoadMI) const { // TODO: Implement this function return nullptr; } -bool -AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const { +bool AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, + ArrayRef<unsigned> Ops) const { // TODO: Implement this function return false; } @@ -360,8 +356,8 @@ static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) { } int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { - int MCOp = AMDGPU::getMCOpcode(Opcode, - AMDGPUSubtargetToSISubtarget(RI.ST.getGeneration())); + int MCOp = AMDGPU::getMCOpcode( + Opcode, AMDGPUSubtargetToSISubtarget(ST.getGeneration())); // -1 means that Opcode is already a native instruction. if (MCOp == -1) diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index 202183c..07042b5 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -85,14 +85,13 @@ public: const TargetRegisterInfo *TRI) const override; protected: - MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, int FrameIndex) const override; - MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, MachineInstr *LoadMI) const override; + public: /// \returns the smallest register index that will be accessed by an indirect /// read or write or -1 if indirect addressing is not used by this program. @@ -103,7 +102,7 @@ public: int getIndirectIndexEnd(const MachineFunction &MF) const; bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const override; + ArrayRef<unsigned> Ops) const override; bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl<MachineInstr *> &NewMIs) const override; diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 849b241..4d08201 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -578,22 +578,20 @@ class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat < // Bitfield extract patterns -/* - -XXX: The BFE pattern is not working correctly because the XForm is not being -applied. +def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{ + return isMask_32(N->getZExtValue()); +}]>; -def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>; -def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}], - SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(countTrailingOnes(N->getZExtValue()), MVT::i32);}]>>; +def IMMPopCount : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), + MVT::i32); +}]>; -class BFEPattern <Instruction BFE> : Pat < - (and (srl i32:$x, legalshift32:$y), bfemask:$z), - (BFE $x, $y, $z) +class BFEPattern <Instruction BFE, Instruction MOV> : Pat < + (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)), + (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask)))) >; -*/ - // rotr pattern class ROTRPattern <Instruction BIT_ALIGN> : Pat < (rotr i32:$src0, i32:$src1), diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td index eee9c29..ab489cd 100644 --- a/lib/Target/R600/AMDGPUIntrinsics.td +++ b/lib/Target/R600/AMDGPUIntrinsics.td @@ -68,6 +68,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_barrier_local : Intrinsic<[], [], []>; def int_AMDGPU_barrier_global : Intrinsic<[], [], []>; } diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp index b81fef4..175dcd8 100644 --- a/lib/Target/R600/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "amdgpu-promote-alloca" @@ -87,7 +88,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { continue; if (Use->getParent()->getParent() == &F) LocalMemAvailable -= - Mod->getDataLayout()->getTypeAllocSize(GVTy->getElementType()); + Mod->getDataLayout().getTypeAllocSize(GVTy->getElementType()); } } } @@ -276,8 +277,8 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) { // value from the reqd_work_group_size function attribute if it is // available. unsigned WorkGroupSize = 256; - int AllocaSize = WorkGroupSize * - Mod->getDataLayout()->getTypeAllocSize(AllocaTy); + int AllocaSize = + WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy); if (AllocaSize > LocalMemAvailable) { DEBUG(dbgs() << " Not enough local memory to promote alloca.\n"); @@ -294,9 +295,9 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) { DEBUG(dbgs() << "Promoting alloca to local memory\n"); LocalMemAvailable -= AllocaSize; + Type *GVTy = ArrayType::get(I.getAllocatedType(), 256); GlobalVariable *GV = new GlobalVariable( - *Mod, ArrayType::get(I.getAllocatedType(), 256), false, - GlobalValue::ExternalLinkage, 0, I.getName(), 0, + *Mod, GVTy, false, GlobalValue::ExternalLinkage, 0, I.getName(), 0, GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS); FunctionType *FTy = FunctionType::get( @@ -332,7 +333,7 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) { Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext()))); Indices.push_back(TID); - Value *Offset = Builder.CreateGEP(GV, Indices); + Value *Offset = Builder.CreateGEP(GVTy, GV, Indices); I.mutateType(Offset->getType()); I.replaceAllUsesWith(Offset); I.eraseFromParent(); diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp index 57b054b..3ca0eca 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.cpp +++ b/lib/Target/R600/AMDGPURegisterInfo.cpp @@ -17,10 +17,7 @@ using namespace llvm; -AMDGPURegisterInfo::AMDGPURegisterInfo(const AMDGPUSubtarget &st) -: AMDGPUGenRegisterInfo(0), - ST(st) - { } +AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {} //===----------------------------------------------------------------------===// // Function handling callbacks - Functions are a seldom used feature of GPUS, so diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h index f27576a..cfd800b 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.h +++ b/lib/Target/R600/AMDGPURegisterInfo.h @@ -30,9 +30,8 @@ class TargetInstrInfo; struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { static const MCPhysReg CalleeSavedReg; - const AMDGPUSubtarget &ST; - AMDGPURegisterInfo(const AMDGPUSubtarget &st); + AMDGPURegisterInfo(); BitVector getReservedRegs(const MachineFunction &MF) const override { assert(!"Unimplemented"); return BitVector(); diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index 70c8525..0ead652 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -70,7 +70,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS, CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), - EnableVGPRSpilling(false), + EnableVGPRSpilling(false), SGPRInitBug(false), FrameLowering(TargetFrameLowering::StackGrowsUp, 64 * 16, // Maximum stack alignment (long16) 0), diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 1b0122c..403a3e4 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -44,6 +44,10 @@ public: VOLCANIC_ISLANDS, }; + enum { + FIXED_SGPR_COUNT_FOR_INIT_BUG = 80 + }; + private: std::string DevName; bool Is64bit; @@ -66,6 +70,7 @@ private: bool CFALUBug; int LocalMemorySize; bool EnableVGPRSpilling; + bool SGPRInitBug; AMDGPUFrameLowering FrameLowering; std::unique_ptr<AMDGPUTargetLowering> TLInfo; @@ -206,6 +211,10 @@ public: return LocalMemorySize; } + bool hasSGPRInitBug() const { + return SGPRInitBug; + } + unsigned getAmdKernelCodeChipID() const; bool enableMachineScheduler() const override { diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index a862f3c..cb95835 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -71,10 +71,10 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, TargetOptions Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OptLevel) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), - DL(computeDataLayout(TT)), - TLOF(new TargetLoweringObjectFileELF()), - Subtarget(TT, CPU, FS, *this), IntrinsicInfo() { + : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM, + OptLevel), + TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this), + IntrinsicInfo() { setRequiresStructuredCFG(true); initAsmInfo(); } @@ -118,7 +118,7 @@ public: ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override { - const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) return createR600MachineScheduler(C); return nullptr; @@ -174,7 +174,7 @@ void AMDGPUPassConfig::addIRPasses() { } void AMDGPUPassConfig::addCodeGenPrepare() { - const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); if (ST.isPromoteAllocaEnabled()) { addPass(createAMDGPUPromoteAlloca(ST)); addPass(createSROAPass()); @@ -184,7 +184,7 @@ void AMDGPUPassConfig::addCodeGenPrepare() { bool AMDGPUPassConfig::addPreISel() { - const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); addPass(createFlattenCFGPass()); if (ST.IsIRStructurizerEnabled()) addPass(createStructurizeCFGPass()); @@ -211,7 +211,7 @@ void R600PassConfig::addPreRegAlloc() { } void R600PassConfig::addPreSched2() { - const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); addPass(createR600EmitClauseMarkers(), false); if (ST.isIfCvtEnabled()) addPass(&IfConverterID, false); @@ -251,15 +251,15 @@ bool GCNPassConfig::addInstSelector() { } void GCNPassConfig::addPreRegAlloc() { - const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) { - // Don't do this with no optimizations since it throws away debug info by - // merging nonadjacent loads. + // Don't do this with no optimizations since it throws away debug info by + // merging nonadjacent loads. - // This should be run after scheduling, but before register allocation. It - // also need extra copies to the address operand to be eliminated. - initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); - insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); + // This should be run after scheduling, but before register allocation. It + // also need extra copies to the address operand to be eliminated. + initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); + insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); } addPass(createSIShrinkInstructionsPass(), false); addPass(createSIFixSGPRLiveRangesPass(), false); diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h index a691536..785c119 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.h +++ b/lib/Target/R600/AMDGPUTargetMachine.h @@ -30,7 +30,6 @@ namespace llvm { class AMDGPUTargetMachine : public LLVMTargetMachine { private: - const DataLayout DL; protected: TargetLoweringObjectFile *TLOF; @@ -42,12 +41,9 @@ public: StringRef CPU, TargetOptions Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); ~AMDGPUTargetMachine(); - // FIXME: This is currently broken, the DataLayout needs to move to - // the target machine. - const DataLayout *getDataLayout() const override { - return &DL; - } - const AMDGPUSubtarget *getSubtargetImpl() const override { + + const AMDGPUSubtarget *getSubtargetImpl() const { return &Subtarget; } + const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override { return &Subtarget; } const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override { diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp index 68f4600..96edc41 100644 --- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" @@ -36,13 +37,15 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, // TODO: Do we want runtime unrolling? for (const BasicBlock *BB : L->getBlocks()) { + const DataLayout &DL = BB->getModule()->getDataLayout(); for (const Instruction &I : *BB) { const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I); if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) continue; const Value *Ptr = GEP->getPointerOperand(); - const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr)); + const AllocaInst *Alloca = + dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL)); if (Alloca) { // We want to do whatever we can to limit the number of alloca // instructions that make it through to the code generator. allocas diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp index ee6e8ec..ee6551b 100644 --- a/lib/Target/R600/AMDILCFGStructurizer.cpp +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp @@ -10,8 +10,8 @@ #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" -#include "R600InstrInfo.h" #include "AMDGPUSubtarget.h" +#include "R600InstrInfo.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallVector.h" @@ -30,6 +30,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include <deque> using namespace llvm; @@ -165,6 +166,7 @@ public: TRI = &TII->getRegisterInfo(); DEBUG(MF.dump();); OrderedBlks.clear(); + Visited.clear(); FuncRep = &MF; MLI = &getAnalysis<MachineLoopInfo>(); DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); @@ -621,7 +623,7 @@ DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end(); ++It) { MachineInstr *instr = &(*It); - if (instr->getDebugLoc().isUnknown() == false) + if (!instr->getDebugLoc().isUnknown()) DL = instr->getDebugLoc(); } return DL; @@ -1075,21 +1077,19 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { } int AMDGPUCFGStructurizer::loopendPatternMatch() { - std::vector<MachineLoop *> NestedLoops; - for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end(); It != E; - ++It) - for (MachineLoop *ML : depth_first(*It)) - NestedLoops.push_back(ML); + std::deque<MachineLoop *> NestedLoops; + for (auto &It: *MLI) + for (MachineLoop *ML : depth_first(It)) + NestedLoops.push_front(ML); if (NestedLoops.size() == 0) return 0; - // Process nested loop outside->inside, so "continue" to a outside loop won't - // be mistaken as "break" of the current loop. + // Process nested loop outside->inside (we did push_front), + // so "continue" to a outside loop won't be mistaken as "break" + // of the current loop. int Num = 0; - for (std::vector<MachineLoop *>::reverse_iterator It = NestedLoops.rbegin(), - E = NestedLoops.rend(); It != E; ++It) { - MachineLoop *ExaminedLoop = *It; + for (MachineLoop *ExaminedLoop : NestedLoops) { if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) continue; DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); @@ -1611,7 +1611,7 @@ void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); - if (UseContinueLogical == false) { + if (!UseContinueLogical) { int BranchOpcode = TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : getBranchZeroOpcode(OldOpcode); diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp index 3b4ba1a..49f0f23 100644 --- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp @@ -46,10 +46,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser { /// } public: - AMDGPUAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &_MII, - const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { + AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(), STI(STI), Parser(Parser) { setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td index 9f9472c..5560146 100644 --- a/lib/Target/R600/EvergreenInstructions.td +++ b/lib/Target/R600/EvergreenInstructions.td @@ -287,9 +287,8 @@ def BFE_INT_eg : R600_3OP <0x5, "BFE_INT", VecALU >; -// XXX: This pattern is broken, disabling for now. See comment in -// AMDGPUInstructions.td for more info. -// def : BFEPattern <BFE_UINT_eg>; +def : BFEPattern <BFE_UINT_eg, MOV_IMM_I32>; + def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))], VecALU diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index b66ed10..d62fd3f 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -99,6 +99,12 @@ void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo, printU8ImmDecOperand(MI, OpNo, O); } +void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << " gds"; +} + void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O) { if (MI->getOperand(OpNo).getImm()) @@ -208,6 +214,16 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) { O << Type << '[' << RegIdx << ':' << (RegIdx + NumRegs - 1) << ']'; } +void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3) + O << "_e64 "; + else + O << "_e32 "; + + printOperand(MI, OpNo, O); +} + void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) { int32_t SImm = static_cast<int32_t>(Imm); if (SImm >= -16 && SImm <= 64) { diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h index 1d43c7a..5289718 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h @@ -44,10 +44,12 @@ private: void printDSOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printGDS(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printRegOperand(unsigned RegNo, raw_ostream &O); + void printVOPDst(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printImmediate32(uint32_t I, raw_ostream &O); void printImmediate64(uint64_t I, raw_ostream &O); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp index 83403ba..fb2deef 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -17,6 +17,7 @@ #include "InstPrinter/AMDGPUInstPrinter.h" #include "SIDefines.h" #include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" @@ -72,50 +73,19 @@ static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T, return new AMDGPUInstPrinter(MAI, MII, MRI); } -static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) { - return createSIMCCodeEmitter(MCII, MRI, STI, Ctx); - } else { - return createR600MCCodeEmitter(MCII, MRI, STI); - } -} - -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &_OS, MCCodeEmitter *_Emitter, - const MCSubtargetInfo &STI, bool RelaxAll) { - return createELFStreamer(Ctx, MAB, _OS, _Emitter, false); -} - extern "C" void LLVMInitializeR600TargetMC() { + for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) { + RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T); + + TargetRegistry::RegisterMCCodeGenInfo(*T, createAMDGPUMCCodeGenInfo); + TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo); + TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo); + TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo); + TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter); + TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend); + } - RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget); - RegisterMCAsmInfo<AMDGPUMCAsmInfo> Z(TheGCNTarget); - - TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheGCNTarget, createAMDGPUMCCodeGenInfo); - - TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheGCNTarget, createAMDGPUMCInstrInfo); - - TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheGCNTarget, createAMDGPUMCRegisterInfo); - - TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheGCNTarget, createAMDGPUMCSubtargetInfo); - - TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheGCNTarget, createAMDGPUMCInstPrinter); - - TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createAMDGPUMCCodeEmitter); - - TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheGCNTarget, createAMDGPUAsmBackend); - - TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheGCNTarget, createMCStreamer); + TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, + createR600MCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createSIMCCodeEmitter); } diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h index bc8cd53..23f0196 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H #define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H +#include "llvm/Support/DataTypes.h" #include "llvm/ADT/StringRef.h" namespace llvm { @@ -34,11 +35,10 @@ extern Target TheGCNTarget; MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI); + MCContext &Ctx); MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx); MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI, diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 8a555ff..fa25f59 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -81,8 +81,8 @@ enum FCInstr { }; MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI, + MCContext &Ctx) { return new R600MCCodeEmitter(MCII, MRI); } diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index 7e23772..760aa37 100644 --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -72,7 +72,6 @@ public: MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx) { return new SIMCCodeEmitter(MCII, MRI, Ctx); } diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td index fb5aa61..82c6d13 100644 --- a/lib/Target/R600/Processors.td +++ b/lib/Target/R600/Processors.td @@ -119,8 +119,12 @@ def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>; // Volcanic Islands //===----------------------------------------------------------------------===// -def : ProcessorModel<"tonga", SIQuarterSpeedModel, [FeatureVolcanicIslands]>; +def : ProcessorModel<"tonga", SIQuarterSpeedModel, + [FeatureVolcanicIslands, FeatureSGPRInitBug] +>; -def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>; +def : ProcessorModel<"iceland", SIQuarterSpeedModel, + [FeatureVolcanicIslands, FeatureSGPRInitBug] +>; def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>; diff --git a/lib/Target/R600/R600ClauseMergePass.cpp b/lib/Target/R600/R600ClauseMergePass.cpp index f07be00..3cb9021 100644 --- a/lib/Target/R600/R600ClauseMergePass.cpp +++ b/lib/Target/R600/R600ClauseMergePass.cpp @@ -14,11 +14,11 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index c738611..a34e2dc 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -837,6 +837,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case Intrinsic::AMDGPU_rsq: // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior. return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1)); + + case AMDGPUIntrinsic::AMDGPU_fract: + case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name. + return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); } // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) break; @@ -1479,8 +1483,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const // Lower loads constant address space global variable loads if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && - isa<GlobalVariable>( - GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) { + isa<GlobalVariable>(GetUnderlyingObject( + LoadNode->getMemOperand()->getValue(), *getDataLayout()))) { SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL, getPointerTy(AMDGPUAS::PRIVATE_ADDRESS)); @@ -1867,7 +1871,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, SelectCC.getOperand(0), // LHS SelectCC.getOperand(1), // RHS DAG.getConstant(-1, MVT::i32), // True - DAG.getConstant(0, MVT::i32), // Flase + DAG.getConstant(0, MVT::i32), // False SelectCC.getOperand(4)); // CC break; diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 653fd0d..5f0bdf3 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -29,9 +29,7 @@ using namespace llvm; #include "AMDGPUGenDFAPacketizer.inc" R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st) - : AMDGPUInstrInfo(st), - RI(st) - { } + : AMDGPUInstrInfo(st), RI() {} const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { return RI; @@ -268,9 +266,8 @@ int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const { return getOperandIdx(Opcode, OpTable[SrcNum]); } -#define SRC_SEL_ROWS 11 int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { - static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = { + static const unsigned SrcSelTable[][2] = { {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, @@ -284,14 +281,13 @@ int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W} }; - for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) { - if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) { - return getOperandIdx(Opcode, SrcSelTable[i][1]); + for (const auto &Row : SrcSelTable) { + if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx) { + return getOperandIdx(Opcode, Row[1]); } } return -1; } -#undef SRC_SEL_ROWS SmallVector<std::pair<MachineOperand *, int64_t>, 3> R600InstrInfo::getSrcs(MachineInstr *MI) const { diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp index 742c0e0..0c06ccc 100644 --- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp +++ b/lib/Target/R600/R600OptimizeVectorRegisters.cpp @@ -27,10 +27,9 @@ /// to reduce MOV count. //===----------------------------------------------------------------------===// -#include "llvm/Support/Debug.h" #include "AMDGPU.h" -#include "R600InstrInfo.h" #include "AMDGPUSubtarget.h" +#include "R600InstrInfo.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -38,6 +37,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp index dc95675..fb0359c 100644 --- a/lib/Target/R600/R600RegisterInfo.cpp +++ b/lib/Target/R600/R600RegisterInfo.cpp @@ -20,14 +20,16 @@ using namespace llvm; -R600RegisterInfo::R600RegisterInfo(const AMDGPUSubtarget &st) -: AMDGPURegisterInfo(st) - { RCW.RegWeight = 0; RCW.WeightLimit = 0;} +R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() { + RCW.RegWeight = 0; + RCW.WeightLimit = 0; +} BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(ST.getInstrInfo()); + const R600InstrInfo *TII = + static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo()); Reserved.set(AMDGPU::ZERO); Reserved.set(AMDGPU::HALF); diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h index f1a8a41..9713e60 100644 --- a/lib/Target/R600/R600RegisterInfo.h +++ b/lib/Target/R600/R600RegisterInfo.h @@ -24,7 +24,7 @@ class AMDGPUSubtarget; struct R600RegisterInfo : public AMDGPURegisterInfo { RegClassWeight RCW; - R600RegisterInfo(const AMDGPUSubtarget &st); + R600RegisterInfo(); BitVector getReservedRegs(const MachineFunction &MF) const override; diff --git a/lib/Target/R600/SIFixSGPRLiveRanges.cpp b/lib/Target/R600/SIFixSGPRLiveRanges.cpp index f34c375..0c54446 100644 --- a/lib/Target/R600/SIFixSGPRLiveRanges.cpp +++ b/lib/Target/R600/SIFixSGPRLiveRanges.cpp @@ -54,6 +54,7 @@ #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/Target/R600/SIFoldOperands.cpp b/lib/Target/R600/SIFoldOperands.cpp index ae4b05d..7ba5a6d 100644 --- a/lib/Target/R600/SIFoldOperands.cpp +++ b/lib/Target/R600/SIFoldOperands.cpp @@ -17,9 +17,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "si-fold-operands" diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 7d794b8..bd0c3c2 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -172,16 +172,12 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setOperationAction(ISD::UDIV, MVT::i64, Expand); setOperationAction(ISD::UREM, MVT::i64, Expand); - // We only support LOAD/STORE and vector manipulation ops for vectors - // with > 4 elements. - MVT VecTypes[] = { - MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32 - }; - setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); setOperationAction(ISD::SELECT, MVT::i1, Promote); - for (MVT VT : VecTypes) { + // We only support LOAD/STORE and vector manipulation ops for vectors + // with > 4 elements. + for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32}) { for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { switch(Op) { case ISD::LOAD: @@ -206,10 +202,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); - setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FRINT, MVT::f64, Legal); } + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FDIV, MVT::f32, Custom); setOperationAction(ISD::FDIV, MVT::f64, Custom); @@ -932,6 +928,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case AMDGPUIntrinsic::AMDGPU_fract: + case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name. + return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1), + DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1))); + default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); } @@ -1346,6 +1348,35 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N, return SDValue(); } +/// \brief Return true if the given offset Size in bytes can be folded into +/// the immediate offsets of a memory instruction for the given address space. +static bool canFoldOffset(unsigned OffsetSize, unsigned AS, + const AMDGPUSubtarget &STI) { + switch (AS) { + case AMDGPUAS::GLOBAL_ADDRESS: { + // MUBUF instructions a 12-bit offset in bytes. + return isUInt<12>(OffsetSize); + } + case AMDGPUAS::CONSTANT_ADDRESS: { + // SMRD instructions have an 8-bit offset in dwords on SI and + // a 20-bit offset in bytes on VI. + if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + return isUInt<20>(OffsetSize); + else + return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); + } + case AMDGPUAS::LOCAL_ADDRESS: + case AMDGPUAS::REGION_ADDRESS: { + // The single offset versions have a 16-bit offset in bytes. + return isUInt<16>(OffsetSize); + } + case AMDGPUAS::PRIVATE_ADDRESS: + // Indirect register addressing does not use any offsets. + default: + return 0; + } +} + // (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2) // This is a variant of @@ -1377,13 +1408,10 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N, if (!CAdd) return SDValue(); - const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); - // If the resulting offset is too large, we can't fold it into the addressing // mode offset. APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue(); - if (!TII->canFoldOffset(Offset.getZExtValue(), AddrSpace)) + if (!canFoldOffset(Offset.getZExtValue(), AddrSpace, *Subtarget)) return SDValue(); SelectionDAG &DAG = DCI.DAG; @@ -1595,6 +1623,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, case AMDGPUISD::UMAX: case AMDGPUISD::UMIN: { if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG && + N->getValueType(0) != MVT::f64 && getTargetMachine().getOptLevel() > CodeGenOpt::None) return performMin3Max3Combine(N, DCI); break; diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index 50f20ac..90a37f1 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -259,7 +259,8 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, return; } - if (TRI->ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >= + AMDGPUSubtarget::VOLCANIC_ISLANDS) { // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM // or SMEM clause, respectively. // @@ -412,7 +413,8 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { - if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() < + AMDGPUSubtarget::VOLCANIC_ISLANDS) return; // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG. diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index c90c741..4167590 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -83,6 +83,9 @@ class Enc64 { int Size = 8; } +class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">; +def VOPDstVCC : VOPDstOperand <VCCReg>; + let Uses = [EXEC] in { class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> : @@ -96,7 +99,7 @@ class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> : } class VOPCCommon <dag ins, string asm, list<dag> pattern> : - VOPAnyCommon <(outs VCCReg:$dst), ins, asm, pattern> { + VOPAnyCommon <(outs VOPDstVCC:$dst), ins, asm, pattern> { let DisableEncoding = "$dst"; let VOPC = 1; @@ -577,6 +580,12 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> : let DS = 1; let UseNamedOperandTable = 1; let DisableEncoding = "$m0"; + + // Most instruction load and store data, so set this as the default. + let mayLoad = 1; + let mayStore = 1; + + let hasSideEffects = 0; let SchedRW = [WriteLDS]; } diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 4f1e5ad..ba98ad7 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -28,7 +28,7 @@ using namespace llvm; SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st) - : AMDGPUInstrInfo(st), RI(st) {} + : AMDGPUInstrInfo(st), RI() {} //===----------------------------------------------------------------------===// // TargetInstrInfo callbacks @@ -120,12 +120,20 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, if (Load0->getOperand(0) != Load1->getOperand(0)) return false; + const ConstantSDNode *Load0Offset = + dyn_cast<ConstantSDNode>(Load0->getOperand(1)); + const ConstantSDNode *Load1Offset = + dyn_cast<ConstantSDNode>(Load1->getOperand(1)); + + if (!Load0Offset || !Load1Offset) + return false; + // Check chain. if (findChainOperand(Load0) != findChainOperand(Load1)) return false; - Offset0 = cast<ConstantSDNode>(Load0->getOperand(1))->getZExtValue(); - Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue(); + Offset0 = Load0Offset->getZExtValue(); + Offset1 = Load1Offset->getZExtValue(); return true; } @@ -418,7 +426,9 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } } -unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { +unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const { + const unsigned Opcode = MI.getOpcode(); + int NewOpc; // Try to map original to commuted opcode @@ -583,10 +593,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB, unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z); unsigned InputPtrReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR); - static const unsigned TIDIGRegs[3] = { - TIDIGXReg, TIDIGYReg, TIDIGZReg - }; - for (unsigned Reg : TIDIGRegs) { + for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) { if (!Entry.isLiveIn(Reg)) Entry.addLiveIn(Reg); } @@ -720,6 +727,26 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MI->eraseFromParent(); break; } + + case AMDGPU::V_CNDMASK_B64_PSEUDO: { + unsigned Dst = MI->getOperand(0).getReg(); + unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); + unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); + unsigned Src0 = MI->getOperand(1).getReg(); + unsigned Src1 = MI->getOperand(2).getReg(); + const MachineOperand &SrcCond = MI->getOperand(3); + + BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo) + .addReg(RI.getSubReg(Src0, AMDGPU::sub0)) + .addReg(RI.getSubReg(Src1, AMDGPU::sub0)) + .addOperand(SrcCond); + BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi) + .addReg(RI.getSubReg(Src0, AMDGPU::sub1)) + .addReg(RI.getSubReg(Src1, AMDGPU::sub1)) + .addOperand(SrcCond); + MI->eraseFromParent(); + break; + } } return true; } @@ -792,7 +819,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, } if (MI) - MI->setDesc(get(commuteOpcode(MI->getOpcode()))); + MI->setDesc(get(commuteOpcode(*MI))); return MI; } @@ -1172,32 +1199,6 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, return RI.opCanUseInlineConstant(OpInfo.OperandType); } -bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const { - switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: { - // MUBUF instructions a 12-bit offset in bytes. - return isUInt<12>(OffsetSize); - } - case AMDGPUAS::CONSTANT_ADDRESS: { - // SMRD instructions have an 8-bit offset in dwords on SI and - // a 20-bit offset in bytes on VI. - if (RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) - return isUInt<20>(OffsetSize); - else - return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); - } - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: { - // The single offset versions have a 16-bit offset in bytes. - return isUInt<16>(OffsetSize); - } - case AMDGPUAS::PRIVATE_ADDRESS: - // Indirect register addressing does not use any offsets. - default: - return 0; - } -} - bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { int Op32 = AMDGPU::getVOPe32(Opcode); if (Op32 == -1) @@ -1405,6 +1406,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; + case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64; case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; @@ -1423,6 +1425,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; + case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64; } } @@ -1865,12 +1868,15 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { MachineInstr *Addr64 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) .addOperand(*VData) - .addOperand(*SRsrc) .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. // This will be replaced later // with the new value of vaddr. + .addOperand(*SRsrc) .addOperand(*SOffset) - .addOperand(*Offset); + .addOperand(*Offset) + .addImm(0) // glc + .addImm(0) // slc + .addImm(0); // tfe MI->removeFromParent(); MI = Addr64; @@ -1914,14 +1920,20 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI, // The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes // on VI. + + bool IsKill = SBase->isKill(); if (OffOp) { - bool isVI = RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS; + bool isVI = + MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >= + AMDGPUSubtarget::VOLCANIC_ISLANDS; unsigned OffScale = isVI ? 1 : 4; // Handle the _IMM variant unsigned LoOffset = OffOp->getImm() * OffScale; unsigned HiOffset = LoOffset + HalfSize; Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo) - .addOperand(*SBase) + // Use addReg instead of addOperand + // to make sure kill flag is cleared. + .addReg(SBase->getReg(), 0, SBase->getSubReg()) .addImm(LoOffset / OffScale); if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) { @@ -1930,25 +1942,28 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI, BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR) .addImm(HiOffset); // The offset in register is in bytes. Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi) - .addOperand(*SBase) + .addReg(SBase->getReg(), getKillRegState(IsKill), + SBase->getSubReg()) .addReg(OffsetSGPR); } else { Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi) - .addOperand(*SBase) + .addReg(SBase->getReg(), getKillRegState(IsKill), + SBase->getSubReg()) .addImm(HiOffset / OffScale); } } else { // Handle the _SGPR variant MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff); Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo) - .addOperand(*SBase) + .addReg(SBase->getReg(), 0, SBase->getSubReg()) .addOperand(*SOff); unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR) .addOperand(*SOff) .addImm(HalfSize); Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp)) - .addOperand(*SBase) + .addReg(SBase->getReg(), getKillRegState(IsKill), + SBase->getSubReg()) .addReg(OffsetSGPR); } @@ -2003,7 +2018,8 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con // SMRD instructions take a dword offsets on SI and byte offset on VI // and MUBUF instructions always take a byte offset. ImmOffset = MI->getOperand(2).getImm(); - if (RI.ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) + if (MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <= + AMDGPUSubtarget::SEA_ISLANDS) ImmOffset <<= 2; RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); @@ -2043,13 +2059,15 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con .addImm(AMDGPU::sub3); MI->setDesc(get(NewOpcode)); if (MI->getOperand(2).isReg()) { - MI->getOperand(2).setReg(MI->getOperand(1).getReg()); + MI->getOperand(2).setReg(SRsrc); } else { - MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false); + MI->getOperand(2).ChangeToRegister(SRsrc, false); } - MI->getOperand(1).setReg(SRsrc); MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset)); + MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc + MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc + MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe const TargetRegisterClass *NewDstRC = RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass); diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 12dc3f3..a9aa99f 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -114,7 +114,7 @@ public: // register. If there is no hardware instruction that can store to \p // DstRC, then AMDGPU::COPY is returned. unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; - unsigned commuteOpcode(unsigned Opcode) const; + unsigned commuteOpcode(const MachineInstr &MI) const; MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI = false) const override; @@ -218,10 +218,6 @@ public: bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, const MachineOperand &MO) const; - /// \brief Return true if the given offset Size in bytes can be folded into - /// the immediate offsets of a memory instruction for the given address space. - bool canFoldOffset(unsigned OffsetSize, unsigned AS) const; - /// \brief Return true if this 64-bit VALU instruction has a 32-bit encoding. /// This function will return false if you pass it a 32-bit instruction. bool hasVALU32BitEncoding(unsigned Opcode) const; diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index e2747dc..d603ecb 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -264,6 +264,9 @@ def ds_offset0 : Operand<i8> { def ds_offset1 : Operand<i8> { let PrintMethod = "printDSOffset1"; } +def gds : Operand <i1> { + let PrintMethod = "printGDS"; +} def glc : Operand <i1> { let PrintMethod = "printGLC"; } @@ -284,6 +287,8 @@ def ClampMod : Operand <i1> { } // End OperandType = "OPERAND_IMMEDIATE" +def VOPDstS64 : VOPDstOperand <SReg_64>; + //===----------------------------------------------------------------------===// // Complex patterns //===----------------------------------------------------------------------===// @@ -292,7 +297,7 @@ def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">; def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">; def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">; -def MUBUFAddr64 : ComplexPattern<i64, 4, "SelectMUBUFAddr64">; +def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">; def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">; def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">; def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">; @@ -315,6 +320,7 @@ def SIOperand { def SRCMODS { int NONE = 0; + int NEG = 1; } def DSTCLAMP { @@ -516,7 +522,7 @@ multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m < class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt, string opName, PatLeaf cond> : SOPC < op, (outs SCCReg:$dst), (ins rc:$src0, rc:$src1), - opName#" $dst, $src0, $src1", []>; + opName#" $src0, $src1", []>; class SOPC_32<bits<7> op, string opName, PatLeaf cond = COND_NULL> : SOPC_Helper<op, SSrc_32, i32, opName, cond>; @@ -637,9 +643,9 @@ class getNumSrcArgs<ValueType Src1, ValueType Src2> { // Returns the register class to use for the destination of VOP[123C] // instructions for the given VT. class getVALUDstForVT<ValueType VT> { - RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32, - !if(!eq(VT.Size, 64), VReg_64, - SReg_64)); // else VT == i1 + RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>, + !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>, + VOPDstOperand<SReg_64>)); // else VT == i1 } // Returns the register class to use for source 0 of VOP[12C] @@ -717,7 +723,7 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, class getAsm32 <int NumSrcArgs> { string src1 = ", $src1"; string src2 = ", $src2"; - string ret = " $dst, $src0"# + string ret = "$dst, $src0"# !if(!eq(NumSrcArgs, 1), "", src1)# !if(!eq(NumSrcArgs, 3), src2, ""); } @@ -733,7 +739,7 @@ class getAsm64 <int NumSrcArgs, bit HasModifiers> { string ret = !if(!eq(HasModifiers, 0), getAsm32<NumSrcArgs>.ret, - " $dst, "#src0#src1#src2#"$clamp"#"$omod"); + "$dst, "#src0#src1#src2#"$clamp"#"$omod"); } @@ -745,7 +751,7 @@ class VOPProfile <list<ValueType> _ArgVT> { field ValueType Src0VT = ArgVT[1]; field ValueType Src1VT = ArgVT[2]; field ValueType Src2VT = ArgVT[3]; - field RegisterClass DstRC = getVALUDstForVT<DstVT>.ret; + field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret; field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret; field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret; field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret; @@ -761,7 +767,7 @@ class VOPProfile <list<ValueType> _ArgVT> { field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, HasModifiers>.ret; - field string Asm32 = "_e32"#getAsm32<NumSrcArgs>.ret; + field string Asm32 = getAsm32<NumSrcArgs>.ret; field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret; } @@ -788,22 +794,27 @@ def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> { def VOP_I1_F32_I32 : VOPProfile <[i1, f32, i32, untyped]> { let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1); - let Asm64 = " $dst, $src0_modifiers, $src1"; + let Asm64 = "$dst, $src0_modifiers, $src1"; } def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> { let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1); - let Asm64 = " $dst, $src0_modifiers, $src1"; + let Asm64 = "$dst, $src0_modifiers, $src1"; } def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; +def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> { + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VCCReg:$src2); + let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2); + let Asm64 = "$dst, $src0, $src1, $src2"; +} def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> { field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2); - field string Asm = " $dst, $src0, $vsrc1, $src2"; + field string Asm = "$dst, $src0, $vsrc1, $src2"; } def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; @@ -835,23 +846,28 @@ class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : field bits<9> src0; } +class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> : + VOP1<op.SI, outs, ins, asm, []>, + SIMCInstr <opName#"_e32", SISubtarget.SI>; + +class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> : + VOP1<op.VI, outs, ins, asm, []>, + SIMCInstr <opName#"_e32", SISubtarget.VI>; + multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern, string opName> { def "" : VOP1_Pseudo <outs, ins, pattern, opName>; - def _si : VOP1<op.SI, outs, ins, asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.SI>; - def _vi : VOP1<op.VI, outs, ins, asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.VI>; + def _si : VOP1_Real_si <opName, op, outs, ins, asm>; + + def _vi : VOP1_Real_vi <opName, op, outs, ins, asm>; } multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern, string opName> { def "" : VOP1_Pseudo <outs, ins, pattern, opName>; - def _si : VOP1<op.SI, outs, ins, asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.SI>; - // No VI instruction. This class is for SI only. + def _si : VOP1_Real_si <opName, op, outs, ins, asm>; } class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : @@ -862,13 +878,20 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : let isCodeGenOnly = 1; } +class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> : + VOP2 <op.SI, outs, ins, opName#asm, []>, + SIMCInstr <opName#"_e32", SISubtarget.SI>; + +class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> : + VOP2 <op.SI, outs, ins, opName#asm, []>, + SIMCInstr <opName#"_e32", SISubtarget.VI>; + multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern, string opName, string revOp> { def "" : VOP2_Pseudo <outs, ins, pattern, opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>; - def _si : VOP2 <op.SI, outs, ins, opName#asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.SI>; + def _si : VOP2_Real_si <opName, op, outs, ins, asm>; } multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern, @@ -876,10 +899,10 @@ multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern, def "" : VOP2_Pseudo <outs, ins, pattern, opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>; - def _si : VOP2 <op.SI, outs, ins, opName#asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.SI>; - def _vi : VOP2 <op.VI, outs, ins, opName#asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.VI>; + def _si : VOP2_Real_si <opName, op, outs, ins, asm>; + + def _vi : VOP2_Real_vi <opName, op, outs, ins, asm>; + } class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> { @@ -1047,9 +1070,10 @@ multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm, multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm, list<dag> pattern, string opName, - bit HasMods, bit defExec> { + bit HasMods, bit defExec, string revOp> { - def "" : VOP3_Pseudo <outs, ins, pattern, opName>; + def "" : VOP3_Pseudo <outs, ins, pattern, opName>, + VOP2_REV<revOp#"_e64", !eq(revOp, opName)>; def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>, VOP3DisableFields<1, 0, HasMods> { @@ -1086,7 +1110,7 @@ multiclass VOP1_Helper <vop1 op, string opName, dag outs, defm _e32 : VOP1_m <op, outs, ins32, opName#asm32, pat32, opName>; - defm _e64 : VOP3_1_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName, HasMods>; + defm _e64 : VOP3_1_m <op, outs, ins64, opName#asm64, pat64, opName, HasMods>; } multiclass VOP1Inst <vop1 op, string opName, VOPProfile P, @@ -1121,7 +1145,7 @@ multiclass VOP2_Helper <vop2 op, string opName, dag outs, defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>; defm _e64 : VOP3_2_m <op, - outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods + outs, ins64, opName#asm64, pat64, opName, revOp, HasMods >; } @@ -1145,7 +1169,7 @@ multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P, string revOp = opName> { defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>; - defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#"_e64"#P.Asm64, + defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#P.Asm64, !if(P.HasModifiers, [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, @@ -1163,7 +1187,7 @@ multiclass VOP2b_Helper <vop2 op, string opName, dag outs, defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>; defm _e64 : VOP3b_2_m <op, - outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods + outs, ins64, opName#asm64, pat64, opName, revOp, HasMods >; } @@ -1189,7 +1213,7 @@ multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs, string revOp, bit HasMods> { defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOp>; - defm _e64 : VOP3_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName, + defm _e64 : VOP3_2_m <op, outs, ins64, opName#asm64, pat64, opName, revOp, HasMods>; } @@ -1235,28 +1259,30 @@ class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : } multiclass VOPC_m <vopc op, dag outs, dag ins, string asm, list<dag> pattern, - string opName, bit DefExec> { + string opName, bit DefExec, string revOpName = ""> { def "" : VOPC_Pseudo <outs, ins, pattern, opName>; def _si : VOPC<op.SI, ins, asm, []>, SIMCInstr <opName#"_e32", SISubtarget.SI> { let Defs = !if(DefExec, [EXEC], []); + let hasSideEffects = DefExec; } def _vi : VOPC<op.VI, ins, asm, []>, SIMCInstr <opName#"_e32", SISubtarget.VI> { let Defs = !if(DefExec, [EXEC], []); + let hasSideEffects = DefExec; } } multiclass VOPC_Helper <vopc op, string opName, dag ins32, string asm32, list<dag> pat32, dag out64, dag ins64, string asm64, list<dag> pat64, - bit HasMods, bit DefExec> { + bit HasMods, bit DefExec, string revOp> { defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>; - defm _e64 : VOP3_C_m <op, out64, ins64, opName#"_e64"#asm64, pat64, - opName, HasMods, DefExec>; + defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64, + opName, HasMods, DefExec, revOp>; } // Special case for class instructions which only have modifiers on @@ -1264,20 +1290,21 @@ multiclass VOPC_Helper <vopc op, string opName, multiclass VOPC_Class_Helper <vopc op, string opName, dag ins32, string asm32, list<dag> pat32, dag out64, dag ins64, string asm64, list<dag> pat64, - bit HasMods, bit DefExec> { + bit HasMods, bit DefExec, string revOp> { defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>; - defm _e64 : VOP3_C_m <op, out64, ins64, opName#"_e64"#asm64, pat64, - opName, HasMods, DefExec>, + defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64, + opName, HasMods, DefExec, revOp>, VOP3DisableModFields<1, 0, 0>; } multiclass VOPCInst <vopc op, string opName, VOPProfile P, PatLeaf cond = COND_NULL, + string revOp = opName, bit DefExec = 0> : VOPC_Helper < op, opName, P.Ins32, P.Asm32, [], - (outs SReg_64:$dst), P.Ins64, P.Asm64, + (outs VOPDstS64:$dst), P.Ins64, P.Asm64, !if(P.HasModifiers, [(set i1:$dst, (setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, @@ -1285,54 +1312,55 @@ multiclass VOPCInst <vopc op, string opName, (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), cond))], [(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]), - P.HasModifiers, DefExec + P.HasModifiers, DefExec, revOp >; multiclass VOPCClassInst <vopc op, string opName, VOPProfile P, bit DefExec = 0> : VOPC_Class_Helper < op, opName, P.Ins32, P.Asm32, [], - (outs SReg_64:$dst), P.Ins64, P.Asm64, + (outs VOPDstS64:$dst), P.Ins64, P.Asm64, !if(P.HasModifiers, [(set i1:$dst, (AMDGPUfp_class (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)), P.Src1VT:$src1))], [(set i1:$dst, (AMDGPUfp_class P.Src0VT:$src0, P.Src1VT:$src1))]), - P.HasModifiers, DefExec + P.HasModifiers, DefExec, opName >; -multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> : - VOPCInst <op, opName, VOP_F32_F32_F32, cond>; +multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> : + VOPCInst <op, opName, VOP_F32_F32_F32, cond, revOp>; -multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> : - VOPCInst <op, opName, VOP_F64_F64_F64, cond>; +multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> : + VOPCInst <op, opName, VOP_F64_F64_F64, cond, revOp>; -multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> : - VOPCInst <op, opName, VOP_I32_I32_I32, cond>; +multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> : + VOPCInst <op, opName, VOP_I32_I32_I32, cond, revOp>; -multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> : - VOPCInst <op, opName, VOP_I64_I64_I64, cond>; +multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> : + VOPCInst <op, opName, VOP_I64_I64_I64, cond, revOp>; multiclass VOPCX <vopc op, string opName, VOPProfile P, - PatLeaf cond = COND_NULL> - : VOPCInst <op, opName, P, cond, 1>; + PatLeaf cond = COND_NULL, + string revOp = ""> + : VOPCInst <op, opName, P, cond, revOp, 1>; -multiclass VOPCX_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> : - VOPCX <op, opName, VOP_F32_F32_F32, cond>; +multiclass VOPCX_F32 <vopc op, string opName, string revOp = opName> : + VOPCX <op, opName, VOP_F32_F32_F32, COND_NULL, revOp>; -multiclass VOPCX_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> : - VOPCX <op, opName, VOP_F64_F64_F64, cond>; +multiclass VOPCX_F64 <vopc op, string opName, string revOp = opName> : + VOPCX <op, opName, VOP_F64_F64_F64, COND_NULL, revOp>; -multiclass VOPCX_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> : - VOPCX <op, opName, VOP_I32_I32_I32, cond>; +multiclass VOPCX_I32 <vopc op, string opName, string revOp = opName> : + VOPCX <op, opName, VOP_I32_I32_I32, COND_NULL, revOp>; -multiclass VOPCX_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> : - VOPCX <op, opName, VOP_I64_I64_I64, cond>; +multiclass VOPCX_I64 <vopc op, string opName, string revOp = opName> : + VOPCX <op, opName, VOP_I64_I64_I64, COND_NULL, revOp>; multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm, list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m < - op, outs, ins, opName#asm, pat, opName, NumSrcArgs, HasMods + op, outs, ins, opName#" "#asm, pat, opName, NumSrcArgs, HasMods >; multiclass VOPC_CLASS_F32 <vopc op, string opName> : @@ -1349,7 +1377,7 @@ multiclass VOPCX_CLASS_F64 <vopc op, string opName> : multiclass VOP3Inst <vop3 op, string opName, VOPProfile P, SDPatternOperator node = null_frag> : VOP3_Helper < - op, opName, P.Outs, P.Ins64, P.Asm64, + op, opName, (outs P.DstRC.RegClass:$dst), P.Ins64, P.Asm64, !if(!eq(P.NumSrcArgs, 3), !if(P.HasModifiers, [(set P.DstVT:$dst, @@ -1381,7 +1409,7 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName, VOPProfile P, SDPatternOperator node = null_frag> : VOP3_Helper < op, opName, - P.Outs, + (outs P.DstRC.RegClass:$dst), (ins InputModsNoDefault:$src0_modifiers, P.Src0RC64:$src0, InputModsNoDefault:$src1_modifiers, P.Src1RC64:$src1, InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2, @@ -1483,10 +1511,8 @@ class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> : DSe_vi <op>, SIMCInstr <opName, SISubtarget.VI>; -class DS_1A_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> : - DS <outs, ins, asm, []>, - DSe <op>, - SIMCInstr <opName, SISubtarget.SI> { +class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> : + DS_Real_si <op,opName, outs, ins, asm> { // Single load interpret the 2 i8imm operands as a single i16 offset. bits<16> offset; @@ -1494,10 +1520,8 @@ class DS_1A_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> : let offset1 = offset{15-8}; } -class DS_1A_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> : - DS <outs, ins, asm, []>, - DSe_vi <op>, - SIMCInstr <opName, SISubtarget.VI> { +class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> : + DS_Real_vi <op, opName, outs, ins, asm> { // Single load interpret the 2 i8imm operands as a single i16 offset. bits<16> offset; @@ -1505,180 +1529,168 @@ class DS_1A_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> : let offset1 = offset{15-8}; } -multiclass DS_1A_Load_m <bits<8> op, string opName, dag outs, dag ins, string asm, - list<dag> pat> { - let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { - def "" : DS_Pseudo <opName, outs, ins, pat>; +multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc, + dag outs = (outs rc:$vdst), + dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds, M0Reg:$m0), + string asm = opName#" $vdst, $addr"#"$offset$gds"> { - let data0 = 0, data1 = 0 in { - def _si : DS_1A_Real_si <op, opName, outs, ins, asm>; - def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>; - } + def "" : DS_Pseudo <opName, outs, ins, []>; + + let data0 = 0, data1 = 0 in { + def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>; + def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>; } } -multiclass DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> - : DS_1A_Load_m < - op, - asm, - (outs regClass:$vdst), - (ins i1imm:$gds, VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0), - asm#" $vdst, $addr"#"$offset", - []>; - -multiclass DS_Load2_m <bits<8> op, string opName, dag outs, dag ins, string asm, - list<dag> pat> { - let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { - def "" : DS_Pseudo <opName, outs, ins, pat>; - - let data0 = 0, data1 = 0 in { - def _si : DS_Real_si <op, opName, outs, ins, asm>; - def _vi : DS_Real_vi <op, opName, outs, ins, asm>; - } +multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc, + dag outs = (outs rc:$vdst), + dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1, + gds:$gds, M0Reg:$m0), + string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> { + + def "" : DS_Pseudo <opName, outs, ins, []>; + + let data0 = 0, data1 = 0 in { + def _si : DS_Real_si <op, opName, outs, ins, asm>; + def _vi : DS_Real_vi <op, opName, outs, ins, asm>; } } -multiclass DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass> - : DS_Load2_m < - op, - asm, - (outs regClass:$vdst), - (ins i1imm:$gds, VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1, - M0Reg:$m0), - asm#" $vdst, $addr"#"$offset0"#"$offset1", - []>; - -multiclass DS_1A_Store_m <bits<8> op, string opName, dag outs, dag ins, - string asm, list<dag> pat> { - let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { - def "" : DS_Pseudo <opName, outs, ins, pat>; - - let data1 = 0, vdst = 0 in { - def _si : DS_1A_Real_si <op, opName, outs, ins, asm>; - def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>; - } +multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc, + dag outs = (outs), + dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds, + M0Reg:$m0), + string asm = opName#" $addr, $data0"#"$offset$gds"> { + + def "" : DS_Pseudo <opName, outs, ins, []>, + AtomicNoRet<opName, 0>; + + let data1 = 0, vdst = 0 in { + def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>; + def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>; } } -multiclass DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> - : DS_1A_Store_m < - op, - asm, - (outs), - (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, ds_offset:$offset, M0Reg:$m0), - asm#" $addr, $data0"#"$offset", - []>; - -multiclass DS_Store_m <bits<8> op, string opName, dag outs, dag ins, - string asm, list<dag> pat> { - let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { - def "" : DS_Pseudo <opName, outs, ins, pat>; - - let vdst = 0 in { - def _si : DS_Real_si <op, opName, outs, ins, asm>; - def _vi : DS_Real_vi <op, opName, outs, ins, asm>; - } +multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc, + dag outs = (outs), + dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1, + ds_offset0:$offset0, ds_offset1:$offset1, gds:$gds, M0Reg:$m0), + string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> { + + def "" : DS_Pseudo <opName, outs, ins, []>; + + let vdst = 0 in { + def _si : DS_Real_si <op, opName, outs, ins, asm>; + def _vi : DS_Real_vi <op, opName, outs, ins, asm>; } } -multiclass DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> - : DS_Store_m < - op, - asm, - (outs), - (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, regClass:$data1, - ds_offset0:$offset0, ds_offset1:$offset1, M0Reg:$m0), - asm#" $addr, $data0, $data1"#"$offset0"#"$offset1", - []>; - -// 1 address, 1 data. -multiclass DS_1A1D_RET_m <bits<8> op, string opName, dag outs, dag ins, - string asm, list<dag> pat, string noRetOp> { - let mayLoad = 1, mayStore = 1, - hasPostISelHook = 1 // Adjusted to no return version. - in { - def "" : DS_Pseudo <opName, outs, ins, pat>, - AtomicNoRet<noRetOp, 1>; - - let data1 = 0 in { - def _si : DS_1A_Real_si <op, opName, outs, ins, asm>; - def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>; - } +multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc, + string noRetOp = "", + dag outs = (outs rc:$vdst), + dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds, + M0Reg:$m0), + string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> { + + def "" : DS_Pseudo <opName, outs, ins, []>, + AtomicNoRet<noRetOp, 1>; + + let data1 = 0 in { + def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>; + def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>; } } -multiclass DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc, - string noRetOp = ""> : DS_1A1D_RET_m < - op, asm, - (outs rc:$vdst), - (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0), - asm#" $vdst, $addr, $data0"#"$offset", [], noRetOp>; - -// 1 address, 2 data. -multiclass DS_1A2D_RET_m <bits<8> op, string opName, dag outs, dag ins, - string asm, list<dag> pat, string noRetOp> { - let mayLoad = 1, mayStore = 1, - hasPostISelHook = 1 // Adjusted to no return version. - in { - def "" : DS_Pseudo <opName, outs, ins, pat>, - AtomicNoRet<noRetOp, 1>; - - def _si : DS_1A_Real_si <op, opName, outs, ins, asm>; - def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>; - } +multiclass DS_1A2D_RET_m <bits<8> op, string opName, RegisterClass rc, + string noRetOp = "", dag ins, + dag outs = (outs rc:$vdst), + string asm = opName#" $vdst, $addr, $data0, $data1"#"$offset"#"$gds"> { + + def "" : DS_Pseudo <opName, outs, ins, []>, + AtomicNoRet<noRetOp, 1>; + + def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>; + def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>; } multiclass DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc, - string noRetOp = ""> : DS_1A2D_RET_m < - op, asm, - (outs rc:$vdst), - (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0), - asm#" $vdst, $addr, $data0, $data1"#"$offset", - [], noRetOp>; - -// 1 address, 2 data. -multiclass DS_1A2D_NORET_m <bits<8> op, string opName, dag outs, dag ins, - string asm, list<dag> pat, string noRetOp> { - let mayLoad = 1, mayStore = 1 in { - def "" : DS_Pseudo <opName, outs, ins, pat>, - AtomicNoRet<noRetOp, 0>; + string noRetOp = "", RegisterClass src = rc> : + DS_1A2D_RET_m <op, asm, rc, noRetOp, + (ins VGPR_32:$addr, src:$data0, src:$data1, + ds_offset:$offset, gds:$gds, M0Reg:$m0) +>; - let vdst = 0 in { - def _si : DS_1A_Real_si <op, opName, outs, ins, asm>; - def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>; - } +multiclass DS_1A2D_NORET <bits<8> op, string opName, RegisterClass rc, + string noRetOp = opName, + dag outs = (outs), + dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1, + ds_offset:$offset, gds:$gds, M0Reg:$m0), + string asm = opName#" $addr, $data0, $data1"#"$offset"#"$gds"> { + + def "" : DS_Pseudo <opName, outs, ins, []>, + AtomicNoRet<noRetOp, 0>; + + let vdst = 0 in { + def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>; + def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>; } } -multiclass DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc, - string noRetOp = asm> : DS_1A2D_NORET_m < - op, asm, - (outs), - (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0), - asm#" $addr, $data0, $data1"#"$offset", - [], noRetOp>; +multiclass DS_0A_RET <bits<8> op, string opName, + dag outs = (outs VGPR_32:$vdst), + dag ins = (ins ds_offset:$offset, gds:$gds, M0Reg:$m0), + string asm = opName#" $vdst"#"$offset"#"$gds"> { -// 1 address, 1 data. -multiclass DS_1A1D_NORET_m <bits<8> op, string opName, dag outs, dag ins, - string asm, list<dag> pat, string noRetOp> { let mayLoad = 1, mayStore = 1 in { - def "" : DS_Pseudo <opName, outs, ins, pat>, - AtomicNoRet<noRetOp, 0>; + def "" : DS_Pseudo <opName, outs, ins, []>; - let data1 = 0, vdst = 0 in { - def _si : DS_1A_Real_si <op, opName, outs, ins, asm>; - def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>; - } - } + let addr = 0, data0 = 0, data1 = 0 in { + def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>; + def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>; + } // end addr = 0, data0 = 0, data1 = 0 + } // end mayLoad = 1, mayStore = 1 } -multiclass DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc, - string noRetOp = asm> : DS_1A1D_NORET_m < - op, asm, - (outs), - (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0), - asm#" $addr, $data0"#"$offset", - [], noRetOp>; +multiclass DS_1A_RET_GDS <bits<8> op, string opName, + dag outs = (outs VGPR_32:$vdst), + dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0), + string asm = opName#" $vdst, $addr"#"$offset gds"> { + + def "" : DS_Pseudo <opName, outs, ins, []>; + + let data0 = 0, data1 = 0, gds = 1 in { + def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>; + def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>; + } // end data0 = 0, data1 = 0, gds = 1 +} + +multiclass DS_1A_GDS <bits<8> op, string opName, + dag outs = (outs), + dag ins = (ins VGPR_32:$addr, M0Reg:$m0), + string asm = opName#" $addr gds"> { + + def "" : DS_Pseudo <opName, outs, ins, []>; + + let vdst = 0, data0 = 0, data1 = 0, offset0 = 0, offset1 = 0, gds = 1 in { + def _si : DS_Real_si <op, opName, outs, ins, asm>; + def _vi : DS_Real_vi <op, opName, outs, ins, asm>; + } // end vdst = 0, data = 0, data1 = 0, gds = 1 +} + +multiclass DS_1A <bits<8> op, string opName, + dag outs = (outs), + dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0, gds:$gds), + string asm = opName#" $addr"#"$offset"#"$gds"> { + + let mayLoad = 1, mayStore = 1 in { + def "" : DS_Pseudo <opName, outs, ins, []>; + + let vdst = 0, data0 = 0, data1 = 0 in { + def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>; + def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>; + } // let vdst = 0, data0 = 0, data1 = 0 + } // end mayLoad = 1, mayStore = 1 +} //===----------------------------------------------------------------------===// // MTBUF classes @@ -1861,14 +1873,14 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, defm _ADDR64 : MUBUFAtomicAddr64_m < op, name#"_addr64", (outs), (ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, - mbuf_offset:$offset, SCSrc_32:$soffset, slc:$slc), + SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc), name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#"$slc", [], 0 >; defm _OFFSET : MUBUFAtomicOffset_m < op, name#"_offset", (outs), - (ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset, - SCSrc_32:$soffset, slc:$slc), + (ins rc:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, mbuf_offset:$offset, + slc:$slc), name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [], 0 >; } // glc = 0 @@ -1880,7 +1892,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, defm _RTN_ADDR64 : MUBUFAtomicAddr64_m < op, name#"_rtn_addr64", (outs rc:$vdata), (ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr, - mbuf_offset:$offset, SSrc_32:$soffset, slc:$slc), + SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc), name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#" glc"#"$slc", [(set vt:$vdata, (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, @@ -1889,8 +1901,8 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, defm _RTN_OFFSET : MUBUFAtomicOffset_m < op, name#"_rtn_offset", (outs rc:$vdata), - (ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset, - SCSrc_32:$soffset, slc:$slc), + (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, slc:$slc), name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc", [(set vt:$vdata, (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, @@ -1909,9 +1921,8 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass, let mayLoad = 1, mayStore = 0 in { let offen = 0, idxen = 0, vaddr = 0 in { defm _OFFSET : MUBUF_m <op, name#"_offset", (outs regClass:$vdata), - (ins SReg_128:$srsrc, - mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc, - slc:$slc, tfe:$tfe), + (ins SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe", [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, @@ -1920,7 +1931,7 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass, let offen = 1, idxen = 0 in { defm _OFFEN : MUBUF_m <op, name#"_offen", (outs regClass:$vdata), - (ins SReg_128:$srsrc, VGPR_32:$vaddr, + (ins VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>; @@ -1928,45 +1939,48 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass, let offen = 0, idxen = 1 in { defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs regClass:$vdata), - (ins SReg_128:$srsrc, VGPR_32:$vaddr, - mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc, + (ins VGPR_32:$vaddr, SReg_128:$srsrc, + SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>; } let offen = 1, idxen = 1 in { defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs regClass:$vdata), - (ins SReg_128:$srsrc, VReg_64:$vaddr, - SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), + (ins VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>; } - let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0 in { + let offen = 0, idxen = 0 in { defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs regClass:$vdata), - (ins SReg_128:$srsrc, VReg_64:$vaddr, - SCSrc_32:$soffset, mbuf_offset:$offset), - name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset", + (ins VReg_64:$vaddr, SReg_128:$srsrc, + SCSrc_32:$soffset, mbuf_offset:$offset, + glc:$glc, slc:$slc, tfe:$tfe), + name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"# + "$glc"#"$slc"#"$tfe", [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset)))]>; + i16:$offset, i1:$glc, i1:$slc, + i1:$tfe)))]>; } } } multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass, - ValueType store_vt, SDPatternOperator st> { + ValueType store_vt = i32, SDPatternOperator st = null_frag> { let mayLoad = 0, mayStore = 1 in { defm : MUBUF_m <op, name, (outs), - (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset, + (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"# - "$glc"#"$slc"#"$tfe", []>; + "$glc"#"$slc"#"$tfe", []>; let offen = 0, idxen = 0, vaddr = 0 in { defm _OFFSET : MUBUF_m <op, name#"_offset",(outs), - (ins vdataClass:$vdata, SReg_128:$srsrc, mbuf_offset:$offset, - SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe), + (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe", [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>; @@ -1974,21 +1988,40 @@ multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass, let offen = 1, idxen = 0 in { defm _OFFEN : MUBUF_m <op, name#"_offen", (outs), - (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset, - mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), + (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, + SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, + slc:$slc, tfe:$tfe), name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"# "$glc"#"$slc"#"$tfe", []>; } // end offen = 1, idxen = 0 - let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0 in { + let offen = 0, idxen = 1 in { + defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs), + (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, + SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, + slc:$slc, tfe:$tfe), + name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>; + } + + let offen = 1, idxen = 1 in { + defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs), + (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), + name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>; + } + + let offen = 0, idxen = 0 in { defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs), - (ins vdataClass:$vdata, SReg_128:$srsrc, - VReg_64:$vaddr, SCSrc_32:$soffset, - mbuf_offset:$offset), - name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset", + (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, + SCSrc_32:$soffset, + mbuf_offset:$offset, glc:$glc, slc:$slc, + tfe:$tfe), + name#" $vdata, $vaddr, $srsrc, $soffset addr64"# + "$offset"#"$glc"#"$slc"#"$tfe", [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, - i32:$soffset, i16:$offset))]>; + i32:$soffset, i16:$offset, + i1:$glc, i1:$slc, i1:$tfe))]>; } } // End mayLoad = 0, mayStore = 1 } @@ -2182,15 +2215,6 @@ def getVOPe32 : InstrMapping { let ValueCols = [["4"]]; } -// Maps an original opcode to its commuted version -def getCommuteRev : InstrMapping { - let FilterClass = "VOP2_REV"; - let RowFields = ["RevOp"]; - let ColFields = ["IsOrig"]; - let KeyCol = ["1"]; - let ValueCols = [["0"]]; -} - def getMaskedMIMGOp : InstrMapping { let FilterClass = "MIMG_Mask"; let RowFields = ["Op"]; @@ -2208,6 +2232,33 @@ def getCommuteOrig : InstrMapping { let ValueCols = [["1"]]; } +// Maps an original opcode to its commuted version +def getCommuteRev : InstrMapping { + let FilterClass = "VOP2_REV"; + let RowFields = ["RevOp"]; + let ColFields = ["IsOrig"]; + let KeyCol = ["1"]; + let ValueCols = [["0"]]; +} + +def getCommuteCmpOrig : InstrMapping { + let FilterClass = "VOP2_REV"; + let RowFields = ["RevOp"]; + let ColFields = ["IsOrig"]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; +} + +// Maps an original opcode to its commuted version +def getCommuteCmpRev : InstrMapping { + let FilterClass = "VOP2_REV"; + let RowFields = ["RevOp"]; + let ColFields = ["IsOrig"]; + let KeyCol = ["1"]; + let ValueCols = [["0"]]; +} + + def getMCOpcodeGen : InstrMapping { let FilterClass = "SIMCInstr"; let RowFields = ["PseudoInstr"]; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 4f72e99..95b2470 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -28,6 +28,8 @@ def SendMsgImm : Operand<i32> { def isGCN : Predicate<"Subtarget->getGeneration() " ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">; +def isSI : Predicate<"Subtarget->getGeneration() " + "== AMDGPUSubtarget::SOUTHERN_ISLANDS">; def isSICI : Predicate< "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" @@ -153,7 +155,9 @@ defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32", >; defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>; -defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32", []>; +defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32", + [(set i32:$dst, (int_AMDGPU_flbit_i32 i32:$src0))] +>; defm S_FLBIT_I32_I64 : SOP1_32_64 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>; defm S_SEXT_I32_I8 : SOP1_32 <sop1<0x19, 0x16>, "s_sext_i32_i8", [(set i32:$dst, (sext_inreg i32:$src0, i8))] @@ -304,7 +308,8 @@ defm S_ASHR_I64 : SOP2_64_32 <sop2<0x23, 0x21>, "s_ashr_i64", >; } // End Defs = [SCC] -defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32", []>; +defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32", + [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>; defm S_BFM_B64 : SOP2_64 <sop2<0x25, 0x23>, "s_bfm_b64", []>; defm S_MUL_I32 : SOP2_32 <sop2<0x26, 0x24>, "s_mul_i32", [(set i32:$dst, (mul i32:$src0, i32:$src1))] @@ -505,31 +510,30 @@ def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> { // VOPC Instructions //===----------------------------------------------------------------------===// -let isCompare = 1 in { +let isCompare = 1, isCommutable = 1 in { defm V_CMP_F_F32 : VOPC_F32 <vopc<0x0, 0x40>, "v_cmp_f_f32">; -defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT>; +defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT, "v_cmp_gt_f32">; defm V_CMP_EQ_F32 : VOPC_F32 <vopc<0x2, 0x42>, "v_cmp_eq_f32", COND_OEQ>; -defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE>; +defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE, "v_cmp_ge_f32">; defm V_CMP_GT_F32 : VOPC_F32 <vopc<0x4, 0x44>, "v_cmp_gt_f32", COND_OGT>; defm V_CMP_LG_F32 : VOPC_F32 <vopc<0x5, 0x45>, "v_cmp_lg_f32", COND_ONE>; defm V_CMP_GE_F32 : VOPC_F32 <vopc<0x6, 0x46>, "v_cmp_ge_f32", COND_OGE>; defm V_CMP_O_F32 : VOPC_F32 <vopc<0x7, 0x47>, "v_cmp_o_f32", COND_O>; defm V_CMP_U_F32 : VOPC_F32 <vopc<0x8, 0x48>, "v_cmp_u_f32", COND_UO>; -defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32", COND_ULT>; +defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32", COND_ULT, "v_cmp_nle_f32">; defm V_CMP_NLG_F32 : VOPC_F32 <vopc<0xa, 0x4a>, "v_cmp_nlg_f32", COND_UEQ>; -defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE>; +defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE, "v_cmp_nlt_f32">; defm V_CMP_NLE_F32 : VOPC_F32 <vopc<0xc, 0x4c>, "v_cmp_nle_f32", COND_UGT>; defm V_CMP_NEQ_F32 : VOPC_F32 <vopc<0xd, 0x4d>, "v_cmp_neq_f32", COND_UNE>; defm V_CMP_NLT_F32 : VOPC_F32 <vopc<0xe, 0x4e>, "v_cmp_nlt_f32", COND_UGE>; defm V_CMP_TRU_F32 : VOPC_F32 <vopc<0xf, 0x4f>, "v_cmp_tru_f32">; -let hasSideEffects = 1 in { defm V_CMPX_F_F32 : VOPCX_F32 <vopc<0x10, 0x50>, "v_cmpx_f_f32">; -defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32">; +defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32", "v_cmpx_gt_f32">; defm V_CMPX_EQ_F32 : VOPCX_F32 <vopc<0x12, 0x52>, "v_cmpx_eq_f32">; -defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32">; +defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32", "v_cmpx_ge_f32">; defm V_CMPX_GT_F32 : VOPCX_F32 <vopc<0x14, 0x54>, "v_cmpx_gt_f32">; defm V_CMPX_LG_F32 : VOPCX_F32 <vopc<0x15, 0x55>, "v_cmpx_lg_f32">; defm V_CMPX_GE_F32 : VOPCX_F32 <vopc<0x16, 0x56>, "v_cmpx_ge_f32">; @@ -543,233 +547,207 @@ defm V_CMPX_NEQ_F32 : VOPCX_F32 <vopc<0x1d, 0x5d>, "v_cmpx_neq_f32">; defm V_CMPX_NLT_F32 : VOPCX_F32 <vopc<0x1e, 0x5e>, "v_cmpx_nlt_f32">; defm V_CMPX_TRU_F32 : VOPCX_F32 <vopc<0x1f, 0x5f>, "v_cmpx_tru_f32">; -} // End hasSideEffects = 1 defm V_CMP_F_F64 : VOPC_F64 <vopc<0x20, 0x60>, "v_cmp_f_f64">; -defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT>; +defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT, "v_cmp_gt_f64">; defm V_CMP_EQ_F64 : VOPC_F64 <vopc<0x22, 0x62>, "v_cmp_eq_f64", COND_OEQ>; -defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE>; +defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE, "v_cmp_ge_f64">; defm V_CMP_GT_F64 : VOPC_F64 <vopc<0x24, 0x64>, "v_cmp_gt_f64", COND_OGT>; defm V_CMP_LG_F64 : VOPC_F64 <vopc<0x25, 0x65>, "v_cmp_lg_f64", COND_ONE>; defm V_CMP_GE_F64 : VOPC_F64 <vopc<0x26, 0x66>, "v_cmp_ge_f64", COND_OGE>; defm V_CMP_O_F64 : VOPC_F64 <vopc<0x27, 0x67>, "v_cmp_o_f64", COND_O>; defm V_CMP_U_F64 : VOPC_F64 <vopc<0x28, 0x68>, "v_cmp_u_f64", COND_UO>; -defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT>; +defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT, "v_cmp_nle_f64">; defm V_CMP_NLG_F64 : VOPC_F64 <vopc<0x2a, 0x6a>, "v_cmp_nlg_f64", COND_UEQ>; -defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE>; +defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE, "v_cmp_nlt_f64">; defm V_CMP_NLE_F64 : VOPC_F64 <vopc<0x2c, 0x6c>, "v_cmp_nle_f64", COND_UGT>; defm V_CMP_NEQ_F64 : VOPC_F64 <vopc<0x2d, 0x6d>, "v_cmp_neq_f64", COND_UNE>; defm V_CMP_NLT_F64 : VOPC_F64 <vopc<0x2e, 0x6e>, "v_cmp_nlt_f64", COND_UGE>; defm V_CMP_TRU_F64 : VOPC_F64 <vopc<0x2f, 0x6f>, "v_cmp_tru_f64">; -let hasSideEffects = 1 in { defm V_CMPX_F_F64 : VOPCX_F64 <vopc<0x30, 0x70>, "v_cmpx_f_f64">; -defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64">; +defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64", "v_cmpx_gt_f64">; defm V_CMPX_EQ_F64 : VOPCX_F64 <vopc<0x32, 0x72>, "v_cmpx_eq_f64">; -defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64">; +defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64", "v_cmpx_ge_f64">; defm V_CMPX_GT_F64 : VOPCX_F64 <vopc<0x34, 0x74>, "v_cmpx_gt_f64">; defm V_CMPX_LG_F64 : VOPCX_F64 <vopc<0x35, 0x75>, "v_cmpx_lg_f64">; defm V_CMPX_GE_F64 : VOPCX_F64 <vopc<0x36, 0x76>, "v_cmpx_ge_f64">; defm V_CMPX_O_F64 : VOPCX_F64 <vopc<0x37, 0x77>, "v_cmpx_o_f64">; defm V_CMPX_U_F64 : VOPCX_F64 <vopc<0x38, 0x78>, "v_cmpx_u_f64">; -defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64">; +defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64", "v_cmpx_nle_f64">; defm V_CMPX_NLG_F64 : VOPCX_F64 <vopc<0x3a, 0x7a>, "v_cmpx_nlg_f64">; -defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64">; +defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64", "v_cmpx_nlt_f64">; defm V_CMPX_NLE_F64 : VOPCX_F64 <vopc<0x3c, 0x7c>, "v_cmpx_nle_f64">; defm V_CMPX_NEQ_F64 : VOPCX_F64 <vopc<0x3d, 0x7d>, "v_cmpx_neq_f64">; defm V_CMPX_NLT_F64 : VOPCX_F64 <vopc<0x3e, 0x7e>, "v_cmpx_nlt_f64">; defm V_CMPX_TRU_F64 : VOPCX_F64 <vopc<0x3f, 0x7f>, "v_cmpx_tru_f64">; -} // End hasSideEffects = 1 let SubtargetPredicate = isSICI in { defm V_CMPS_F_F32 : VOPC_F32 <vopc<0x40>, "v_cmps_f_f32">; -defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32">; +defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32", COND_NULL, "v_cmps_gt_f32">; defm V_CMPS_EQ_F32 : VOPC_F32 <vopc<0x42>, "v_cmps_eq_f32">; -defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32">; +defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32", COND_NULL, "v_cmps_ge_f32">; defm V_CMPS_GT_F32 : VOPC_F32 <vopc<0x44>, "v_cmps_gt_f32">; defm V_CMPS_LG_F32 : VOPC_F32 <vopc<0x45>, "v_cmps_lg_f32">; defm V_CMPS_GE_F32 : VOPC_F32 <vopc<0x46>, "v_cmps_ge_f32">; defm V_CMPS_O_F32 : VOPC_F32 <vopc<0x47>, "v_cmps_o_f32">; defm V_CMPS_U_F32 : VOPC_F32 <vopc<0x48>, "v_cmps_u_f32">; -defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32">; +defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32", COND_NULL, "v_cmps_nle_f32">; defm V_CMPS_NLG_F32 : VOPC_F32 <vopc<0x4a>, "v_cmps_nlg_f32">; -defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32">; +defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32", COND_NULL, "v_cmps_nlt_f32">; defm V_CMPS_NLE_F32 : VOPC_F32 <vopc<0x4c>, "v_cmps_nle_f32">; defm V_CMPS_NEQ_F32 : VOPC_F32 <vopc<0x4d>, "v_cmps_neq_f32">; defm V_CMPS_NLT_F32 : VOPC_F32 <vopc<0x4e>, "v_cmps_nlt_f32">; defm V_CMPS_TRU_F32 : VOPC_F32 <vopc<0x4f>, "v_cmps_tru_f32">; -let hasSideEffects = 1 in { defm V_CMPSX_F_F32 : VOPCX_F32 <vopc<0x50>, "v_cmpsx_f_f32">; -defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32">; +defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32", "v_cmpsx_gt_f32">; defm V_CMPSX_EQ_F32 : VOPCX_F32 <vopc<0x52>, "v_cmpsx_eq_f32">; -defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32">; +defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32", "v_cmpsx_ge_f32">; defm V_CMPSX_GT_F32 : VOPCX_F32 <vopc<0x54>, "v_cmpsx_gt_f32">; defm V_CMPSX_LG_F32 : VOPCX_F32 <vopc<0x55>, "v_cmpsx_lg_f32">; defm V_CMPSX_GE_F32 : VOPCX_F32 <vopc<0x56>, "v_cmpsx_ge_f32">; defm V_CMPSX_O_F32 : VOPCX_F32 <vopc<0x57>, "v_cmpsx_o_f32">; defm V_CMPSX_U_F32 : VOPCX_F32 <vopc<0x58>, "v_cmpsx_u_f32">; -defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32">; +defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32", "v_cmpsx_nle_f32">; defm V_CMPSX_NLG_F32 : VOPCX_F32 <vopc<0x5a>, "v_cmpsx_nlg_f32">; -defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32">; +defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32", "v_cmpsx_nlt_f32">; defm V_CMPSX_NLE_F32 : VOPCX_F32 <vopc<0x5c>, "v_cmpsx_nle_f32">; defm V_CMPSX_NEQ_F32 : VOPCX_F32 <vopc<0x5d>, "v_cmpsx_neq_f32">; defm V_CMPSX_NLT_F32 : VOPCX_F32 <vopc<0x5e>, "v_cmpsx_nlt_f32">; defm V_CMPSX_TRU_F32 : VOPCX_F32 <vopc<0x5f>, "v_cmpsx_tru_f32">; -} // End hasSideEffects = 1 defm V_CMPS_F_F64 : VOPC_F64 <vopc<0x60>, "v_cmps_f_f64">; -defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64">; +defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64", COND_NULL, "v_cmps_gt_f64">; defm V_CMPS_EQ_F64 : VOPC_F64 <vopc<0x62>, "v_cmps_eq_f64">; -defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64">; +defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64", COND_NULL, "v_cmps_ge_f64">; defm V_CMPS_GT_F64 : VOPC_F64 <vopc<0x64>, "v_cmps_gt_f64">; defm V_CMPS_LG_F64 : VOPC_F64 <vopc<0x65>, "v_cmps_lg_f64">; defm V_CMPS_GE_F64 : VOPC_F64 <vopc<0x66>, "v_cmps_ge_f64">; defm V_CMPS_O_F64 : VOPC_F64 <vopc<0x67>, "v_cmps_o_f64">; defm V_CMPS_U_F64 : VOPC_F64 <vopc<0x68>, "v_cmps_u_f64">; -defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64">; +defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64", COND_NULL, "v_cmps_nle_f64">; defm V_CMPS_NLG_F64 : VOPC_F64 <vopc<0x6a>, "v_cmps_nlg_f64">; -defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64">; +defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64", COND_NULL, "v_cmps_nlt_f64">; defm V_CMPS_NLE_F64 : VOPC_F64 <vopc<0x6c>, "v_cmps_nle_f64">; defm V_CMPS_NEQ_F64 : VOPC_F64 <vopc<0x6d>, "v_cmps_neq_f64">; defm V_CMPS_NLT_F64 : VOPC_F64 <vopc<0x6e>, "v_cmps_nlt_f64">; defm V_CMPS_TRU_F64 : VOPC_F64 <vopc<0x6f>, "v_cmps_tru_f64">; -let hasSideEffects = 1, Defs = [EXEC] in { - -defm V_CMPSX_F_F64 : VOPC_F64 <vopc<0x70>, "v_cmpsx_f_f64">; -defm V_CMPSX_LT_F64 : VOPC_F64 <vopc<0x71>, "v_cmpsx_lt_f64">; -defm V_CMPSX_EQ_F64 : VOPC_F64 <vopc<0x72>, "v_cmpsx_eq_f64">; -defm V_CMPSX_LE_F64 : VOPC_F64 <vopc<0x73>, "v_cmpsx_le_f64">; -defm V_CMPSX_GT_F64 : VOPC_F64 <vopc<0x74>, "v_cmpsx_gt_f64">; -defm V_CMPSX_LG_F64 : VOPC_F64 <vopc<0x75>, "v_cmpsx_lg_f64">; -defm V_CMPSX_GE_F64 : VOPC_F64 <vopc<0x76>, "v_cmpsx_ge_f64">; -defm V_CMPSX_O_F64 : VOPC_F64 <vopc<0x77>, "v_cmpsx_o_f64">; -defm V_CMPSX_U_F64 : VOPC_F64 <vopc<0x78>, "v_cmpsx_u_f64">; -defm V_CMPSX_NGE_F64 : VOPC_F64 <vopc<0x79>, "v_cmpsx_nge_f64">; -defm V_CMPSX_NLG_F64 : VOPC_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">; -defm V_CMPSX_NGT_F64 : VOPC_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64">; -defm V_CMPSX_NLE_F64 : VOPC_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">; -defm V_CMPSX_NEQ_F64 : VOPC_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">; -defm V_CMPSX_NLT_F64 : VOPC_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">; -defm V_CMPSX_TRU_F64 : VOPC_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">; - -} // End hasSideEffects = 1, Defs = [EXEC] + +defm V_CMPSX_F_F64 : VOPCX_F64 <vopc<0x70>, "v_cmpsx_f_f64">; +defm V_CMPSX_LT_F64 : VOPCX_F64 <vopc<0x71>, "v_cmpsx_lt_f64", "v_cmpsx_gt_f64">; +defm V_CMPSX_EQ_F64 : VOPCX_F64 <vopc<0x72>, "v_cmpsx_eq_f64">; +defm V_CMPSX_LE_F64 : VOPCX_F64 <vopc<0x73>, "v_cmpsx_le_f64", "v_cmpsx_ge_f64">; +defm V_CMPSX_GT_F64 : VOPCX_F64 <vopc<0x74>, "v_cmpsx_gt_f64">; +defm V_CMPSX_LG_F64 : VOPCX_F64 <vopc<0x75>, "v_cmpsx_lg_f64">; +defm V_CMPSX_GE_F64 : VOPCX_F64 <vopc<0x76>, "v_cmpsx_ge_f64">; +defm V_CMPSX_O_F64 : VOPCX_F64 <vopc<0x77>, "v_cmpsx_o_f64">; +defm V_CMPSX_U_F64 : VOPCX_F64 <vopc<0x78>, "v_cmpsx_u_f64">; +defm V_CMPSX_NGE_F64 : VOPCX_F64 <vopc<0x79>, "v_cmpsx_nge_f64", "v_cmpsx_nle_f64">; +defm V_CMPSX_NLG_F64 : VOPCX_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">; +defm V_CMPSX_NGT_F64 : VOPCX_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64", "v_cmpsx_nlt_f64">; +defm V_CMPSX_NLE_F64 : VOPCX_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">; +defm V_CMPSX_NEQ_F64 : VOPCX_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">; +defm V_CMPSX_NLT_F64 : VOPCX_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">; +defm V_CMPSX_TRU_F64 : VOPCX_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">; } // End SubtargetPredicate = isSICI defm V_CMP_F_I32 : VOPC_I32 <vopc<0x80, 0xc0>, "v_cmp_f_i32">; -defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT>; +defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">; defm V_CMP_EQ_I32 : VOPC_I32 <vopc<0x82, 0xc2>, "v_cmp_eq_i32", COND_EQ>; -defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE>; +defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">; defm V_CMP_GT_I32 : VOPC_I32 <vopc<0x84, 0xc4>, "v_cmp_gt_i32", COND_SGT>; defm V_CMP_NE_I32 : VOPC_I32 <vopc<0x85, 0xc5>, "v_cmp_ne_i32", COND_NE>; defm V_CMP_GE_I32 : VOPC_I32 <vopc<0x86, 0xc6>, "v_cmp_ge_i32", COND_SGE>; defm V_CMP_T_I32 : VOPC_I32 <vopc<0x87, 0xc7>, "v_cmp_t_i32">; -let hasSideEffects = 1 in { defm V_CMPX_F_I32 : VOPCX_I32 <vopc<0x90, 0xd0>, "v_cmpx_f_i32">; -defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32">; +defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32", "v_cmpx_gt_i32">; defm V_CMPX_EQ_I32 : VOPCX_I32 <vopc<0x92, 0xd2>, "v_cmpx_eq_i32">; -defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32">; +defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32", "v_cmpx_ge_i32">; defm V_CMPX_GT_I32 : VOPCX_I32 <vopc<0x94, 0xd4>, "v_cmpx_gt_i32">; defm V_CMPX_NE_I32 : VOPCX_I32 <vopc<0x95, 0xd5>, "v_cmpx_ne_i32">; defm V_CMPX_GE_I32 : VOPCX_I32 <vopc<0x96, 0xd6>, "v_cmpx_ge_i32">; defm V_CMPX_T_I32 : VOPCX_I32 <vopc<0x97, 0xd7>, "v_cmpx_t_i32">; -} // End hasSideEffects = 1 defm V_CMP_F_I64 : VOPC_I64 <vopc<0xa0, 0xe0>, "v_cmp_f_i64">; -defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT>; +defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">; defm V_CMP_EQ_I64 : VOPC_I64 <vopc<0xa2, 0xe2>, "v_cmp_eq_i64", COND_EQ>; -defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE>; +defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">; defm V_CMP_GT_I64 : VOPC_I64 <vopc<0xa4, 0xe4>, "v_cmp_gt_i64", COND_SGT>; defm V_CMP_NE_I64 : VOPC_I64 <vopc<0xa5, 0xe5>, "v_cmp_ne_i64", COND_NE>; defm V_CMP_GE_I64 : VOPC_I64 <vopc<0xa6, 0xe6>, "v_cmp_ge_i64", COND_SGE>; defm V_CMP_T_I64 : VOPC_I64 <vopc<0xa7, 0xe7>, "v_cmp_t_i64">; -let hasSideEffects = 1 in { defm V_CMPX_F_I64 : VOPCX_I64 <vopc<0xb0, 0xf0>, "v_cmpx_f_i64">; -defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64">; +defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64", "v_cmpx_gt_i64">; defm V_CMPX_EQ_I64 : VOPCX_I64 <vopc<0xb2, 0xf2>, "v_cmpx_eq_i64">; -defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64">; +defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64", "v_cmpx_ge_i64">; defm V_CMPX_GT_I64 : VOPCX_I64 <vopc<0xb4, 0xf4>, "v_cmpx_gt_i64">; defm V_CMPX_NE_I64 : VOPCX_I64 <vopc<0xb5, 0xf5>, "v_cmpx_ne_i64">; defm V_CMPX_GE_I64 : VOPCX_I64 <vopc<0xb6, 0xf6>, "v_cmpx_ge_i64">; defm V_CMPX_T_I64 : VOPCX_I64 <vopc<0xb7, 0xf7>, "v_cmpx_t_i64">; -} // End hasSideEffects = 1 defm V_CMP_F_U32 : VOPC_I32 <vopc<0xc0, 0xc8>, "v_cmp_f_u32">; -defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT>; +defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT, "v_cmp_gt_u32">; defm V_CMP_EQ_U32 : VOPC_I32 <vopc<0xc2, 0xca>, "v_cmp_eq_u32", COND_EQ>; -defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE>; +defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE, "v_cmp_ge_u32">; defm V_CMP_GT_U32 : VOPC_I32 <vopc<0xc4, 0xcc>, "v_cmp_gt_u32", COND_UGT>; defm V_CMP_NE_U32 : VOPC_I32 <vopc<0xc5, 0xcd>, "v_cmp_ne_u32", COND_NE>; defm V_CMP_GE_U32 : VOPC_I32 <vopc<0xc6, 0xce>, "v_cmp_ge_u32", COND_UGE>; defm V_CMP_T_U32 : VOPC_I32 <vopc<0xc7, 0xcf>, "v_cmp_t_u32">; -let hasSideEffects = 1 in { defm V_CMPX_F_U32 : VOPCX_I32 <vopc<0xd0, 0xd8>, "v_cmpx_f_u32">; -defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32">; +defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32", "v_cmpx_gt_u32">; defm V_CMPX_EQ_U32 : VOPCX_I32 <vopc<0xd2, 0xda>, "v_cmpx_eq_u32">; -defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32">; +defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32", "v_cmpx_le_u32">; defm V_CMPX_GT_U32 : VOPCX_I32 <vopc<0xd4, 0xdc>, "v_cmpx_gt_u32">; defm V_CMPX_NE_U32 : VOPCX_I32 <vopc<0xd5, 0xdd>, "v_cmpx_ne_u32">; defm V_CMPX_GE_U32 : VOPCX_I32 <vopc<0xd6, 0xde>, "v_cmpx_ge_u32">; defm V_CMPX_T_U32 : VOPCX_I32 <vopc<0xd7, 0xdf>, "v_cmpx_t_u32">; -} // End hasSideEffects = 1 defm V_CMP_F_U64 : VOPC_I64 <vopc<0xe0, 0xe8>, "v_cmp_f_u64">; -defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT>; +defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT, "v_cmp_gt_u64">; defm V_CMP_EQ_U64 : VOPC_I64 <vopc<0xe2, 0xea>, "v_cmp_eq_u64", COND_EQ>; -defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE>; +defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE, "v_cmp_ge_u64">; defm V_CMP_GT_U64 : VOPC_I64 <vopc<0xe4, 0xec>, "v_cmp_gt_u64", COND_UGT>; defm V_CMP_NE_U64 : VOPC_I64 <vopc<0xe5, 0xed>, "v_cmp_ne_u64", COND_NE>; defm V_CMP_GE_U64 : VOPC_I64 <vopc<0xe6, 0xee>, "v_cmp_ge_u64", COND_UGE>; defm V_CMP_T_U64 : VOPC_I64 <vopc<0xe7, 0xef>, "v_cmp_t_u64">; -let hasSideEffects = 1 in { - defm V_CMPX_F_U64 : VOPCX_I64 <vopc<0xf0, 0xf8>, "v_cmpx_f_u64">; -defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64">; +defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64", "v_cmpx_gt_u64">; defm V_CMPX_EQ_U64 : VOPCX_I64 <vopc<0xf2, 0xfa>, "v_cmpx_eq_u64">; -defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64">; +defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64", "v_cmpx_ge_u64">; defm V_CMPX_GT_U64 : VOPCX_I64 <vopc<0xf4, 0xfc>, "v_cmpx_gt_u64">; defm V_CMPX_NE_U64 : VOPCX_I64 <vopc<0xf5, 0xfd>, "v_cmpx_ne_u64">; defm V_CMPX_GE_U64 : VOPCX_I64 <vopc<0xf6, 0xfe>, "v_cmpx_ge_u64">; defm V_CMPX_T_U64 : VOPCX_I64 <vopc<0xf7, 0xff>, "v_cmpx_t_u64">; -} // End hasSideEffects = 1 +} // End isCompare = 1, isCommutable = 1 defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <vopc<0x88, 0x10>, "v_cmp_class_f32">; - -let hasSideEffects = 1 in { defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <vopc<0x98, 0x11>, "v_cmpx_class_f32">; -} // End hasSideEffects = 1 - defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <vopc<0xa8, 0x12>, "v_cmp_class_f64">; - -let hasSideEffects = 1 in { defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <vopc<0xb8, 0x13>, "v_cmpx_class_f64">; -} // End hasSideEffects = 1 - -} // End isCompare = 1 //===----------------------------------------------------------------------===// // DS Instructions //===----------------------------------------------------------------------===// - defm DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>; defm DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>; defm DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>; @@ -782,12 +760,26 @@ defm DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>; defm DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>; defm DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>; defm DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>; -defm DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VGPR_32>; +defm DS_MSKOR_B32 : DS_1A2D_NORET <0xc, "ds_mskor_b32", VGPR_32>; +let mayLoad = 0 in { +defm DS_WRITE_B32 : DS_1A1D_NORET <0xd, "ds_write_b32", VGPR_32>; +defm DS_WRITE2_B32 : DS_1A1D_Off8_NORET <0xe, "ds_write2_b32", VGPR_32>; +defm DS_WRITE2ST64_B32 : DS_1A1D_Off8_NORET <0xf, "ds_write2st64_b32", VGPR_32>; +} defm DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>; defm DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>; -defm DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VGPR_32>; -defm DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VGPR_32>; - +defm DS_MIN_F32 : DS_1A2D_NORET <0x12, "ds_min_f32", VGPR_32>; +defm DS_MAX_F32 : DS_1A2D_NORET <0x13, "ds_max_f32", VGPR_32>; + +defm DS_GWS_INIT : DS_1A_GDS <0x19, "ds_gws_init">; +defm DS_GWS_SEMA_V : DS_1A_GDS <0x1a, "ds_gws_sema_v">; +defm DS_GWS_SEMA_BR : DS_1A_GDS <0x1b, "ds_gws_sema_br">; +defm DS_GWS_SEMA_P : DS_1A_GDS <0x1c, "ds_gws_sema_p">; +defm DS_GWS_BARRIER : DS_1A_GDS <0x1d, "ds_gws_barrier">; +let mayLoad = 0 in { +defm DS_WRITE_B8 : DS_1A1D_NORET <0x1e, "ds_write_b8", VGPR_32>; +defm DS_WRITE_B16 : DS_1A1D_NORET <0x1f, "ds_write_b16", VGPR_32>; +} defm DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">; defm DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">; defm DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">; @@ -800,20 +792,34 @@ defm DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32" defm DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">; defm DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">; defm DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">; -defm DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">; +defm DS_MSKOR_RTN_B32 : DS_1A2D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">; defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>; -//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2_b32">; -//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2st64_b32">; +defm DS_WRXCHG2_RTN_B32 : DS_1A2D_RET < + 0x2e, "ds_wrxchg2_rtn_b32", VReg_64, "", VGPR_32 +>; +defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET < + 0x2f, "ds_wrxchg2st64_rtn_b32", VReg_64, "", VGPR_32 +>; defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">; defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">; -defm DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">; -defm DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">; - +defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">; +defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">; let SubtargetPredicate = isCI in { defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">; } // End isCI - - +defm DS_SWIZZLE_B32 : DS_1A_RET <0x35, "ds_swizzle_b32", VGPR_32>; +let mayStore = 0 in { +defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>; +defm DS_READ2_B32 : DS_1A_Off8_RET <0x37, "ds_read2_b32", VReg_64>; +defm DS_READ2ST64_B32 : DS_1A_Off8_RET <0x38, "ds_read2st64_b32", VReg_64>; +defm DS_READ_I8 : DS_1A_RET <0x39, "ds_read_i8", VGPR_32>; +defm DS_READ_U8 : DS_1A_RET <0x3a, "ds_read_u8", VGPR_32>; +defm DS_READ_I16 : DS_1A_RET <0x3b, "ds_read_i16", VGPR_32>; +defm DS_READ_U16 : DS_1A_RET <0x3c, "ds_read_u16", VGPR_32>; +} +defm DS_CONSUME : DS_0A_RET <0x3d, "ds_consume">; +defm DS_APPEND : DS_0A_RET <0x3e, "ds_append">; +defm DS_ORDERED_COUNT : DS_1A_RET_GDS <0x3f, "ds_ordered_count">; defm DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>; defm DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>; defm DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>; @@ -826,7 +832,12 @@ defm DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>; defm DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>; defm DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>; defm DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>; -defm DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>; +defm DS_MSKOR_B64 : DS_1A2D_NORET <0x4c, "ds_mskor_b64", VReg_64>; +let mayLoad = 0 in { +defm DS_WRITE_B64 : DS_1A1D_NORET <0x4d, "ds_write_b64", VReg_64>; +defm DS_WRITE2_B64 : DS_1A1D_Off8_NORET <0x4E, "ds_write2_b64", VReg_64>; +defm DS_WRITE2ST64_B64 : DS_1A1D_Off8_NORET <0x4f, "ds_write2st64_b64", VReg_64>; +} defm DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>; defm DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>; defm DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>; @@ -844,57 +855,88 @@ defm DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64" defm DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">; defm DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">; defm DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">; -defm DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">; +defm DS_MSKOR_RTN_B64 : DS_1A2D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">; defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">; -//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2_b64">; -//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2st64_b64">; +defm DS_WRXCHG2_RTN_B64 : DS_1A2D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_128, "ds_wrxchg2_b64", VReg_64>; +defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET <0x6f, "ds_wrxchg2st64_rtn_b64", VReg_128, "ds_wrxchg2st64_b64", VReg_64>; defm DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">; defm DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">; defm DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_rtn_f64", VReg_64, "ds_min_f64">; defm DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_rtn_f64", VReg_64, "ds_max_f64">; +let mayStore = 0 in { +defm DS_READ_B64 : DS_1A_RET <0x76, "ds_read_b64", VReg_64>; +defm DS_READ2_B64 : DS_1A_Off8_RET <0x77, "ds_read2_b64", VReg_128>; +defm DS_READ2ST64_B64 : DS_1A_Off8_RET <0x78, "ds_read2st64_b64", VReg_128>; +} + +defm DS_ADD_SRC2_U32 : DS_1A <0x80, "ds_add_src2_u32">; +defm DS_SUB_SRC2_U32 : DS_1A <0x81, "ds_sub_src2_u32">; +defm DS_RSUB_SRC2_U32 : DS_1A <0x82, "ds_rsub_src2_u32">; +defm DS_INC_SRC2_U32 : DS_1A <0x83, "ds_inc_src2_u32">; +defm DS_DEC_SRC2_U32 : DS_1A <0x84, "ds_dec_src2_u32">; +defm DS_MIN_SRC2_I32 : DS_1A <0x85, "ds_min_src2_i32">; +defm DS_MAX_SRC2_I32 : DS_1A <0x86, "ds_max_src2_i32">; +defm DS_MIN_SRC2_U32 : DS_1A <0x87, "ds_min_src2_u32">; +defm DS_MAX_SRC2_U32 : DS_1A <0x88, "ds_max_src2_u32">; +defm DS_AND_SRC2_B32 : DS_1A <0x89, "ds_and_src_b32">; +defm DS_OR_SRC2_B32 : DS_1A <0x8a, "ds_or_src2_b32">; +defm DS_XOR_SRC2_B32 : DS_1A <0x8b, "ds_xor_src2_b32">; +defm DS_WRITE_SRC2_B32 : DS_1A <0x8c, "ds_write_src2_b32">; + +defm DS_MIN_SRC2_F32 : DS_1A <0x92, "ds_min_src2_f32">; +defm DS_MAX_SRC2_F32 : DS_1A <0x93, "ds_max_src2_f32">; + +defm DS_ADD_SRC2_U64 : DS_1A <0xc0, "ds_add_src2_u64">; +defm DS_SUB_SRC2_U64 : DS_1A <0xc1, "ds_sub_src2_u64">; +defm DS_RSUB_SRC2_U64 : DS_1A <0xc2, "ds_rsub_src2_u64">; +defm DS_INC_SRC2_U64 : DS_1A <0xc3, "ds_inc_src2_u64">; +defm DS_DEC_SRC2_U64 : DS_1A <0xc4, "ds_dec_src2_u64">; +defm DS_MIN_SRC2_I64 : DS_1A <0xc5, "ds_min_src2_i64">; +defm DS_MAX_SRC2_I64 : DS_1A <0xc6, "ds_max_src2_i64">; +defm DS_MIN_SRC2_U64 : DS_1A <0xc7, "ds_min_src2_u64">; +defm DS_MAX_SRC2_U64 : DS_1A <0xc8, "ds_max_src2_u64">; +defm DS_AND_SRC2_B64 : DS_1A <0xc9, "ds_and_src2_b64">; +defm DS_OR_SRC2_B64 : DS_1A <0xca, "ds_or_src2_b64">; +defm DS_XOR_SRC2_B64 : DS_1A <0xcb, "ds_xor_src2_b64">; +defm DS_WRITE_SRC2_B64 : DS_1A <0xcc, "ds_write_src2_b64">; + +defm DS_MIN_SRC2_F64 : DS_1A <0xd2, "ds_min_src2_f64">; +defm DS_MAX_SRC2_F64 : DS_1A <0xd3, "ds_max_src2_f64">; + //let SubtargetPredicate = isCI in { // DS_CONDXCHG32_RTN_B64 // DS_CONDXCHG32_RTN_B128 //} // End isCI -// TODO: _SRC2_* forms - -defm DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "ds_write_b32", VGPR_32>; -defm DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "ds_write_b8", VGPR_32>; -defm DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "ds_write_b16", VGPR_32>; -defm DS_WRITE_B64 : DS_Store_Helper <0x00000004d, "ds_write_b64", VReg_64>; - -defm DS_READ_B32 : DS_Load_Helper <0x00000036, "ds_read_b32", VGPR_32>; -defm DS_READ_I8 : DS_Load_Helper <0x00000039, "ds_read_i8", VGPR_32>; -defm DS_READ_U8 : DS_Load_Helper <0x0000003a, "ds_read_u8", VGPR_32>; -defm DS_READ_I16 : DS_Load_Helper <0x0000003b, "ds_read_i16", VGPR_32>; -defm DS_READ_U16 : DS_Load_Helper <0x0000003c, "ds_read_u16", VGPR_32>; -defm DS_READ_B64 : DS_Load_Helper <0x00000076, "ds_read_b64", VReg_64>; - -// 2 forms. -defm DS_WRITE2_B32 : DS_Store2_Helper <0x0000000E, "ds_write2_b32", VGPR_32>; -defm DS_WRITE2ST64_B32 : DS_Store2_Helper <0x0000000F, "ds_write2st64_b32", VGPR_32>; -defm DS_WRITE2_B64 : DS_Store2_Helper <0x0000004E, "ds_write2_b64", VReg_64>; -defm DS_WRITE2ST64_B64 : DS_Store2_Helper <0x0000004F, "ds_write2st64_b64", VReg_64>; - -defm DS_READ2_B32 : DS_Load2_Helper <0x00000037, "ds_read2_b32", VReg_64>; -defm DS_READ2ST64_B32 : DS_Load2_Helper <0x00000038, "ds_read2st64_b32", VReg_64>; -defm DS_READ2_B64 : DS_Load2_Helper <0x00000075, "ds_read2_b64", VReg_128>; -defm DS_READ2ST64_B64 : DS_Load2_Helper <0x00000076, "ds_read2st64_b64", VReg_128>; - //===----------------------------------------------------------------------===// // MUBUF Instructions //===----------------------------------------------------------------------===// -//def BUFFER_LOAD_FORMAT_X : MUBUF_ <mubuf<0x00>, "buffer_load_format_x", []>; -//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <mubuf<0x01>, "buffer_load_format_xy", []>; -//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <mubuf<0x02>, "buffer_load_format_xyz", []>; -defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <mubuf<0x03>, "buffer_load_format_xyzw", VReg_128>; -//def BUFFER_STORE_FORMAT_X : MUBUF_ <mubuf<0x04>, "buffer_store_format_x", []>; -//def BUFFER_STORE_FORMAT_XY : MUBUF_ <mubuf<0x05>, "buffer_store_format_xy", []>; -//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <mubuf<0x06>, "buffer_store_format_xyz", []>; -//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <mubuf<0x07>, "buffer_store_format_xyzw", []>; +defm BUFFER_LOAD_FORMAT_X : MUBUF_Load_Helper < + mubuf<0x00>, "buffer_load_format_x", VGPR_32 +>; +defm BUFFER_LOAD_FORMAT_XY : MUBUF_Load_Helper < + mubuf<0x01>, "buffer_load_format_xy", VReg_64 +>; +defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Load_Helper < + mubuf<0x02>, "buffer_load_format_xyz", VReg_96 +>; +defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper < + mubuf<0x03>, "buffer_load_format_xyzw", VReg_128 +>; +defm BUFFER_STORE_FORMAT_X : MUBUF_Store_Helper < + mubuf<0x04>, "buffer_store_format_x", VGPR_32 +>; +defm BUFFER_STORE_FORMAT_XY : MUBUF_Store_Helper < + mubuf<0x05>, "buffer_store_format_xy", VReg_64 +>; +defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Store_Helper < + mubuf<0x06>, "buffer_store_format_xyz", VReg_96 +>; +defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Store_Helper < + mubuf<0x07>, "buffer_store_format_xyzw", VReg_128 +>; defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper < mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global >; @@ -1418,13 +1460,17 @@ defm V_INTERP_MOV_F32 : VINTRP_m < // VOP2 Instructions //===----------------------------------------------------------------------===// -defm V_CNDMASK_B32_e64 : VOP3_m_nomods <vop3<0x100>, (outs VGPR_32:$dst), - (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2), - "v_cndmask_b32_e64 $dst, $src0, $src1, $src2", - [(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))], - "v_cndmask_b32_e64", 3 ->; +multiclass V_CNDMASK <vop2 op, string name> { + defm _e32 : VOP2_m < + op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins32, VOP_CNDMASK.Asm32, [], + name, name>; + + defm _e64 : VOP3_m < + op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64, + name#!cast<string>(VOP_CNDMASK.Asm64), [], name, 3>; +} +defm V_CNDMASK_B32 : V_CNDMASK<vop2<0x0>, "v_cndmask_b32">; let isCommutable = 1 in { defm V_ADD_F32 : VOP2Inst <vop2<0x3, 0x1>, "v_add_f32", @@ -1568,8 +1614,8 @@ defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32", >; } // End isCommutable = 1 -defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32, - AMDGPUbfm +defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", + VOP_I32_I32_I32 >; defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32", VOP_I32_I32_I32 @@ -1638,14 +1684,12 @@ defm V_CUBEMA_F32 : VOP3Inst <vop3<0x147, 0x1c7>, "v_cubema_f32", VOP_F32_F32_F32_F32 >; -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { defm V_BFE_U32 : VOP3Inst <vop3<0x148, 0x1c8>, "v_bfe_u32", VOP_I32_I32_I32_I32, AMDGPUbfe_u32 >; defm V_BFE_I32 : VOP3Inst <vop3<0x149, 0x1c9>, "v_bfe_i32", VOP_I32_I32_I32_I32, AMDGPUbfe_i32 >; -} defm V_BFI_B32 : VOP3Inst <vop3<0x14a, 0x1ca>, "v_bfi_b32", VOP_I32_I32_I32_I32, AMDGPUbfi @@ -1833,6 +1877,11 @@ defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64", //===----------------------------------------------------------------------===// let isCodeGenOnly = 1, isPseudo = 1 in { +// For use in patterns +def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$dst), + (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", [] +>; + let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { // 64-bit vector move instruction. This is mainly used by the SIFoldOperands // pass to enable folding of inline immediates. @@ -2049,7 +2098,7 @@ def : Pat < /* int_SI_vs_load_input */ def : Pat< (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr), - (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0) + (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0) >; /* int_SI_export */ @@ -2196,6 +2245,11 @@ def : Pat < (V_BCNT_U32_B32_e64 $popcnt, $val) >; +def : Pat < + (i32 (select i1:$src0, i32:$src1, i32:$src2)), + (V_CNDMASK_B32_e64 $src2, $src1, $src0) +>; + /********** ======================= **********/ /********** Image sampling patterns **********/ /********** ======================= **********/ @@ -2738,7 +2792,7 @@ def : Ext32Pat <anyext>; // Offset in an 32Bit VGPR def : Pat < (SIload_constant v4i32:$sbase, i32:$voff), - (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0, 0) + (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0) >; // The multiplication scales from [0,1] to the unsigned integer range @@ -2781,7 +2835,7 @@ def : ROTRPattern <V_ALIGNBIT_B32>; class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat < (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))), - (inst (i1 0), $ptr, (as_i16imm $offset), (S_MOV_B32 -1)) + (inst $ptr, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1)) >; def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>; @@ -2799,12 +2853,12 @@ def : DSReadPat <DS_READ_B64, v2i32, local_load_aligned8bytes>; def : Pat < (v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))), - (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1, (S_MOV_B32 -1)) + (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0), (S_MOV_B32 -1)) >; class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat < (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)), - (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1)) + (inst $ptr, $value, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1)) >; def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>; @@ -2819,14 +2873,14 @@ def : DSWritePat <DS_WRITE_B64, v2i32, local_store_aligned8bytes>; def : Pat < (local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)), - (DS_WRITE2_B32 (i1 0), $ptr, (EXTRACT_SUBREG $value, sub0), - (EXTRACT_SUBREG $value, sub1), $offset0, $offset1, - (S_MOV_B32 -1)) + (DS_WRITE2_B32 $ptr, (EXTRACT_SUBREG $value, sub0), + (EXTRACT_SUBREG $value, sub1), $offset0, $offset1, + (i1 0), (S_MOV_B32 -1)) >; class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat < (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value), - (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1)) + (inst $ptr, $value, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1)) >; // Special case of DSAtomicRetPat for add / sub 1 -> inc / dec @@ -2842,13 +2896,13 @@ class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat < class DSAtomicIncRetPat<DS inst, ValueType vt, Instruction LoadImm, PatFrag frag> : Pat < (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)), - (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (S_MOV_B32 -1)) + (inst $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (i1 0), (S_MOV_B32 -1)) >; class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat < (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap), - (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset), (S_MOV_B32 -1)) + (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1)) >; @@ -2898,8 +2952,9 @@ def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>; multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt, PatFrag constant_ld> { def : Pat < - (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))), - (Instr_ADDR64 $srsrc, $vaddr, $soffset, $offset) + (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, + i16:$offset, i1:$glc, i1:$slc, i1:$tfe))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe) >; } @@ -2916,7 +2971,7 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, constant_load>; class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat < (vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset))), - (Instr $srsrc, $vaddr, $soffset, $offset, 0, 0, 0) + (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) >; def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>; @@ -2935,7 +2990,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset, imm:$offset, 0, 0, imm:$glc, imm:$slc, imm:$tfe)), - (offset $rsrc, (as_i16imm $offset), $soffset, (as_i1imm $glc), + (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), (as_i1imm $tfe)) >; @@ -2943,7 +2998,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, imm:$offset, 1, 0, imm:$glc, imm:$slc, imm:$tfe)), - (offen $rsrc, $vaddr, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), + (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), (as_i1imm $tfe)) >; @@ -2951,7 +3006,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, imm:$offset, 0, 1, imm:$glc, imm:$slc, imm:$tfe)), - (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), + (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), (as_i1imm $tfe)) >; @@ -2959,7 +3014,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset, imm:$offset, 1, 1, imm:$glc, imm:$slc, imm:$tfe)), - (bothen $rsrc, $vaddr, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), + (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), (as_i1imm $tfe)) >; } @@ -2974,7 +3029,7 @@ defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_ class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat < (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)), - (Instr $value, $srsrc, $vaddr, $soffset, $offset, 0, 0, 0) + (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) >; def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>; @@ -3104,26 +3159,26 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST I // 1. Extract with offset def : Pat< - (vector_extract vt:$vec, (add i32:$idx, imm:$off)), - (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off)) + (eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))), + (SI_INDIRECT_SRC $vec, $idx, imm:$off) >; // 2. Extract without offset def : Pat< - (vector_extract vt:$vec, i32:$idx), - (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0)) + (eltvt (vector_extract vt:$vec, i32:$idx)), + (SI_INDIRECT_SRC $vec, $idx, 0) >; // 3. Insert with offset def : Pat< (vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)), - (IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val) + (IndDst $vec, $idx, imm:$off, $val) >; // 4. Insert without offset def : Pat< (vector_insert vt:$vec, eltvt:$val, i32:$idx), - (IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val) + (IndDst $vec, $idx, 0, $val) >; } @@ -3269,6 +3324,89 @@ def : Pat < (V_CNDMASK_B32_e64 $src0, $src1, $src2) >; +multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> { + def : Pat < + (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)), + (BFM $a, $b) + >; + + def : Pat < + (vt (add (vt (shl 1, vt:$a)), -1)), + (BFM $a, (MOV 0)) + >; +} + +defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>; +// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>; + +def : BFEPattern <V_BFE_U32, S_MOV_B32>; + +//===----------------------------------------------------------------------===// +// Fract Patterns +//===----------------------------------------------------------------------===// + +let Predicates = [isSI] in { + +// V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is +// used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient +// way to implement it is using V_FRACT_F64. +// The workaround for the V_FRACT bug is: +// fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999) + +// Convert (x + (-floor(x)) to fract(x) +def : Pat < + (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)), + (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))), + (V_CNDMASK_B64_PSEUDO + $x, + (V_MIN_F64 + SRCMODS.NONE, + (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE), + SRCMODS.NONE, + (V_MOV_B64_PSEUDO 0x3fefffffffffffff), + DSTCLAMP.NONE, DSTOMOD.NONE), + (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)) +>; + +// Convert floor(x) to (x - fract(x)) +def : Pat < + (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))), + (V_ADD_F64 + $mods, + $x, + SRCMODS.NEG, + (V_CNDMASK_B64_PSEUDO + $x, + (V_MIN_F64 + SRCMODS.NONE, + (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE), + SRCMODS.NONE, + (V_MOV_B64_PSEUDO 0x3fefffffffffffff), + DSTCLAMP.NONE, DSTOMOD.NONE), + (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)), + DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +} // End Predicates = [isSI] + +let Predicates = [isCI] in { + +// Convert (x - floor(x)) to fract(x) +def : Pat < + (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)), + (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))), + (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +// Convert (x + (-floor(x))) to fract(x) +def : Pat < + (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)), + (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))), + (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +} // End Predicates = [isCI] + //============================================================================// // Miscellaneous Optimization Patterns //============================================================================// diff --git a/lib/Target/R600/SILoadStoreOptimizer.cpp b/lib/Target/R600/SILoadStoreOptimizer.cpp index 46630d0..a927ad8 100644 --- a/lib/Target/R600/SILoadStoreOptimizer.cpp +++ b/lib/Target/R600/SILoadStoreOptimizer.cpp @@ -45,6 +45,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -249,10 +250,10 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( DebugLoc DL = I->getDebugLoc(); MachineInstrBuilder Read2 = BuildMI(*MBB, I, DL, Read2Desc, DestReg) - .addImm(0) // gds .addOperand(*AddrReg) // addr .addImm(NewOffset0) // offset0 .addImm(NewOffset1) // offset1 + .addImm(0) // gds .addOperand(*M0Reg) // M0 .addMemOperand(*I->memoperands_begin()) .addMemOperand(*Paired->memoperands_begin()); @@ -332,12 +333,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( MachineInstrBuilder Write2 = BuildMI(*MBB, I, DL, Write2Desc) - .addImm(0) // gds .addOperand(*Addr) // addr .addOperand(*Data0) // data0 .addOperand(*Data1) // data1 .addImm(NewOffset0) // offset0 .addImm(NewOffset1) // offset1 + .addImm(0) // gds .addOperand(*M0Reg) // m0 .addMemOperand(*I->memoperands_begin()) .addMemOperand(*Paired->memoperands_begin()); diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 9224e14..13a8974 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -14,7 +14,6 @@ #include "SIRegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -25,9 +24,7 @@ using namespace llvm; -SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st) -: AMDGPURegisterInfo(st) - { } +SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {} BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); @@ -47,14 +44,34 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AMDGPU::VGPR255); Reserved.set(AMDGPU::VGPR254); + // Tonga and Iceland can only allocate a fixed number of SGPRs due + // to a hw bug. + if (MF.getSubtarget<AMDGPUSubtarget>().hasSGPRInitBug()) { + unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); + // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs). + // Assume XNACK_MASK is unused. + unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4; + + for (unsigned i = Limit; i < NumSGPRs; ++i) { + unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); + MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true); + + for (; R.isValid(); ++R) + Reserved.set(*R); + } + } + return Reserved; } -unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const { +unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, + unsigned Idx) const { + const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>(); // FIXME: We should adjust the max number of waves based on LDS size. - unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU()); - unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU()); + unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(), + STI.getMaxWavesPerCU()); + unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU()); for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I) { @@ -125,9 +142,10 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, int64_t Offset, RegScavenger *RS) const { - const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo()); MachineBasicBlock *MBB = MI->getParent(); const MachineFunction *MF = MI->getParent()->getParent(); + const SIInstrInfo *TII = + static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo()); LLVMContext &Ctx = MF->getFunction()->getContext(); DebugLoc DL = MI->getDebugLoc(); bool IsLoad = TII->get(LoadStoreOp).mayLoad(); @@ -162,8 +180,8 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) .addReg(SubReg, getDefRegState(IsLoad)) .addReg(ScratchRsrcReg, getKillRegState(IsKill)) - .addImm(Offset) .addReg(SOffset) + .addImm(Offset) .addImm(0) // glc .addImm(0) // slc .addImm(0) // tfe @@ -178,7 +196,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo *FrameInfo = MF->getFrameInfo(); - const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo()); + const SIInstrInfo *TII = + static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo()); DebugLoc DL = MI->getDebugLoc(); MachineOperand &FIOp = MI->getOperand(FIOperandNum); @@ -249,7 +268,22 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, .addReg(SubReg); } } - TII->insertNOPs(MI, 3); + + // TODO: only do this when it is needed + switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) { + case AMDGPUSubtarget::SOUTHERN_ISLANDS: + // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI + TII->insertNOPs(MI, 3); + break; + case AMDGPUSubtarget::SEA_ISLANDS: + break; + default: // VOLCANIC_ISLANDS and later + // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI + // and later. This also applies to VALUs which write VCC, but we're + // unlikely to see VMEM use VCC. + TII->insertNOPs(MI, 4); + } + MI->eraseFromParent(); break; } @@ -494,14 +528,24 @@ unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const { } } -unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const { - switch(WaveCount) { - case 10: return 48; - case 9: return 56; - case 8: return 64; - case 7: return 72; - case 6: return 80; - case 5: return 96; - default: return 103; +unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen, + unsigned WaveCount) const { + if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + switch (WaveCount) { + case 10: return 80; + case 9: return 80; + case 8: return 96; + default: return 102; + } + } else { + switch(WaveCount) { + case 10: return 48; + case 9: return 56; + case 8: return 64; + case 7: return 72; + case 6: return 80; + case 5: return 96; + default: return 103; + } } } diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index d908ffd..bfdb67c 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -17,17 +17,19 @@ #define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H #include "AMDGPURegisterInfo.h" +#include "AMDGPUSubtarget.h" #include "llvm/Support/Debug.h" namespace llvm { struct SIRegisterInfo : public AMDGPURegisterInfo { - SIRegisterInfo(const AMDGPUSubtarget &st); + SIRegisterInfo(); BitVector getReservedRegs(const MachineFunction &MF) const override; - unsigned getRegPressureSetLimit(unsigned Idx) const override; + unsigned getRegPressureSetLimit(const MachineFunction &MF, + unsigned Idx) const override; bool requiresRegisterScavenging(const MachineFunction &Fn) const override; @@ -111,7 +113,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount /// concurrent waves. - unsigned getNumSGPRsAllowed(unsigned WaveCount) const; + unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen, + unsigned WaveCount) const; unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC) const; diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 8b25e95..7bb5dc2 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -256,10 +256,3 @@ def VSrc_64 : RegImmOperand<VS_64>; def VCSrc_32 : RegInlineOperand<VS_32>; def VCSrc_64 : RegInlineOperand<VS_64>; - -//===----------------------------------------------------------------------===// -// SGPR and VGPR register classes -//===----------------------------------------------------------------------===// - -def VSrc_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, - (add VReg_128, SReg_128)>; diff --git a/lib/Target/R600/SIShrinkInstructions.cpp b/lib/Target/R600/SIShrinkInstructions.cpp index 97bbd78..51e72cd 100644 --- a/lib/Target/R600/SIShrinkInstructions.cpp +++ b/lib/Target/R600/SIShrinkInstructions.cpp @@ -18,9 +18,10 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "si-shrink-instructions" @@ -88,6 +89,11 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); // Can't shrink instruction with three operands. + // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add + // a special case for it. It can only be shrunk if the third operand + // is vcc. We should handle this the same way we handle vopc, by addding + // a register allocation hint pre-regalloc and then do the shrining + // post-regalloc. if (Src2) return false; diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 0fa56e6..282d923 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -1268,7 +1268,8 @@ int foo (void) { .. else if (strchr ("<>", *intel_parser.op_string) -Those should be turned into a switch. +Those should be turned into a switch. SimplifyLibCalls only gets the second +case. //===---------------------------------------------------------------------===// @@ -1843,44 +1844,6 @@ we remove checking in code like //===---------------------------------------------------------------------===// -This code (from Benchmarks/Dhrystone/dry.c): - -define i32 @Func1(i32, i32) nounwind readnone optsize ssp { -entry: - %sext = shl i32 %0, 24 - %conv = ashr i32 %sext, 24 - %sext6 = shl i32 %1, 24 - %conv4 = ashr i32 %sext6, 24 - %cmp = icmp eq i32 %conv, %conv4 - %. = select i1 %cmp, i32 10000, i32 0 - ret i32 %. -} - -Should be simplified into something like: - -define i32 @Func1(i32, i32) nounwind readnone optsize ssp { -entry: - %sext = shl i32 %0, 24 - %conv = and i32 %sext, 0xFF000000 - %sext6 = shl i32 %1, 24 - %conv4 = and i32 %sext6, 0xFF000000 - %cmp = icmp eq i32 %conv, %conv4 - %. = select i1 %cmp, i32 10000, i32 0 - ret i32 %. -} - -and then to: - -define i32 @Func1(i32, i32) nounwind readnone optsize ssp { -entry: - %conv = and i32 %0, 0xFF - %conv4 = and i32 %1, 0xFF - %cmp = icmp eq i32 %conv, %conv4 - %. = select i1 %cmp, i32 10000, i32 0 - ret i32 %. -} -//===---------------------------------------------------------------------===// - clang -O3 currently compiles this code int g(unsigned int a) { diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp index 5128843..598856f 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp @@ -74,7 +74,6 @@ public: MCCodeEmitter *llvm::createSparcMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx) { return new SparcMCCodeEmitter(Ctx); } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h index f72c6c4..3a6f508 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h @@ -62,8 +62,8 @@ private: const VariantKind Kind; const MCExpr *Expr; - explicit SparcMCExpr(VariantKind _Kind, const MCExpr *_Expr) - : Kind(_Kind), Expr(_Expr) {} + explicit SparcMCExpr(VariantKind Kind, const MCExpr *Expr) + : Kind(Kind), Expr(Expr) {} public: /// @name Construction diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index 3cc4314..630ed1b 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -122,25 +122,16 @@ static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll) { - MCStreamer *S = createELFStreamer(Context, MAB, OS, Emitter, RelaxAll); - new SparcTargetELFStreamer(*S); - return S; +static MCTargetStreamer * +createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + return new SparcTargetELFStreamer(S); } -static MCStreamer * -createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst) { - - MCStreamer *S = llvm::createAsmStreamer( - Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); - new SparcTargetAsmStreamer(*S, OS); - return S; +static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new SparcTargetAsmStreamer(S, OS); } static MCInstPrinter *createSparcMCInstPrinter(const Target &T, @@ -157,54 +148,37 @@ extern "C" void LLVMInitializeSparcTargetMC() { RegisterMCAsmInfoFn X(TheSparcTarget, createSparcMCAsmInfo); RegisterMCAsmInfoFn Y(TheSparcV9Target, createSparcV9MCAsmInfo); + for (Target *T : {&TheSparcTarget, &TheSparcV9Target}) { + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createSparcMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createSparcMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createSparcMCSubtargetInfo); + + // Register the MC Code Emitter. + TargetRegistry::RegisterMCCodeEmitter(*T, createSparcMCCodeEmitter); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(*T, createSparcAsmBackend); + + // Register the object target streamer. + TargetRegistry::RegisterObjectTargetStreamer(*T, + createObjectTargetStreamer); + + // Register the asm streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, createTargetAsmStreamer); + + // Register the MCInstPrinter + TargetRegistry::RegisterMCInstPrinter(*T, createSparcMCInstPrinter); + } + // Register the MC codegen info. TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget, createSparcMCCodeGenInfo); TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target, createSparcV9MCCodeGenInfo); - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheSparcV9Target, createSparcMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheSparcTarget, createSparcMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheSparcV9Target, - createSparcMCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget, - createSparcMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheSparcV9Target, - createSparcMCSubtargetInfo); - - // Register the MC Code Emitter. - TargetRegistry::RegisterMCCodeEmitter(TheSparcTarget, - createSparcMCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheSparcV9Target, - createSparcMCCodeEmitter); - - //Register the asm backend. - TargetRegistry::RegisterMCAsmBackend(TheSparcTarget, - createSparcAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheSparcV9Target, - createSparcAsmBackend); - - // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(TheSparcTarget, - createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheSparcV9Target, - createMCStreamer); - - // Register the asm streamer. - TargetRegistry::RegisterAsmStreamer(TheSparcTarget, - createMCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheSparcV9Target, - createMCAsmStreamer); - - // Register the MCInstPrinter - TargetRegistry::RegisterMCInstPrinter(TheSparcTarget, - createSparcMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheSparcV9Target, - createSparcMCInstPrinter); } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h index c31943d..d2ec991 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h @@ -33,7 +33,6 @@ extern Target TheSparcV9Target; MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx); MCAsmBackend *createSparcAsmBackend(const Target &T, const MCRegisterInfo &MRI, diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index 9f03b04..1cf5ccf 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -50,7 +50,7 @@ public: /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) override; const char *getPassName() const override { @@ -195,12 +195,13 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) { /// inline asm expressions. bool SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) { SDValue Op0, Op1; - switch (ConstraintCode) { + switch (ConstraintID) { default: return true; - case 'm': // memory + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_m: // memory if (!SelectADDRrr(Op, Op0, Op1)) SelectADDRri(Op, Op0, Op1); break; diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 6774977..c8b0570 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -915,9 +915,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, // Add a register mask operand representing the call-preserved registers. const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo(); - const uint32_t *Mask = ((hasReturnsTwice) - ? TRI->getRTCallPreservedMask(CallConv) - : TRI->getCallPreservedMask(CallConv)); + const uint32_t *Mask = + ((hasReturnsTwice) + ? TRI->getRTCallPreservedMask(CallConv) + : TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv)); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -1229,7 +1230,8 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo(); const uint32_t *Mask = ((hasReturnsTwice) ? TRI->getRTCallPreservedMask(CLI.CallConv) - : TRI->getCallPreservedMask(CLI.CallConv)); + : TRI->getCallPreservedMask(DAG.getMachineFunction(), + CLI.CallConv)); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -1904,8 +1906,8 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op, Ops.push_back(Callee); Ops.push_back(Symbol); Ops.push_back(DAG.getRegister(SP::O0, PtrVT)); - const uint32_t *Mask = - Subtarget->getRegisterInfo()->getCallPreservedMask(CallingConv::C); + const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask( + DAG.getMachineFunction(), CallingConv::C); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); Ops.push_back(InFlag); diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 8b2e6bc..4b70f16 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -33,9 +33,8 @@ using namespace llvm; void SparcInstrInfo::anchor() {} SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST) - : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), - RI(ST), Subtarget(ST) { -} + : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), RI(), + Subtarget(ST) {} /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index fe93ed7..6e08418 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -22,6 +22,8 @@ namespace llvm { +class SparcSubtarget; + /// SPII - This namespace holds all of the target specific flags that /// instruction info tracks. /// diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 3cca98f..9667bc0 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -34,17 +34,16 @@ static cl::opt<bool> ReserveAppRegisters("sparc-reserve-app-registers", cl::Hidden, cl::init(false), cl::desc("Reserve application registers (%g2-%g4)")); -SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st) - : SparcGenRegisterInfo(SP::O7), Subtarget(st) { -} +SparcRegisterInfo::SparcRegisterInfo() : SparcGenRegisterInfo(SP::O7) {} const MCPhysReg* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_SaveList; } -const uint32_t* -SparcRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { +const uint32_t * +SparcRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { return CSR_RegMask; } @@ -55,6 +54,7 @@ SparcRegisterInfo::getRTCallPreservedMask(CallingConv::ID CC) const { BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); + const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>(); // FIXME: G1 reserved for now for large imm generation by frame code. Reserved.set(SP::G1); @@ -89,6 +89,7 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { const TargetRegisterClass* SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { + const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>(); return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass; } @@ -160,6 +161,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Addressable stack objects are accessed using neg. offsets from %fp MachineFunction &MF = *MI.getParent()->getParent(); + const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>(); int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + MI.getOperand(FIOperandNum + 1).getImm() + Subtarget.getStackPointerBias(); @@ -174,7 +176,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) { if (MI.getOpcode() == SP::STQFri) { - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); unsigned SrcReg = MI.getOperand(2).getReg(); unsigned SrcEvenReg = getSubReg(SrcReg, SP::sub_even64); unsigned SrcOddReg = getSubReg(SrcReg, SP::sub_odd64); @@ -186,7 +188,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(2).setReg(SrcOddReg); Offset += 8; } else if (MI.getOpcode() == SP::LDQFri) { - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); unsigned DestReg = MI.getOperand(0).getReg(); unsigned DestEvenReg = getSubReg(DestReg, SP::sub_even64); unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64); diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 63567b0..764a894 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -20,20 +20,13 @@ #include "SparcGenRegisterInfo.inc" namespace llvm { - -class SparcSubtarget; -class TargetInstrInfo; -class Type; - struct SparcRegisterInfo : public SparcGenRegisterInfo { - SparcSubtarget &Subtarget; - - SparcRegisterInfo(SparcSubtarget &st); + SparcRegisterInfo(); /// Code Generation virtual methods... - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF =nullptr) const override; - const uint32_t* getCallPreservedMask(CallingConv::ID CC) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const override; const uint32_t* getRTCallPreservedMask(CallingConv::ID CC) const; diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 1c423dc..6979a17 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -56,12 +56,11 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool is64bit) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - TLOF(make_unique<SparcELFTargetObjectFile>()), - DL(computeDataLayout(is64bit)), - Subtarget(TT, CPU, FS, *this, is64bit) { + CodeGenOpt::Level OL, bool is64bit) + : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM, + CM, OL), + TLOF(make_unique<SparcELFTargetObjectFile>()), + Subtarget(TT, CPU, FS, *this, is64bit) { initAsmInfo(); } diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 4f93980..30a8ebf 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -22,7 +22,6 @@ namespace llvm { class SparcTargetMachine : public LLVMTargetMachine { std::unique_ptr<TargetLoweringObjectFile> TLOF; - const DataLayout DL; SparcSubtarget Subtarget; public: SparcTargetMachine(const Target &T, StringRef TT, @@ -31,8 +30,9 @@ public: CodeGenOpt::Level OL, bool is64bit); ~SparcTargetMachine() override; - const DataLayout *getDataLayout() const override { return &DL; } - const SparcSubtarget *getSubtargetImpl() const override { return &Subtarget; } + const SparcSubtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index d9bb916..40dc48e 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -110,7 +110,6 @@ private: MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &MCSTI, MCContext &Ctx) { return new SystemZMCCodeEmitter(MCII, Ctx); } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 6e82b6d..ffd05a9 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -181,14 +181,6 @@ static MCInstPrinter *createSystemZMCInstPrinter(const Target &T, return new SystemZInstPrinter(MAI, MII, MRI); } -static MCStreamer * -createSystemZMCObjectStreamer(const Target &T, StringRef TT, MCContext &Ctx, - MCAsmBackend &MAB, raw_ostream &OS, - MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll) { - return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll); -} - extern "C" void LLVMInitializeSystemZTargetMC() { // Register the MCAsmInfo. TargetRegistry::RegisterMCAsmInfo(TheSystemZTarget, @@ -221,8 +213,4 @@ extern "C" void LLVMInitializeSystemZTargetMC() { // Register the MCInstPrinter. TargetRegistry::RegisterMCInstPrinter(TheSystemZTarget, createSystemZMCInstPrinter); - - // Register the MCObjectStreamer; - TargetRegistry::RegisterMCObjectStreamer(TheSystemZTarget, - createSystemZMCObjectStreamer); } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index 5eb6526..962c950 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -71,7 +71,6 @@ inline unsigned getRegAsGRH32(unsigned Reg) { MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx); MCAsmBackend *createSystemZMCAsmBackend(const Target &T, diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index b8b0db9..a52aa25 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -328,7 +328,7 @@ public: // Override SelectionDAGISel. SDNode *Select(SDNode *Node) override; - bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) override; // Include the pieces autogenerated from the target description. @@ -1129,18 +1129,29 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { bool SystemZDAGToDAGISel:: SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) { - assert(ConstraintCode == 'm' && "Unexpected constraint code"); - // Accept addresses with short displacements, which are compatible - // with Q, R, S and T. But keep the index operand for future expansion. - SDValue Base, Disp, Index; - if (!selectBDXAddr(SystemZAddressingMode::FormBD, - SystemZAddressingMode::Disp12Only, - Op, Base, Disp, Index)) - return true; - OutOps.push_back(Base); - OutOps.push_back(Disp); - OutOps.push_back(Index); - return false; + switch(ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_m: + case InlineAsm::Constraint_Q: + case InlineAsm::Constraint_R: + case InlineAsm::Constraint_S: + case InlineAsm::Constraint_T: + // Accept addresses with short displacements, which are compatible + // with Q, R, S and T. But keep the index operand for future expansion. + SDValue Base, Disp, Index; + if (selectBDXAddr(SystemZAddressingMode::FormBD, + SystemZAddressingMode::Disp12Only, + Op, Base, Disp, Index)) { + OutOps.push_back(Base); + OutOps.push_back(Disp); + OutOps.push_back(Index); + return false; + } + break; + } + return true; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index e96398d..0ca8bcd 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -920,7 +920,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -1858,7 +1858,8 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallingConv::C); + const uint32_t *Mask = + TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index a2b10b0..23c62c9 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -233,6 +233,26 @@ public: std::string &Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; + + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + if (ConstraintCode.size() == 1) { + switch(ConstraintCode[0]) { + default: + break; + case 'Q': + return InlineAsm::Constraint_Q; + case 'R': + return InlineAsm::Constraint_R; + case 'S': + return InlineAsm::Constraint_S; + case 'T': + return InlineAsm::Constraint_T; + } + } + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const override; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 8488ec8..5128993 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -743,11 +743,10 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return nullptr; } -MachineInstr * -SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const { +MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + ArrayRef<unsigned> Ops, + int FrameIndex) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned Size = MFI->getObjectSize(FrameIndex); unsigned Opcode = MI->getOpcode(); @@ -862,9 +861,9 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } MachineInstr * -SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { +SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, + MachineInstr *LoadMI) const { return nullptr; } diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index e711f89..b55810b 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -186,11 +186,11 @@ public: MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const override; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, + ArrayRef<unsigned> Ops, int FrameIndex) const override; - MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const override; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, + MachineInstr *LoadMI) const override; bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override; bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 64f5eeb..7cabea9 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -28,7 +28,8 @@ SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } const uint32_t * -SystemZRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { +SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { return CSR_SystemZ_RegMask; } diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 212fe91..a0db5a9 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -43,9 +43,9 @@ public: bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override { return true; } - const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF = nullptr) const - override; - const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 73198b1..86baccb 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -25,12 +25,12 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + // Make sure that global data has at least 16 bits of alignment by + // default, so that we can refer to it using LARL. We don't have any + // special requirements for stack variables though. + : LLVMTargetMachine(T, "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64", + TT, CPU, FS, Options, RM, CM, OL), TLOF(make_unique<TargetLoweringObjectFileELF>()), - // Make sure that global data has at least 16 bits of alignment by - // default, so that we can refer to it using LARL. We don't have any - // special requirements for stack variables though. - DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 52ccc5a..181b926 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -24,7 +24,6 @@ class TargetFrameLowering; class SystemZTargetMachine : public LLVMTargetMachine { std::unique_ptr<TargetLoweringObjectFile> TLOF; - const DataLayout DL; SystemZSubtarget Subtarget; public: @@ -34,9 +33,8 @@ public: CodeGenOpt::Level OL); ~SystemZTargetMachine() override; - // Override TargetMachine. - const DataLayout *getDataLayout() const override { return &DL; } - const SystemZSubtarget *getSubtargetImpl() const override { + const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; } + const SystemZSubtarget *getSubtargetImpl(const Function &) const override { return &Subtarget; } // Override LLVMTargetMachine diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp index 5b7953d..1b74e8c 100644 --- a/lib/Target/Target.cpp +++ b/lib/Target/Target.cpp @@ -34,7 +34,6 @@ inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfoImpl *P) { } void llvm::initializeTarget(PassRegistry &Registry) { - initializeDataLayoutPassPass(Registry); initializeTargetLibraryInfoWrapperPassPass(Registry); initializeTargetTransformInfoWrapperPassPass(Registry); } @@ -48,9 +47,6 @@ LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) { } void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) { - // The DataLayoutPass must now be in sync with the module. Unfortunatelly we - // cannot enforce that from the C api. - unwrap(PM)->add(new DataLayoutPass()); } void LLVMAddTargetLibraryInfo(LLVMTargetLibraryInfoRef TLI, diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index faa6fbe..75100fb 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -343,3 +343,9 @@ const MCExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol // null return could mean 'no location' & we should just do that here. return MCSymbolRefExpr::Create(Sym, *Ctx); } + +void TargetLoweringObjectFile::getNameWithPrefix( + SmallVectorImpl<char> &OutName, const GlobalValue *GV, + bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const { + Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel); +} diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 307e93c..dd07f81 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/SectionKind.h" @@ -36,18 +37,20 @@ using namespace llvm; // TargetMachine Class // -TargetMachine::TargetMachine(const Target &T, +TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options) - : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), - CodeGenInfo(nullptr), AsmInfo(nullptr), - RequireStructuredCFG(false), - Options(Options) { -} + : TheTarget(T), DL(DataLayoutString), TargetTriple(TT), TargetCPU(CPU), + TargetFS(FS), CodeGenInfo(nullptr), AsmInfo(nullptr), MRI(nullptr), + MII(nullptr), STI(nullptr), RequireStructuredCFG(false), + Options(Options) {} TargetMachine::~TargetMachine() { delete CodeGenInfo; delete AsmInfo; + delete MRI; + delete MII; + delete STI; } /// \brief Reset the target options based on the function's attributes. @@ -177,7 +180,7 @@ void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name, const TargetLoweringObjectFile *TLOF = getObjFileLowering(); const MCSection *TheSection = TLOF->SectionForGlobal(GV, GVKind, Mang, *this); bool CannotUsePrivateLabel = !canUsePrivateLabel(*AsmInfo, *TheSection); - Mang.getNameWithPrefix(Name, GV, CannotUsePrivateLabel); + TLOF->getNameWithPrefix(Name, GV, CannotUsePrivateLabel, Mang, *this); } MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const { diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp index c7838a9..236cb1b 100644 --- a/lib/Target/TargetMachineC.cpp +++ b/lib/Target/TargetMachineC.cpp @@ -198,8 +198,7 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M, *ErrorMessage = strdup(error.c_str()); return true; } - Mod->setDataLayout(td); - pass.add(new DataLayoutPass()); + Mod->setDataLayout(*td); TargetMachine::CodeGenFileType ft; switch (codegen) { diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp index 10597a8..b2bb59e 100644 --- a/lib/Target/TargetSubtargetInfo.cpp +++ b/lib/Target/TargetSubtargetInfo.cpp @@ -23,22 +23,6 @@ TargetSubtargetInfo::TargetSubtargetInfo() {} TargetSubtargetInfo::~TargetSubtargetInfo() {} -// Temporary option to compare overall performance change when moving from the -// SD scheduler to the MachineScheduler pass pipeline. This is convenient for -// benchmarking during the transition from SD to MI scheduling. Once armv7 makes -// the switch, it should go away. The normal way to enable/disable the -// MachineScheduling pass itself is by using -enable-misched. For targets that -// already use MI sched (via MySubTarget::enableMachineScheduler()) -// -misched-bench=false negates the subtarget hook. -static cl::opt<bool> BenchMachineSched("misched-bench", cl::Hidden, - cl::desc("Migrate from the target's default SD scheduler to MI scheduler")); - -bool TargetSubtargetInfo::useMachineScheduler() const { - if (BenchMachineSched.getNumOccurrences()) - return BenchMachineSched; - return enableMachineScheduler(); -} - bool TargetSubtargetInfo::enableAtomicExpand() const { return true; } @@ -47,6 +31,10 @@ bool TargetSubtargetInfo::enableMachineScheduler() const { return false; } +bool TargetSubtargetInfo::enableJoinGlobalCopies() const { + return enableMachineScheduler(); +} + bool TargetSubtargetInfo::enableRALocalReassignment( CodeGenOpt::Level OptLevel) const { return true; diff --git a/lib/Target/X86/Android.mk b/lib/Target/X86/Android.mk index 08646d0..7194dd3 100644 --- a/lib/Target/X86/Android.mk +++ b/lib/Target/X86/Android.mk @@ -1,8 +1,10 @@ LOCAL_PATH := $(call my-dir) x86_codegen_TBLGEN_TABLES := \ + X86GenAsmMatcher.inc \ X86GenAsmWriter.inc \ X86GenAsmWriter1.inc \ + X86GenDisassemblerTables.inc \ X86GenRegisterInfo.inc \ X86GenInstrInfo.inc \ X86GenDAGISel.inc \ diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 0b6fb52..c24805a 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -11,6 +11,7 @@ #include "X86AsmInstrumentation.h" #include "X86AsmParserCommon.h" #include "X86Operand.h" +#include "X86ISelLowering.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" @@ -664,6 +665,7 @@ private: ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size); std::unique_ptr<X86Operand> ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size); + std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End); bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg, SMLoc Start, @@ -1407,6 +1409,35 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, /*Scale=*/1, Start, End, Size, Identifier, Info); } +//ParseRoundingModeOp - Parse AVX-512 rounding mode operand +std::unique_ptr<X86Operand> +X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) { + MCAsmParser &Parser = getParser(); + const AsmToken &Tok = Parser.getTok(); + consumeToken(); // Eat "{" + if (Tok.getIdentifier().startswith("r")){ + int rndMode = StringSwitch<int>(Tok.getIdentifier()) + .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT) + .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF) + .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF) + .Case("rz", X86::STATIC_ROUNDING::TO_ZERO) + .Default(-1); + if (-1 == rndMode) + return ErrorOperand(Tok.getLoc(), "Invalid rounding mode."); + Parser.Lex(); // Eat "r*" of r*-sae + if (!getLexer().is(AsmToken::Minus)) + return ErrorOperand(Tok.getLoc(), "Expected - at this point"); + Parser.Lex(); // Eat "-" + Parser.Lex(); // Eat the sae + if (!getLexer().is(AsmToken::RCurly)) + return ErrorOperand(Tok.getLoc(), "Expected } at this point"); + Parser.Lex(); // Eat "}" + const MCExpr *RndModeOp = + MCConstantExpr::Create(rndMode, Parser.getContext()); + return X86Operand::CreateImm(RndModeOp, Start, End); + } + return ErrorOperand(Tok.getLoc(), "unknown token in expression"); +} /// ParseIntelMemOperand - Parse intel style memory operand. std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start, @@ -1656,6 +1687,11 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() { return ParseIntelMemOperand(Imm, Start, Size); } + // rounding mode token + if (STI.getFeatureBits() & X86::FeatureAVX512 && + getLexer().is(AsmToken::LCurly)) + return ParseRoundingModeOp(Start, End); + // Register. unsigned RegNo = 0; if (!ParseRegister(RegNo, Start, End)) { @@ -1708,6 +1744,12 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() { return nullptr; return X86Operand::CreateImm(Val, Start, End); } + case AsmToken::LCurly:{ + SMLoc Start = Parser.getTok().getLoc(), End; + if (STI.getFeatureBits() & X86::FeatureAVX512) + return ParseRoundingModeOp(Start, End); + return ErrorOperand(Start, "unknown token in expression"); + } } } diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h index d67e119..94dbedb 100644 --- a/lib/Target/X86/AsmParser/X86Operand.h +++ b/lib/Target/X86/AsmParser/X86Operand.h @@ -260,6 +260,9 @@ struct X86Operand : public MCParsedAsmOperand { return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && !getMemIndexReg() && getMemScale() == 1; } + bool isAVX512RC() const{ + return isImm(); + } bool isAbsMem16() const { return isAbsMem() && Mem.ModeSize == 16; @@ -394,7 +397,10 @@ struct X86Operand : public MCParsedAsmOperand { RegNo = getGR32FromGR64(RegNo); Inst.addOperand(MCOperand::CreateReg(RegNo)); } - + void addAVX512RCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } void addImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); addExpr(Inst, getImm()); diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 99fb1ab..e8c5475 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -378,26 +378,28 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, unsigned NewOpc; switch (mcInst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); - case X86::VCMPPDrmi: NewOpc = X86::VCMPPDrmi_alt; break; - case X86::VCMPPDrri: NewOpc = X86::VCMPPDrri_alt; break; - case X86::VCMPPSrmi: NewOpc = X86::VCMPPSrmi_alt; break; - case X86::VCMPPSrri: NewOpc = X86::VCMPPSrri_alt; break; - case X86::VCMPSDrm: NewOpc = X86::VCMPSDrm_alt; break; - case X86::VCMPSDrr: NewOpc = X86::VCMPSDrr_alt; break; - case X86::VCMPSSrm: NewOpc = X86::VCMPSSrm_alt; break; - case X86::VCMPSSrr: NewOpc = X86::VCMPSSrr_alt; break; - case X86::VCMPPDYrmi: NewOpc = X86::VCMPPDYrmi_alt; break; - case X86::VCMPPDYrri: NewOpc = X86::VCMPPDYrri_alt; break; - case X86::VCMPPSYrmi: NewOpc = X86::VCMPPSYrmi_alt; break; - case X86::VCMPPSYrri: NewOpc = X86::VCMPPSYrri_alt; break; - case X86::VCMPPDZrmi: NewOpc = X86::VCMPPDZrmi_alt; break; - case X86::VCMPPDZrri: NewOpc = X86::VCMPPDZrri_alt; break; - case X86::VCMPPSZrmi: NewOpc = X86::VCMPPSZrmi_alt; break; - case X86::VCMPPSZrri: NewOpc = X86::VCMPPSZrri_alt; break; - case X86::VCMPSDZrm: NewOpc = X86::VCMPSDZrmi_alt; break; - case X86::VCMPSDZrr: NewOpc = X86::VCMPSDZrri_alt; break; - case X86::VCMPSSZrm: NewOpc = X86::VCMPSSZrmi_alt; break; - case X86::VCMPSSZrr: NewOpc = X86::VCMPSSZrri_alt; break; + case X86::VCMPPDrmi: NewOpc = X86::VCMPPDrmi_alt; break; + case X86::VCMPPDrri: NewOpc = X86::VCMPPDrri_alt; break; + case X86::VCMPPSrmi: NewOpc = X86::VCMPPSrmi_alt; break; + case X86::VCMPPSrri: NewOpc = X86::VCMPPSrri_alt; break; + case X86::VCMPSDrm: NewOpc = X86::VCMPSDrm_alt; break; + case X86::VCMPSDrr: NewOpc = X86::VCMPSDrr_alt; break; + case X86::VCMPSSrm: NewOpc = X86::VCMPSSrm_alt; break; + case X86::VCMPSSrr: NewOpc = X86::VCMPSSrr_alt; break; + case X86::VCMPPDYrmi: NewOpc = X86::VCMPPDYrmi_alt; break; + case X86::VCMPPDYrri: NewOpc = X86::VCMPPDYrri_alt; break; + case X86::VCMPPSYrmi: NewOpc = X86::VCMPPSYrmi_alt; break; + case X86::VCMPPSYrri: NewOpc = X86::VCMPPSYrri_alt; break; + case X86::VCMPPDZrmi: NewOpc = X86::VCMPPDZrmi_alt; break; + case X86::VCMPPDZrri: NewOpc = X86::VCMPPDZrri_alt; break; + case X86::VCMPPDZrrib: NewOpc = X86::VCMPPDZrrib_alt; break; + case X86::VCMPPSZrmi: NewOpc = X86::VCMPPSZrmi_alt; break; + case X86::VCMPPSZrri: NewOpc = X86::VCMPPSZrri_alt; break; + case X86::VCMPPSZrrib: NewOpc = X86::VCMPPSZrrib_alt; break; + case X86::VCMPSDZrm: NewOpc = X86::VCMPSDZrmi_alt; break; + case X86::VCMPSDZrr: NewOpc = X86::VCMPSDZrri_alt; break; + case X86::VCMPSSZrm: NewOpc = X86::VCMPSSZrmi_alt; break; + case X86::VCMPSSZrr: NewOpc = X86::VCMPSSZrri_alt; break; } // Switch opcode to the one that doesn't get special printing. mcInst.setOpcode(NewOpc); diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index 619a0d4..7c9e012 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -310,11 +310,8 @@ static bool isPrefixAtLocation(struct InternalInstruction* insn, uint8_t prefix, uint64_t location) { - if (insn->prefixPresent[prefix] == 1 && - insn->prefixLocations[prefix] == location) - return true; - else - return false; + return insn->prefixPresent[prefix] == 1 && + insn->prefixLocations[prefix] == location; } /* @@ -1458,6 +1455,8 @@ static int readModRM(struct InternalInstruction* insn) { case TYPE_VK1: \ case TYPE_VK8: \ case TYPE_VK16: \ + if (index > 7) \ + *valid = 0; \ return prefix##_K0 + index; \ case TYPE_MM64: \ return prefix##_MM0 + (index & 0x7); \ diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 70c6042..9e65050 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -485,18 +485,6 @@ struct OperandSpecifier { uint8_t type; }; -// Indicates where the opcode modifier (if any) is to be found. Extended -// opcodes with AddRegFrm have the opcode modifier in the ModR/M byte. -#define MODIFIER_TYPES \ - ENUM_ENTRY(MODIFIER_NONE) - -#define ENUM_ENTRY(n) n, -enum ModifierType { - MODIFIER_TYPES - MODIFIER_max -}; -#undef ENUM_ENTRY - static const unsigned X86_MAX_OPERANDS = 6; /// Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 719b761..a400d46 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -76,8 +76,8 @@ class X86AsmBackend : public MCAsmBackend { bool HasNopl; const uint64_t MaxNopLength; public: - X86AsmBackend(const Target &T, StringRef _CPU) - : MCAsmBackend(), CPU(_CPU), MaxNopLength(_CPU == "slm" ? 7 : 15) { + X86AsmBackend(const Target &T, StringRef CPU) + : MCAsmBackend(), CPU(CPU), MaxNopLength(CPU == "slm" ? 7 : 15) { HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" && CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" && CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" && @@ -351,8 +351,8 @@ namespace { class ELFX86AsmBackend : public X86AsmBackend { public: uint8_t OSABI; - ELFX86AsmBackend(const Target &T, uint8_t _OSABI, StringRef CPU) - : X86AsmBackend(T, CPU), OSABI(_OSABI) {} + ELFX86AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) + : X86AsmBackend(T, CPU), OSABI(OSABI) {} }; class ELFX86_32AsmBackend : public ELFX86AsmBackend { diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index e8b0b4c..76a9d2b 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -38,231 +38,214 @@ X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, X86ELFObjectWriter::~X86ELFObjectWriter() {} -unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const { - // determine the type of the relocation +enum X86_64RelType { RT64_64, RT64_32, RT64_32S, RT64_16, RT64_8 }; - MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); - unsigned Type; - if (getEMachine() == ELF::EM_X86_64) { - if (IsPCRel) { - switch ((unsigned)Fixup.getKind()) { - default: llvm_unreachable("invalid fixup kind!"); - - case FK_Data_8: Type = ELF::R_X86_64_PC64; break; - case FK_Data_4: Type = ELF::R_X86_64_PC32; break; - case FK_Data_2: Type = ELF::R_X86_64_PC16; break; - case FK_Data_1: Type = ELF::R_X86_64_PC8; break; +static X86_64RelType getType64(unsigned Kind, + MCSymbolRefExpr::VariantKind &Modifier, + bool &IsPCRel) { + switch (Kind) { + default: + llvm_unreachable("Unimplemented"); + case X86::reloc_global_offset_table8: + Modifier = MCSymbolRefExpr::VK_GOT; + IsPCRel = true; + return RT64_64; + case FK_Data_8: + return RT64_64; + case X86::reloc_signed_4byte: + if (Modifier == MCSymbolRefExpr::VK_None && !IsPCRel) + return RT64_32S; + return RT64_32; + case X86::reloc_global_offset_table: + Modifier = MCSymbolRefExpr::VK_GOT; + IsPCRel = true; + return RT64_32; + case FK_Data_4: + case FK_PCRel_4: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + return RT64_32; + case FK_Data_2: + return RT64_16; + case FK_PCRel_1: + case FK_Data_1: + return RT64_8; + } +} - case FK_PCRel_8: - assert(Modifier == MCSymbolRefExpr::VK_None); - Type = ELF::R_X86_64_PC64; - break; - case X86::reloc_signed_4byte: - case X86::reloc_riprel_4byte_movq_load: - case X86::reloc_riprel_4byte: - case FK_PCRel_4: - switch (Modifier) { - default: - llvm_unreachable("Unimplemented"); - case MCSymbolRefExpr::VK_None: - Type = ELF::R_X86_64_PC32; - break; - case MCSymbolRefExpr::VK_PLT: - Type = ELF::R_X86_64_PLT32; - break; - case MCSymbolRefExpr::VK_GOTPCREL: - Type = ELF::R_X86_64_GOTPCREL; - break; - case MCSymbolRefExpr::VK_GOTTPOFF: - Type = ELF::R_X86_64_GOTTPOFF; - break; - case MCSymbolRefExpr::VK_TLSGD: - Type = ELF::R_X86_64_TLSGD; - break; - case MCSymbolRefExpr::VK_TLSLD: - Type = ELF::R_X86_64_TLSLD; - break; - } - break; - case FK_PCRel_2: - assert(Modifier == MCSymbolRefExpr::VK_None); - Type = ELF::R_X86_64_PC16; - break; - case FK_PCRel_1: - assert(Modifier == MCSymbolRefExpr::VK_None); - Type = ELF::R_X86_64_PC8; - break; - } - } else { - switch ((unsigned)Fixup.getKind()) { - default: llvm_unreachable("invalid fixup kind!"); - case X86::reloc_global_offset_table8: - Type = ELF::R_X86_64_GOTPC64; - break; - case X86::reloc_global_offset_table: - Type = ELF::R_X86_64_GOTPC32; - break; - case FK_Data_8: - switch (Modifier) { - default: - llvm_unreachable("Unimplemented"); - case MCSymbolRefExpr::VK_None: - Type = ELF::R_X86_64_64; - break; - case MCSymbolRefExpr::VK_GOT: - Type = ELF::R_X86_64_GOT64; - break; - case MCSymbolRefExpr::VK_GOTOFF: - Type = ELF::R_X86_64_GOTOFF64; - break; - case MCSymbolRefExpr::VK_TPOFF: - Type = ELF::R_X86_64_TPOFF64; - break; - case MCSymbolRefExpr::VK_DTPOFF: - Type = ELF::R_X86_64_DTPOFF64; - break; - } - break; - case X86::reloc_signed_4byte: - switch (Modifier) { - default: - llvm_unreachable("Unimplemented"); - case MCSymbolRefExpr::VK_None: - Type = ELF::R_X86_64_32S; - break; - case MCSymbolRefExpr::VK_GOT: - Type = ELF::R_X86_64_GOT32; - break; - case MCSymbolRefExpr::VK_GOTPCREL: - Type = ELF::R_X86_64_GOTPCREL; - break; - case MCSymbolRefExpr::VK_TPOFF: - Type = ELF::R_X86_64_TPOFF32; - break; - case MCSymbolRefExpr::VK_DTPOFF: - Type = ELF::R_X86_64_DTPOFF32; - break; - } - break; - case FK_Data_4: - Type = ELF::R_X86_64_32; - break; - case FK_Data_2: Type = ELF::R_X86_64_16; break; - case FK_PCRel_1: - case FK_Data_1: Type = ELF::R_X86_64_8; break; - } +static unsigned getRelocType64(MCSymbolRefExpr::VariantKind Modifier, + X86_64RelType Type, bool IsPCRel) { + switch (Modifier) { + default: + llvm_unreachable("Unimplemented"); + case MCSymbolRefExpr::VK_None: + switch (Type) { + case RT64_64: + return IsPCRel ? ELF::R_X86_64_PC64 : ELF::R_X86_64_64; + case RT64_32: + return IsPCRel ? ELF::R_X86_64_PC32 : ELF::R_X86_64_32; + case RT64_32S: + return ELF::R_X86_64_32S; + case RT64_16: + return IsPCRel ? ELF::R_X86_64_PC16 : ELF::R_X86_64_16; + case RT64_8: + return IsPCRel ? ELF::R_X86_64_PC8 : ELF::R_X86_64_8; } - } else if (getEMachine() == ELF::EM_386) { - if (IsPCRel) { - switch ((unsigned)Fixup.getKind()) { - default: llvm_unreachable("invalid fixup kind!"); - - case X86::reloc_global_offset_table: - Type = ELF::R_386_GOTPC; - break; - - case FK_PCRel_1: - case FK_Data_1: - switch (Modifier) { - default: - llvm_unreachable("Unimplemented"); - case MCSymbolRefExpr::VK_None: - Type = ELF::R_386_PC8; - break; - } - break; - - case FK_PCRel_2: - case FK_Data_2: - switch (Modifier) { - default: - llvm_unreachable("Unimplemented"); - case MCSymbolRefExpr::VK_None: - Type = ELF::R_386_PC16; - break; - } - break; + case MCSymbolRefExpr::VK_GOT: + switch (Type) { + case RT64_64: + return IsPCRel ? ELF::R_X86_64_GOTPC64 : ELF::R_X86_64_GOT64; + case RT64_32: + return IsPCRel ? ELF::R_X86_64_GOTPC32 : ELF::R_X86_64_GOT32; + case RT64_32S: + case RT64_16: + case RT64_8: + llvm_unreachable("Unimplemented"); + } + case MCSymbolRefExpr::VK_GOTOFF: + assert(Type == RT64_64); + assert(!IsPCRel); + return ELF::R_X86_64_GOTOFF64; + case MCSymbolRefExpr::VK_TPOFF: + assert(!IsPCRel); + switch (Type) { + case RT64_64: + return ELF::R_X86_64_TPOFF64; + case RT64_32: + return ELF::R_X86_64_TPOFF32; + case RT64_32S: + case RT64_16: + case RT64_8: + llvm_unreachable("Unimplemented"); + } + case MCSymbolRefExpr::VK_DTPOFF: + assert(!IsPCRel); + switch (Type) { + case RT64_64: + return ELF::R_X86_64_DTPOFF64; + case RT64_32: + return ELF::R_X86_64_DTPOFF32; + case RT64_32S: + case RT64_16: + case RT64_8: + llvm_unreachable("Unimplemented"); + } + case MCSymbolRefExpr::VK_SIZE: + assert(!IsPCRel); + switch (Type) { + case RT64_64: + return ELF::R_X86_64_SIZE64; + case RT64_32: + return ELF::R_X86_64_SIZE32; + case RT64_32S: + case RT64_16: + case RT64_8: + llvm_unreachable("Unimplemented"); + } + case MCSymbolRefExpr::VK_TLSGD: + assert(Type == RT64_32); + return ELF::R_X86_64_TLSGD; + case MCSymbolRefExpr::VK_GOTTPOFF: + assert(Type == RT64_32); + return ELF::R_X86_64_GOTTPOFF; + case MCSymbolRefExpr::VK_TLSLD: + assert(Type == RT64_32); + return ELF::R_X86_64_TLSLD; + case MCSymbolRefExpr::VK_PLT: + assert(Type == RT64_32); + return ELF::R_X86_64_PLT32; + case MCSymbolRefExpr::VK_GOTPCREL: + assert(Type == RT64_32); + return ELF::R_X86_64_GOTPCREL; + } +} - case X86::reloc_signed_4byte: - case FK_PCRel_4: - case FK_Data_4: - switch (Modifier) { - default: - llvm_unreachable("Unimplemented"); - case MCSymbolRefExpr::VK_None: - Type = ELF::R_386_PC32; - break; - case MCSymbolRefExpr::VK_PLT: - Type = ELF::R_386_PLT32; - break; - } - break; - } - } else { - switch ((unsigned)Fixup.getKind()) { - default: llvm_unreachable("invalid fixup kind!"); +enum X86_32RelType { RT32_32, RT32_16, RT32_8 }; - case X86::reloc_global_offset_table: - Type = ELF::R_386_GOTPC; - break; +static X86_32RelType getType32(X86_64RelType T) { + switch (T) { + case RT64_64: + llvm_unreachable("Unimplemented"); + case RT64_32: + case RT64_32S: + return RT32_32; + case RT64_16: + return RT32_16; + case RT64_8: + return RT32_8; + } + llvm_unreachable("unexpected relocation type!"); +} - // FIXME: Should we avoid selecting reloc_signed_4byte in 32 bit mode - // instead? - case X86::reloc_signed_4byte: - case FK_PCRel_4: - case FK_Data_4: - switch (Modifier) { - default: - llvm_unreachable("Unimplemented"); - case MCSymbolRefExpr::VK_None: - Type = ELF::R_386_32; - break; - case MCSymbolRefExpr::VK_GOT: - Type = ELF::R_386_GOT32; - break; - case MCSymbolRefExpr::VK_PLT: - Type = ELF::R_386_PLT32; - break; - case MCSymbolRefExpr::VK_GOTOFF: - Type = ELF::R_386_GOTOFF; - break; - case MCSymbolRefExpr::VK_TLSGD: - Type = ELF::R_386_TLS_GD; - break; - case MCSymbolRefExpr::VK_TPOFF: - Type = ELF::R_386_TLS_LE_32; - break; - case MCSymbolRefExpr::VK_INDNTPOFF: - Type = ELF::R_386_TLS_IE; - break; - case MCSymbolRefExpr::VK_NTPOFF: - Type = ELF::R_386_TLS_LE; - break; - case MCSymbolRefExpr::VK_GOTNTPOFF: - Type = ELF::R_386_TLS_GOTIE; - break; - case MCSymbolRefExpr::VK_TLSLDM: - Type = ELF::R_386_TLS_LDM; - break; - case MCSymbolRefExpr::VK_DTPOFF: - Type = ELF::R_386_TLS_LDO_32; - break; - case MCSymbolRefExpr::VK_GOTTPOFF: - Type = ELF::R_386_TLS_IE_32; - break; - } - break; - case FK_Data_2: Type = ELF::R_386_16; break; - case FK_PCRel_1: - case FK_Data_1: Type = ELF::R_386_8; break; - } +static unsigned getRelocType32(MCSymbolRefExpr::VariantKind Modifier, + X86_32RelType Type, bool IsPCRel) { + switch (Modifier) { + default: + llvm_unreachable("Unimplemented"); + case MCSymbolRefExpr::VK_None: + switch (Type) { + case RT32_32: + return IsPCRel ? ELF::R_386_PC32 : ELF::R_386_32; + case RT32_16: + return IsPCRel ? ELF::R_386_PC16 : ELF::R_386_16; + case RT32_8: + return IsPCRel ? ELF::R_386_PC8 : ELF::R_386_8; } - } else - llvm_unreachable("Unsupported ELF machine type."); + case MCSymbolRefExpr::VK_GOT: + assert(Type == RT32_32); + return IsPCRel ? ELF::R_386_GOTPC : ELF::R_386_GOT32; + case MCSymbolRefExpr::VK_GOTOFF: + assert(Type == RT32_32); + assert(!IsPCRel); + return ELF::R_386_GOTOFF; + case MCSymbolRefExpr::VK_TPOFF: + assert(Type == RT32_32); + assert(!IsPCRel); + return ELF::R_386_TLS_LE_32; + case MCSymbolRefExpr::VK_DTPOFF: + assert(Type == RT32_32); + assert(!IsPCRel); + return ELF::R_386_TLS_LDO_32; + case MCSymbolRefExpr::VK_TLSGD: + assert(Type == RT32_32); + assert(!IsPCRel); + return ELF::R_386_TLS_GD; + case MCSymbolRefExpr::VK_GOTTPOFF: + assert(Type == RT32_32); + assert(!IsPCRel); + return ELF::R_386_TLS_IE_32; + case MCSymbolRefExpr::VK_PLT: + assert(Type == RT32_32); + return ELF::R_386_PLT32; + case MCSymbolRefExpr::VK_INDNTPOFF: + assert(Type == RT32_32); + assert(!IsPCRel); + return ELF::R_386_TLS_IE; + case MCSymbolRefExpr::VK_NTPOFF: + assert(Type == RT32_32); + assert(!IsPCRel); + return ELF::R_386_TLS_LE; + case MCSymbolRefExpr::VK_GOTNTPOFF: + assert(Type == RT32_32); + assert(!IsPCRel); + return ELF::R_386_TLS_GOTIE; + case MCSymbolRefExpr::VK_TLSLDM: + assert(Type == RT32_32); + assert(!IsPCRel); + return ELF::R_386_TLS_LDM; + } +} + +unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); + X86_64RelType Type = getType64(Fixup.getKind(), Modifier, IsPCRel); + if (getEMachine() == ELF::EM_X86_64) + return getRelocType64(Modifier, Type, IsPCRel); - return Type; + assert(getEMachine() == ELF::EM_386 && "Unsupported ELF machine type."); + return getRelocType32(Modifier, getType32(Type), IsPCRel); } MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS, diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp index b679316..10b83f4 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp @@ -36,7 +36,7 @@ public: MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName); // FIXME: check that the value is actually the same. - if (Sym->isVariable() == false) + if (!Sym->isVariable()) Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx)); const MCExpr *Expr = nullptr; diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 3ad8ab1..9b98a3e 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -168,10 +168,8 @@ public: } // end anonymous namespace - MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx) { return new X86MCCodeEmitter(MCII, Ctx); } diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 0e7b4e5..0946326 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -55,143 +55,6 @@ std::string X86_MC::ParseX86Triple(StringRef TT) { return FS; } -/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the -/// specified arguments. If we can't run cpuid on the host, return true. -bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, - unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { -#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) - #if defined(__GNUC__) - // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. - asm ("movq\t%%rbx, %%rsi\n\t" - "cpuid\n\t" - "xchgq\t%%rbx, %%rsi\n\t" - : "=a" (*rEAX), - "=S" (*rEBX), - "=c" (*rECX), - "=d" (*rEDX) - : "a" (value)); - return false; - #elif defined(_MSC_VER) - int registers[4]; - __cpuid(registers, value); - *rEAX = registers[0]; - *rEBX = registers[1]; - *rECX = registers[2]; - *rEDX = registers[3]; - return false; - #else - return true; - #endif -#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) - #if defined(__GNUC__) - asm ("movl\t%%ebx, %%esi\n\t" - "cpuid\n\t" - "xchgl\t%%ebx, %%esi\n\t" - : "=a" (*rEAX), - "=S" (*rEBX), - "=c" (*rECX), - "=d" (*rEDX) - : "a" (value)); - return false; - #elif defined(_MSC_VER) - __asm { - mov eax,value - cpuid - mov esi,rEAX - mov dword ptr [esi],eax - mov esi,rEBX - mov dword ptr [esi],ebx - mov esi,rECX - mov dword ptr [esi],ecx - mov esi,rEDX - mov dword ptr [esi],edx - } - return false; - #else - return true; - #endif -#else - return true; -#endif -} - -/// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the -/// 4 values in the specified arguments. If we can't run cpuid on the host, -/// return true. -bool X86_MC::GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, - unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { -#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) - #if defined(__GNUC__) - // gcc desn't know cpuid would clobber ebx/rbx. Preseve it manually. - asm ("movq\t%%rbx, %%rsi\n\t" - "cpuid\n\t" - "xchgq\t%%rbx, %%rsi\n\t" - : "=a" (*rEAX), - "=S" (*rEBX), - "=c" (*rECX), - "=d" (*rEDX) - : "a" (value), - "c" (subleaf)); - return false; - #elif defined(_MSC_VER) - int registers[4]; - __cpuidex(registers, value, subleaf); - *rEAX = registers[0]; - *rEBX = registers[1]; - *rECX = registers[2]; - *rEDX = registers[3]; - return false; - #else - return true; - #endif -#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) - #if defined(__GNUC__) - asm ("movl\t%%ebx, %%esi\n\t" - "cpuid\n\t" - "xchgl\t%%ebx, %%esi\n\t" - : "=a" (*rEAX), - "=S" (*rEBX), - "=c" (*rECX), - "=d" (*rEDX) - : "a" (value), - "c" (subleaf)); - return false; - #elif defined(_MSC_VER) - __asm { - mov eax,value - mov ecx,subleaf - cpuid - mov esi,rEAX - mov dword ptr [esi],eax - mov esi,rEBX - mov dword ptr [esi],ebx - mov esi,rECX - mov dword ptr [esi],ecx - mov esi,rEDX - mov dword ptr [esi],edx - } - return false; - #else - return true; - #endif -#else - return true; -#endif -} - -void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family, - unsigned &Model) { - Family = (EAX >> 8) & 0xf; // Bits 8 - 11 - Model = (EAX >> 4) & 0xf; // Bits 4 - 7 - if (Family == 6 || Family == 0xf) { - if (Family == 0xf) - // Examine extended family ID if family ID is F. - Family += (EAX >> 20) & 0xff; // Bits 20 - 27 - // Examine extended model ID if family ID is 6 or F. - Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 - } -} - unsigned X86_MC::getDwarfRegFlavour(Triple TT, bool isEH) { if (TT.getArch() == Triple::x86_64) return DWARFFlavour::X86_64; @@ -344,24 +207,6 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &_OS, MCCodeEmitter *_Emitter, - const MCSubtargetInfo &STI, bool RelaxAll) { - Triple TheTriple(TT); - - switch (TheTriple.getObjectFormat()) { - default: llvm_unreachable("unsupported object format"); - case Triple::MachO: - return createMachOStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll); - case Triple::COFF: - assert(TheTriple.isOSWindows() && "only Windows COFF is supported"); - return createX86WinCOFFStreamer(Ctx, MAB, _Emitter, _OS, RelaxAll); - case Triple::ELF: - return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll); - } -} - static MCInstPrinter *createX86MCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, @@ -392,61 +237,42 @@ static MCInstrAnalysis *createX86MCInstrAnalysis(const MCInstrInfo *Info) { // Force static initialization. extern "C" void LLVMInitializeX86TargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo); - RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo); - - // Register the MC codegen info. - RegisterMCCodeGenInfoFn C(TheX86_32Target, createX86MCCodeGenInfo); - RegisterMCCodeGenInfoFn D(TheX86_64Target, createX86MCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target, - X86_MC::createX86MCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target, - X86_MC::createX86MCSubtargetInfo); - - // Register the MC instruction analyzer. - TargetRegistry::RegisterMCInstrAnalysis(TheX86_32Target, - createX86MCInstrAnalysis); - TargetRegistry::RegisterMCInstrAnalysis(TheX86_64Target, - createX86MCInstrAnalysis); - - // Register the code emitter. - TargetRegistry::RegisterMCCodeEmitter(TheX86_32Target, - createX86MCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheX86_64Target, - createX86MCCodeEmitter); + for (Target *T : {&TheX86_32Target, &TheX86_64Target}) { + // Register the MC asm info. + RegisterMCAsmInfoFn X(*T, createX86MCAsmInfo); + + // Register the MC codegen info. + RegisterMCCodeGenInfoFn Y(*T, createX86MCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createX86MCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createX86MCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, + X86_MC::createX86MCSubtargetInfo); + + // Register the MC instruction analyzer. + TargetRegistry::RegisterMCInstrAnalysis(*T, createX86MCInstrAnalysis); + + // Register the code emitter. + TargetRegistry::RegisterMCCodeEmitter(*T, createX86MCCodeEmitter); + + // Register the object streamer. + TargetRegistry::RegisterCOFFStreamer(*T, createX86WinCOFFStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createX86MCInstPrinter); + + // Register the MC relocation info. + TargetRegistry::RegisterMCRelocationInfo(*T, createX86MCRelocationInfo); + } // Register the asm backend. TargetRegistry::RegisterMCAsmBackend(TheX86_32Target, createX86_32AsmBackend); TargetRegistry::RegisterMCAsmBackend(TheX86_64Target, createX86_64AsmBackend); - - // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(TheX86_32Target, - createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheX86_64Target, - createMCStreamer); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheX86_32Target, - createX86MCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheX86_64Target, - createX86MCInstPrinter); - - // Register the MC relocation info. - TargetRegistry::RegisterMCRelocationInfo(TheX86_32Target, - createX86MCRelocationInfo); - TargetRegistry::RegisterMCRelocationInfo(TheX86_64Target, - createX86MCRelocationInfo); } diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index d8320b9..6f50f11 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -53,18 +53,6 @@ namespace N86 { namespace X86_MC { std::string ParseX86Triple(StringRef TT); - /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in - /// the specified arguments. If we can't run cpuid on the host, return true. - bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, - unsigned *rEBX, unsigned *rECX, unsigned *rEDX); - /// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return - /// the 4 values in the specified arguments. If we can't run cpuid on the - /// host, return true. - bool GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, - unsigned *rEBX, unsigned *rECX, unsigned *rEDX); - - void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model); - unsigned getDwarfRegFlavour(Triple TT, bool isEH); void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI); @@ -78,7 +66,6 @@ namespace X86_MC { MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx); MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI, @@ -86,12 +73,12 @@ MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI, MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -/// createX86WinCOFFStreamer - Construct an X86 Windows COFF machine code -/// streamer which will generate PE/COFF format object files. +/// Construct an X86 Windows COFF machine code streamer which will generate +/// PE/COFF format object files. /// /// Takes ownership of \p AB and \p CE. MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, - MCCodeEmitter *CE, raw_ostream &OS, + raw_ostream &OS, MCCodeEmitter *CE, bool RelaxAll); /// createX86MachObjectWriter - Construct an X86 Mach-O object writer. diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp index 3b81d53..81749fc 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp @@ -38,7 +38,7 @@ public: MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName); // FIXME: check that the value is actually the same. - if (Sym->isVariable() == false) + if (!Sym->isVariable()) Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx)); const MCExpr *Expr = nullptr; @@ -93,7 +93,7 @@ public: RSymI->getName(RSymName); MCSymbol *RSym = Ctx.GetOrCreateSymbol(RSymName); - if (RSym->isVariable() == false) + if (!RSym->isVariable()) RSym->setVariableValue(MCConstantExpr::Create(RSymAddr, Ctx)); const MCExpr *RHS = MCSymbolRefExpr::Create(RSym, Ctx); diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp index 5f1596c..5690efe 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp @@ -48,13 +48,11 @@ void X86WinCOFFStreamer::FinishImpl() { } } -namespace llvm { -MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, - MCCodeEmitter *CE, raw_ostream &OS, - bool RelaxAll) { +MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, + raw_ostream &OS, MCCodeEmitter *CE, + bool RelaxAll) { X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS); S->getAssembler().setRelaxAll(RelaxAll); return S; } -} diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 71329b0..e6896e8 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -93,36 +93,6 @@ The pattern isel got this one right. //===---------------------------------------------------------------------===// -SSE should implement 'select_cc' using 'emulated conditional moves' that use -pcmp/pand/pandn/por to do a selection instead of a conditional branch: - -double %X(double %Y, double %Z, double %A, double %B) { - %C = setlt double %A, %B - %z = fadd double %Z, 0.0 ;; select operand is not a load - %D = select bool %C, double %Y, double %z - ret double %D -} - -We currently emit: - -_X: - subl $12, %esp - xorpd %xmm0, %xmm0 - addsd 24(%esp), %xmm0 - movsd 32(%esp), %xmm1 - movsd 16(%esp), %xmm2 - ucomisd 40(%esp), %xmm1 - jb LBB_X_2 -LBB_X_1: - movsd %xmm0, %xmm2 -LBB_X_2: - movsd %xmm2, (%esp) - fldl (%esp) - addl $12, %esp - ret - -//===---------------------------------------------------------------------===// - Lower memcpy / memset to a series of SSE 128 bit move instructions when it's feasible. @@ -787,25 +757,6 @@ cheaper to do fld1 than load from a constant pool for example, so //===---------------------------------------------------------------------===// -The X86 backend should be able to if-convert SSE comparisons like "ucomisd" to -"cmpsd". For example, this code: - -double d1(double x) { return x == x ? x : x + x; } - -Compiles into: - -_d1: - ucomisd %xmm0, %xmm0 - jnp LBB1_2 - addsd %xmm0, %xmm0 - ret -LBB1_2: - ret - -Also, the 'ret's should be shared. This is PR6032. - -//===---------------------------------------------------------------------===// - These should compile into the same code (PR6214): Perhaps instcombine should canonicalize the former into the later? @@ -858,35 +809,6 @@ doing a shuffle from v[1] to v[0] then a float store. //===---------------------------------------------------------------------===// -On SSE4 machines, we compile this code: - -define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, - <2 x float> *%P) nounwind { - %Z = fadd <2 x float> %Q, %R - - store <2 x float> %Z, <2 x float> *%P - ret <2 x float> %Z -} - -into: - -_test2: ## @test2 -## BB#0: - insertps $0, %xmm2, %xmm2 - insertps $16, %xmm3, %xmm2 - insertps $0, %xmm0, %xmm3 - insertps $16, %xmm1, %xmm3 - addps %xmm2, %xmm3 - movq %xmm3, (%rdi) - movaps %xmm3, %xmm0 - pshufd $1, %xmm3, %xmm1 - ## kill: XMM1<def> XMM1<kill> - ret - -The insertps's of $0 are pointless complex copies. - -//===---------------------------------------------------------------------===// - [UNSAFE FP] void foo(double, double, double); diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index bb0b9ce..f6033a7 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -63,9 +63,6 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer.EndCOFFSymbolDef(); } - // Have common code print out the function header with linkage info etc. - EmitFunctionHeader(); - // Emit the rest of the function body. EmitFunctionBody(); diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index a17f052..cba140f 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -84,7 +84,7 @@ private: bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, DebugLoc DL); bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO, - unsigned &ResultReg); + unsigned &ResultReg, unsigned Alignment = 1); bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM, MachineMemOperand *MMO = nullptr, bool Aligned = false); @@ -327,7 +327,8 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. /// Return true and the result register by reference if it is possible. bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, - MachineMemOperand *MMO, unsigned &ResultReg) { + MachineMemOperand *MMO, unsigned &ResultReg, + unsigned Alignment) { // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = nullptr; @@ -372,6 +373,30 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, case MVT::f80: // No f80 support yet. return false; + case MVT::v4f32: + if (Alignment >= 16) + Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm; + else + Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm; + RC = &X86::VR128RegClass; + break; + case MVT::v2f64: + if (Alignment >= 16) + Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm; + else + Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm; + RC = &X86::VR128RegClass; + break; + case MVT::v4i32: + case MVT::v2i64: + case MVT::v8i16: + case MVT::v16i8: + if (Alignment >= 16) + Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm; + else + Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm; + RC = &X86::VR128RegClass; + break; } ResultReg = createResultReg(RC); @@ -1068,8 +1093,14 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) { if (!X86SelectAddress(Ptr, AM)) return false; + unsigned Alignment = LI->getAlignment(); + unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType()); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = ABIAlignment; + unsigned ResultReg = 0; - if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg)) + if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg, + Alignment)) return false; updateValueMap(I, ResultReg); @@ -1094,20 +1125,30 @@ static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { } } -/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS -/// of the comparison, return an opcode that works for the compare (e.g. -/// CMP32ri) otherwise return 0. +/// If we have a comparison with RHS as the RHS of the comparison, return an +/// opcode that works for the compare (e.g. CMP32ri) otherwise return 0. static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { + int64_t Val = RHSC->getSExtValue(); switch (VT.getSimpleVT().SimpleTy) { // Otherwise, we can't fold the immediate into this comparison. - default: return 0; - case MVT::i8: return X86::CMP8ri; - case MVT::i16: return X86::CMP16ri; - case MVT::i32: return X86::CMP32ri; + default: + return 0; + case MVT::i8: + return X86::CMP8ri; + case MVT::i16: + if (isInt<8>(Val)) + return X86::CMP16ri8; + return X86::CMP16ri; + case MVT::i32: + if (isInt<8>(Val)) + return X86::CMP32ri8; + return X86::CMP32ri; case MVT::i64: + if (isInt<8>(Val)) + return X86::CMP64ri8; // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext // field. - if ((int)RHSC->getSExtValue() == RHSC->getSExtValue()) + if (isInt<32>(Val)) return X86::CMP64ri32; return 0; } @@ -1810,11 +1851,11 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { return true; } -/// \brief Emit SSE instructions to lower the select. +/// \brief Emit SSE or AVX instructions to lower the select. /// /// Try to use SSE1/SSE2 instructions to simulate a select without branches. /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary -/// SSE instructions are available. +/// SSE instructions are available. If AVX is available, try to use a VBLENDV. bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { // Optimize conditions coming from a compare if both instructions are in the // same basic block (values defined in other basic blocks may not have @@ -1850,19 +1891,17 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { if (NeedSwap) std::swap(CmpLHS, CmpRHS); - static unsigned OpcTable[2][2][4] = { - { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr }, - { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } }, - { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr }, - { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } } + // Choose the SSE instruction sequence based on data type (float or double). + static unsigned OpcTable[2][4] = { + { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr }, + { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr } }; - bool HasAVX = Subtarget->hasAVX(); unsigned *Opc = nullptr; switch (RetVT.SimpleTy) { default: return false; - case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break; - case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break; + case MVT::f32: Opc = &OpcTable[0][0]; break; + case MVT::f64: Opc = &OpcTable[1][0]; break; } const Value *LHS = I->getOperand(1); @@ -1884,14 +1923,33 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { return false; const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); - unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, - CmpRHSReg, CmpRHSIsKill, CC); - unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false, - LHSReg, LHSIsKill); - unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true, - RHSReg, RHSIsKill); - unsigned ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true, - AndReg, /*IsKill=*/true); + unsigned ResultReg; + + if (Subtarget->hasAVX()) { + // If we have AVX, create 1 blendv instead of 3 logic instructions. + // Blendv was introduced with SSE 4.1, but the 2 register form implicitly + // uses XMM0 as the selection register. That may need just as many + // instructions as the AND/ANDN/OR sequence due to register moves, so + // don't bother. + unsigned CmpOpcode = + (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr; + unsigned BlendOpcode = + (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr; + + unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill, + CmpRHSReg, CmpRHSIsKill, CC); + ResultReg = fastEmitInst_rrr(BlendOpcode, RC, RHSReg, RHSIsKill, + LHSReg, LHSIsKill, CmpReg, true); + } else { + unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, + CmpRHSReg, CmpRHSIsKill, CC); + unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false, + LHSReg, LHSIsKill); + unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true, + RHSReg, RHSIsKill); + ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true, + AndReg, /*IsKill=*/true); + } updateValueMap(I, ResultReg); return true; } @@ -2015,38 +2073,30 @@ bool X86FastISel::X86SelectSIToFP(const Instruction *I) { if (OpReg == 0) return false; - bool HasAVX = Subtarget->hasAVX(); const TargetRegisterClass *RC = nullptr; unsigned Opcode; - if (I->getType()->isDoubleTy() && X86ScalarSSEf64) { + if (I->getType()->isDoubleTy()) { // sitofp int -> double - Opcode = HasAVX ? X86::VCVTSI2SDrr : X86::CVTSI2SDrr; + Opcode = X86::VCVTSI2SDrr; RC = &X86::FR64RegClass; - } else if (I->getType()->isFloatTy() && X86ScalarSSEf32) { + } else if (I->getType()->isFloatTy()) { // sitofp int -> float - Opcode = HasAVX ? X86::VCVTSI2SSrr : X86::CVTSI2SSrr; + Opcode = X86::VCVTSI2SSrr; RC = &X86::FR32RegClass; } else return false; + // The target-independent selection algorithm in FastISel already knows how + // to select a SINT_TO_FP if the target is SSE but not AVX. This code is only + // reachable if the subtarget has AVX. + assert(Subtarget->hasAVX() && "Expected a subtarget with AVX!"); - unsigned ImplicitDefReg = 0; - if (HasAVX) { - ImplicitDefReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); - } - - const MCInstrDesc &II = TII.get(Opcode); - OpReg = constrainOperandRegClass(II, OpReg, (HasAVX ? 2 : 1)); - - unsigned ResultReg = createResultReg(RC); - MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg); - if (ImplicitDefReg) - MIB.addReg(ImplicitDefReg, RegState::Kill); - MIB.addReg(OpReg); + unsigned ImplicitDefReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); + unsigned ResultReg = + fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false); updateValueMap(I, ResultReg); return true; } @@ -3053,7 +3103,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Add a register mask operand representing the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); // Add an implicit use GOT pointer in EBX. if (Subtarget->isPICStyleGOT()) diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index c8e5f64..3b0bd03 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -32,10 +32,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/InlineAsm.h" #include "llvm/Support/Debug.h" @@ -300,7 +300,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { // function. If it is all integer, there is nothing for us to do! bool FPIsUsed = false; - assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!"); + static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!"); for (unsigned i = 0; i <= 6; ++i) if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) { FPIsUsed = true; @@ -438,7 +438,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { // Rewind to first instruction newly inserted. while (Start != BB.begin() && std::prev(Start) != PrevI) --Start; dbgs() << "Inserted instructions:\n\t"; - Start->print(dbgs(), &MF.getTarget()); + Start->print(dbgs()); while (++Start != std::next(I)) {} } dumpStack(); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index cead099..1d2c73c 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -581,7 +581,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { bool Is64Bit = STI.is64Bit(); // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); - bool IsWin64 = STI.isTargetWin64(); + bool IsWin64 = STI.isCallingConvWin64(Fn->getCallingConv()); // Not necessarily synonymous with IsWin64. bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry(); diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 8d50ae1..fb12ce5 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -228,7 +228,7 @@ namespace { /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) override; void EmitSpecialCodeForMain(); @@ -1004,6 +1004,15 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, switch (N.getOpcode()) { default: break; + case ISD::FRAME_ALLOC_RECOVER: { + if (!AM.hasSymbolicDisplacement()) + if (const auto *ESNode = dyn_cast<ExternalSymbolSDNode>(N.getOperand(0))) + if (ESNode->getOpcode() == ISD::TargetExternalSymbol) { + AM.ES = ESNode->getSymbol(); + return false; + } + break; + } case ISD::Constant: { uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); if (!FoldOffsetIntoAddress(Val, AM)) @@ -2805,14 +2814,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } bool X86DAGToDAGISel:: -SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, +SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { SDValue Op0, Op1, Op2, Op3, Op4; - switch (ConstraintCode) { - case 'o': // offsetable ?? - case 'v': // not offsetable ?? + switch (ConstraintID) { + case InlineAsm::Constraint_o: // offsetable ?? + case InlineAsm::Constraint_v: // not offsetable ?? default: return true; - case 'm': // memory + case InlineAsm::Constraint_m: // memory if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4)) return true; break; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6866be7..8b92e70 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -25,7 +25,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/VariadicFunction.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -77,119 +76,6 @@ static cl::opt<int> ReciprocalEstimateRefinementSteps( static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, SDValue V2); -static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal, - SelectionDAG &DAG, SDLoc dl, - unsigned vectorWidth) { - assert((vectorWidth == 128 || vectorWidth == 256) && - "Unsupported vector width"); - EVT VT = Vec.getValueType(); - EVT ElVT = VT.getVectorElementType(); - unsigned Factor = VT.getSizeInBits()/vectorWidth; - EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, - VT.getVectorNumElements()/Factor); - - // Extract from UNDEF is UNDEF. - if (Vec.getOpcode() == ISD::UNDEF) - return DAG.getUNDEF(ResultVT); - - // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR - unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits(); - - // This is the index of the first element of the vectorWidth-bit chunk - // we want. - unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth) - * ElemsPerChunk); - - // If the input is a buildvector just emit a smaller one. - if (Vec.getOpcode() == ISD::BUILD_VECTOR) - return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT, - makeArrayRef(Vec->op_begin() + NormalizedIdxVal, - ElemsPerChunk)); - - SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); -} - -/// Generate a DAG to grab 128-bits from a vector > 128 bits. This -/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128 -/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4 -/// instructions or a simple subregister reference. Idx is an index in the -/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes -/// lowering EXTRACT_VECTOR_ELT operations easier. -static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, - SelectionDAG &DAG, SDLoc dl) { - assert((Vec.getValueType().is256BitVector() || - Vec.getValueType().is512BitVector()) && "Unexpected vector size!"); - return ExtractSubVector(Vec, IdxVal, DAG, dl, 128); -} - -/// Generate a DAG to grab 256-bits from a 512-bit vector. -static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal, - SelectionDAG &DAG, SDLoc dl) { - assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!"); - return ExtractSubVector(Vec, IdxVal, DAG, dl, 256); -} - -static SDValue InsertSubVector(SDValue Result, SDValue Vec, - unsigned IdxVal, SelectionDAG &DAG, - SDLoc dl, unsigned vectorWidth) { - assert((vectorWidth == 128 || vectorWidth == 256) && - "Unsupported vector width"); - // Inserting UNDEF is Result - if (Vec.getOpcode() == ISD::UNDEF) - return Result; - EVT VT = Vec.getValueType(); - EVT ElVT = VT.getVectorElementType(); - EVT ResultVT = Result.getValueType(); - - // Insert the relevant vectorWidth bits. - unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits(); - - // This is the index of the first element of the vectorWidth-bit chunk - // we want. - unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth) - * ElemsPerChunk); - - SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); -} - -/// Generate a DAG to put 128-bits into a vector > 128 bits. This -/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or -/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a -/// simple superregister reference. Idx is an index in the 128 bits -/// we want. It need not be aligned to a 128-bit boundary. That makes -/// lowering INSERT_VECTOR_ELT operations easier. -static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, - SelectionDAG &DAG,SDLoc dl) { - assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!"); - return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128); -} - -static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, - SelectionDAG &DAG, SDLoc dl) { - assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!"); - return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256); -} - -/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128 -/// instructions. This is used because creating CONCAT_VECTOR nodes of -/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower -/// large BUILD_VECTORS. -static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT, - unsigned NumElems, SelectionDAG &DAG, - SDLoc dl) { - SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl); - return Insert128BitVector(V, V2, NumElems/2, DAG, dl); -} - -static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT, - unsigned NumElems, SelectionDAG &DAG, - SDLoc dl) { - SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl); - return Insert256BitVector(V, V2, NumElems/2, DAG, dl); -} - X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, const X86Subtarget &STI) : TargetLowering(TM), Subtarget(&STI) { @@ -871,35 +757,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // MMX-sized vectors (other than x86mmx) are expected to be expanded // into smaller operations. - setOperationAction(ISD::MULHS, MVT::v8i8, Expand); - setOperationAction(ISD::MULHS, MVT::v4i16, Expand); - setOperationAction(ISD::MULHS, MVT::v2i32, Expand); - setOperationAction(ISD::MULHS, MVT::v1i64, Expand); - setOperationAction(ISD::AND, MVT::v8i8, Expand); - setOperationAction(ISD::AND, MVT::v4i16, Expand); - setOperationAction(ISD::AND, MVT::v2i32, Expand); - setOperationAction(ISD::AND, MVT::v1i64, Expand); - setOperationAction(ISD::OR, MVT::v8i8, Expand); - setOperationAction(ISD::OR, MVT::v4i16, Expand); - setOperationAction(ISD::OR, MVT::v2i32, Expand); - setOperationAction(ISD::OR, MVT::v1i64, Expand); - setOperationAction(ISD::XOR, MVT::v8i8, Expand); - setOperationAction(ISD::XOR, MVT::v4i16, Expand); - setOperationAction(ISD::XOR, MVT::v2i32, Expand); - setOperationAction(ISD::XOR, MVT::v1i64, Expand); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand); + for (MVT MMXTy : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64}) { + setOperationAction(ISD::MULHS, MMXTy, Expand); + setOperationAction(ISD::AND, MMXTy, Expand); + setOperationAction(ISD::OR, MMXTy, Expand); + setOperationAction(ISD::XOR, MMXTy, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MMXTy, Expand); + setOperationAction(ISD::SELECT, MMXTy, Expand); + setOperationAction(ISD::BITCAST, MMXTy, Expand); + } setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand); - setOperationAction(ISD::SELECT, MVT::v8i8, Expand); - setOperationAction(ISD::SELECT, MVT::v4i16, Expand); - setOperationAction(ISD::SELECT, MVT::v2i32, Expand); - setOperationAction(ISD::SELECT, MVT::v1i64, Expand); - setOperationAction(ISD::BITCAST, MVT::v8i8, Expand); - setOperationAction(ISD::BITCAST, MVT::v4i16, Expand); - setOperationAction(ISD::BITCAST, MVT::v2i32, Expand); - setOperationAction(ISD::BITCAST, MVT::v1i64, Expand); if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) { addRegisterClass(MVT::v4f32, &X86::VR128RegClass); @@ -1065,27 +932,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) { - setOperationAction(ISD::FFLOOR, MVT::f32, Legal); - setOperationAction(ISD::FCEIL, MVT::f32, Legal); - setOperationAction(ISD::FTRUNC, MVT::f32, Legal); - setOperationAction(ISD::FRINT, MVT::f32, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); - setOperationAction(ISD::FFLOOR, MVT::f64, Legal); - setOperationAction(ISD::FCEIL, MVT::f64, Legal); - setOperationAction(ISD::FTRUNC, MVT::f64, Legal); - setOperationAction(ISD::FRINT, MVT::f64, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); - - setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); - setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); - setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); - setOperationAction(ISD::FRINT, MVT::v4f32, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); - setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); - setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); - setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); - setOperationAction(ISD::FRINT, MVT::v2f64, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); + for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { + setOperationAction(ISD::FFLOOR, RoundedTy, Legal); + setOperationAction(ISD::FCEIL, RoundedTy, Legal); + setOperationAction(ISD::FTRUNC, RoundedTy, Legal); + setOperationAction(ISD::FRINT, RoundedTy, Legal); + setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); + } // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); @@ -1474,7 +1327,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Legal); setOperationAction(ISD::SETCC, MVT::v16i1, Custom); @@ -1576,6 +1428,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SUB, MVT::v32i16, Legal); setOperationAction(ISD::SUB, MVT::v64i8, Legal); setOperationAction(ISD::MUL, MVT::v32i16, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); for (int i = MVT::v32i8; i != MVT::v8i64; ++i) { const MVT VT = (MVT::SimpleValueType)i; @@ -1599,7 +1455,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SETCC, MVT::v4i1, Custom); setOperationAction(ISD::SETCC, MVT::v2i1, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); setOperationAction(ISD::AND, MVT::v8i32, Legal); setOperationAction(ISD::OR, MVT::v8i32, Legal); @@ -3189,7 +3048,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -3906,21 +3765,6 @@ static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, return true; } -/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming -/// the two vector operands have swapped position. -static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, - unsigned NumElems) { - for (unsigned i = 0; i != NumElems; ++i) { - int idx = Mask[i]; - if (idx < 0) - continue; - else if (idx < (int)NumElems) - Mask[i] = idx + NumElems; - else - Mask[i] = idx - NumElems; - } -} - /// isVEXTRACTIndex - Return true if the specified /// EXTRACT_SUBVECTOR operand specifies a vector extract that is /// suitable for instruction that extract 128 or 256 bit vectors @@ -4083,9 +3927,13 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops); } else if (VT.getScalarType() == MVT::i1) { - assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type"); + + assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16) + && "Unexpected vector type"); + assert((Subtarget->hasVLX() || VT.getVectorNumElements() >= 8) + && "Unexpected vector type"); SDValue Cst = DAG.getConstant(0, MVT::i1); - SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst); + SmallVector<SDValue, 64> Ops(VT.getVectorNumElements(), Cst); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } else llvm_unreachable("Unexpected vector type"); @@ -4093,6 +3941,162 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } +static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal, + SelectionDAG &DAG, SDLoc dl, + unsigned vectorWidth) { + assert((vectorWidth == 128 || vectorWidth == 256) && + "Unsupported vector width"); + EVT VT = Vec.getValueType(); + EVT ElVT = VT.getVectorElementType(); + unsigned Factor = VT.getSizeInBits()/vectorWidth; + EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, + VT.getVectorNumElements()/Factor); + + // Extract from UNDEF is UNDEF. + if (Vec.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(ResultVT); + + // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR + unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits(); + + // This is the index of the first element of the vectorWidth-bit chunk + // we want. + unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth) + * ElemsPerChunk); + + // If the input is a buildvector just emit a smaller one. + if (Vec.getOpcode() == ISD::BUILD_VECTOR) + return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT, + makeArrayRef(Vec->op_begin() + NormalizedIdxVal, + ElemsPerChunk)); + + SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); +} + +/// Generate a DAG to grab 128-bits from a vector > 128 bits. This +/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128 +/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4 +/// instructions or a simple subregister reference. Idx is an index in the +/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes +/// lowering EXTRACT_VECTOR_ELT operations easier. +static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, + SelectionDAG &DAG, SDLoc dl) { + assert((Vec.getValueType().is256BitVector() || + Vec.getValueType().is512BitVector()) && "Unexpected vector size!"); + return ExtractSubVector(Vec, IdxVal, DAG, dl, 128); +} + +/// Generate a DAG to grab 256-bits from a 512-bit vector. +static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal, + SelectionDAG &DAG, SDLoc dl) { + assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!"); + return ExtractSubVector(Vec, IdxVal, DAG, dl, 256); +} + +static SDValue InsertSubVector(SDValue Result, SDValue Vec, + unsigned IdxVal, SelectionDAG &DAG, + SDLoc dl, unsigned vectorWidth) { + assert((vectorWidth == 128 || vectorWidth == 256) && + "Unsupported vector width"); + // Inserting UNDEF is Result + if (Vec.getOpcode() == ISD::UNDEF) + return Result; + EVT VT = Vec.getValueType(); + EVT ElVT = VT.getVectorElementType(); + EVT ResultVT = Result.getValueType(); + + // Insert the relevant vectorWidth bits. + unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits(); + + // This is the index of the first element of the vectorWidth-bit chunk + // we want. + unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth) + * ElemsPerChunk); + + SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); +} + +/// Generate a DAG to put 128-bits into a vector > 128 bits. This +/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or +/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a +/// simple superregister reference. Idx is an index in the 128 bits +/// we want. It need not be aligned to a 128-bit boundary. That makes +/// lowering INSERT_VECTOR_ELT operations easier. +static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, + SelectionDAG &DAG, SDLoc dl) { + assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!"); + + // For insertion into the zero index (low half) of a 256-bit vector, it is + // more efficient to generate a blend with immediate instead of an insert*128. + // We are still creating an INSERT_SUBVECTOR below with an undef node to + // extend the subvector to the size of the result vector. Make sure that + // we are not recursing on that node by checking for undef here. + if (IdxVal == 0 && Result.getValueType().is256BitVector() && + Result.getOpcode() != ISD::UNDEF) { + EVT ResultVT = Result.getValueType(); + SDValue ZeroIndex = DAG.getIntPtrConstant(0); + SDValue Undef = DAG.getUNDEF(ResultVT); + SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef, + Vec, ZeroIndex); + + // The blend instruction, and therefore its mask, depend on the data type. + MVT ScalarType = ResultVT.getScalarType().getSimpleVT(); + if (ScalarType.isFloatingPoint()) { + // Choose either vblendps (float) or vblendpd (double). + unsigned ScalarSize = ScalarType.getSizeInBits(); + assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type"); + unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f; + SDValue Mask = DAG.getConstant(MaskVal, MVT::i8); + return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask); + } + + const X86Subtarget &Subtarget = + static_cast<const X86Subtarget &>(DAG.getSubtarget()); + + // AVX2 is needed for 256-bit integer blend support. + // Integers must be cast to 32-bit because there is only vpblendd; + // vpblendw can't be used for this because it has a handicapped mask. + + // If we don't have AVX2, then cast to float. Using a wrong domain blend + // is still more efficient than using the wrong domain vinsertf128 that + // will be created by InsertSubVector(). + MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32; + + SDValue Mask = DAG.getConstant(0x0f, MVT::i8); + Vec256 = DAG.getNode(ISD::BITCAST, dl, CastVT, Vec256); + Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask); + return DAG.getNode(ISD::BITCAST, dl, ResultVT, Vec256); + } + + return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128); +} + +static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, + SelectionDAG &DAG, SDLoc dl) { + assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!"); + return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256); +} + +/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128 +/// instructions. This is used because creating CONCAT_VECTOR nodes of +/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower +/// large BUILD_VECTORS. +static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT, + unsigned NumElems, SelectionDAG &DAG, + SDLoc dl) { + SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl); + return Insert128BitVector(V, V2, NumElems/2, DAG, dl); +} + +static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT, + unsigned NumElems, SelectionDAG &DAG, + SDLoc dl) { + SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl); + return Insert256BitVector(V, V2, NumElems/2, DAG, dl); +} + /// getOnesVector - Returns a vector of specified type with all bits set. /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately. @@ -5567,8 +5571,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl); } - SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG); - if (Broadcast.getNode()) + if (SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG)) return Broadcast; unsigned EVTBits = ExtVT.getSizeInBits(); @@ -5635,12 +5638,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || (ExtVT == MVT::i64 && Subtarget->is64Bit())) { - if (VT.is256BitVector() || VT.is512BitVector()) { + if (VT.is512BitVector()) { SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec, Item, DAG.getIntPtrConstant(0)); } - assert(VT.is128BitVector() && "Expected an SSE value type!"); + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Expected an SSE value type!"); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); @@ -5742,24 +5746,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { } // If element VT is < 32 bits, convert it to inserts into a zero vector. - if (EVTBits == 8 && NumElems == 16) { - SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, - Subtarget, *this); - if (V.getNode()) return V; - } + if (EVTBits == 8 && NumElems == 16) + if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, + Subtarget, *this)) + return V; - if (EVTBits == 16 && NumElems == 8) { - SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, - Subtarget, *this); - if (V.getNode()) return V; - } + if (EVTBits == 16 && NumElems == 8) + if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, + Subtarget, *this)) + return V; // If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS - if (EVTBits == 32 && NumElems == 4) { - SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget, *this); - if (V.getNode()) + if (EVTBits == 32 && NumElems == 4) + if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget, *this)) return V; - } // If element VT is == 32 bits, turn it into a number of shuffles. SmallVector<SDValue, 8> V(NumElems); @@ -5807,13 +5807,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { V[i] = Op.getOperand(i); // Check for elements which are consecutive loads. - SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false); - if (LD.getNode()) + if (SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false)) return LD; // Check for a build vector from mostly shuffle plus few inserting. - SDValue Sh = buildFromShuffleMostly(Op, DAG); - if (Sh.getNode()) + if (SDValue Sh = buildFromShuffleMostly(Op, DAG)) return Sh; // For SSE 4.1, use insertps to put the high elements into the low element. @@ -5893,8 +5891,64 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return Concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl); } -static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { - MVT LLVM_ATTRIBUTE_UNUSED VT = Op.getSimpleValueType(); +static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, + const X86Subtarget *Subtarget, + SelectionDAG & DAG) { + SDLoc dl(Op); + MVT ResVT = Op.getSimpleValueType(); + unsigned NumOfOperands = Op.getNumOperands(); + + assert(isPowerOf2_32(NumOfOperands) && + "Unexpected number of operands in CONCAT_VECTORS"); + + if (NumOfOperands > 2) { + MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(), + ResVT.getVectorNumElements()/2); + SmallVector<SDValue, 2> Ops; + for (unsigned i = 0; i < NumOfOperands/2; i++) + Ops.push_back(Op.getOperand(i)); + SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops); + Ops.clear(); + for (unsigned i = NumOfOperands/2; i < NumOfOperands; i++) + Ops.push_back(Op.getOperand(i)); + SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); + } + + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + bool IsZeroV1 = ISD::isBuildVectorAllZeros(V1.getNode()); + bool IsZeroV2 = ISD::isBuildVectorAllZeros(V2.getNode()); + + if (IsZeroV1 && IsZeroV2) + return getZeroVector(ResVT, Subtarget, DAG, dl); + + SDValue ZeroIdx = DAG.getIntPtrConstant(0); + SDValue Undef = DAG.getUNDEF(ResVT); + unsigned NumElems = ResVT.getVectorNumElements(); + SDValue ShiftBits = DAG.getConstant(NumElems/2, MVT::i8); + + V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, ZeroIdx); + V2 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V2, ShiftBits); + if (IsZeroV1) + return V2; + + V1 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx); + // Zero the upper bits of V1 + V1 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V1, ShiftBits); + V1 = DAG.getNode(X86ISD::VSRLI, dl, ResVT, V1, ShiftBits); + if (IsZeroV2) + return V1; + return DAG.getNode(ISD::OR, dl, ResVT, V1, V2); +} + +static SDValue LowerCONCAT_VECTORS(SDValue Op, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + if (VT.getVectorElementType() == MVT::i1) + return LowerCONCAT_VECTORSvXi1(Op, Subtarget, DAG); + assert((VT.is256BitVector() && Op.getNumOperands() == 2) || (VT.is512BitVector() && (Op.getNumOperands() == 2 || Op.getNumOperands() == 4))); @@ -6935,8 +6989,8 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, "a sorted mask where the broadcast " "comes from V1."); - // Go up the chain of (vector) values to try and find a scalar load that - // we can combine with the broadcast. + // Go up the chain of (vector) values to find a scalar load that we can + // combine with the broadcast. for (;;) { switch (V.getOpcode()) { case ISD::CONCAT_VECTORS: { @@ -6973,12 +7027,12 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) { V = V.getOperand(BroadcastIdx); - // If the scalar isn't a load we can't broadcast from it in AVX1, only with - // AVX2. + // If the scalar isn't a load, we can't broadcast from it in AVX1. + // Only AVX2 has register broadcasts. if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V)) return SDValue(); } else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) { - // We can't broadcast from a vector register w/o AVX2, and we can only + // We can't broadcast from a vector register without AVX2, and we can only // broadcast from the zero-element of a vector register. return SDValue(); } @@ -7689,10 +7743,18 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// The exact breakdown of how to form these dword pairs and align them on the /// correct sides is really tricky. See the comments within the function for /// more of the details. +/// +/// This code also handles repeated 128-bit lanes of v8i16 shuffles, but each +/// lane must shuffle the *exact* same way. In fact, you must pass a v8 Mask to +/// this routine for it to work correctly. To shuffle a 256-bit or 512-bit i16 +/// vector, form the analogous 128-bit 8-element Mask. static SDValue lowerV8I16GeneralSingleInputVectorShuffle( - SDLoc DL, SDValue V, MutableArrayRef<int> Mask, + SDLoc DL, MVT VT, SDValue V, MutableArrayRef<int> Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - assert(V.getSimpleValueType() == MVT::v8i16 && "Bad input type!"); + assert(VT.getScalarType() == MVT::i16 && "Bad input type!"); + MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); + + assert(Mask.size() == 8 && "Shuffle mask length doen't match!"); MutableArrayRef<int> LoMask = Mask.slice(0, 4); MutableArrayRef<int> HiMask = Mask.slice(4, 4); @@ -7845,9 +7907,9 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( int PSHUFDMask[] = {0, 1, 2, 3}; PSHUFDMask[ADWord] = BDWord; PSHUFDMask[BDWord] = ADWord; - V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, - DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, - DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V), + V = DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, + DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V), getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG))); // Adjust the mask to match the new locations of A and B. @@ -7859,8 +7921,8 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( // Recurse back into this routine to re-compute state now that this isn't // a 3 and 1 problem. - return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16), - Mask); + return lowerV8I16GeneralSingleInputVectorShuffle(DL, VT, V, Mask, Subtarget, + DAG); }; if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3)) return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4); @@ -8083,15 +8145,15 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( // Now enact all the shuffles we've computed to move the inputs into their // target half. if (!isNoopShuffleMask(PSHUFLMask)) - V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V, + V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, getV4X86ShuffleImm8ForMask(PSHUFLMask, DAG)); if (!isNoopShuffleMask(PSHUFHMask)) - V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V, + V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, getV4X86ShuffleImm8ForMask(PSHUFHMask, DAG)); if (!isNoopShuffleMask(PSHUFDMask)) - V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, - DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, - DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V), + V = DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, + DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V), getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG))); // At this point, each half should contain all its inputs, and we can then @@ -8105,7 +8167,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( // Do a half shuffle for the low mask. if (!isNoopShuffleMask(LoMask)) - V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V, + V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, getV4X86ShuffleImm8ForMask(LoMask, DAG)); // Do a half shuffle with the high mask after shifting its values down. @@ -8113,7 +8175,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( if (M >= 0) M -= 4; if (!isNoopShuffleMask(HiMask)) - V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V, + V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, getV4X86ShuffleImm8ForMask(HiMask, DAG)); return V; @@ -8232,8 +8294,8 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, Mask, Subtarget, DAG)) return Rotate; - return lowerV8I16GeneralSingleInputVectorShuffle(DL, V1, Mask, Subtarget, - DAG); + return lowerV8I16GeneralSingleInputVectorShuffle(DL, MVT::v8i16, V1, Mask, + Subtarget, DAG); } assert(std::any_of(Mask.begin(), Mask.end(), isV1) && @@ -8946,7 +9008,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT, int LaneSize = Mask.size() / 2; // If there are only inputs from one 128-bit lane, splitting will in fact be - // less expensive. The flags track wether the given lane contains an element + // less expensive. The flags track whether the given lane contains an element // that crosses to another lane. bool LaneCrossing[2] = {false, false}; for (int i = 0, Size = Mask.size(); i < Size; ++i) @@ -8986,34 +9048,78 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { + // TODO: If minimizing size and one of the inputs is a zero vector and the + // the zero vector has only one use, we could use a VPERM2X128 to save the + // instruction bytes needed to explicitly generate the zero vector. + // Blends are faster and handle all the non-lane-crossing cases. if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask, Subtarget, DAG)) return Blend; - MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), - VT.getVectorNumElements() / 2); - // Check for patterns which can be matched with a single insert of a 128-bit - // subvector. - if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}) || - isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) { - SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, - DAG.getIntPtrConstant(0)); - SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, - Mask[2] < 4 ? V1 : V2, DAG.getIntPtrConstant(0)); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); - } - if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 6, 7})) { - SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, - DAG.getIntPtrConstant(0)); - SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2, - DAG.getIntPtrConstant(2)); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); + bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode()); + bool IsV2Zero = ISD::isBuildVectorAllZeros(V2.getNode()); + + // If either input operand is a zero vector, use VPERM2X128 because its mask + // allows us to replace the zero input with an implicit zero. + if (!IsV1Zero && !IsV2Zero) { + // Check for patterns which can be matched with a single insert of a 128-bit + // subvector. + bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}); + if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) { + MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), + VT.getVectorNumElements() / 2); + SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, + DAG.getIntPtrConstant(0)); + SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, + OnlyUsesV1 ? V1 : V2, DAG.getIntPtrConstant(0)); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); + } + } + + // Otherwise form a 128-bit permutation. After accounting for undefs, + // convert the 64-bit shuffle mask selection values into 128-bit + // selection bits by dividing the indexes by 2 and shifting into positions + // defined by a vperm2*128 instruction's immediate control byte. + + // The immediate permute control byte looks like this: + // [1:0] - select 128 bits from sources for low half of destination + // [2] - ignore + // [3] - zero low half of destination + // [5:4] - select 128 bits from sources for high half of destination + // [6] - ignore + // [7] - zero high half of destination + + int MaskLO = Mask[0]; + if (MaskLO == SM_SentinelUndef) + MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1]; + + int MaskHI = Mask[2]; + if (MaskHI == SM_SentinelUndef) + MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3]; + + unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4; + + // If either input is a zero vector, replace it with an undef input. + // Shuffle mask values < 4 are selecting elements of V1. + // Shuffle mask values >= 4 are selecting elements of V2. + // Adjust each half of the permute mask by clearing the half that was + // selecting the zero vector and setting the zero mask bit. + if (IsV1Zero) { + V1 = DAG.getUNDEF(VT); + if (MaskLO < 4) + PermMask = (PermMask & 0xf0) | 0x08; + if (MaskHI < 4) + PermMask = (PermMask & 0x0f) | 0x80; + } + if (IsV2Zero) { + V2 = DAG.getUNDEF(VT); + if (MaskLO >= 4) + PermMask = (PermMask & 0xf0) | 0x08; + if (MaskHI >= 4) + PermMask = (PermMask & 0x0f) | 0x80; } - // Otherwise form a 128-bit permutation. - // FIXME: Detect zero-vector inputs and use the VPERM2X128 to zero that half. - unsigned PermMask = Mask[0] / 2 | (Mask[2] / 2) << 4; return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, DAG.getConstant(PermMask, MVT::i8)); } @@ -9326,6 +9432,15 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef<int> Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); + // If we have a single input to the zero element, insert that into V1 if we + // can do so cheaply. + int NumV2Elements = + std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 8; }); + if (NumV2Elements == 1 && Mask[0] >= 8) + if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( + DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) + return Insertion; + if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) return Blend; @@ -9557,6 +9672,15 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2, Mask, DAG); + SmallVector<int, 8> RepeatedMask; + if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) { + // As this is a single-input shuffle, the repeated mask should be + // a strictly valid v8i16 mask that we can pass through to the v8i16 + // lowering to handle even the v16 case. + return lowerV8I16GeneralSingleInputVectorShuffle( + DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG); + } + SDValue PSHUFBMask[32]; for (int i = 0; i < 16; ++i) { if (Mask[i] == -1) { @@ -10118,8 +10242,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { // Try to lower this to a blend-style vector shuffle. This can handle all // constant condition cases. - SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG); - if (BlendOp.getNode()) + if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG)) return BlendOp; // Variable blends are only legal from SSE4.1 onward. @@ -10421,17 +10544,31 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // If the vector is wider than 128 bits, extract the 128-bit subvector, insert // into that, and then insert the subvector back into the result. if (VT.is256BitVector() || VT.is512BitVector()) { - // Get the desired 128-bit vector half. + // With a 256-bit vector, we can insert into the zero element efficiently + // using a blend if we have AVX or AVX2 and the right data type. + if (VT.is256BitVector() && IdxVal == 0) { + // TODO: It is worthwhile to cast integer to floating point and back + // and incur a domain crossing penalty if that's what we'll end up + // doing anyway after extracting to a 128-bit vector. + if ((Subtarget->hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) || + (Subtarget->hasAVX2() && EltVT == MVT::i32)) { + SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); + N2 = DAG.getIntPtrConstant(1); + return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, N2); + } + } + + // Get the desired 128-bit vector chunk. SDValue V = Extract128BitVector(N0, IdxVal, DAG, dl); - // Insert the element into the desired half. + // Insert the element into the desired chunk. unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits(); unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128; V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1, DAG.getConstant(IdxIn128, MVT::i32)); - // Insert the changed part back to the 256-bit vector + // Insert the changed part back into the bigger vector return Insert128BitVector(N0, V, IdxVal, DAG, dl); } assert(VT.is128BitVector() && "Only 128-bit vector types should be left!"); @@ -10456,16 +10593,29 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, } if (EltVT == MVT::f32) { - // Bits [7:6] of the constant are the source select. This will always be - // zero here. The DAG Combiner may combine an extract_elt index into - // these - // bits. For example (insert (extract, 3), 2) could be matched by - // putting - // the '3' into bits [7:6] of X86ISD::INSERTPS. - // Bits [5:4] of the constant are the destination select. This is the - // value of the incoming immediate. - // Bits [3:0] of the constant are the zero mask. The DAG Combiner may + // Bits [7:6] of the constant are the source select. This will always be + // zero here. The DAG Combiner may combine an extract_elt index into + // these bits. For example (insert (extract, 3), 2) could be matched by + // putting the '3' into bits [7:6] of X86ISD::INSERTPS. + // Bits [5:4] of the constant are the destination select. This is the + // value of the incoming immediate. + // Bits [3:0] of the constant are the zero mask. The DAG Combiner may // combine either bitwise AND or insert of float 0.0 to set these bits. + + const Function *F = DAG.getMachineFunction().getFunction(); + bool MinSize = F->hasFnAttribute(Attribute::MinSize); + if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) { + // If this is an insertion of 32-bits into the low 32-bits of + // a vector, we prefer to generate a blend with immediate rather + // than an insertps. Blends are simpler operations in hardware and so + // will always have equal or better performance than insertps. + // But if optimizing for size and there's a load folding opportunity, + // generate insertps because blendps does not have a 32-bit memory + // operand form. + N2 = DAG.getIntPtrConstant(1); + N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); + return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, N2); + } N2 = DAG.getIntPtrConstant(IdxVal << 4); // Create this as a scalar to vector.. N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); @@ -10593,6 +10743,37 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, if (OpVT.is512BitVector() && SubVecVT.is256BitVector()) return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl); + if (OpVT.getVectorElementType() == MVT::i1) { + if (IdxVal == 0 && Vec.getOpcode() == ISD::UNDEF) // the operation is legal + return Op; + SDValue ZeroIdx = DAG.getIntPtrConstant(0); + SDValue Undef = DAG.getUNDEF(OpVT); + unsigned NumElems = OpVT.getVectorNumElements(); + SDValue ShiftBits = DAG.getConstant(NumElems/2, MVT::i8); + + if (IdxVal == OpVT.getVectorNumElements() / 2) { + // Zero upper bits of the Vec + Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits); + Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits); + + SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef, + SubVec, ZeroIdx); + Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits); + return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2); + } + if (IdxVal == 0) { + SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef, + SubVec, ZeroIdx); + // Zero upper bits of the Vec2 + Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits); + Vec2 = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec2, ShiftBits); + // Zero lower bits of the Vec + Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits); + Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits); + // Merge them together + return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2); + } + } return SDValue(); } @@ -13149,9 +13330,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op1.getValueType(); SDValue CC; - // Lower fp selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops - // are available. Otherwise fp cmovs get lowered into a less efficient branch - // sequence later on. + // Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops + // are available or VBLENDV if AVX is available. + // Otherwise FP cmovs get lowered into a less efficient branch sequence later. if (Cond.getOpcode() == ISD::SETCC && ((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) || (Subtarget->hasSSE1() && VT == MVT::f32)) && @@ -13166,8 +13347,42 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(SSECC, MVT::i8)); return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2); } + SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1, DAG.getConstant(SSECC, MVT::i8)); + + // If we have AVX, we can use a variable vector select (VBLENDV) instead + // of 3 logic instructions for size savings and potentially speed. + // Unfortunately, there is no scalar form of VBLENDV. + + // If either operand is a constant, don't try this. We can expect to + // optimize away at least one of the logic instructions later in that + // case, so that sequence would be faster than a variable blend. + + // BLENDV was introduced with SSE 4.1, but the 2 register form implicitly + // uses XMM0 as the selection register. That may need just as many + // instructions as the AND/ANDN/OR sequence due to register moves, so + // don't bother. + + if (Subtarget->hasAVX() && + !isa<ConstantFPSDNode>(Op1) && !isa<ConstantFPSDNode>(Op2)) { + + // Convert to vectors, do a VSELECT, and convert back to scalar. + // All of the conversions should be optimized away. + + EVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64; + SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1); + SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2); + SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp); + + EVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; + VCmp = DAG.getNode(ISD::BITCAST, DL, VCmpVT, VCmp); + + SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2); + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, + VSel, DAG.getIntPtrConstant(0)); + } SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2); SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1); return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And); @@ -14595,6 +14810,13 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::x86_avx2_permd: + case Intrinsic::x86_avx2_permps: + // Operands intentionally swapped. Mask is last operand to intrinsic, + // but second operand for node/instruction. + return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(1)); + case Intrinsic::x86_avx512_mask_valign_q_512: case Intrinsic::x86_avx512_mask_valign_d_512: // Vector source operands are swapped. @@ -16039,21 +16261,19 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, SDLoc dl(Op); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); - SDValue V; assert(VT.isVector() && "Custom lowering only for vector shifts!"); assert(Subtarget->hasSSE2() && "Only custom lower when we have SSE2!"); - V = LowerScalarImmediateShift(Op, DAG, Subtarget); - if (V.getNode()) + if (SDValue V = LowerScalarImmediateShift(Op, DAG, Subtarget)) return V; - V = LowerScalarVariableShift(Op, DAG, Subtarget); - if (V.getNode()) + if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget)) return V; if (Subtarget->hasAVX512() && (VT == MVT::v16i32 || VT == MVT::v8i64)) return Op; + // AVX2 has VPSLLV/VPSRAV/VPSRLV. if (Subtarget->hasInt256()) { if (Op.getOpcode() == ISD::SRL && @@ -16068,6 +16288,17 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, return Op; } + // 2i64 vector logical shifts can efficiently avoid scalarization - do the + // shifts per-lane and then shuffle the partial results back together. + if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) { + // Splat the shift amounts so the scalar shifts above will catch it. + SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0}); + SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1}); + SDValue R0 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt0); + SDValue R1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt1); + return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3}); + } + // If possible, lower this packed shift into a vector multiply instead of // expanding it into a sequence of scalar shifts. // Do this only if the vector shift count is a constant build_vector. @@ -16238,7 +16469,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, Amt = DAG.getNode(ISD::ANY_EXTEND, dl, NewVT, Amt); return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(Op.getOpcode(), dl, NewVT, R, Amt)); - } + } // Decompose 256-bit shifts into smaller 128-bit shifts. if (VT.is256BitVector()) { @@ -16254,12 +16485,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, SDValue Amt1, Amt2; if (Amt.getOpcode() == ISD::BUILD_VECTOR) { // Constant shift amount - SmallVector<SDValue, 4> Amt1Csts; - SmallVector<SDValue, 4> Amt2Csts; - for (unsigned i = 0; i != NumElems/2; ++i) - Amt1Csts.push_back(Amt->getOperand(i)); - for (unsigned i = NumElems/2; i != NumElems; ++i) - Amt2Csts.push_back(Amt->getOperand(i)); + SmallVector<SDValue, 8> Ops(Amt->op_begin(), Amt->op_begin() + NumElems); + ArrayRef<SDValue> Amt1Csts = makeArrayRef(Ops).slice(0, NumElems / 2); + ArrayRef<SDValue> Amt2Csts = makeArrayRef(Ops).slice(NumElems / 2); Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt1Csts); Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt2Csts); @@ -16386,14 +16614,17 @@ bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { return needsCmpXchgNb(PTy->getElementType()); } -bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { +TargetLoweringBase::AtomicRMWExpansionKind +X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32; const Type *MemType = AI->getType(); // If the operand is too big, we must see if cmpxchg8/16b is available // and default to library calls otherwise. - if (MemType->getPrimitiveSizeInBits() > NativeWidth) - return needsCmpXchgNb(MemType); + if (MemType->getPrimitiveSizeInBits() > NativeWidth) { + return needsCmpXchgNb(MemType) ? AtomicRMWExpansionKind::CmpXChg + : AtomicRMWExpansionKind::None; + } AtomicRMWInst::BinOp Op = AI->getOperation(); switch (Op) { @@ -16403,13 +16634,14 @@ bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { case AtomicRMWInst::Add: case AtomicRMWInst::Sub: // It's better to use xadd, xsub or xchg for these in all cases. - return false; + return AtomicRMWExpansionKind::None; case AtomicRMWInst::Or: case AtomicRMWInst::And: case AtomicRMWInst::Xor: // If the atomicrmw's result isn't actually used, we can just add a "lock" // prefix to a normal instruction for these operations. - return !AI->use_empty(); + return !AI->use_empty() ? AtomicRMWExpansionKind::CmpXChg + : AtomicRMWExpansionKind::None; case AtomicRMWInst::Nand: case AtomicRMWInst::Max: case AtomicRMWInst::Min: @@ -16417,7 +16649,7 @@ bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { case AtomicRMWInst::UMin: // These always require a non-trivial set of data operations on x86. We must // use a cmpxchg loop. - return true; + return AtomicRMWExpansionKind::CmpXChg; } } @@ -16874,7 +17106,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); - case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG); case ISD::VECTOR_SHUFFLE: return lowerVectorShuffle(Op, Subtarget, DAG); case ISD::VSELECT: return LowerVSELECT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); @@ -17719,7 +17951,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr *MI, // 9 ) EFLAGS (implicit-def) assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!"); - assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands"); + static_assert(X86::AddrNumOperands == 5, + "VAARG_64 assumes 5 address operands"); unsigned DestReg = MI->getOperand(0).getReg(); MachineOperand &Base = MI->getOperand(1); @@ -18095,6 +18328,92 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; MachineFunction *F = BB->getParent(); + + // We also lower double CMOVs: + // (CMOV (CMOV F, T, cc1), T, cc2) + // to two successives branches. For that, we look for another CMOV as the + // following instruction. + // + // Without this, we would add a PHI between the two jumps, which ends up + // creating a few copies all around. For instance, for + // + // (sitofp (zext (fcmp une))) + // + // we would generate: + // + // ucomiss %xmm1, %xmm0 + // movss <1.0f>, %xmm0 + // movaps %xmm0, %xmm1 + // jne .LBB5_2 + // xorps %xmm1, %xmm1 + // .LBB5_2: + // jp .LBB5_4 + // movaps %xmm1, %xmm0 + // .LBB5_4: + // retq + // + // because this custom-inserter would have generated: + // + // A + // | \ + // | B + // | / + // C + // | \ + // | D + // | / + // E + // + // A: X = ...; Y = ... + // B: empty + // C: Z = PHI [X, A], [Y, B] + // D: empty + // E: PHI [X, C], [Z, D] + // + // If we lower both CMOVs in a single step, we can instead generate: + // + // A + // | \ + // | C + // | /| + // |/ | + // | | + // | D + // | / + // E + // + // A: X = ...; Y = ... + // D: empty + // E: PHI [X, A], [X, C], [Y, D] + // + // Which, in our sitofp/fcmp example, gives us something like: + // + // ucomiss %xmm1, %xmm0 + // movss <1.0f>, %xmm0 + // jne .LBB5_4 + // jp .LBB5_4 + // xorps %xmm0, %xmm0 + // .LBB5_4: + // retq + // + MachineInstr *NextCMOV = nullptr; + MachineBasicBlock::iterator NextMIIt = + std::next(MachineBasicBlock::iterator(MI)); + if (NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() && + NextMIIt->getOperand(2).getReg() == MI->getOperand(2).getReg() && + NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg()) + NextCMOV = &*NextMIIt; + + MachineBasicBlock *jcc1MBB = nullptr; + + // If we have a double CMOV, we lower it to two successive branches to + // the same block. EFLAGS is used by both, so mark it as live in the second. + if (NextCMOV) { + jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, jcc1MBB); + jcc1MBB->addLiveIn(X86::EFLAGS); + } + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, copy0MBB); @@ -18103,8 +18422,10 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // If the EFLAGS register isn't dead in the terminator, then claim that it's // live into the sink and copy blocks. const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); - if (!MI->killsRegister(X86::EFLAGS) && - !checkAndUpdateEFLAGSKill(MI, BB, TRI)) { + + MachineInstr *LastEFLAGSUser = NextCMOV ? NextCMOV : MI; + if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) && + !checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) { copy0MBB->addLiveIn(X86::EFLAGS); sinkMBB->addLiveIn(X86::EFLAGS); } @@ -18115,7 +18436,19 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); + if (NextCMOV) { + // The fallthrough block may be jcc1MBB, if we have a double CMOV. + BB->addSuccessor(jcc1MBB); + + // In that case, jcc1MBB will itself fallthrough the copy0MBB, and + // jump to the sinkMBB. + jcc1MBB->addSuccessor(copy0MBB); + jcc1MBB->addSuccessor(sinkMBB); + } else { + BB->addSuccessor(copy0MBB); + } + + // The true block target of the first (or only) branch is always sinkMBB. BB->addSuccessor(sinkMBB); // Create the conditional branch instruction. @@ -18123,6 +18456,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); + if (NextCMOV) { + unsigned Opc2 = X86::GetCondBranchFromCond( + (X86::CondCode)NextCMOV->getOperand(3).getImm()); + BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB); + } + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -18131,10 +18470,22 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... - BuildMI(*sinkMBB, sinkMBB->begin(), DL, - TII->get(X86::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) - .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + MachineInstrBuilder MIB = + BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), + MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + + // If we have a double CMOV, the second Jcc provides the same incoming + // value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes). + if (NextCMOV) { + MIB.addReg(MI->getOperand(2).getReg()).addMBB(jcc1MBB); + // Copy the PHI result to the register defined by the second CMOV. + BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())), + DL, TII->get(TargetOpcode::COPY), NextCMOV->getOperand(0).getReg()) + .addReg(MI->getOperand(0).getReg()); + NextCMOV->eraseFromParent(); + } MI->eraseFromParent(); // The pseudo instruction is gone now. return sinkMBB; @@ -18218,7 +18569,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, // Calls into a routine in libgcc to allocate more space from the heap. const uint32_t *RegMask = - Subtarget->getRegisterInfo()->getCallPreservedMask(CallingConv::C); + Subtarget->getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C); if (IsLP64) { BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI) .addReg(sizeVReg); @@ -18303,7 +18654,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, // FIXME: The 32-bit calls have non-standard calling conventions. Use a // proper register mask. const uint32_t *RegMask = - Subtarget->getRegisterInfo()->getCallPreservedMask(CallingConv::C); + Subtarget->getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C); if (Subtarget->is64Bit()) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI) @@ -19132,9 +19483,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, // Note that even with AVX we prefer the PSHUFD form of shuffle for integer // vectors because it can have a load folded into it that UNPCK cannot. This // doesn't preclude something switching to the shorter encoding post-RA. - if (FloatDomain) { - if (Mask.equals(0, 0) || Mask.equals(1, 1)) { - bool Lo = Mask.equals(0, 0); + // + // FIXME: Should teach these routines about AVX vector widths. + if (FloatDomain && VT.getSizeInBits() == 128) { + if (Mask.equals({0, 0}) || Mask.equals({1, 1})) { + bool Lo = Mask.equals({0, 0}); unsigned Shuffle; MVT ShuffleVT; // Check if we have SSE3 which will let us use MOVDDUP. That instruction @@ -19163,8 +19516,8 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, return true; } if (Subtarget->hasSSE3() && - (Mask.equals(0, 0, 2, 2) || Mask.equals(1, 1, 3, 3))) { - bool Lo = Mask.equals(0, 0, 2, 2); + (Mask.equals({0, 0, 2, 2}) || Mask.equals({1, 1, 3, 3}))) { + bool Lo = Mask.equals({0, 0, 2, 2}); unsigned Shuffle = Lo ? X86ISD::MOVSLDUP : X86ISD::MOVSHDUP; MVT ShuffleVT = MVT::v4f32; if (Depth == 1 && Root->getOpcode() == Shuffle) @@ -19177,8 +19530,8 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, /*AddTo*/ true); return true; } - if (Mask.equals(0, 0, 1, 1) || Mask.equals(2, 2, 3, 3)) { - bool Lo = Mask.equals(0, 0, 1, 1); + if (Mask.equals({0, 0, 1, 1}) || Mask.equals({2, 2, 3, 3})) { + bool Lo = Mask.equals({0, 0, 1, 1}); unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH; MVT ShuffleVT = MVT::v4f32; if (Depth == 1 && Root->getOpcode() == Shuffle) @@ -19196,12 +19549,12 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, // We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK // variants as none of these have single-instruction variants that are // superior to the UNPCK formulation. - if (!FloatDomain && - (Mask.equals(0, 0, 1, 1, 2, 2, 3, 3) || - Mask.equals(4, 4, 5, 5, 6, 6, 7, 7) || - Mask.equals(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7) || - Mask.equals(8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, - 15))) { + if (!FloatDomain && VT.getSizeInBits() == 128 && + (Mask.equals({0, 0, 1, 1, 2, 2, 3, 3}) || + Mask.equals({4, 4, 5, 5, 6, 6, 7, 7}) || + Mask.equals({0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}) || + Mask.equals( + {8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15}))) { bool Lo = Mask[0] == 0; unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH; if (Depth == 1 && Root->getOpcode() == Shuffle) @@ -19237,9 +19590,9 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, // in practice PSHUFB tends to be *very* fast so we're more aggressive. if ((Depth >= 3 || HasPSHUFB) && Subtarget->hasSSSE3()) { SmallVector<SDValue, 16> PSHUFBMask; - assert(Mask.size() <= 16 && "Can't shuffle elements smaller than bytes!"); - int Ratio = 16 / Mask.size(); - for (unsigned i = 0; i < 16; ++i) { + int NumBytes = VT.getSizeInBits() / 8; + int Ratio = NumBytes / Mask.size(); + for (int i = 0; i < NumBytes; ++i) { if (Mask[i / Ratio] == SM_SentinelUndef) { PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8)); continue; @@ -19249,12 +19602,13 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, : 255; PSHUFBMask.push_back(DAG.getConstant(M, MVT::i8)); } - Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Input); + MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes); + Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Input); DCI.AddToWorklist(Op.getNode()); SDValue PSHUFBMaskOp = - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, PSHUFBMask); + DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVT, PSHUFBMask); DCI.AddToWorklist(PSHUFBMaskOp.getNode()); - Op = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, Op, PSHUFBMaskOp); + Op = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Op, PSHUFBMaskOp); DCI.AddToWorklist(Op.getNode()); DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op), /*AddTo*/ true); @@ -19312,10 +19666,6 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, MVT VT = Op.getSimpleValueType(); if (!VT.isVector()) return false; // Bail if we hit a non-vector. - // FIXME: This routine should be taught about 256-bit shuffles, or a 256-bit - // version should be added. - if (VT.getSizeInBits() != 128) - return false; assert(Root.getSimpleValueType().isVector() && "Shuffles operate on vector types!"); @@ -19418,12 +19768,26 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, /// This is a very minor wrapper around getTargetShuffleMask to easy forming v4 /// PSHUF-style masks that can be reused with such instructions. static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) { + MVT VT = N.getSimpleValueType(); SmallVector<int, 4> Mask; bool IsUnary; - bool HaveMask = getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), Mask, IsUnary); + bool HaveMask = getTargetShuffleMask(N.getNode(), VT, Mask, IsUnary); (void)HaveMask; assert(HaveMask); + // If we have more than 128-bits, only the low 128-bits of shuffle mask + // matter. Check that the upper masks are repeats and remove them. + if (VT.getSizeInBits() > 128) { + int LaneElts = 128 / VT.getScalarSizeInBits(); +#ifndef NDEBUG + for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i) + for (int j = 0; j < LaneElts; ++j) + assert(Mask[j] == Mask[i * LaneElts + j] - LaneElts && + "Mask doesn't repeat in high 128-bit lanes!"); +#endif + Mask.resize(LaneElts); + } + switch (N.getOpcode()) { case X86ISD::PSHUFD: return Mask; @@ -19496,7 +19860,8 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask, case X86ISD::UNPCKH: // For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword // shuffle into a preceding word shuffle. - if (V.getValueType() != MVT::v16i8 && V.getValueType() != MVT::v8i16) + if (V.getSimpleValueType().getScalarType() != MVT::i8 && + V.getSimpleValueType().getScalarType() != MVT::i16) return SDValue(); // Search for a half-shuffle which we can combine with. @@ -19670,8 +20035,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, break; case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: - assert(VT == MVT::v8i16); - (void)VT; + assert(VT.getScalarType() == MVT::i16 && "Bad word shuffle type!"); if (combineRedundantHalfShuffle(N, Mask, DAG, DCI)) return SDValue(); // We combined away this shuffle, so we're done. @@ -19679,17 +20043,18 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, // See if this reduces to a PSHUFD which is no more expensive and can // combine with more operations. Note that it has to at least flip the // dwords as otherwise it would have been removed as a no-op. - if (Mask[0] == 2 && Mask[1] == 3 && Mask[2] == 0 && Mask[3] == 1) { + if (makeArrayRef(Mask).equals({2, 3, 0, 1})) { int DMask[] = {0, 1, 2, 3}; int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2; DMask[DOffset + 0] = DOffset + 1; DMask[DOffset + 1] = DOffset + 0; - V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V); + MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); + V = DAG.getNode(ISD::BITCAST, DL, DVT, V); DCI.AddToWorklist(V.getNode()); - V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V, + V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V, getV4X86ShuffleImm8ForMask(DMask, DAG)); DCI.AddToWorklist(V.getNode()); - return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V); + return DAG.getNode(ISD::BITCAST, DL, VT, V); } // Look for shuffle patterns which can be implemented as a single unpack. @@ -19717,18 +20082,14 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, int MappedMask[8]; for (int i = 0; i < 8; ++i) MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2; - const int UnpackLoMask[] = {0, 0, 1, 1, 2, 2, 3, 3}; - const int UnpackHiMask[] = {4, 4, 5, 5, 6, 6, 7, 7}; - if (std::equal(std::begin(MappedMask), std::end(MappedMask), - std::begin(UnpackLoMask)) || - std::equal(std::begin(MappedMask), std::end(MappedMask), - std::begin(UnpackHiMask))) { + if (makeArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) || + makeArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) { // We can replace all three shuffles with an unpack. - V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, D.getOperand(0)); + V = DAG.getNode(ISD::BITCAST, DL, VT, D.getOperand(0)); DCI.AddToWorklist(V.getNode()); return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL : X86ISD::UNPCKH, - DL, MVT::v8i16, V, V); + DL, VT, V, V); } } } @@ -19876,10 +20237,6 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, } } - // Only handle 128 wide vector from here on. - if (!VT.is128BitVector()) - return SDValue(); - // Combine a vector_shuffle that is equal to build_vector load1, load2, load3, // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are // consecutive, non-overlapping, and in the right order. @@ -20987,6 +21344,49 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { return SDValue(); } +/// Check whether Cond is an AND/OR of SETCCs off of the same EFLAGS. +/// Match: +/// (X86or (X86setcc) (X86setcc)) +/// (X86cmp (and (X86setcc) (X86setcc)), 0) +static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0, + X86::CondCode &CC1, SDValue &Flags, + bool &isAnd) { + if (Cond->getOpcode() == X86ISD::CMP) { + ConstantSDNode *CondOp1C = dyn_cast<ConstantSDNode>(Cond->getOperand(1)); + if (!CondOp1C || !CondOp1C->isNullValue()) + return false; + + Cond = Cond->getOperand(0); + } + + isAnd = false; + + SDValue SetCC0, SetCC1; + switch (Cond->getOpcode()) { + default: return false; + case ISD::AND: + case X86ISD::AND: + isAnd = true; + // fallthru + case ISD::OR: + case X86ISD::OR: + SetCC0 = Cond->getOperand(0); + SetCC1 = Cond->getOperand(1); + break; + }; + + // Make sure we have SETCC nodes, using the same flags value. + if (SetCC0.getOpcode() != X86ISD::SETCC || + SetCC1.getOpcode() != X86ISD::SETCC || + SetCC0->getOperand(1) != SetCC1->getOperand(1)) + return false; + + CC0 = (X86::CondCode)SetCC0->getConstantOperandVal(0); + CC1 = (X86::CondCode)SetCC1->getConstantOperandVal(0); + Flags = SetCC0->getOperand(1); + return true; +} + /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL] static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -21156,6 +21556,44 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, } } + // Fold and/or of setcc's to double CMOV: + // (CMOV F, T, ((cc1 | cc2) != 0)) -> (CMOV (CMOV F, T, cc1), T, cc2) + // (CMOV F, T, ((cc1 & cc2) != 0)) -> (CMOV (CMOV T, F, !cc1), F, !cc2) + // + // This combine lets us generate: + // cmovcc1 (jcc1 if we don't have CMOV) + // cmovcc2 (same) + // instead of: + // setcc1 + // setcc2 + // and/or + // cmovne (jne if we don't have CMOV) + // When we can't use the CMOV instruction, it might increase branch + // mispredicts. + // When we can use CMOV, or when there is no mispredict, this improves + // throughput and reduces register pressure. + // + if (CC == X86::COND_NE) { + SDValue Flags; + X86::CondCode CC0, CC1; + bool isAndSetCC; + if (checkBoolTestAndOrSetCCCombine(Cond, CC0, CC1, Flags, isAndSetCC)) { + if (isAndSetCC) { + std::swap(FalseOp, TrueOp); + CC0 = X86::GetOppositeBranchCondition(CC0); + CC1 = X86::GetOppositeBranchCondition(CC1); + } + + SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, MVT::i8), + Flags}; + SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), LOps); + SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, MVT::i8), Flags}; + SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(CMOV.getNode(), 1)); + return CMOV; + } + } + return SDValue(); } @@ -21166,24 +21604,16 @@ static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, default: return SDValue(); // SSE/AVX/AVX2 blend intrinsics. case Intrinsic::x86_avx2_pblendvb: - case Intrinsic::x86_avx2_pblendw: - case Intrinsic::x86_avx2_pblendd_128: - case Intrinsic::x86_avx2_pblendd_256: // Don't try to simplify this intrinsic if we don't have AVX2. if (!Subtarget->hasAVX2()) return SDValue(); // FALL-THROUGH - case Intrinsic::x86_avx_blend_pd_256: - case Intrinsic::x86_avx_blend_ps_256: case Intrinsic::x86_avx_blendv_pd_256: case Intrinsic::x86_avx_blendv_ps_256: // Don't try to simplify this intrinsic if we don't have AVX. if (!Subtarget->hasAVX()) return SDValue(); // FALL-THROUGH - case Intrinsic::x86_sse41_pblendw: - case Intrinsic::x86_sse41_blendpd: - case Intrinsic::x86_sse41_blendps: case Intrinsic::x86_sse41_blendvps: case Intrinsic::x86_sse41_blendvpd: case Intrinsic::x86_sse41_pblendvb: { @@ -21640,7 +22070,7 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG, // an and with a mask. // We'd like to try to combine that into a shuffle with zero // plus a bitcast, removing the and. - if (N0.getOpcode() != ISD::BITCAST || + if (N0.getOpcode() != ISD::BITCAST || N0.getOperand(0).getOpcode() != ISD::VECTOR_SHUFFLE) return SDValue(); @@ -21670,7 +22100,7 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG, unsigned ResSize = N1.getValueType().getScalarSizeInBits(); // Make sure the splat matches the mask we expect - if (SplatBitSize > ResSize || + if (SplatBitSize > ResSize || (SplatValue + 1).exactLogBase2() != (int)SrcSize) return SDValue(); @@ -21724,12 +22154,10 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalizeOps()) return SDValue(); - SDValue Zext = VectorZextCombine(N, DAG, DCI, Subtarget); - if (Zext.getNode()) + if (SDValue Zext = VectorZextCombine(N, DAG, DCI, Subtarget)) return Zext; - SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget); - if (R.getNode()) + if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget)) return R; EVT VT = N->getValueType(0); @@ -22521,7 +22949,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) { // If A and B occur in reverse order in RHS, then "swap" them (which means // rewriting the mask). if (A != C) - CommuteVectorShuffleMask(RMask, NumElts); + ShuffleVectorSDNode::commuteMask(RMask); // At this point LHS and RHS are equivalent to // LHS = VECTOR_SHUFFLE A, B, LMask @@ -22630,7 +23058,7 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1))) if (C->getValueAPF().isPosZero()) return N->getOperand(1); - + return SDValue(); } @@ -22864,45 +23292,51 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG, if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB) if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0))) if (C->getAPIntValue() == 0 && LHS.hasOneUse()) { - SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), - LHS.getValueType(), RHS, LHS.getOperand(1)); - return DAG.getSetCC(SDLoc(N), N->getValueType(0), - addV, DAG.getConstant(0, addV.getValueType()), CC); + SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), LHS.getValueType(), RHS, + LHS.getOperand(1)); + return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV, + DAG.getConstant(0, addV.getValueType()), CC); } if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB) if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0))) if (C->getAPIntValue() == 0 && RHS.hasOneUse()) { - SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), - RHS.getValueType(), LHS, RHS.getOperand(1)); - return DAG.getSetCC(SDLoc(N), N->getValueType(0), - addV, DAG.getConstant(0, addV.getValueType()), CC); + SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), RHS.getValueType(), LHS, + RHS.getOperand(1)); + return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV, + DAG.getConstant(0, addV.getValueType()), CC); } - if (VT.getScalarType() == MVT::i1) { - bool IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) && - (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1); - bool IsVZero0 = ISD::isBuildVectorAllZeros(LHS.getNode()); - if (!IsSEXT0 && !IsVZero0) - return SDValue(); - bool IsSEXT1 = (RHS.getOpcode() == ISD::SIGN_EXTEND) && - (RHS.getOperand(0).getValueType().getScalarType() == MVT::i1); + if (VT.getScalarType() == MVT::i1 && + (CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) { + bool IsSEXT0 = + (LHS.getOpcode() == ISD::SIGN_EXTEND) && + (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1); bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode()); - if (!IsSEXT1 && !IsVZero1) - return SDValue(); + if (!IsSEXT0 || !IsVZero1) { + // Swap the operands and update the condition code. + std::swap(LHS, RHS); + CC = ISD::getSetCCSwappedOperands(CC); + + IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) && + (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1); + IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode()); + } if (IsSEXT0 && IsVZero1) { - assert(VT == LHS.getOperand(0).getValueType() && "Uexpected operand type"); - if (CC == ISD::SETEQ) + assert(VT == LHS.getOperand(0).getValueType() && + "Uexpected operand type"); + if (CC == ISD::SETGT) + return DAG.getConstant(0, VT); + if (CC == ISD::SETLE) + return DAG.getConstant(1, VT); + if (CC == ISD::SETEQ || CC == ISD::SETGE) return DAG.getNOT(DL, LHS.getOperand(0), VT); + + assert((CC == ISD::SETNE || CC == ISD::SETLT) && + "Unexpected condition code!"); return LHS.getOperand(0); } - if (IsSEXT1 && IsVZero0) { - assert(VT == RHS.getOperand(0).getValueType() && "Uexpected operand type"); - if (CC == ISD::SETEQ) - return DAG.getNOT(DL, RHS.getOperand(0), VT); - return RHS.getOperand(0); - } } return SDValue(); @@ -22940,7 +23374,7 @@ static SDValue PerformINSERTPSCombine(SDNode *N, SelectionDAG &DAG, // countS and just gets an f32 from that address. unsigned DestIndex = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() >> 6; - + Ld = NarrowVectorLoadToElement(cast<LoadSDNode>(Ld), DestIndex, DAG); // Create this as a scalar to vector to match the instruction pattern. @@ -22964,7 +23398,7 @@ static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) { // pattern-matching possibilities related to scalar math ops in SSE/AVX. // x86InstrInfo knows how to commute this back after instruction selection // if it would help register allocation. - + // TODO: If optimizing for size or a processor that doesn't suffer from // partial register update stalls, this should be transformed into a MOVSD // instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD. @@ -23503,27 +23937,23 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { // X86 Inline Assembly Support //===----------------------------------------------------------------------===// -namespace { - // Helper to match a string separated by whitespace. - bool matchAsmImpl(StringRef s, ArrayRef<const StringRef *> args) { - s = s.substr(s.find_first_not_of(" \t")); // Skip leading whitespace. - - for (unsigned i = 0, e = args.size(); i != e; ++i) { - StringRef piece(*args[i]); - if (!s.startswith(piece)) // Check if the piece matches. - return false; +// Helper to match a string separated by whitespace. +static bool matchAsm(StringRef S, ArrayRef<const char *> Pieces) { + S = S.substr(S.find_first_not_of(" \t")); // Skip leading whitespace. - s = s.substr(piece.size()); - StringRef::size_type pos = s.find_first_not_of(" \t"); - if (pos == 0) // We matched a prefix. - return false; + for (StringRef Piece : Pieces) { + if (!S.startswith(Piece)) // Check if the piece matches. + return false; - s = s.substr(pos); - } + S = S.substr(Piece.size()); + StringRef::size_type Pos = S.find_first_not_of(" \t"); + if (Pos == 0) // We matched a prefix. + return false; - return s.empty(); + S = S.substr(Pos); } - const VariadicFunction1<bool, StringRef, StringRef, matchAsmImpl> matchAsm={}; + + return S.empty(); } static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) { @@ -23563,12 +23993,12 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { // ops instead of emitting the bswap asm. For now, we don't support 486 or // lower so don't worry about this. // bswap $0 - if (matchAsm(AsmPieces[0], "bswap", "$0") || - matchAsm(AsmPieces[0], "bswapl", "$0") || - matchAsm(AsmPieces[0], "bswapq", "$0") || - matchAsm(AsmPieces[0], "bswap", "${0:q}") || - matchAsm(AsmPieces[0], "bswapl", "${0:q}") || - matchAsm(AsmPieces[0], "bswapq", "${0:q}")) { + if (matchAsm(AsmPieces[0], {"bswap", "$0"}) || + matchAsm(AsmPieces[0], {"bswapl", "$0"}) || + matchAsm(AsmPieces[0], {"bswapq", "$0"}) || + matchAsm(AsmPieces[0], {"bswap", "${0:q}"}) || + matchAsm(AsmPieces[0], {"bswapl", "${0:q}"}) || + matchAsm(AsmPieces[0], {"bswapq", "${0:q}"})) { // No need to check constraints, nothing other than the equivalent of // "=r,0" would be valid here. return IntrinsicLowering::LowerToByteSwap(CI); @@ -23577,8 +24007,8 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { // rorw $$8, ${0:w} --> llvm.bswap.i16 if (CI->getType()->isIntegerTy(16) && IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && - (matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") || - matchAsm(AsmPieces[0], "rolw", "$$8,", "${0:w}"))) { + (matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) || + matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) { AsmPieces.clear(); const std::string &ConstraintsStr = IA->getConstraintString(); SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); @@ -23590,9 +24020,9 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { case 3: if (CI->getType()->isIntegerTy(32) && IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && - matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") && - matchAsm(AsmPieces[1], "rorl", "$$16,", "$0") && - matchAsm(AsmPieces[2], "rorw", "$$8,", "${0:w}")) { + matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) && + matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) && + matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) { AsmPieces.clear(); const std::string &ConstraintsStr = IA->getConstraintString(); SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); @@ -23607,9 +24037,9 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 - if (matchAsm(AsmPieces[0], "bswap", "%eax") && - matchAsm(AsmPieces[1], "bswap", "%edx") && - matchAsm(AsmPieces[2], "xchgl", "%eax,", "%edx")) + if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) && + matchAsm(AsmPieces[1], {"bswap", "%edx"}) && + matchAsm(AsmPieces[2], {"xchgl", "%eax,", "%edx"})) return IntrinsicLowering::LowerToByteSwap(CI); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 4423015..dd20ec2 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -30,37 +30,37 @@ namespace llvm { // Start the numbering where the builtin ops leave off. FIRST_NUMBER = ISD::BUILTIN_OP_END, - /// BSF - Bit scan forward. - /// BSR - Bit scan reverse. + /// Bit scan forward. BSF, + /// Bit scan reverse. BSR, - /// SHLD, SHRD - Double shift instructions. These correspond to + /// Double shift instructions. These correspond to /// X86::SHLDxx and X86::SHRDxx instructions. SHLD, SHRD, - /// FAND - Bitwise logical AND of floating point values. This corresponds + /// Bitwise logical AND of floating point values. This corresponds /// to X86::ANDPS or X86::ANDPD. FAND, - /// FOR - Bitwise logical OR of floating point values. This corresponds + /// Bitwise logical OR of floating point values. This corresponds /// to X86::ORPS or X86::ORPD. FOR, - /// FXOR - Bitwise logical XOR of floating point values. This corresponds + /// Bitwise logical XOR of floating point values. This corresponds /// to X86::XORPS or X86::XORPD. FXOR, - /// FANDN - Bitwise logical ANDNOT of floating point values. This + /// Bitwise logical ANDNOT of floating point values. This /// corresponds to X86::ANDNPS or X86::ANDNPD. FANDN, - /// FSRL - Bitwise logical right shift of floating point values. These + /// Bitwise logical right shift of floating point values. This /// corresponds to X86::PSRLDQ. FSRL, - /// CALL - These operations represent an abstract X86 call + /// These operations represent an abstract X86 call /// instruction, which includes a bunch of information. In particular the /// operands of these node are: /// @@ -79,8 +79,7 @@ namespace llvm { /// CALL, - /// RDTSC_DAG - This operation implements the lowering for - /// readcyclecounter + /// This operation implements the lowering for readcyclecounter RDTSC_DAG, /// X86 Read Time-Stamp Counter and Processor ID. @@ -131,187 +130,186 @@ namespace llvm { /// 1 is the number of bytes of stack to pop. RET_FLAG, - /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx. + /// Repeat fill, corresponds to X86::REP_STOSx. REP_STOS, - /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx. + /// Repeat move, corresponds to X86::REP_MOVSx. REP_MOVS, - /// GlobalBaseReg - On Darwin, this node represents the result of the popl + /// On Darwin, this node represents the result of the popl /// at function entry, used for PIC code. GlobalBaseReg, - /// Wrapper - A wrapper node for TargetConstantPool, + /// A wrapper node for TargetConstantPool, /// TargetExternalSymbol, and TargetGlobalAddress. Wrapper, - /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP + /// Special wrapper used under X86-64 PIC mode for RIP /// relative displacements. WrapperRIP, - /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector + /// Copies a 64-bit value from the low word of an XMM vector /// to an MMX vector. If you think this is too close to the previous /// mnemonic, so do I; blame Intel. MOVDQ2Q, - /// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX + /// Copies a 32-bit value from the low word of a MMX /// vector to a GPR. MMX_MOVD2W, - /// MMX_MOVW2D - Copies a GPR into the low 32-bit word of a MMX vector + /// Copies a GPR into the low 32-bit word of a MMX vector /// and zero out the high word. MMX_MOVW2D, - /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to + /// Extract an 8-bit value from a vector and zero extend it to /// i32, corresponds to X86::PEXTRB. PEXTRB, - /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to + /// Extract a 16-bit value from a vector and zero extend it to /// i32, corresponds to X86::PEXTRW. PEXTRW, - /// INSERTPS - Insert any element of a 4 x float vector into any element + /// Insert any element of a 4 x float vector into any element /// of a destination 4 x floatvector. INSERTPS, - /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector, + /// Insert the lower 8-bits of a 32-bit value to a vector, /// corresponds to X86::PINSRB. PINSRB, - /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector, + /// Insert the lower 16-bits of a 32-bit value to a vector, /// corresponds to X86::PINSRW. PINSRW, MMX_PINSRW, - /// PSHUFB - Shuffle 16 8-bit values within a vector. + /// Shuffle 16 8-bit values within a vector. PSHUFB, - /// ANDNP - Bitwise Logical AND NOT of Packed FP values. + /// Bitwise Logical AND NOT of Packed FP values. ANDNP, - /// PSIGN - Copy integer sign. + /// Copy integer sign. PSIGN, - /// BLENDI - Blend where the selector is an immediate. + /// Blend where the selector is an immediate. BLENDI, - /// SHRUNKBLEND - Blend where the condition has been shrunk. + /// Blend where the condition has been shrunk. /// This is used to emphasize that the condition mask is /// no more valid for generic VSELECT optimizations. SHRUNKBLEND, - /// ADDSUB - Combined add and sub on an FP vector. + /// Combined add and sub on an FP vector. ADDSUB, - // FADD, FSUB, FMUL, FDIV, FMIN, FMAX - FP vector ops with rounding mode. + // FP vector ops with rounding mode. FADD_RND, FSUB_RND, FMUL_RND, FDIV_RND, - // SUBUS - Integer sub with unsigned saturation. + // Integer sub with unsigned saturation. SUBUS, - /// HADD - Integer horizontal add. + /// Integer horizontal add. HADD, - /// HSUB - Integer horizontal sub. + /// Integer horizontal sub. HSUB, - /// FHADD - Floating point horizontal add. + /// Floating point horizontal add. FHADD, - /// FHSUB - Floating point horizontal sub. + /// Floating point horizontal sub. FHSUB, - /// UMAX, UMIN - Unsigned integer max and min. + /// Unsigned integer max and min. UMAX, UMIN, - /// SMAX, SMIN - Signed integer max and min. + /// Signed integer max and min. SMAX, SMIN, - /// FMAX, FMIN - Floating point max and min. - /// + /// Floating point max and min. FMAX, FMIN, - /// FMAXC, FMINC - Commutative FMIN and FMAX. + /// Commutative FMIN and FMAX. FMAXC, FMINC, - /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal - /// approximation. Note that these typically require refinement + /// Floating point reciprocal-sqrt and reciprocal approximation. + /// Note that these typically require refinement /// in order to obtain suitable precision. FRSQRT, FRCP, - // TLSADDR - Thread Local Storage. + // Thread Local Storage. TLSADDR, - // TLSBASEADDR - Thread Local Storage. A call to get the start address + // Thread Local Storage. A call to get the start address // of the TLS block for the current module. TLSBASEADDR, - // TLSCALL - Thread Local Storage. When calling to an OS provided + // Thread Local Storage. When calling to an OS provided // thunk at the address from an earlier relocation. TLSCALL, - // EH_RETURN - Exception Handling helpers. + // Exception Handling helpers. EH_RETURN, - // EH_SJLJ_SETJMP - SjLj exception handling setjmp. + // SjLj exception handling setjmp. EH_SJLJ_SETJMP, - // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. + // SjLj exception handling longjmp. EH_SJLJ_LONGJMP, - /// TC_RETURN - Tail call return. See X86TargetLowering::LowerCall for + /// Tail call return. See X86TargetLowering::LowerCall for /// the list of operands. TC_RETURN, - // VZEXT_MOVL - Vector move to low scalar and zero higher vector elements. + // Vector move to low scalar and zero higher vector elements. VZEXT_MOVL, - // VZEXT - Vector integer zero-extend. + // Vector integer zero-extend. VZEXT, - // VSEXT - Vector integer signed-extend. + // Vector integer signed-extend. VSEXT, - // VTRUNC - Vector integer truncate. + // Vector integer truncate. VTRUNC, - // VTRUNC - Vector integer truncate with mask. + // Vector integer truncate with mask. VTRUNCM, - // VFPEXT - Vector FP extend. + // Vector FP extend. VFPEXT, - // VFPROUND - Vector FP round. + // Vector FP round. VFPROUND, - // VSHL, VSRL - 128-bit vector logical left / right shift + // 128-bit vector logical left / right shift VSHLDQ, VSRLDQ, - // VSHL, VSRL, VSRA - Vector shift elements + // Vector shift elements VSHL, VSRL, VSRA, - // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate + // Vector shift elements by immediate VSHLI, VSRLI, VSRAI, - // CMPP - Vector packed double/float comparison. + // Vector packed double/float comparison. CMPP, - // PCMP* - Vector integer comparisons. + // Vector integer comparisons. PCMPEQ, PCMPGT, - // PCMP*M - Vector integer comparisons, the result is in a mask vector. + // Vector integer comparisons, the result is in a mask vector. PCMPEQM, PCMPGTM, - /// CMPM, CMPMU - Vector comparison generating mask bits for fp and + /// Vector comparison generating mask bits for fp and /// integer signed and unsigned data types. CMPM, CMPMU, - // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. + // Arithmetic operations with FLAGS results. ADD, SUB, ADC, SBB, SMUL, INC, DEC, OR, XOR, AND, - BEXTR, // BEXTR - Bit field extract + BEXTR, // Bit field extract UMUL, // LOW, HI, FLAGS = umul LHS, RHS @@ -322,16 +320,16 @@ namespace llvm { UDIVREM8_ZEXT_HREG, SDIVREM8_SEXT_HREG, - // MUL_IMM - X86 specific multiply by immediate. + // X86-specific multiply by immediate. MUL_IMM, - // PTEST - Vector bitwise comparisons. + // Vector bitwise comparisons. PTEST, - // TESTP - Vector packed fp sign bitwise comparisons. + // Vector packed fp sign bitwise comparisons. TESTP, - // TESTM, TESTNM - Vector "test" in AVX-512, the result is in a mask vector. + // Vector "test" in AVX-512, the result is in a mask vector. TESTM, TESTNM, @@ -697,6 +695,12 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + // FIXME: Map different constraints differently. + return InlineAsm::Constraint_m; + } + /// Given a physical register constraint /// (e.g. {edx}), return the register number and the register class for the /// register. This should only be used for C_Register constraints. On @@ -993,7 +997,8 @@ namespace llvm { bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 4923bc5..509602f 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -74,6 +74,15 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, !if (!eq (Size, 128), "v2i64", !if (!eq (Size, 256), "v4i64", VTName)), VTName)); + + PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # + !if (!eq (TypeVariantName, "i"), + !if (!eq (Size, 128), "v2i64", + !if (!eq (Size, 256), "v4i64", + !if (!eq (Size, 512), + !if (!eq (EltSize, 64), "v8i64", "v16i32"), + VTName))), VTName)); + PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT); // The corresponding float type, e.g. v16f32 for v16i32 @@ -107,6 +116,9 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, // create the canonical constant zero node ImmAllZerosV. ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32"); dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV))); + + string ZSuffix = !if (!eq (Size, 128), "Z128", + !if (!eq (Size, 256), "Z256", "Z")); } def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; @@ -1559,6 +1571,11 @@ multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC, (outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), !strconcat("vcmp", suffix, "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; + def rrib_alt: AVX512PIi8<0xC2, MRMSrcReg, + (outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), + !strconcat("vcmp", suffix, + "\t{{sae}, $cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc, {sae}}"), + [], d>, EVEX_B; let mayLoad = 1 in def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem, (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), @@ -2047,6 +2064,8 @@ let Predicates = [HasVLX] in { (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>; def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))), (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>; + def : Pat<(v4i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))), + (v4i1 (COPY_TO_REGCLASS VK2:$src, VK4))>; def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))), (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>; def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))), @@ -2062,177 +2081,193 @@ def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))), (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>, Requires<[HasAVX512, NoDQI]>; + +def : Pat<(v4i1 (X86vshli VK4:$src, (i8 imm:$imm))), + (v4i1 (COPY_TO_REGCLASS + (KSHIFTLWri (COPY_TO_REGCLASS VK4:$src, VK16), + (I8Imm $imm)), VK4))>, Requires<[HasAVX512]>; + +def : Pat<(v4i1 (X86vsrli VK4:$src, (i8 imm:$imm))), + (v4i1 (COPY_TO_REGCLASS + (KSHIFTRWri (COPY_TO_REGCLASS VK4:$src, VK16), + (I8Imm $imm)), VK4))>, Requires<[HasAVX512]>; + //===----------------------------------------------------------------------===// // AVX-512 - Aligned and unaligned load and store // -multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag, - RegisterClass KRC, RegisterClass RC, - ValueType vt, ValueType zvt, X86MemOperand memop, - Domain d, bit IsReMaterializable = 1> { -let hasSideEffects = 0 in { - def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), + +multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, + PatFrag ld_frag, PatFrag mload, + bit IsReMaterializable = 1> { + let hasSideEffects = 0 in { + def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], - d>, EVEX; - def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src), + _.ExeDomain>, EVEX; + def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), + (ins _.KRCWM:$mask, _.RC:$src), !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", - "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ; - } + "${dst} {${mask}} {z}, $src}"), [], _.ExeDomain>, + EVEX, EVEX_KZ; + let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable, SchedRW = [WriteLoad] in - def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src), + def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))], - d>, EVEX; - - let AddedComplexity = 20 in { - let Constraints = "$src0 = $dst", hasSideEffects = 0 in { - let hasSideEffects = 0 in - def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src0, KRC:$mask, RC:$src1), - !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", - "${dst} {${mask}}, $src1}"), - [(set RC:$dst, (vt (vselect KRC:$mask, - (vt RC:$src1), - (vt RC:$src0))))], - d>, EVEX, EVEX_K; + [(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))], + _.ExeDomain>, EVEX; + + let Constraints = "$src0 = $dst" in { + def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), + (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), + !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", + "${dst} {${mask}}, $src1}"), + [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask, + (_.VT _.RC:$src1), + (_.VT _.RC:$src0))))], _.ExeDomain>, + EVEX, EVEX_K; let mayLoad = 1, SchedRW = [WriteLoad] in - def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src0, KRC:$mask, memop:$src1), + def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), + (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", "${dst} {${mask}}, $src1}"), - [(set RC:$dst, (vt - (vselect KRC:$mask, - (vt (bitconvert (ld_frag addr:$src1))), - (vt RC:$src0))))], - d>, EVEX, EVEX_K; + [(set _.RC:$dst, (_.VT + (vselect _.KRCWM:$mask, + (_.VT (bitconvert (ld_frag addr:$src1))), + (_.VT _.RC:$src0))))], _.ExeDomain>, EVEX, EVEX_K; } let mayLoad = 1, SchedRW = [WriteLoad] in - def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), - (ins KRC:$mask, memop:$src), - !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", - "${dst} {${mask}} {z}, $src}"), - [(set RC:$dst, (vt - (vselect KRC:$mask, - (vt (bitconvert (ld_frag addr:$src))), - (vt (bitconvert (zvt immAllZerosV))))))], - d>, EVEX, EVEX_KZ; + def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), + (ins _.KRCWM:$mask, _.MemOp:$src), + OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# + "${dst} {${mask}} {z}, $src}", + [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask, + (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))], + _.ExeDomain>, EVEX, EVEX_KZ; } + def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), + (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; + + def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), + (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; + + def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), + (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0, + _.KRCWM:$mask, addr:$ptr)>; } -multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat, - string elty, string elsz, string vsz512, - string vsz256, string vsz128, Domain d, - Predicate prd, bit IsReMaterializable = 1> { +multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, + AVX512VLVectorVTInfo _, + Predicate prd, + bit IsReMaterializable = 1> { let Predicates = [prd] in - defm Z : avx512_load<opc, OpcodeStr, - !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz), - !cast<RegisterClass>("VK"##vsz512##"WM"), VR512, - !cast<ValueType>("v"##vsz512##elty##elsz), v16i32, - !cast<X86MemOperand>(elty##"512mem"), d, - IsReMaterializable>, EVEX_V512; + defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.AlignedLdFrag, + masked_load_aligned512, IsReMaterializable>, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_load<opc, OpcodeStr, - !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"), - "v"##vsz256##elty##elsz, "v4i64")), - !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X, - !cast<ValueType>("v"##vsz256##elty##elsz), v8i32, - !cast<X86MemOperand>(elty##"256mem"), d, - IsReMaterializable>, EVEX_V256; - - defm Z128 : avx512_load<opc, OpcodeStr, - !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"), - "v"##vsz128##elty##elsz, "v2i64")), - !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X, - !cast<ValueType>("v"##vsz128##elty##elsz), v4i32, - !cast<X86MemOperand>(elty##"128mem"), d, - IsReMaterializable>, EVEX_V128; + defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.AlignedLdFrag, + masked_load_aligned256, IsReMaterializable>, EVEX_V256; + defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.AlignedLdFrag, + masked_load_aligned128, IsReMaterializable>, EVEX_V128; } } +multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, + AVX512VLVectorVTInfo _, + Predicate prd, + bit IsReMaterializable = 1> { + let Predicates = [prd] in + defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.LdFrag, + masked_load_unaligned, IsReMaterializable>, EVEX_V512; -multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag, - ValueType OpVT, RegisterClass KRC, RegisterClass RC, - X86MemOperand memop, Domain d> { + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.LdFrag, + masked_load_unaligned, IsReMaterializable>, EVEX_V256; + defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.LdFrag, + masked_load_unaligned, IsReMaterializable>, EVEX_V128; + } +} + +multiclass avx512_store<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, + PatFrag st_frag, PatFrag mstore> { let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { - def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>, - EVEX; + def rr_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), + OpcodeStr # "\t{$src, $dst|$dst, $src}", [], + _.ExeDomain>, EVEX; let Constraints = "$src1 = $dst" in - def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), - (ins RC:$src1, KRC:$mask, RC:$src2), - !strconcat(OpcodeStr, - "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>, - EVEX, EVEX_K; - def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), - (ins KRC:$mask, RC:$src), - !strconcat(OpcodeStr, - "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"), - [], d>, EVEX, EVEX_KZ; + def rrk_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), + (ins _.RC:$src1, _.KRCWM:$mask, _.RC:$src2), + OpcodeStr # + "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}", + [], _.ExeDomain>, EVEX, EVEX_K; + def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), + (ins _.KRCWM:$mask, _.RC:$src), + OpcodeStr # + "\t{$src, ${dst} {${mask}} {z}|" # + "${dst} {${mask}} {z}, $src}", + [], _.ExeDomain>, EVEX, EVEX_KZ; } let mayStore = 1 in { - def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src), + def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX; + [(st_frag (_.VT _.RC:$src), addr:$dst)], _.ExeDomain>, EVEX; def mrk : AVX512PI<opc, MRMDestMem, (outs), - (ins memop:$dst, KRC:$mask, RC:$src), - !strconcat(OpcodeStr, - "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), - [], d>, EVEX, EVEX_K; + (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), + OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", + [], _.ExeDomain>, EVEX, EVEX_K; } + + def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)), + (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr, + _.KRCWM:$mask, _.RC:$src)>; } -multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat, - string st_suff_512, string st_suff_256, - string st_suff_128, string elty, string elsz, - string vsz512, string vsz256, string vsz128, - Domain d, Predicate prd> { +multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, + AVX512VLVectorVTInfo _, Predicate prd> { let Predicates = [prd] in - defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512), - !cast<ValueType>("v"##vsz512##elty##elsz), - !cast<RegisterClass>("VK"##vsz512##"WM"), VR512, - !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512; + defm Z : avx512_store<opc, OpcodeStr, _.info512, store, + masked_store_unaligned>, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256), - !cast<ValueType>("v"##vsz256##elty##elsz), - !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X, - !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256; - - defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128), - !cast<ValueType>("v"##vsz128##elty##elsz), - !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X, - !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128; + defm Z256 : avx512_store<opc, OpcodeStr, _.info256, store, + masked_store_unaligned>, EVEX_V256; + defm Z128 : avx512_store<opc, OpcodeStr, _.info128, store, + masked_store_unaligned>, EVEX_V128; } } -defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32", - "16", "8", "4", SSEPackedSingle, HasAVX512>, - avx512_store_vl<0x29, "vmovaps", "alignedstore", - "512", "256", "", "f", "32", "16", "8", "4", - SSEPackedSingle, HasAVX512>, - PS, EVEX_CD8<32, CD8VF>; +multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, + AVX512VLVectorVTInfo _, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_store<opc, OpcodeStr, _.info512, alignedstore512, + masked_store_aligned512>, EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_store<opc, OpcodeStr, _.info256, alignedstore256, + masked_store_aligned256>, EVEX_V256; + defm Z128 : avx512_store<opc, OpcodeStr, _.info128, alignedstore, + masked_store_aligned128>, EVEX_V128; + } +} -defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64", - "8", "4", "2", SSEPackedDouble, HasAVX512>, - avx512_store_vl<0x29, "vmovapd", "alignedstore", - "512", "256", "", "f", "64", "8", "4", "2", - SSEPackedDouble, HasAVX512>, - PD, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32", - "16", "8", "4", SSEPackedSingle, HasAVX512>, - avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32", - "16", "8", "4", SSEPackedSingle, HasAVX512>, +defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, + HasAVX512>, + avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, + HasAVX512>, PS, EVEX_CD8<32, CD8VF>; + +defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, + HasAVX512>, + avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, + HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512>, + avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512>, PS, EVEX_CD8<32, CD8VF>; -defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64", - "8", "4", "2", SSEPackedDouble, HasAVX512, 0>, - avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64", - "8", "4", "2", SSEPackedDouble, HasAVX512>, - PD, VEX_W, EVEX_CD8<64, CD8VF>; +defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 0>, + avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>, + PD, VEX_W, EVEX_CD8<64, CD8VF>; def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr, (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)), @@ -2276,6 +2311,7 @@ def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src), (VMOVAPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src)>; +let Predicates = [HasAVX512, NoVLX] in { def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)), (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), @@ -2285,73 +2321,36 @@ def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)), (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; -def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src)), - (VMOVUPSZmrk addr:$ptr, VK16WM:$mask, VR512:$src)>; - -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src)), - (VMOVUPDZmrk addr:$ptr, VK8WM:$mask, VR512:$src)>; - -def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, undef)), - (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, - (bc_v16f32 (v16i32 immAllZerosV)))), - (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src0))), - (VMOVUPSZrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>; - -def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, - (bc_v8f64 (v16i32 immAllZerosV)))), - (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>; - -def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src0))), - (VMOVUPDZrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>; - def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src0))), (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmk (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src0, sub_ymm), (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; +} -defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32", - "16", "8", "4", SSEPackedInt, HasAVX512>, - avx512_store_vl<0x7F, "vmovdqa32", "alignedstore", - "512", "256", "", "i", "32", "16", "8", "4", - SSEPackedInt, HasAVX512>, - PD, EVEX_CD8<32, CD8VF>; - -defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64", - "8", "4", "2", SSEPackedInt, HasAVX512>, - avx512_store_vl<0x7F, "vmovdqa64", "alignedstore", - "512", "256", "", "i", "64", "8", "4", "2", - SSEPackedInt, HasAVX512>, - PD, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8", - "64", "32", "16", SSEPackedInt, HasBWI>, - avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "", - "i", "8", "64", "32", "16", SSEPackedInt, +defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, + HasAVX512>, + avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, + HasAVX512>, PD, EVEX_CD8<32, CD8VF>; + +defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, + HasAVX512>, + avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, + HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>, + avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI>, XD, EVEX_CD8<8, CD8VF>; -defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16", - "32", "16", "8", SSEPackedInt, HasBWI>, - avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "", - "i", "16", "32", "16", "8", SSEPackedInt, +defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>, + avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>; -defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32", - "16", "8", "4", SSEPackedInt, HasAVX512>, - avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "", - "i", "32", "16", "8", "4", SSEPackedInt, +defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512>, + avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512>, XS, EVEX_CD8<32, CD8VF>; -defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64", - "8", "4", "2", SSEPackedInt, HasAVX512>, - avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "", - "i", "64", "8", "4", "2", SSEPackedInt, +defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512>, + avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>; def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr, @@ -2389,37 +2388,8 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), (v16i32 VR512:$src))), (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; } - -def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 immAllZerosV))), - (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, undef)), - (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src0))), - (VMOVDQU32Zrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, - (bc_v8i64 (v16i32 immAllZerosV)))), - (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>; - -def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>; - -def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src0))), - (VMOVDQU64Zrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>; - -def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src)), - (VMOVDQU32Zmrk addr:$ptr, VK16WM:$mask, VR512:$src)>; - -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src)), - (VMOVDQU64Zmrk addr:$ptr, VK8WM:$mask, VR512:$src)>; - -// SKX replacement -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), - (VMOVDQU32Z256mrk addr:$ptr, VK8WM:$mask, VR256:$src)>; - -// KNL replacement +// NoVLX patterns +let Predicates = [HasAVX512, NoVLX] in { def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), @@ -2428,7 +2398,7 @@ def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)), (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; - +} // Move Int Doubleword to Packed Double Int // @@ -3243,28 +3213,95 @@ defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, //===----------------------------------------------------------------------===// // AVX-512 FP arithmetic //===----------------------------------------------------------------------===// +multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, + SDNode OpNode, SDNode VecNode, OpndItins itins, + bit IsCommutable> { -multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, - SizeItins itins> { - defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X, - f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG, - EVEX_CD8<32, CD8VT1>; - defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X, - f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG, - EVEX_CD8<64, CD8VT1>; + defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2), OpcodeStr, + "$src2, $src1", "$src1, $src2", + (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), + (i32 FROUND_CURRENT)), + "", itins.rr, IsCommutable>; + + defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, + "$src2, $src1", "$src1, $src2", + (VecNode (_.VT _.RC:$src1), + (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))), + (i32 FROUND_CURRENT)), + "", itins.rm, IsCommutable>; + let isCodeGenOnly = 1, isCommutable = IsCommutable, + Predicates = [HasAVX512] in { + def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), + (ins _.FRC:$src1, _.FRC:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))], + itins.rr>; + def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), + (ins _.FRC:$src1, _.ScalarMemOp:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set _.FRC:$dst, (OpNode _.FRC:$src1, + (_.ScalarLdFrag addr:$src2)))], itins.rr>; + } } -let isCommutable = 1 in { -defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>; -defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>; -defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>; -defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>; +multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, + SDNode VecNode, OpndItins itins, bit IsCommutable> { + + defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, + "$rc, $src2, $src1", "$src1, $src2, $rc", + (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), + (i32 imm:$rc)), "", itins.rr, IsCommutable>, + EVEX_B, EVEX_RC; } -let isCommutable = 0 in { -defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>; -defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>; +multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, + SDNode VecNode, OpndItins itins, bit IsCommutable> { + + defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2), OpcodeStr, + "$src2, $src1", "$src1, $src2", + (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), + (i32 FROUND_NO_EXC)), "{sae}">, EVEX_B; } +multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode, + SDNode VecNode, + SizeItins itins, bit IsCommutable> { + defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, + itins.s, IsCommutable>, + avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode, + itins.s, IsCommutable>, + XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; + defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, + itins.d, IsCommutable>, + avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode, + itins.d, IsCommutable>, + XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; +} + +multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, + SDNode VecNode, + SizeItins itins, bit IsCommutable> { + defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, + itins.s, IsCommutable>, + avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, VecNode, + itins.s, IsCommutable>, + XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; + defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, + itins.d, IsCommutable>, + avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, VecNode, + itins.d, IsCommutable>, + XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; +} +defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>; +defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_ALU_ITINS_S, 1>; +defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>; +defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_ALU_ITINS_S, 0>; +defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fminRnd, SSE_ALU_ITINS_S, 1>; +defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITINS_S, 1>; + multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> { defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), @@ -3411,15 +3448,27 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))), " ", SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V; + let mayLoad = 1 in defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", - (_.VT (OpNode (_.LdFrag addr:$src1), (i8 imm:$src2))), + (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), + (i8 imm:$src2))), " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V; } +multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, + string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { + let mayLoad = 1 in + defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), + (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, + "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2", + (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))), + " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V, EVEX_B; +} + multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> { + ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> { // src2 is always 128-bit defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, VR128X:$src2), OpcodeStr, @@ -3430,46 +3479,95 @@ multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, i128mem:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))), - " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase, EVEX_4V; + " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase, + EVEX_4V; } multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> { - defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag, _>, EVEX_V512; + ValueType SrcVT, PatFrag bc_frag, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag, + VTInfo.info512>, EVEX_V512, + EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag, + VTInfo.info256>, EVEX_V256, + EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; + defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag, + VTInfo.info128>, EVEX_V128, + EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; + } } -multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, string OpcodeStr, - SDNode OpNode> { +multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, + string OpcodeStr, SDNode OpNode> { defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, v4i32, bc_v4i32, - v16i32_info>, EVEX_CD8<32, CD8VQ>; + avx512vl_i32_info, HasAVX512>; defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, v2i64, bc_v2i64, - v8i64_info>, EVEX_CD8<64, CD8VQ>, VEX_W; + avx512vl_i64_info, HasAVX512>, VEX_W; + defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, v8i16, bc_v8i16, + avx512vl_i16_info, HasBWI>; +} + +multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo> { + let Predicates = [HasAVX512] in + defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, + VTInfo.info512>, + avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, + VTInfo.info512>, EVEX_V512; + let Predicates = [HasAVX512, HasVLX] in { + defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, + VTInfo.info256>, + avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, + VTInfo.info256>, EVEX_V256; + defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, + VTInfo.info128>, + avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, + VTInfo.info128>, EVEX_V128; + } } -defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli, - v16i32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli, - v8i64_info>, EVEX_V512, - EVEX_CD8<64, CD8VF>, VEX_W; +multiclass avx512_shift_rmi_w<bits<8> opcw, + Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode> { + let Predicates = [HasBWI] in + defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, + v32i16_info>, EVEX_V512; + let Predicates = [HasVLX, HasBWI] in { + defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, + v16i16x_info>, EVEX_V256; + defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, + v8i16x_info>, EVEX_V128; + } +} -defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli, - v16i32_info>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli, - v8i64_info>, EVEX_V512, - EVEX_CD8<64, CD8VF>, VEX_W; +multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, + Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode> { + defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, + avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; + defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, + avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; +} -defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai, - v16i32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai, - v8i64_info>, EVEX_V512, - EVEX_CD8<64, CD8VF>, VEX_W; +defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>, + avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>; -defm VPSLL : avx512_shift_types<0xF2, 0xF3, "vpsll", X86vshl>; -defm VPSRA : avx512_shift_types<0xE2, 0xE2, "vpsra", X86vsra>; -defm VPSRL : avx512_shift_types<0xD2, 0xD3, "vpsrl", X86vsrl>; +defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>, + avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>; + +defm VPSRA : avx512_shift_rmi_dq<0x72, 0x73, MRM4r, MRM4m, "vpsra", X86vsrai>, + avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>; + +defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>; +defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>; + +defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>; +defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>; +defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>; //===-------------------------------------------------------------------===// // Variable Bit Shifts @@ -3481,29 +3579,71 @@ multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))), " ", SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V; + let mayLoad = 1 in defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))), - " ", SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V; + " ", SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V, + EVEX_CD8<_.EltSize, CD8VF>; } +multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + let mayLoad = 1 in + defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, + "${src2}"##_.BroadcastStr##", $src1", + "$src1, ${src2}"##_.BroadcastStr, + (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast + (_.ScalarLdFrag addr:$src2))))), + " ", SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B, + EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; +} multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, AVX512VLVectorVTInfo _> { - defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512; + let Predicates = [HasAVX512] in + defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>, + avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512; + + let Predicates = [HasAVX512, HasVLX] in { + defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>, + avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256; + defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>, + avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128; + } } multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, SDNode OpNode> { defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, - avx512vl_i32_info>, EVEX_CD8<32, CD8VQ>; + avx512vl_i32_info>; defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, - avx512vl_i64_info>, EVEX_CD8<64, CD8VQ>, VEX_W; + avx512vl_i64_info>, VEX_W; } -defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>; -defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>; -defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>; +multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, + SDNode OpNode> { + let Predicates = [HasBWI] in + defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, v32i16_info>, + EVEX_V512, VEX_W; + let Predicates = [HasVLX, HasBWI] in { + + defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, v16i16x_info>, + EVEX_V256, VEX_W; + defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, v8i16x_info>, + EVEX_V128, VEX_W; + } +} + +defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>, + avx512_var_shift_w<0x12, "vpsllvw", shl>; +defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>, + avx512_var_shift_w<0x11, "vpsravw", sra>; +defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>, + avx512_var_shift_w<0x10, "vpsrlvw", srl>; +defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>; +defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>; //===----------------------------------------------------------------------===// // AVX-512 - MOVDDUP @@ -4919,81 +5059,74 @@ defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext //===----------------------------------------------------------------------===// // GATHER - SCATTER Operations -multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, - X86MemOperand memop, PatFrag GatherNode> { -let mayLoad = 1, hasTwoExplicitDefs = 1, +multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand memop> { +let mayLoad = 1, Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in - def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb), - (ins _.RC:$src1, _.KRCWM:$mask, memop:$src2), + def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb), + (ins RC:$src1, KRC:$mask, memop:$src2), !strconcat(OpcodeStr, "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), - [(set _.RC:$dst, _.KRCWM:$mask_wb, - (_.VT (GatherNode (_.VT _.RC:$src1), _.KRCWM:$mask, - vectoraddr:$src2)))]>, EVEX, EVEX_K, - EVEX_CD8<_.EltSize, CD8VT1>; + []>, EVEX, EVEX_K; } let ExeDomain = SSEPackedDouble in { -defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", v8f64_info, vy64xmem, - mgatherv8i32>, EVEX_V512, VEX_W; -defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", v8f64_info, vz64mem, - mgatherv8i64>, EVEX_V512, VEX_W; +defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; } let ExeDomain = SSEPackedSingle in { -defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", v16f32_info, vz32mem, - mgatherv16i32>, EVEX_V512; -defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", v8f32x_info, vz64mem, - mgatherv8i64>, EVEX_V512; +defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; +defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; } -defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", v8i64_info, vy64xmem, - mgatherv8i32>, EVEX_V512, VEX_W; -defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", v16i32_info, vz32mem, - mgatherv16i32>, EVEX_V512; - -defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", v8i64_info, vz64mem, - mgatherv8i64>, EVEX_V512, VEX_W; -defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", v8i32x_info, vz64mem, - mgatherv8i64>, EVEX_V512; +defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; -multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, - X86MemOperand memop, PatFrag ScatterNode> { +defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; +multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand memop> { let mayStore = 1, Constraints = "$mask = $mask_wb" in - - def mr : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb), - (ins memop:$dst, _.KRCWM:$mask, _.RC:$src), + def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb), + (ins memop:$dst, KRC:$mask, RC:$src2), !strconcat(OpcodeStr, - "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), - [(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src), - _.KRCWM:$mask, vectoraddr:$dst))]>, - EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; + "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), + []>, EVEX, EVEX_K; } let ExeDomain = SSEPackedDouble in { -defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", v8f64_info, vy64xmem, - mscatterv8i32>, EVEX_V512, VEX_W; -defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", v8f64_info, vz64mem, - mscatterv8i64>, EVEX_V512, VEX_W; +defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; } let ExeDomain = SSEPackedSingle in { -defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", v16f32_info, vz32mem, - mscatterv16i32>, EVEX_V512; -defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", v8f32x_info, vz64mem, - mscatterv8i64>, EVEX_V512; +defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; +defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; } -defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", v8i64_info, vy64xmem, - mscatterv8i32>, EVEX_V512, VEX_W; -defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", v16i32_info, vz32mem, - mscatterv16i32>, EVEX_V512; +defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; -defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", v8i64_info, vz64mem, - mscatterv8i64>, EVEX_V512, VEX_W; -defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", v8i32x_info, vz64mem, - mscatterv8i64>, EVEX_V512; +defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; // prefetch multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index bf515a8..0bdabdf 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -282,6 +282,8 @@ def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>; def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>; def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>; def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>; +def X86fmaxRnd : SDNode<"X86ISD::FMAX", SDTFPBinOpRound>; +def X86fminRnd : SDNode<"X86ISD::FMIN", SDTFPBinOpRound>; def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; @@ -304,8 +306,6 @@ def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>; def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>; def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>; def X86RndScale : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>; -def X86mgather : SDNode<"X86ISD::GATHER", SDTypeProfile<1, 3, - [SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>]>>; def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>, @@ -526,58 +526,6 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), return false; }]>; -def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_gather node:$src1, node:$src2, node:$src3) , [{ - //if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N)) - // return (Mgt->getIndex().getValueType() == MVT::v8i32 || - // Mgt->getBasePtr().getValueType() == MVT::v8i32); - //return false; - return N != 0; -}]>; - -def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_gather node:$src1, node:$src2, node:$src3) , [{ - //if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N)) - // return (Mgt->getIndex().getValueType() == MVT::v8i64 || - // Mgt->getBasePtr().getValueType() == MVT::v8i64); - //return false; - return N != 0; -}]>; -def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_gather node:$src1, node:$src2, node:$src3) , [{ - //if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N)) - // return (Mgt->getIndex().getValueType() == MVT::v16i32 || - // Mgt->getBasePtr().getValueType() == MVT::v16i32); - //return false; - return N != 0; -}]>; - -def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_scatter node:$src1, node:$src2, node:$src3) , [{ - //if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N)) - // return (Sc->getIndex().getValueType() == MVT::v8i32 || - // Sc->getBasePtr().getValueType() == MVT::v8i32); - //return false; - return N != 0; -}]>; - -def mscatterv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_scatter node:$src1, node:$src2, node:$src3) , [{ - //if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N)) - // return (Sc->getIndex().getValueType() == MVT::v8i64 || - // Sc->getBasePtr().getValueType() == MVT::v8i64); - //return false; - return N != 0; -}]>; -def mscatterv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_scatter node:$src1, node:$src2, node:$src3) , [{ - //if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N)) - // return (Sc->getIndex().getValueType() == MVT::v16i32 || - // Sc->getBasePtr().getValueType() == MVT::v16i32); - //return false; - return N != 0; -}]>; - // 128-bit bitconvert pattern fragments def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; @@ -681,3 +629,55 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec, return X86::isVINSERT256Index(N); }], INSERT_get_vinsert256_imm>; +def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_load node:$src1, node:$src2, node:$src3), [{ + if (dyn_cast<MaskedLoadSDNode>(N)) + return cast<MaskedLoadSDNode>(N)->getAlignment() >= 16; + return false; +}]>; + +def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_load node:$src1, node:$src2, node:$src3), [{ + if (dyn_cast<MaskedLoadSDNode>(N)) + return cast<MaskedLoadSDNode>(N)->getAlignment() >= 32; + return false; +}]>; + +def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_load node:$src1, node:$src2, node:$src3), [{ + if (dyn_cast<MaskedLoadSDNode>(N)) + return cast<MaskedLoadSDNode>(N)->getAlignment() >= 64; + return false; +}]>; + +def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_load node:$src1, node:$src2, node:$src3), [{ + return (dyn_cast<MaskedLoadSDNode>(N) != 0); +}]>; + +def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_store node:$src1, node:$src2, node:$src3), [{ + if (dyn_cast<MaskedStoreSDNode>(N)) + return cast<MaskedStoreSDNode>(N)->getAlignment() >= 16; + return false; +}]>; + +def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_store node:$src1, node:$src2, node:$src3), [{ + if (dyn_cast<MaskedStoreSDNode>(N)) + return cast<MaskedStoreSDNode>(N)->getAlignment() >= 32; + return false; +}]>; + +def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_store node:$src1, node:$src2, node:$src3), [{ + if (dyn_cast<MaskedStoreSDNode>(N)) + return cast<MaskedStoreSDNode>(N)->getAlignment() >= 64; + return false; +}]>; + +def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_store node:$src1, node:$src2, node:$src3), [{ + return (dyn_cast<MaskedStoreSDNode>(N) != 0); +}]>; + diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index f5b9680..538ec1c 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -104,7 +104,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) : X86GenInstrInfo( (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32), (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)), - Subtarget(STI), RI(STI) { + Subtarget(STI), RI(STI.getTargetTriple()) { static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = { { X86::ADC32ri, X86::ADC32mi, 0 }, @@ -4573,9 +4573,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI, return nullptr; // Check whether we can fold the def into SrcOperandId. - SmallVector<unsigned, 8> Ops; - Ops.push_back(SrcOperandId); - MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI); + MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandId, DefMI); if (FoldMI) { FoldAsLoadDefReg = 0; return FoldMI; @@ -4670,7 +4668,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { } static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, - const SmallVectorImpl<MachineOperand> &MOs, + ArrayRef<MachineOperand> MOs, MachineInstr *MI, const TargetInstrInfo &TII) { // Create the base instruction with the memory operand as the first part. @@ -4697,9 +4695,8 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, return MIB; } -static MachineInstr *FuseInst(MachineFunction &MF, - unsigned Opcode, unsigned OpNo, - const SmallVectorImpl<MachineOperand> &MOs, +static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode, + unsigned OpNo, ArrayRef<MachineOperand> MOs, MachineInstr *MI, const TargetInstrInfo &TII) { // Omit the implicit operands, something BuildMI can't do. MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), @@ -4723,7 +4720,7 @@ static MachineInstr *FuseInst(MachineFunction &MF, } static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, - const SmallVectorImpl<MachineOperand> &MOs, + ArrayRef<MachineOperand> MOs, MachineInstr *MI) { MachineFunction &MF = *MI->getParent()->getParent(); MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode)); @@ -4736,12 +4733,12 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, return MIB.addImm(0); } -MachineInstr* -X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, unsigned OpNum, - const SmallVectorImpl<MachineOperand> &MOs, - unsigned Size, unsigned Align, - bool AllowCommute) const { +MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + unsigned OpNum, + ArrayRef<MachineOperand> MOs, + unsigned Size, unsigned Align, + bool AllowCommute) const { const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr; bool isCallRegIndirect = Subtarget.callRegIndirect(); @@ -5104,10 +5101,10 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, MI->addRegisterKilled(Reg, TRI, true); } -MachineInstr* -X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const { +MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + ArrayRef<unsigned> Ops, + int FrameIndex) const { // Check switch flag if (NoFusing) return nullptr; @@ -5145,10 +5142,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, } else if (Ops.size() != 1) return nullptr; - SmallVector<MachineOperand,4> MOs; - MOs.push_back(MachineOperand::CreateFI(FrameIndex)); - return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, - Size, Alignment, /*AllowCommute=*/true); + return foldMemoryOperandImpl(MF, MI, Ops[0], + MachineOperand::CreateFI(FrameIndex), Size, + Alignment, /*AllowCommute=*/true); } static bool isPartialRegisterLoad(const MachineInstr &LoadMI, @@ -5170,9 +5166,9 @@ static bool isPartialRegisterLoad(const MachineInstr &LoadMI, return false; } -MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, +MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, + ArrayRef<unsigned> Ops, MachineInstr *LoadMI) const { // If loading from a FrameIndex, fold directly from the FrameIndex. unsigned NumOps = LoadMI->getDesc().getNumOperands(); @@ -5295,8 +5291,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return nullptr; // Folding a normal load. Just copy the load's address operands. - for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) - MOs.push_back(LoadMI->getOperand(i)); + MOs.append(LoadMI->operands_begin() + NumOps - X86::AddrNumOperands, + LoadMI->operands_begin() + NumOps); break; } } @@ -5304,9 +5300,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, /*Size=*/0, Alignment, /*AllowCommute=*/true); } - bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const { + ArrayRef<unsigned> Ops) const { // Check switch flag if (NoFusing) return 0; @@ -5559,7 +5554,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, } if (Load) BeforeOps.push_back(SDValue(Load, 0)); - std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); + BeforeOps.insert(BeforeOps.end(), AfterOps.begin(), AfterOps.end()); SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps); NewNodes.push_back(NewNode); diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 4d15467..0dd8101 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -305,23 +305,21 @@ public: /// folding and return true, otherwise it should return false. If it folds /// the instruction, it is likely that the MachineInstruction the iterator /// references has been changed. - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, int FrameIndex) const override; /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific /// stack slot. - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const override; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, + MachineInstr *LoadMI) const override; /// canFoldMemoryOperand - Returns true if the specified load / store is /// folding is possible. - bool canFoldMemoryOperand(const MachineInstr*, - const SmallVectorImpl<unsigned> &) const override; + bool canFoldMemoryOperand(const MachineInstr *, + ArrayRef<unsigned>) const override; /// unfoldMemoryOperand - Separate a single instruction which folded a load or /// a store or a load and a store into two or more instruction. If this is @@ -406,10 +404,9 @@ public: void breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override; - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned OpNum, - const SmallVectorImpl<MachineOperand> &MOs, + ArrayRef<MachineOperand> MOs, unsigned Size, unsigned Alignment, bool AllowCommute) const; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 9881caf..e9a0431 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -572,10 +572,13 @@ def X86GR32orGR64AsmOperand : AsmOperandClass { def GR32orGR64 : RegisterOperand<GR32> { let ParserMatchClass = X86GR32orGR64AsmOperand; } - +def AVX512RCOperand : AsmOperandClass { + let Name = "AVX512RC"; +} def AVX512RC : Operand<i32> { let PrintMethod = "printRoundingControl"; let OperandType = "OPERAND_IMMEDIATE"; + let ParserMatchClass = AVX512RCOperand; } // Sign-extended immediate classes. We don't need to define the full lattice @@ -713,9 +716,6 @@ def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr", def tls64baseaddr : ComplexPattern<i64, 5, "SelectTLSADDRAddr", [tglobaltlsaddr], []>; -def vectoraddr : ComplexPattern<iPTR, 5, "SelectAddr", [],[SDNPWantParent]>; -//def vectoraddr : ComplexPattern<iPTR, 5, "SelectVectorAddr", [],[SDNPWantParent]>; - //===----------------------------------------------------------------------===// // X86 Instruction Predicate Definitions. def HasCMov : Predicate<"Subtarget->hasCMov()">; @@ -855,11 +855,11 @@ def X86_COND_E_OR_NE : ImmLeaf<i8, [{ return (Imm == X86::COND_E) || (Imm == X86::COND_NE); }]>; -let FastIselShouldIgnore = 1 in { // FastIsel should ignore all simm8 instrs. - def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>; - def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>; - def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>; -} + +def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>; +def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>; +def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>; + def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index d2929d2..ccdbf0e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3567,7 +3567,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32, f32mem, ssmem, sse_load_f32, !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode, - itins, HasAVX, "SS">, XS, VEX_4V, VEX_LIG; + itins, UseAVX, "SS">, XS, VEX_4V, VEX_LIG; } multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, @@ -3579,7 +3579,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64, f64mem, sdmem, sse_load_f64, !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd), - OpNode, itins, HasAVX, "SD">, XD, VEX_4V, VEX_LIG; + OpNode, itins, UseAVX, "SD">, XD, VEX_4V, VEX_LIG; } // Square root. @@ -4077,7 +4077,7 @@ defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128, // SSE2 - Packed Integer Logical Instructions //===---------------------------------------------------------------------===// -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, VR128, v8i16, v8i16, bc_v8i16, loadv2i64, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; @@ -4123,7 +4123,7 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { } } // Predicates = [HasAVX] -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2, NoVLX] in { defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, VR256, v16i16, v8i16, bc_v8i16, loadv2i64, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; @@ -5902,7 +5902,6 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtO (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>; // On AVX2, we also support 256bit inputs. - // FIXME: remove these patterns when the old shuffle lowering goes away. def : Pat<(v16i16 (ExtOp (v32i8 VR256:$src))), (!cast<I>(OpcPrefix#BWYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; def : Pat<(v8i32 (ExtOp (v32i8 VR256:$src))), @@ -6955,6 +6954,34 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, Sched<[itins.Sched.Folded, ReadAfterLd]>; } +/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate +multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, bit Is2Addr = 1, + OpndItins itins = DEFAULT_ITINS> { + let isCommutable = 1 in + def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, u8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))], + itins.rr>, Sched<[itins.Sched]>; + def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, u8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set RC:$dst, + (OpVT (OpNode RC:$src1, + (bitconvert (memop_frag addr:$src2)), imm:$src3)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; +} + let Predicates = [HasAVX] in { let isCommutable = 0 in { defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, @@ -6963,26 +6990,24 @@ let Predicates = [HasAVX] in { } let ExeDomain = SSEPackedSingle in { - defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, - VR128, loadv4f32, f128mem, 0, - DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; - defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", - int_x86_avx_blend_ps_256, VR256, loadv8f32, - f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>, - VEX_4V, VEX_L; + defm VBLENDPS : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v4f32, + VR128, loadv4f32, f128mem, 0, + DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; + defm VBLENDPSY : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v8f32, + VR256, loadv8f32, f256mem, 0, + DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L; } let ExeDomain = SSEPackedDouble in { - defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, - VR128, loadv2f64, f128mem, 0, - DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; - defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", - int_x86_avx_blend_pd_256,VR256, loadv4f64, - f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>, - VEX_4V, VEX_L; + defm VBLENDPD : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v2f64, + VR128, loadv2f64, f128mem, 0, + DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; + defm VBLENDPDY : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v4f64, + VR256, loadv4f64, f256mem, 0, + DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L; } - defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, - VR128, loadv2i64, i128mem, 0, - DEFAULT_ITINS_BLENDSCHED>, VEX_4V; + defm VPBLENDW : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v8i16, + VR128, loadv2i64, i128mem, 0, + DEFAULT_ITINS_BLENDSCHED>, VEX_4V; let ExeDomain = SSEPackedSingle in defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, @@ -7004,9 +7029,9 @@ let Predicates = [HasAVX2] in { VR256, loadv4i64, i256mem, 0, DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L; } - defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw, - VR256, loadv4i64, i256mem, 0, - DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L; + defm VPBLENDWY : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v16i16, + VR256, loadv4i64, i256mem, 0, + DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L; } let Constraints = "$src1 = $dst" in { @@ -7016,16 +7041,16 @@ let Constraints = "$src1 = $dst" in { 1, SSE_MPSADBW_ITINS>; } let ExeDomain = SSEPackedSingle in - defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, - VR128, memopv4f32, f128mem, - 1, SSE_INTALU_ITINS_FBLEND_P>; + defm BLENDPS : SS41I_binop_rmi<0x0C, "blendps", X86Blendi, v4f32, + VR128, memopv4f32, f128mem, + 1, SSE_INTALU_ITINS_FBLEND_P>; let ExeDomain = SSEPackedDouble in - defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd, - VR128, memopv2f64, f128mem, - 1, SSE_INTALU_ITINS_FBLEND_P>; - defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, - VR128, memopv2i64, i128mem, - 1, SSE_INTALU_ITINS_BLEND_P>; + defm BLENDPD : SS41I_binop_rmi<0x0D, "blendpd", X86Blendi, v2f64, + VR128, memopv2f64, f128mem, + 1, SSE_INTALU_ITINS_FBLEND_P>; + defm PBLENDW : SS41I_binop_rmi<0x0E, "pblendw", X86Blendi, v8i16, + VR128, memopv2i64, i128mem, + 1, SSE_INTALU_ITINS_BLEND_P>; let ExeDomain = SSEPackedSingle in defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, VR128, memopv4f32, f128mem, 1, @@ -7116,32 +7141,12 @@ let Predicates = [HasAVX] in { def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), (v4f64 VR256:$src2))), (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; - - def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2), - (imm:$mask))), - (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>; - def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2), - (imm:$mask))), - (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>; - - def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2), - (imm:$mask))), - (VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>; - def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2), - (imm:$mask))), - (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>; - def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2), - (imm:$mask))), - (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>; } let Predicates = [HasAVX2] in { def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), (v32i8 VR256:$src2))), (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; - def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2), - (imm:$mask))), - (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>; } // Patterns @@ -7260,17 +7265,6 @@ let Predicates = [UseSSE41] in { def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), (v2f64 VR128:$src2))), (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; - - def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2), - (imm:$mask))), - (PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>; - def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2), - (imm:$mask))), - (BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>; - def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2), - (imm:$mask))), - (BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>; - } let SchedRW = [WriteLoad] in { @@ -7840,9 +7834,9 @@ def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, WriteFShuffle256>, VEX_L; let Predicates = [HasAVX2] in -def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, - int_x86_avx2_vbroadcasti128, WriteLoad>, - VEX_L; +def VBROADCASTI128 : avx_broadcast_no_int<0x5A, "vbroadcasti128", VR256, + i128mem, v4i64, loadv2i64, + WriteLoad>, VEX_L; let Predicates = [HasAVX] in def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), @@ -8238,38 +8232,31 @@ let Predicates = [HasF16C] in { // AVX2 Instructions //===----------------------------------------------------------------------===// -/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate -multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr, - Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop> { +/// AVX2_binop_rmi - AVX2 binary operator with 8-bit immediate +multiclass AVX2_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop> { let isCommutable = 1 in def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>, Sched<[WriteBlend]>, VEX_4V; def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, - (IntId RC:$src1, - (bitconvert (memop_frag addr:$src2)), imm:$src3))]>, + (OpVT (OpNode RC:$src1, + (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>, Sched<[WriteBlendLd, ReadAfterLd]>, VEX_4V; } -defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128, - VR128, loadv2i64, i128mem>; -defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, - VR256, loadv4i64, i256mem>, VEX_L; - -def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), - imm:$mask)), - (VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>; -def : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), - imm:$mask)), - (VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>; +defm VPBLENDD : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v4i32, + VR128, loadv2i64, i128mem>; +defm VPBLENDDY : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v8i32, + VR256, loadv4i64, i256mem>, VEX_L; //===----------------------------------------------------------------------===// // VPBROADCAST - Load from memory and broadcast to all elements of the @@ -8608,9 +8595,7 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), // def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst), (ins VR256:$src1, u8imm:$src2), - "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>, + "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[WriteShuffle256]>, VEX, VEX_L; let hasSideEffects = 0, mayStore = 1 in def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index e436811..42256b2 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -175,8 +175,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0), X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(avx2_permd, INTR_TYPE_2OP, X86ISD::VPERMV, 0), - X86_INTRINSIC_DATA(avx2_permps, INTR_TYPE_2OP, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0), diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 6af59d4..cd3076d 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -77,8 +77,8 @@ namespace llvm { X86AsmPrinter::StackMapShadowTracker::startFunction(MachineFunction &F) { MF = &F; CodeEmitter.reset(TM.getTarget().createMCCodeEmitter( - *MF->getSubtarget().getInstrInfo(), *MF->getSubtarget().getRegisterInfo(), - MF->getSubtarget(), MF->getContext())); + *MF->getSubtarget().getInstrInfo(), + *MF->getSubtarget().getRegisterInfo(), MF->getContext())); } void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index cab7ce8..06545bc 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "X86RegisterInfo.h" +#include "X86FrameLowering.h" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" @@ -53,26 +54,26 @@ static cl::opt<bool> EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), cl::desc("Enable use of a base pointer for complex stack frames")); -X86RegisterInfo::X86RegisterInfo(const X86Subtarget &STI) - : X86GenRegisterInfo( - (STI.is64Bit() ? X86::RIP : X86::EIP), - X86_MC::getDwarfRegFlavour(STI.getTargetTriple(), false), - X86_MC::getDwarfRegFlavour(STI.getTargetTriple(), true), - (STI.is64Bit() ? X86::RIP : X86::EIP)), - Subtarget(STI) { +X86RegisterInfo::X86RegisterInfo(const Triple &TT) + : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP), + X86_MC::getDwarfRegFlavour(TT, false), + X86_MC::getDwarfRegFlavour(TT, true), + (TT.isArch64Bit() ? X86::RIP : X86::EIP)) { X86_MC::InitLLVM2SEHRegisterMapping(this); // Cache some information. - Is64Bit = Subtarget.is64Bit(); - IsWin64 = Subtarget.isTargetWin64(); + Is64Bit = TT.isArch64Bit(); + IsWin64 = Is64Bit && TT.isOSWindows(); // Use a callee-saved register as the base pointer. These registers must // not conflict with any ABI requirements. For example, in 32-bit mode PIC // requires GOT in the EBX register before function calls via PLT GOT pointer. if (Is64Bit) { SlotSize = 8; - bool Use64BitReg = - Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64(); + // This matches the simplified 32-bit pointer code in the data layout + // computation. + // FIXME: Should use the data layout? + bool Use64BitReg = TT.getEnvironment() != Triple::GNUX32; StackPtr = Use64BitReg ? X86::RSP : X86::ESP; FramePtr = Use64BitReg ? X86::RBP : X86::EBP; BasePtr = Use64BitReg ? X86::RBX : X86::EBX; @@ -120,8 +121,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx); } -const TargetRegisterClass* -X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{ +const TargetRegisterClass * +X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &MF) const { // Don't allow super-classes of GR8_NOREX. This class is only used after // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied // to the full GR8 register class in 64-bit mode, so we cannot allow the @@ -161,6 +163,7 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{ const TargetRegisterClass * X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { + const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); switch (Kind) { default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); case 0: // Normal GPRs. @@ -172,9 +175,9 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, return &X86::GR64_NOSPRegClass; return &X86::GR32_NOSPRegClass; case 2: // Available for tailcall (not callee-saved GPRs). - if (Subtarget.isTargetWin64()) + if (IsWin64) return &X86::GR64_TCW64RegClass; - else if (Subtarget.is64Bit()) + else if (Is64Bit) return &X86::GR64_TCRegClass; const Function *F = MF.getFunction(); @@ -210,7 +213,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case X86::GR64RegClassID: return 12 - FPDiff; case X86::VR128RegClassID: - return Subtarget.is64Bit() ? 10 : 4; + return Is64Bit ? 10 : 4; case X86::VR64RegClassID: return 4; } @@ -218,8 +221,10 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, const MCPhysReg * X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>(); bool HasAVX = Subtarget.hasAVX(); bool HasAVX512 = Subtarget.hasAVX512(); + bool CallsEHReturn = MF->getMMI().callsEHReturn(); assert(MF && "MachineFunction required"); switch (MF->getFunction()->getCallingConv()) { @@ -253,11 +258,16 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (Is64Bit) return CSR_64_MostRegs_SaveList; break; + case CallingConv::X86_64_Win64: + return CSR_Win64_SaveList; + case CallingConv::X86_64_SysV: + if (CallsEHReturn) + return CSR_64EHRet_SaveList; + return CSR_64_SaveList; default: break; } - bool CallsEHReturn = MF->getMMI().callsEHReturn(); if (Is64Bit) { if (IsWin64) return CSR_Win64_SaveList; @@ -270,8 +280,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_32_SaveList; } -const uint32_t* -X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { +const uint32_t * +X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); bool HasAVX = Subtarget.hasAVX(); bool HasAVX512 = Subtarget.hasAVX512(); @@ -308,6 +320,10 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { break; default: break; + case CallingConv::X86_64_Win64: + return CSR_Win64_RegMask; + case CallingConv::X86_64_SysV: + return CSR_64_RegMask; } // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check @@ -349,7 +365,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Set the base-pointer register and its aliases as reserved if needed. if (hasBasePointer(MF)) { CallingConv::ID CC = MF.getFunction()->getCallingConv(); - const uint32_t* RegMask = getCallPreservedMask(CC); + const uint32_t *RegMask = getCallPreservedMask(MF, CC); if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister())) report_fatal_error( "Stack realignment in presence of dynamic allocas is not supported with" @@ -393,7 +409,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(*AI); } } - if (!Is64Bit || !Subtarget.hasAVX512()) { + if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) { for (unsigned n = 16; n != 32; ++n) { for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); @@ -486,6 +502,24 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, else BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr); + // FRAME_ALLOC uses a single offset, with no register. It only works in the + // simple FP case, and doesn't work with stack realignment. On 32-bit, the + // offset is from the traditional base pointer location. On 64-bit, the + // offset is from the SP at the end of the prologue, not the FP location. This + // matches the behavior of llvm.frameaddress. + if (Opc == TargetOpcode::FRAME_ALLOC) { + MachineOperand &FI = MI.getOperand(FIOperandNum); + bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + int Offset; + if (IsWinEH) + Offset = static_cast<const X86FrameLowering *>(TFI) + ->getFrameIndexOffsetFromSP(MF, FrameIndex); + else + Offset = TFI->getFrameIndexOffset(MF, FrameIndex); + FI.ChangeToImmediate(Offset); + return; + } + // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit // register as source operand, semantic is the same and destination is // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided. @@ -537,8 +571,9 @@ unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { return TFI->hasFP(MF) ? FramePtr : StackPtr; } -unsigned X86RegisterInfo::getPtrSizedFrameRegister( - const MachineFunction &MF) const { +unsigned +X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { + const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); unsigned FrameReg = getFrameRegister(MF); if (Subtarget.isTarget64BitILP32()) FrameReg = getX86SubSuperRegister(FrameReg, MVT::i32, false); diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 406b1fc..74edab9 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -20,14 +20,7 @@ #include "X86GenRegisterInfo.inc" namespace llvm { - class Type; - class TargetInstrInfo; - class X86Subtarget; - class X86RegisterInfo final : public X86GenRegisterInfo { -public: - const X86Subtarget &Subtarget; - private: /// Is64Bit - Is the target 64-bits. /// @@ -55,7 +48,7 @@ private: unsigned BasePtr; public: - X86RegisterInfo(const X86Subtarget &STI); + X86RegisterInfo(const Triple &TT); // FIXME: This should be tablegen'd like getDwarfRegNum is int getSEHRegNum(unsigned i) const; @@ -76,8 +69,9 @@ public: getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const override; - const TargetRegisterClass* - getLargestLegalSuperClass(const TargetRegisterClass *RC) const override; + const TargetRegisterClass * + getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &MF) const override; /// getPointerRegClass - Returns a TargetRegisterClass used for pointer /// values. @@ -98,7 +92,8 @@ public: /// callee-save registers on this target. const MCPhysReg * getCalleeSavedRegs(const MachineFunction* MF) const override; - const uint32_t *getCallPreservedMask(CallingConv::ID) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const; /// getReservedRegs - Returns a bitset indexed by physical register number diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 61c0600..677e824 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -2014,7 +2014,7 @@ def : InstRW<[WriteFMADDr], // 3p forms. "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?", // 3s forms. - "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)r", + "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)r", // 4s/4s_int forms. "VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?", // 4p forms. @@ -2031,7 +2031,7 @@ def : InstRW<[WriteFMADDm], // 3p forms. "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?", // 3s forms. - "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)m", + "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)m", // 4s/4s_int forms. "VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?", // 4p forms. diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 7feabf6..ca8fc9c 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -62,8 +62,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, #ifndef NDEBUG // If the base register might conflict with our physical registers, bail out. - unsigned ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI, - X86::ECX, X86::EAX, X86::EDI}; + const unsigned ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI, + X86::ECX, X86::EAX, X86::EDI}; assert(!isBaseRegConflictPossible(DAG, ClobberSet)); #endif @@ -228,8 +228,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( return SDValue(); // If the base register might conflict with our physical registers, bail out. - unsigned ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI, - X86::ECX, X86::ESI, X86::EDI}; + const unsigned ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI, + X86::ECX, X86::ESI, X86::EDI}; if (isBaseRegConflictPossible(DAG, ClobberSet)) return SDValue(); diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 4bde053..43d3895 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -37,10 +37,10 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { return make_unique<TargetLoweringObjectFileMachO>(); } - if (TT.isOSLinux()) - return make_unique<X86LinuxTargetObjectFile>(); + if (TT.isOSLinux() || TT.isOSNaCl()) + return make_unique<X86LinuxNaClTargetObjectFile>(); if (TT.isOSBinFormatELF()) - return make_unique<TargetLoweringObjectFileELF>(); + return make_unique<X86ELFTargetObjectFile>(); if (TT.isKnownWindowsMSVCEnvironment()) return make_unique<X86WindowsTargetObjectFile>(); if (TT.isOSBinFormatCOFF()) @@ -94,9 +94,9 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + : LLVMTargetMachine(T, computeDataLayout(Triple(TT)), TT, CPU, FS, Options, + RM, CM, OL), TLOF(createTLOF(Triple(getTargetTriple()))), - DL(computeDataLayout(Triple(TT))), Subtarget(TT, CPU, FS, *this, Options.StackAlignmentOverride) { // default to hard float ABI if (Options.FloatABIType == FloatABI::Default) diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 283858d..c9833ed 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -24,8 +24,6 @@ class StringRef; class X86TargetMachine final : public LLVMTargetMachine { std::unique_ptr<TargetLoweringObjectFile> TLOF; - // Calculates type size & alignment - const DataLayout DL; X86Subtarget Subtarget; mutable StringMap<std::unique_ptr<X86Subtarget>> SubtargetMap; @@ -35,8 +33,6 @@ public: const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); ~X86TargetMachine() override; - const DataLayout *getDataLayout() const override { return &DL; } - const X86Subtarget *getSubtargetImpl() const override { return &Subtarget; } const X86Subtarget *getSubtargetImpl(const Function &F) const override; TargetIRAnalysis getTargetIRAnalysis() override; diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 1d1c32e..d65d3b0 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -15,17 +15,13 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; using namespace dwarf; -X86_64MachoTargetObjectFile::X86_64MachoTargetObjectFile() - : TargetLoweringObjectFileMachO() { - SupportIndirectSymViaGOTPCRel = true; -} - const MCExpr *X86_64MachoTargetObjectFile::getTTypeGlobalReference( const GlobalValue *GV, unsigned Encoding, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI, @@ -52,28 +48,30 @@ MCSymbol *X86_64MachoTargetObjectFile::getCFIPersonalitySymbol( } const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel( - const MCSymbol *Sym, int64_t Offset) const { + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, + MachineModuleInfo *MMI, MCStreamer &Streamer) const { // On Darwin/X86-64, we need to use foo@GOTPCREL+4 to access the got entry // from a data section. In case there's an additional offset, then use // foo@GOTPCREL+4+<offset>. + unsigned FinalOff = Offset+MV.getConstant()+4; const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext()); - const MCExpr *Off = MCConstantExpr::Create(Offset+4, getContext()); + const MCExpr *Off = MCConstantExpr::Create(FinalOff, getContext()); return MCBinaryExpr::CreateAdd(Res, Off, getContext()); } +const MCExpr *X86ELFTargetObjectFile::getDebugThreadLocalSymbol( + const MCSymbol *Sym) const { + return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext()); +} + void -X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { +X86LinuxNaClTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); InitializeELF(TM.Options.UseInitArray); } -const MCExpr * -X86LinuxTargetObjectFile::getDebugThreadLocalSymbol( - const MCSymbol *Sym) const { - return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext()); -} - const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol( const ConstantExpr *CE, Mangler &Mang, const TargetMachine &TM) const { // We are looking for the difference of two symbols, need a subtraction @@ -97,14 +95,12 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol( SubRHS->getPointerAddressSpace() != 0) return nullptr; - // Both ptrtoint instructions must wrap global variables: + // Both ptrtoint instructions must wrap global objects: // - Only global variables are eligible for image relative relocations. - // - The subtrahend refers to the special symbol __ImageBase, a global. - const GlobalVariable *GVLHS = - dyn_cast<GlobalVariable>(SubLHS->getPointerOperand()); - const GlobalVariable *GVRHS = - dyn_cast<GlobalVariable>(SubRHS->getPointerOperand()); - if (!GVLHS || !GVRHS) + // - The subtrahend refers to the special symbol __ImageBase, a GlobalVariable. + const auto *GOLHS = dyn_cast<GlobalObject>(SubLHS->getPointerOperand()); + const auto *GVRHS = dyn_cast<GlobalVariable>(SubRHS->getPointerOperand()); + if (!GOLHS || !GVRHS) return nullptr; // We expect __ImageBase to be a global variable without a section, externally @@ -117,10 +113,10 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol( return nullptr; // An image-relative, thread-local, symbol makes no sense. - if (GVLHS->isThreadLocal()) + if (GOLHS->isThreadLocal()) return nullptr; - return MCSymbolRefExpr::Create(TM.getSymbol(GVLHS, Mang), + return MCSymbolRefExpr::Create(TM.getSymbol(GOLHS, Mang), MCSymbolRefExpr::VK_COFF_IMGREL32, getContext()); } diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index f745538..2e25fb2 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -19,8 +19,6 @@ namespace llvm { /// x86-64. class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO { public: - X86_64MachoTargetObjectFile(); - const MCExpr * getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding, Mangler &Mang, const TargetMachine &TM, @@ -33,20 +31,25 @@ namespace llvm { const TargetMachine &TM, MachineModuleInfo *MMI) const override; - const MCExpr * - getIndirectSymViaGOTPCRel(const MCSymbol *Sym, - int64_t Offset) const override; + const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCValue &MV, int64_t Offset, + MachineModuleInfo *MMI, + MCStreamer &Streamer) const override; }; - /// X86LinuxTargetObjectFile - This implementation is used for linux x86 - /// and x86-64. - class X86LinuxTargetObjectFile : public TargetLoweringObjectFileELF { - void Initialize(MCContext &Ctx, const TargetMachine &TM) override; - + /// \brief This implemenatation is used for X86 ELF targets that don't + /// have a further specialization. + class X86ELFTargetObjectFile : public TargetLoweringObjectFileELF { /// \brief Describe a TLS variable address within debug info. const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override; }; + /// X86LinuxNaClTargetObjectFile - This implementation is used for linux and + /// Native Client on x86 and x86-64. + class X86LinuxNaClTargetObjectFile : public X86ELFTargetObjectFile { + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; + }; + /// \brief This implementation is used for Windows targets on x86 and x86-64. class X86WindowsTargetObjectFile : public TargetLoweringObjectFileCOFF { const MCExpr * diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index 4073549..d0a09b2 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -126,15 +126,11 @@ void XCoreTargetAsmStreamer::emitCCBottomFunction(StringRef Name) { } } -static MCStreamer * -createXCoreMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst) { - MCStreamer *S = llvm::createAsmStreamer( - Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); - new XCoreTargetAsmStreamer(*S, OS); - return S; +static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new XCoreTargetAsmStreamer(S, OS); } // Force static initialization. @@ -160,5 +156,6 @@ extern "C" void LLVMInitializeXCoreTargetMC() { TargetRegistry::RegisterMCInstPrinter(TheXCoreTarget, createXCoreMCInstPrinter); - TargetRegistry::RegisterAsmStreamer(TheXCoreTarget, createXCoreMCAsmStreamer); + TargetRegistry::RegisterAsmTargetStreamer(TheXCoreTarget, + createTargetAsmStreamer); } diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h index 0ff5961..28e0275 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h @@ -14,6 +14,8 @@ #ifndef LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H #define LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H +#include "llvm/Support/DataTypes.h" + namespace llvm { class Target; diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index f79b78b..5c7ea5e 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -65,7 +65,7 @@ namespace { // Complex Pattern Selectors. bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset); - bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) override; const char *getPassName() const override { @@ -108,12 +108,12 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base, } bool XCoreDAGToDAGISel:: -SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, +SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { SDValue Reg; - switch (ConstraintCode) { + switch (ConstraintID) { default: return true; - case 'm': // Memory. + case InlineAsm::Constraint_m: // Memory. switch (Op.getOpcode()) { default: return true; case XCoreISD::CPRelativeWrapper: diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 213ae4a..b20fc01 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -177,6 +177,12 @@ namespace llvm { const std::string &Constraint, MVT VT) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + // FIXME: Map different constraints differently. + return InlineAsm::Constraint_m; + } + // Expand specifics SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const; SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const; diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 5c666ae..1d569e8 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -208,8 +208,8 @@ bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) { MF.getFunction()->needsUnwindTableEntry(); } -const MCPhysReg* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) - const { +const MCPhysReg * +XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { // The callee saved registers LR & FP are explicitly handled during // emitPrologue & emitEpilogue and related functions. static const MCPhysReg CalleeSavedRegs[] = { diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 5d7721c..010fccd 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -29,8 +29,7 @@ public: /// Code Generation virtual methods... - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF =nullptr) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 7998fc1..228dc1c 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -27,9 +27,10 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + : LLVMTargetMachine( + T, "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32", + TT, CPU, FS, Options, RM, CM, OL), TLOF(make_unique<XCoreTargetObjectFile>()), - DL("e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32"), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index c5df07c..0d324ab 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -21,7 +21,6 @@ namespace llvm { class XCoreTargetMachine : public LLVMTargetMachine { std::unique_ptr<TargetLoweringObjectFile> TLOF; - const DataLayout DL; // Calculates type size & alignment XCoreSubtarget Subtarget; public: XCoreTargetMachine(const Target &T, StringRef TT, @@ -30,8 +29,10 @@ public: CodeGenOpt::Level OL); ~XCoreTargetMachine() override; - const DataLayout *getDataLayout() const override { return &DL; } - const XCoreSubtarget *getSubtargetImpl() const override { return &Subtarget; } + const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; } + const XCoreSubtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 7e48ce3..46480bd 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -69,16 +69,15 @@ namespace { bool runOnSCC(CallGraphSCC &SCC) override; static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) - : CallGraphSCCPass(ID), DL(nullptr), maxElements(maxElements) { + : CallGraphSCCPass(ID), maxElements(maxElements) { initializeArgPromotionPass(*PassRegistry::getPassRegistry()); } /// A vector used to hold the indices of a single GEP instruction typedef std::vector<uint64_t> IndicesVector; - const DataLayout *DL; private: - bool isDenselyPacked(Type *type); + bool isDenselyPacked(Type *type, const DataLayout &DL); bool canPaddingBeAccessed(Argument *Arg); CallGraphNode *PromoteArguments(CallGraphNode *CGN); bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; @@ -109,9 +108,6 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { bool Changed = false, LocalChange; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; - do { // Iterate until we stop promoting from this SCC. LocalChange = false; // Attempt to promote arguments from all functions in this SCC. @@ -128,7 +124,7 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { } /// \brief Checks if a type could have padding bytes. -bool ArgPromotion::isDenselyPacked(Type *type) { +bool ArgPromotion::isDenselyPacked(Type *type, const DataLayout &DL) { // There is no size information, so be conservative. if (!type->isSized()) @@ -136,7 +132,7 @@ bool ArgPromotion::isDenselyPacked(Type *type) { // If the alloc size is not equal to the storage size, then there are padding // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. - if (!DL || DL->getTypeSizeInBits(type) != DL->getTypeAllocSizeInBits(type)) + if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type)) return false; if (!isa<CompositeType>(type)) @@ -144,19 +140,20 @@ bool ArgPromotion::isDenselyPacked(Type *type) { // For homogenous sequential types, check for padding within members. if (SequentialType *seqTy = dyn_cast<SequentialType>(type)) - return isa<PointerType>(seqTy) || isDenselyPacked(seqTy->getElementType()); + return isa<PointerType>(seqTy) || + isDenselyPacked(seqTy->getElementType(), DL); // Check for padding within and between elements of a struct. StructType *StructTy = cast<StructType>(type); - const StructLayout *Layout = DL->getStructLayout(StructTy); + const StructLayout *Layout = DL.getStructLayout(StructTy); uint64_t StartPos = 0; for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { Type *ElTy = StructTy->getElementType(i); - if (!isDenselyPacked(ElTy)) + if (!isDenselyPacked(ElTy, DL)) return false; if (StartPos != Layout->getElementOffsetInBits(i)) return false; - StartPos += DL->getTypeAllocSizeInBits(ElTy); + StartPos += DL.getTypeAllocSizeInBits(ElTy); } return true; @@ -236,6 +233,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // IR, while in the callee the classification is determined dynamically based // on the number of registers consumed so far. if (F->isVarArg()) return nullptr; + const DataLayout &DL = F->getParent()->getDataLayout(); // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. @@ -250,8 +248,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // packed or if we can prove the padding bytes are never accessed. This does // not apply to inalloca. bool isSafeToPromote = - PtrArg->hasByValAttr() && - (isDenselyPacked(AgTy) || !canPaddingBeAccessed(PtrArg)); + PtrArg->hasByValAttr() && + (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); if (isSafeToPromote) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { @@ -310,9 +308,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { /// AllCallersPassInValidPointerForArgument - Return true if we can prove that /// all callees pass in a valid pointer for the specified function argument. -static bool AllCallersPassInValidPointerForArgument(Argument *Arg, - const DataLayout *DL) { +static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { Function *Callee = Arg->getParent(); + const DataLayout &DL = Callee->getParent()->getDataLayout(); unsigned ArgNo = Arg->getArgNo(); @@ -430,7 +428,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. - if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg, DL)) + if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as @@ -586,7 +584,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, FunctionType *FTy = F->getFunctionType(); std::vector<Type*> Params; - typedef std::set<IndicesVector> ScalarizeTable; + typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable; // ScalarizedElements - If we are promoting a pointer that has elements // accessed out of it, keep track of which elements are accessed so that we @@ -623,8 +621,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Simple byval argument? Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) - Params.push_back(STy->getElementType(i)); + Params.insert(Params.end(), STy->element_begin(), STy->element_end()); ++NumByValArgsPromoted; } else if (!ArgsToPromote.count(I)) { // Unchanged argument @@ -647,7 +644,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ScalarizeTable &ArgIndices = ScalarizedElements[I]; for (User *U : I->users()) { Instruction *UI = cast<Instruction>(U); - assert(isa<LoadInst>(UI) || isa<GetElementPtrInst>(UI)); + Type *SrcTy; + if (LoadInst *L = dyn_cast<LoadInst>(UI)) + SrcTy = L->getType(); + else + SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType(); IndicesVector Indices; Indices.reserve(UI->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single @@ -659,7 +660,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // GEPs with a single 0 index can be merged with direct loads if (Indices.size() == 1 && Indices.front() == 0) Indices.clear(); - ArgIndices.insert(Indices); + ArgIndices.insert(std::make_pair(SrcTy, Indices)); LoadInst *OrigLoad; if (LoadInst *L = dyn_cast<LoadInst>(UI)) OrigLoad = L; @@ -673,11 +674,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { // not allowed to dereference ->begin() if size() is 0 - Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), *SI)); + Params.push_back( + GetElementPtrInst::getIndexedType(I->getType(), SI->second)); assert(Params.back()); } - if (ArgIndices.size() == 1 && ArgIndices.begin()->empty()) + if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) ++NumArgumentsPromoted; else ++NumAggregatesPromoted; @@ -768,9 +770,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - Value *Idx = GetElementPtrInst::Create(*AI, Idxs, - (*AI)->getName()+"."+utostr(i), - Call); + Value *Idx = GetElementPtrInst::Create( + STy, *AI, Idxs, (*AI)->getName() + "." + utostr(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call)); } @@ -783,12 +784,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { Value *V = *AI; - LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, *SI)]; - if (!SI->empty()) { - Ops.reserve(SI->size()); + LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, SI->second)]; + if (!SI->second.empty()) { + Ops.reserve(SI->second.size()); Type *ElTy = V->getType(); - for (IndicesVector::const_iterator II = SI->begin(), - IE = SI->end(); II != IE; ++II) { + for (IndicesVector::const_iterator II = SI->second.begin(), + IE = SI->second.end(); + II != IE; ++II) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. Type *IdxTy = (ElTy->isStructTy() ? @@ -799,7 +801,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); } // And create a GEP to extract those indices. - V = GetElementPtrInst::Create(V, Ops, V->getName()+".idx", Call); + V = GetElementPtrInst::Create(SI->first, V, Ops, + V->getName() + ".idx", Call); Ops.clear(); AA.copyValue(OrigLoad->getOperand(0), V); } @@ -903,10 +906,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - Value *Idx = - GetElementPtrInst::Create(TheAlloca, Idxs, - TheAlloca->getName()+"."+Twine(i), - InsertPt); + Value *Idx = GetElementPtrInst::Create( + AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), + InsertPt); I2->setName(I->getName()+"."+Twine(i)); new StoreInst(I2++, Idx, InsertPt); } @@ -939,7 +941,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, while (!I->use_empty()) { if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) { - assert(ArgIndices.begin()->empty() && + assert(ArgIndices.begin()->second.empty() && "Load element should sort to front!"); I2->setName(I->getName()+".val"); LI->replaceAllUsesWith(I2); @@ -961,7 +963,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Function::arg_iterator TheArg = I2; for (ScalarizeTable::iterator It = ArgIndices.begin(); - *It != Operands; ++It, ++TheArg) { + It->second != Operands; ++It, ++TheArg) { assert(It != ArgIndices.end() && "GEP not handled??"); } diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 0b6ade9..8ce7646 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -52,7 +52,6 @@ namespace { // alignment to a concrete value. unsigned getAlignment(GlobalVariable *GV) const; - const DataLayout *DL; }; } @@ -89,32 +88,22 @@ static bool IsBetterCanonical(const GlobalVariable &A, return A.hasUnnamedAddr(); } -bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const { - return DL || GV->getAlignment() != 0; -} - unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const { unsigned Align = GV->getAlignment(); if (Align) return Align; - if (DL) - return DL->getPreferredAlignment(GV); - return 0; + return GV->getParent()->getDataLayout().getPreferredAlignment(GV); } bool ConstantMerge::runOnModule(Module &M) { - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; // Find all the globals that are marked "used". These cannot be merged. SmallPtrSet<const GlobalValue*, 8> UsedGlobals; FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals); FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals); - - // Map unique <constants, has-unknown-alignment> pairs to globals. We don't - // want to merge globals of unknown alignment with those of explicit - // alignment. If we have DataLayout, we always know the alignment. - DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap; + + // Map unique constants to globals. + DenseMap<Constant *, GlobalVariable *> CMap; // Replacements - This vector contains a list of replacements to perform. SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements; @@ -156,8 +145,7 @@ bool ConstantMerge::runOnModule(Module &M) { Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. - PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV)); - GlobalVariable *&Slot = CMap[Pair]; + GlobalVariable *&Slot = CMap[Init]; // If this is the first constant we find or if the old one is local, // replace with the current one. If the current is externally visible @@ -188,8 +176,7 @@ bool ConstantMerge::runOnModule(Module &M) { Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. - PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV)); - GlobalVariable *Slot = CMap[Pair]; + GlobalVariable *Slot = CMap[Init]; if (!Slot || Slot == GV) continue; diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 0c844fe..ba04c80 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Pass.h" +#include <unordered_map> using namespace llvm; #define DEBUG_TYPE "globaldce" @@ -47,6 +48,7 @@ namespace { private: SmallPtrSet<GlobalValue*, 32> AliveGlobals; SmallPtrSet<Constant *, 8> SeenConstants; + std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; /// GlobalIsNeeded - mark the specific global value as needed, and /// recursively mark anything that it uses as also needed. @@ -78,6 +80,17 @@ bool GlobalDCE::runOnModule(Module &M) { // Remove empty functions from the global ctors list. Changed |= optimizeGlobalCtorsList(M, isEmptyFunction); + // Collect the set of members for each comdat. + for (Function &F : M) + if (Comdat *C = F.getComdat()) + ComdatMembers.insert(std::make_pair(C, &F)); + for (GlobalVariable &GV : M.globals()) + if (Comdat *C = GV.getComdat()) + ComdatMembers.insert(std::make_pair(C, &GV)); + for (GlobalAlias &GA : M.aliases()) + if (Comdat *C = GA.getComdat()) + ComdatMembers.insert(std::make_pair(C, &GA)); + // Loop over the module, adding globals which are obviously necessary. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); @@ -177,6 +190,7 @@ bool GlobalDCE::runOnModule(Module &M) { // Make sure that all memory is released AliveGlobals.clear(); SeenConstants.clear(); + ComdatMembers.clear(); return Changed; } @@ -188,17 +202,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { if (!AliveGlobals.insert(G).second) return; - Module *M = G->getParent(); if (Comdat *C = G->getComdat()) { - for (Function &F : *M) - if (F.getComdat() == C) - GlobalIsNeeded(&F); - for (GlobalVariable &GV : M->globals()) - if (GV.getComdat() == C) - GlobalIsNeeded(&GV); - for (GlobalAlias &GA : M->aliases()) - if (GA.getComdat() == C) - GlobalIsNeeded(&GA); + for (auto &&CM : make_range(ComdatMembers.equal_range(C))) + GlobalIsNeeded(CM.second); } if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) { diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 45e04f1..20b41fb 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" @@ -38,7 +39,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -86,7 +86,6 @@ namespace { const GlobalStatus &GS); bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); - const DataLayout *DL; TargetLibraryInfo *TLI; SmallSet<const Comdat *, 8> NotDiscardableComdats; }; @@ -269,7 +268,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, /// quick scan over the use list to clean up the easy and obvious cruft. This /// returns true if it made a change. static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, - const DataLayout *DL, + const DataLayout &DL, TargetLibraryInfo *TLI) { bool Changed = false; // Note that we need to use a weak value handle for the worklist items. When @@ -318,8 +317,8 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, // and will invalidate our notion of what Init is. Constant *SubInit = nullptr; if (!isa<ConstantExpr>(GEP->getOperand(0))) { - ConstantExpr *CE = - dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, DL, TLI)); + ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>( + ConstantFoldInstruction(GEP, DL, TLI)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); @@ -580,8 +579,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { Idxs.push_back(NullInt); for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) Idxs.push_back(GEPI->getOperand(i)); - NewPtr = GetElementPtrInst::Create(NewPtr, Idxs, - GEPI->getName()+"."+Twine(Val),GEPI); + NewPtr = GetElementPtrInst::Create( + NewPtr->getType()->getPointerElementType(), NewPtr, Idxs, + GEPI->getName() + "." + Twine(Val), GEPI); } } GEP->replaceAllUsesWith(NewPtr); @@ -739,7 +739,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { /// if the loaded value is dynamically null, then we know that they cannot be /// reachable with a null optimize away the load. static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, - const DataLayout *DL, + const DataLayout &DL, TargetLibraryInfo *TLI) { bool Changed = false; @@ -802,7 +802,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the /// instructions that are foldable. -static void ConstantPropUsersOf(Value *V, const DataLayout *DL, +static void ConstantPropUsersOf(Value *V, const DataLayout &DL, TargetLibraryInfo *TLI) { for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; ) if (Instruction *I = dyn_cast<Instruction>(*UI++)) @@ -822,12 +822,10 @@ static void ConstantPropUsersOf(Value *V, const DataLayout *DL, /// the specified malloc. Because it is always the result of the specified /// malloc, there is no reason to actually DO the malloc. Instead, turn the /// malloc into a global, and any loads of GV as uses of the new global. -static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, - CallInst *CI, - Type *AllocTy, - ConstantInt *NElements, - const DataLayout *DL, - TargetLibraryInfo *TLI) { +static GlobalVariable * +OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, + ConstantInt *NElements, const DataLayout &DL, + TargetLibraryInfo *TLI) { DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); Type *GlobalType; @@ -1167,7 +1165,8 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, InsertedScalarizedValues, PHIsToRewrite), LI->getName()+".f"+Twine(FieldNo), LI); - } else if (PHINode *PN = dyn_cast<PHINode>(V)) { + } else { + PHINode *PN = cast<PHINode>(V); // PN's type is pointer to struct. Make a new PHI of pointer to struct // field. @@ -1181,8 +1180,6 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, PN->getName()+".f"+Twine(FieldNo), PN); Result = NewPN; PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); - } else { - llvm_unreachable("Unknown usable value"); } return FieldVals[FieldNo] = Result; @@ -1224,7 +1221,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, GEPIdx.push_back(GEPI->getOperand(1)); GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); - Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx, + Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx, GEPI->getName(), GEPI); GEPI->replaceAllUsesWith(NGEPI); GEPI->eraseFromParent(); @@ -1271,7 +1268,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, - Value *NElems, const DataLayout *DL, + Value *NElems, const DataLayout &DL, const TargetLibraryInfo *TLI) { DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); Type *MAT = getMallocAllocatedType(CI, TLI); @@ -1301,10 +1298,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, GV->getThreadLocalMode()); FieldGlobals.push_back(NGV); - unsigned TypeSize = DL->getTypeAllocSize(FieldTy); + unsigned TypeSize = DL.getTypeAllocSize(FieldTy); if (StructType *ST = dyn_cast<StructType>(FieldTy)) - TypeSize = DL->getStructLayout(ST)->getSizeInBytes(); - Type *IntPtrTy = DL->getIntPtrType(CI->getType()); + TypeSize = DL.getStructLayout(ST)->getSizeInBytes(); + Type *IntPtrTy = DL.getIntPtrType(CI->getType()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), NElems, nullptr, @@ -1459,16 +1456,12 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, /// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a /// pointer global variable with a single value stored it that is a malloc or /// cast of malloc. -static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, - CallInst *CI, +static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, Type *AllocTy, AtomicOrdering Ordering, Module::global_iterator &GVI, - const DataLayout *DL, + const DataLayout &DL, TargetLibraryInfo *TLI) { - if (!DL) - return false; - // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -1504,7 +1497,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // Restrict this transformation to only working on small allocations // (2048 bytes currently), as we don't want to introduce a 16M global or // something. - if (NElements->getZExtValue() * DL->getTypeAllocSize(AllocTy) < 2048) { + if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) { GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI); return true; } @@ -1534,8 +1527,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is a fixed size array, transform the Malloc to be an alloc of // structs. malloc [100 x struct],1 -> malloc struct, 100 if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) { - Type *IntPtrTy = DL->getIntPtrType(CI->getType()); - unsigned TypeSize = DL->getStructLayout(AllocSTy)->getSizeInBytes(); + Type *IntPtrTy = DL.getIntPtrType(CI->getType()); + unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes(); Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, @@ -1563,7 +1556,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, AtomicOrdering Ordering, Module::global_iterator &GVI, - const DataLayout *DL, + const DataLayout &DL, TargetLibraryInfo *TLI) { // Ignore no-op GEPs and bitcasts. StoredOnceVal = StoredOnceVal->stripPointerCasts(); @@ -1733,6 +1726,7 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, Module::global_iterator &GVI, const GlobalStatus &GS) { + auto &DL = GV->getParent()->getDataLayout(); // If this is a first class global and has only one accessing function // and this function is main (which we know is not recursive), we replace // the global with a local alloca in this function. @@ -1804,12 +1798,10 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, ++NumMarked; return true; } else if (!GV->getInitializer()->getType()->isSingleValueType()) { - if (DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>()) { - const DataLayout &DL = DLP->getDataLayout(); - if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) { - GVI = FirstNewGV; // Don't skip the newly produced globals! - return true; - } + const DataLayout &DL = GV->getParent()->getDataLayout(); + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) { + GVI = FirstNewGV; // Don't skip the newly produced globals! + return true; } } else if (GS.StoredType == GlobalStatus::StoredOnce) { // If the initial value for the global was an undef value, and if only @@ -1954,6 +1946,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { // Simplify the initializer. if (GV->hasInitializer()) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) { + auto &DL = M.getDataLayout(); Constant *New = ConstantFoldConstantExpression(CE, DL, TLI); if (New && New != CE) GV->setInitializer(New); @@ -1971,9 +1964,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { static inline bool isSimpleEnoughValueToCommit(Constant *C, - SmallPtrSetImpl<Constant*> &SimpleConstants, - const DataLayout *DL); - + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL); /// isSimpleEnoughValueToCommit - Return true if the specified constant can be /// handled by the code generator. We don't want to generate something like: @@ -1983,9 +1975,10 @@ isSimpleEnoughValueToCommit(Constant *C, /// This function should be called if C was not found (but just got inserted) /// in SimpleConstants to avoid having to rescan the same constants all the /// time. -static bool isSimpleEnoughValueToCommitHelper(Constant *C, - SmallPtrSetImpl<Constant*> &SimpleConstants, - const DataLayout *DL) { +static bool +isSimpleEnoughValueToCommitHelper(Constant *C, + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL) { // Simple global addresses are supported, do not allow dllimport or // thread-local globals. if (auto *GV = dyn_cast<GlobalValue>(C)) @@ -2019,8 +2012,8 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, case Instruction::PtrToInt: // int <=> ptr is fine if the int type is the same size as the // pointer type. - if (!DL || DL->getTypeSizeInBits(CE->getType()) != - DL->getTypeSizeInBits(CE->getOperand(0)->getType())) + if (DL.getTypeSizeInBits(CE->getType()) != + DL.getTypeSizeInBits(CE->getOperand(0)->getType())) return false; return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); @@ -2042,8 +2035,8 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, static inline bool isSimpleEnoughValueToCommit(Constant *C, - SmallPtrSetImpl<Constant*> &SimpleConstants, - const DataLayout *DL) { + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL) { // If we already checked this constant, we win. if (!SimpleConstants.insert(C).second) return true; @@ -2174,8 +2167,8 @@ namespace { /// Once an evaluation call fails, the evaluation object should not be reused. class Evaluator { public: - Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI) - : DL(DL), TLI(TLI) { + Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI) + : DL(DL), TLI(TLI) { ValueStack.emplace_back(); } @@ -2249,7 +2242,7 @@ private: /// simple enough to live in a static initializer of a global. SmallPtrSet<Constant*, 8> SimpleConstants; - const DataLayout *DL; + const DataLayout &DL; const TargetLibraryInfo *TLI; }; @@ -2498,9 +2491,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Value *Ptr = PtrArg->stripPointerCasts(); if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { Type *ElemTy = cast<PointerType>(GV->getType())->getElementType(); - if (DL && !Size->isAllOnesValue() && + if (!Size->isAllOnesValue() && Size->getValue().getLimitedValue() >= - DL->getTypeStoreSize(ElemTy)) { + DL.getTypeStoreSize(ElemTy)) { Invariants.insert(GV); DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV << "\n"); @@ -2689,7 +2682,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, /// EvaluateStaticConstructor - Evaluate static constructors in the function, if /// we can. Return true if we can, false otherwise. -static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL, +static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, const TargetLibraryInfo *TLI) { // Call the function. Evaluator Eval(DL, TLI); @@ -3040,8 +3033,7 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { bool GlobalOpt::runOnModule(Module &M) { bool Changed = false; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; + auto &DL = M.getDataLayout(); TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); bool LocalChange = true; diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 305ad7a..3aa4ee5 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" @@ -29,7 +30,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -72,8 +72,8 @@ Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime) InlineLimit : Threshold), InsertLifetime(InsertLifetime) {} -/// getAnalysisUsage - For this class, we declare that we require and preserve -/// the call graph. If the derived class implements this method, it should +/// For this class, we declare that we require and preserve the call graph. +/// If the derived class implements this method, it should /// always explicitly call the implementation here. void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AliasAnalysis>(); @@ -111,18 +111,17 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) { Caller->addFnAttr(Attribute::StackProtect); } -/// InlineCallIfPossible - If it is possible to inline the specified call site, +/// If it is possible to inline the specified call site, /// do so and update the CallGraph for this operation. /// /// This function also does some basic book-keeping to update the IR. The /// InlinedArrayAllocas map keeps track of any allocas that are already -/// available from other functions inlined into the caller. If we are able to +/// available from other functions inlined into the caller. If we are able to /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, - int InlineHistory, bool InsertLifetime, - const DataLayout *DL) { + int InlineHistory, bool InsertLifetime) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); @@ -198,11 +197,6 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, unsigned Align1 = AI->getAlignment(), Align2 = AvailableAlloca->getAlignment(); - // If we don't have data layout information, and only one alloca is using - // the target default, then we can't safely merge them because we can't - // pick the greater alignment. - if (!DL && (!Align1 || !Align2) && Align1 != Align2) - continue; // The available alloca has to be in the right function, not in some other // function in this SCC. @@ -223,8 +217,8 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, if (Align1 != Align2) { if (!Align1 || !Align2) { - assert(DL && "DataLayout required to compare default alignments"); - unsigned TypeAlign = DL->getABITypeAlignment(AI->getAllocatedType()); + const DataLayout &DL = Caller->getParent()->getDataLayout(); + unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType()); Align1 = Align1 ? Align1 : TypeAlign; Align2 = Align2 ? Align2 : TypeAlign; @@ -300,8 +294,7 @@ static void emitAnalysis(CallSite CS, const Twine &Msg) { emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg); } -/// shouldInline - Return true if the inliner should attempt to inline -/// at the given CallSite. +/// Return true if the inliner should attempt to inline at the given CallSite. bool Inliner::shouldInline(CallSite CS) { InlineCost IC = getInlineCost(CS); @@ -415,7 +408,7 @@ bool Inliner::shouldInline(CallSite CS) { return true; } -/// InlineHistoryIncludes - Return true if the specified inline history ID +/// Return true if the specified inline history ID /// indicates an inline history that includes the specified function. static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) { @@ -432,8 +425,6 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr; AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); @@ -495,7 +486,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, DL, AA, ACT); + InlineFunctionInfo InlineInfo(&CG, AA, ACT); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -553,7 +544,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // Attempt to inline the function. if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, - InlineHistoryID, InsertLifetime, DL)) { + InlineHistoryID, InsertLifetime)) { emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, Twine(Callee->getName() + " will not be inlined into " + @@ -625,14 +616,13 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { return Changed; } -// doFinalization - Remove now-dead linkonce functions at the end of -// processing to avoid breaking the SCC traversal. +/// Remove now-dead linkonce functions at the end of +/// processing to avoid breaking the SCC traversal. bool Inliner::doFinalization(CallGraph &CG) { return removeDeadFunctions(CG); } -/// removeDeadFunctions - Remove dead functions that are not included in -/// DNR (Do Not Remove) list. +/// Remove dead functions that are not included in DNR (Do Not Remove) list. bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { SmallVector<CallGraphNode*, 16> FunctionsToRemove; diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp index 0a22a80..fe00d92 100644 --- a/lib/Transforms/IPO/LowerBitSets.cpp +++ b/lib/Transforms/IPO/LowerBitSets.cpp @@ -16,6 +16,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalVariable.h" @@ -31,10 +32,17 @@ using namespace llvm; #define DEBUG_TYPE "lowerbitsets" -STATISTIC(NumBitSetsCreated, "Number of bitsets created"); +STATISTIC(ByteArraySizeBits, "Byte array size in bits"); +STATISTIC(ByteArraySizeBytes, "Byte array size in bytes"); +STATISTIC(NumByteArraysCreated, "Number of byte arrays created"); STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered"); STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets"); +static cl::opt<bool> AvoidReuse( + "lowerbitsets-avoid-reuse", + cl::desc("Try to avoid reuse of byte array addresses using aliases"), + cl::Hidden, cl::init(true)); + bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { if (Offset < ByteOffset) return false; @@ -46,11 +54,11 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { if (BitOffset >= BitSize) return false; - return (Bits[BitOffset / 8] >> (BitOffset % 8)) & 1; + return Bits.count(BitOffset); } bool BitSetInfo::containsValue( - const DataLayout *DL, + const DataLayout &DL, const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout, Value *V, uint64_t COffset) const { if (auto GV = dyn_cast<GlobalVariable>(V)) { @@ -61,8 +69,8 @@ bool BitSetInfo::containsValue( } if (auto GEP = dyn_cast<GEPOperator>(V)) { - APInt APOffset(DL->getPointerSizeInBits(0), 0); - bool Result = GEP->accumulateConstantOffset(*DL, APOffset); + APInt APOffset(DL.getPointerSizeInBits(0), 0); + bool Result = GEP->accumulateConstantOffset(DL, APOffset); if (!Result) return false; COffset += APOffset.getZExtValue(); @@ -101,18 +109,15 @@ BitSetInfo BitSetBuilder::build() { BSI.ByteOffset = Min; BSI.AlignLog2 = 0; - // FIXME: Can probably do something smarter if all offsets are 0. if (Mask != 0) BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined); // Build the compressed bitset while normalizing the offsets against the // computed alignment. BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1; - uint64_t ByteSize = (BSI.BitSize + 7) / 8; - BSI.Bits.resize(ByteSize); for (uint64_t Offset : Offsets) { Offset >>= BSI.AlignLog2; - BSI.Bits[Offset / 8] |= 1 << (Offset % 8); + BSI.Bits.insert(Offset); } return BSI; @@ -147,15 +152,47 @@ void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) { FragmentMap[ObjIndex] = FragmentIndex; } +void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits, + uint64_t BitSize, uint64_t &AllocByteOffset, + uint8_t &AllocMask) { + // Find the smallest current allocation. + unsigned Bit = 0; + for (unsigned I = 1; I != BitsPerByte; ++I) + if (BitAllocs[I] < BitAllocs[Bit]) + Bit = I; + + AllocByteOffset = BitAllocs[Bit]; + + // Add our size to it. + unsigned ReqSize = AllocByteOffset + BitSize; + BitAllocs[Bit] = ReqSize; + if (Bytes.size() < ReqSize) + Bytes.resize(ReqSize); + + // Set our bits. + AllocMask = 1 << Bit; + for (uint64_t B : Bits) + Bytes[AllocByteOffset + B] |= AllocMask; +} + namespace { +struct ByteArrayInfo { + std::set<uint64_t> Bits; + uint64_t BitSize; + GlobalVariable *ByteArray; + Constant *Mask; +}; + struct LowerBitSets : public ModulePass { static char ID; LowerBitSets() : ModulePass(ID) { initializeLowerBitSetsPass(*PassRegistry::getPassRegistry()); } - const DataLayout *DL; + Module *M; + + bool LinkerSubsectionsViaSymbols; IntegerType *Int1Ty; IntegerType *Int8Ty; IntegerType *Int32Ty; @@ -169,20 +206,23 @@ struct LowerBitSets : public ModulePass { // Mapping from bitset mdstrings to the call sites that test them. DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites; + std::vector<ByteArrayInfo> ByteArrayInfos; + BitSetInfo buildBitSet(MDString *BitSet, const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout); - Value *createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI, - GlobalVariable *BitSetGlobal, Value *BitOffset); + ByteArrayInfo *createByteArray(BitSetInfo &BSI); + void allocateByteArrays(); + Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI, + Value *BitOffset); Value * - lowerBitSetCall(CallInst *CI, const BitSetInfo &BSI, - GlobalVariable *BitSetGlobal, GlobalVariable *CombinedGlobal, + lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, + GlobalVariable *CombinedGlobal, const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout); - void buildBitSetsFromGlobals(Module &M, - const std::vector<MDString *> &BitSets, + void buildBitSetsFromGlobals(const std::vector<MDString *> &BitSets, const std::vector<GlobalVariable *> &Globals); - bool buildBitSets(Module &M); - bool eraseBitSetMetadata(Module &M); + bool buildBitSets(); + bool eraseBitSetMetadata(); bool doInitialization(Module &M) override; bool runOnModule(Module &M) override; @@ -198,19 +238,21 @@ char LowerBitSets::ID = 0; ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; } -bool LowerBitSets::doInitialization(Module &M) { - DL = M.getDataLayout(); - if (!DL) - report_fatal_error("Data layout required"); +bool LowerBitSets::doInitialization(Module &Mod) { + M = &Mod; + const DataLayout &DL = Mod.getDataLayout(); - Int1Ty = Type::getInt1Ty(M.getContext()); - Int8Ty = Type::getInt8Ty(M.getContext()); - Int32Ty = Type::getInt32Ty(M.getContext()); + Triple TargetTriple(M->getTargetTriple()); + LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); + + Int1Ty = Type::getInt1Ty(M->getContext()); + Int8Ty = Type::getInt8Ty(M->getContext()); + Int32Ty = Type::getInt32Ty(M->getContext()); Int32PtrTy = PointerType::getUnqual(Int32Ty); - Int64Ty = Type::getInt64Ty(M.getContext()); - IntPtrTy = DL->getIntPtrType(M.getContext(), 0); + Int64Ty = Type::getInt64Ty(M->getContext()); + IntPtrTy = DL.getIntPtrType(M->getContext(), 0); - BitSetNM = M.getNamedMetadata("llvm.bitsets"); + BitSetNM = M->getNamedMetadata("llvm.bitsets"); BitSetTestCallSites.clear(); @@ -259,52 +301,128 @@ static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits, return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0)); } +ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) { + // Create globals to stand in for byte arrays and masks. These never actually + // get initialized, we RAUW and erase them later in allocateByteArrays() once + // we know the offset and mask to use. + auto ByteArrayGlobal = new GlobalVariable( + *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); + auto MaskGlobal = new GlobalVariable( + *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); + + ByteArrayInfos.emplace_back(); + ByteArrayInfo *BAI = &ByteArrayInfos.back(); + + BAI->Bits = BSI.Bits; + BAI->BitSize = BSI.BitSize; + BAI->ByteArray = ByteArrayGlobal; + BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty); + return BAI; +} + +void LowerBitSets::allocateByteArrays() { + std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(), + [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) { + return BAI1.BitSize > BAI2.BitSize; + }); + + std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size()); + + ByteArrayBuilder BAB; + for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { + ByteArrayInfo *BAI = &ByteArrayInfos[I]; + + uint8_t Mask; + BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask); + + BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask)); + cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent(); + } + + Constant *ByteArrayConst = ConstantDataArray::get(M->getContext(), BAB.Bytes); + auto ByteArray = + new GlobalVariable(*M, ByteArrayConst->getType(), /*isConstant=*/true, + GlobalValue::PrivateLinkage, ByteArrayConst); + + for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { + ByteArrayInfo *BAI = &ByteArrayInfos[I]; + + Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0), + ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])}; + Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(ByteArray, Idxs); + + // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures + // that the pc-relative displacement is folded into the lea instead of the + // test instruction getting another displacement. + if (LinkerSubsectionsViaSymbols) { + BAI->ByteArray->replaceAllUsesWith(GEP); + } else { + GlobalAlias *Alias = GlobalAlias::create( + Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M); + BAI->ByteArray->replaceAllUsesWith(Alias); + } + BAI->ByteArray->eraseFromParent(); + } + + ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] + + BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] + + BAB.BitAllocs[6] + BAB.BitAllocs[7]; + ByteArraySizeBytes = BAB.Bytes.size(); +} + /// Build a test that bit BitOffset is set in BSI, where /// BitSetGlobal is a global containing the bits in BSI. -Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI, - GlobalVariable *BitSetGlobal, - Value *BitOffset) { - if (BSI.Bits.size() <= 8) { +Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, + ByteArrayInfo *&BAI, Value *BitOffset) { + if (BSI.BitSize <= 64) { // If the bit set is sufficiently small, we can avoid a load by bit testing // a constant. IntegerType *BitsTy; - if (BSI.Bits.size() <= 4) + if (BSI.BitSize <= 32) BitsTy = Int32Ty; else BitsTy = Int64Ty; uint64_t Bits = 0; - for (auto I = BSI.Bits.rbegin(), E = BSI.Bits.rend(); I != E; ++I) { - Bits <<= 8; - Bits |= *I; - } + for (auto Bit : BSI.Bits) + Bits |= uint64_t(1) << Bit; Constant *BitsConst = ConstantInt::get(BitsTy, Bits); return createMaskedBitTest(B, BitsConst, BitOffset); } else { - // TODO: We might want to use the memory variant of the bt instruction - // with the previously computed bit offset at -Os. This instruction does - // exactly what we want but has been benchmarked as being slower than open - // coding the load+bt. - Value *BitSetGlobalOffset = - B.CreateLShr(BitOffset, ConstantInt::get(IntPtrTy, 5)); - Value *BitSetEntryAddr = B.CreateGEP( - ConstantExpr::getBitCast(BitSetGlobal, Int32PtrTy), BitSetGlobalOffset); - Value *BitSetEntry = B.CreateLoad(BitSetEntryAddr); - - return createMaskedBitTest(B, BitSetEntry, BitOffset); + if (!BAI) { + ++NumByteArraysCreated; + BAI = createByteArray(BSI); + } + + Constant *ByteArray = BAI->ByteArray; + if (!LinkerSubsectionsViaSymbols && AvoidReuse) { + // Each use of the byte array uses a different alias. This makes the + // backend less likely to reuse previously computed byte array addresses, + // improving the security of the CFI mechanism based on this pass. + ByteArray = GlobalAlias::create( + BAI->ByteArray->getType()->getElementType(), 0, + GlobalValue::PrivateLinkage, "bits_use", ByteArray, M); + } + + Value *ByteAddr = B.CreateGEP(ByteArray, BitOffset); + Value *Byte = B.CreateLoad(ByteAddr); + + Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask); + return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0)); } } /// Lower a llvm.bitset.test call to its implementation. Returns the value to /// replace the call with. Value *LowerBitSets::lowerBitSetCall( - CallInst *CI, const BitSetInfo &BSI, GlobalVariable *BitSetGlobal, + CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, GlobalVariable *CombinedGlobal, const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) { Value *Ptr = CI->getArgOperand(0); + const DataLayout &DL = M->getDataLayout(); if (BSI.containsValue(DL, GlobalLayout, Ptr)) - return ConstantInt::getTrue(BitSetGlobal->getParent()->getContext()); + return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext()); Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy); Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd( @@ -336,8 +454,8 @@ Value *LowerBitSets::lowerBitSetCall( Value *OffsetSHR = B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2)); Value *OffsetSHL = B.CreateShl( - PtrOffset, ConstantInt::get(IntPtrTy, DL->getPointerSizeInBits(0) - - BSI.AlignLog2)); + PtrOffset, + ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2)); BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); } @@ -353,7 +471,7 @@ Value *LowerBitSets::lowerBitSetCall( // Now that we know that the offset is in range and aligned, load the // appropriate bit from the bitset. - Value *Bit = createBitSetTest(ThenB, BSI, BitSetGlobal, BitOffset); + Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset); // The value we want is 0 if we came directly from the initial block // (having failed the range or alignment checks), or the loaded bit if @@ -368,14 +486,14 @@ Value *LowerBitSets::lowerBitSetCall( /// Given a disjoint set of bitsets and globals, layout the globals, build the /// bit sets and lower the llvm.bitset.test calls. void LowerBitSets::buildBitSetsFromGlobals( - Module &M, const std::vector<MDString *> &BitSets, const std::vector<GlobalVariable *> &Globals) { // Build a new global with the combined contents of the referenced globals. std::vector<Constant *> GlobalInits; + const DataLayout &DL = M->getDataLayout(); for (GlobalVariable *G : Globals) { GlobalInits.push_back(G->getInitializer()); - uint64_t InitSize = DL->getTypeAllocSize(G->getInitializer()->getType()); + uint64_t InitSize = DL.getTypeAllocSize(G->getInitializer()->getType()); // Compute the amount of padding required to align the next element to the // next power of 2. @@ -391,13 +509,13 @@ void LowerBitSets::buildBitSetsFromGlobals( } if (!GlobalInits.empty()) GlobalInits.pop_back(); - Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits); + Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits); auto CombinedGlobal = - new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true, + new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true, GlobalValue::PrivateLinkage, NewInit); const StructLayout *CombinedGlobalLayout = - DL->getStructLayout(cast<StructType>(NewInit->getType())); + DL.getStructLayout(cast<StructType>(NewInit->getType())); // Compute the offsets of the original globals within the new global. DenseMap<GlobalVariable *, uint64_t> GlobalLayout; @@ -410,18 +528,12 @@ void LowerBitSets::buildBitSetsFromGlobals( // Build the bitset. BitSetInfo BSI = buildBitSet(BS, GlobalLayout); - // Create a global in which to store it. - ++NumBitSetsCreated; - Constant *BitsConst = ConstantDataArray::get(M.getContext(), BSI.Bits); - auto BitSetGlobal = new GlobalVariable( - M, BitsConst->getType(), /*isConstant=*/true, - GlobalValue::PrivateLinkage, BitsConst, BS->getString() + ".bits"); + ByteArrayInfo *BAI = 0; // Lower each call to llvm.bitset.test for this bitset. for (CallInst *CI : BitSetTestCallSites[BS]) { ++NumBitSetCallsLowered; - Value *Lowered = - lowerBitSetCall(CI, BSI, BitSetGlobal, CombinedGlobal, GlobalLayout); + Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout); CI->replaceAllUsesWith(Lowered); CI->eraseFromParent(); } @@ -436,20 +548,24 @@ void LowerBitSets::buildBitSetsFromGlobals( ConstantInt::get(Int32Ty, I * 2)}; Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs); - GlobalAlias *GAlias = GlobalAlias::create( - Globals[I]->getType()->getElementType(), - Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(), - "", CombinedGlobalElemPtr, &M); - GAlias->takeName(Globals[I]); - Globals[I]->replaceAllUsesWith(GAlias); + if (LinkerSubsectionsViaSymbols) { + Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr); + } else { + GlobalAlias *GAlias = GlobalAlias::create( + Globals[I]->getType()->getElementType(), + Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(), + "", CombinedGlobalElemPtr, M); + GAlias->takeName(Globals[I]); + Globals[I]->replaceAllUsesWith(GAlias); + } Globals[I]->eraseFromParent(); } } /// Lower all bit sets in this module. -bool LowerBitSets::buildBitSets(Module &M) { +bool LowerBitSets::buildBitSets() { Function *BitSetTestFunc = - M.getFunction(Intrinsic::getName(Intrinsic::bitset_test)); + M->getFunction(Intrinsic::getName(Intrinsic::bitset_test)); if (!BitSetTestFunc) return false; @@ -591,22 +707,24 @@ bool LowerBitSets::buildBitSets(Module &M) { }); // Build the bitsets from this disjoint set. - buildBitSetsFromGlobals(M, BitSets, OrderedGlobals); + buildBitSetsFromGlobals(BitSets, OrderedGlobals); } + allocateByteArrays(); + return true; } -bool LowerBitSets::eraseBitSetMetadata(Module &M) { +bool LowerBitSets::eraseBitSetMetadata() { if (!BitSetNM) return false; - M.eraseNamedMetadata(BitSetNM); + M->eraseNamedMetadata(BitSetNM); return true; } bool LowerBitSets::runOnModule(Module &M) { - bool Changed = buildBitSets(M); - Changed |= eraseBitSetMetadata(M); + bool Changed = buildBitSets(); + Changed |= eraseBitSetMetadata(); return Changed; } diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index b91ebf2..596674d 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -127,9 +127,8 @@ namespace { /// side of claiming that two functions are different). class FunctionComparator { public: - FunctionComparator(const DataLayout *DL, const Function *F1, - const Function *F2) - : FnL(F1), FnR(F2), DL(DL) {} + FunctionComparator(const Function *F1, const Function *F2) + : FnL(F1), FnR(F2) {} /// Test whether the two functions have equivalent behaviour. int compare(); @@ -292,8 +291,7 @@ private: /// Parts to be compared for each comparison stage, /// most significant stage first: /// 1. Address space. As numbers. - /// 2. Constant offset, (if "DataLayout *DL" field is not NULL, - /// using GEPOperator::accumulateConstantOffset method). + /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method). /// 3. Pointer operand type (using cmpType method). /// 4. Number of operands. /// 5. Compare operands, using cmpValues method. @@ -354,8 +352,6 @@ private: // The two functions undergoing comparison. const Function *FnL, *FnR; - const DataLayout *DL; - /// Assign serial numbers to values from left function, and values from /// right function. /// Explanation: @@ -394,14 +390,13 @@ private: class FunctionNode { AssertingVH<Function> F; - const DataLayout *DL; public: - FunctionNode(Function *F, const DataLayout *DL) : F(F), DL(DL) {} + FunctionNode(Function *F) : F(F) {} Function *getFunc() const { return F; } void release() { F = 0; } bool operator<(const FunctionNode &RHS) const { - return (FunctionComparator(DL, F, RHS.getFunc()).compare()) == -1; + return (FunctionComparator(F, RHS.getFunc()).compare()) == -1; } }; } @@ -620,10 +615,11 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { PointerType *PTyL = dyn_cast<PointerType>(TyL); PointerType *PTyR = dyn_cast<PointerType>(TyR); - if (DL) { - if (PTyL && PTyL->getAddressSpace() == 0) TyL = DL->getIntPtrType(TyL); - if (PTyR && PTyR->getAddressSpace() == 0) TyR = DL->getIntPtrType(TyR); - } + const DataLayout &DL = FnL->getParent()->getDataLayout(); + if (PTyL && PTyL->getAddressSpace() == 0) + TyL = DL.getIntPtrType(TyL); + if (PTyR && PTyR->getAddressSpace() == 0) + TyR = DL.getIntPtrType(TyR); if (TyL == TyR) return 0; @@ -855,13 +851,12 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, // When we have target data, we can reduce the GEP down to the value in bytes // added to the address. - if (DL) { - unsigned BitWidth = DL->getPointerSizeInBits(ASL); - APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); - if (GEPL->accumulateConstantOffset(*DL, OffsetL) && - GEPR->accumulateConstantOffset(*DL, OffsetR)) - return cmpAPInts(OffsetL, OffsetR); - } + const DataLayout &DL = FnL->getParent()->getDataLayout(); + unsigned BitWidth = DL.getPointerSizeInBits(ASL); + APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); + if (GEPL->accumulateConstantOffset(DL, OffsetL) && + GEPR->accumulateConstantOffset(DL, OffsetR)) + return cmpAPInts(OffsetL, OffsetR); if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(), (uint64_t)GEPR->getPointerOperand()->getType())) @@ -1122,9 +1117,6 @@ private: /// to modify it. FnTreeType FnTree; - /// DataLayout for more accurate GEP comparisons. May be NULL. - const DataLayout *DL; - /// Whether or not the target supports global aliases. bool HasGlobalAliases; }; @@ -1152,8 +1144,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) { Function *F1 = cast<Function>(*I); Function *F2 = cast<Function>(*J); - int Res1 = FunctionComparator(DL, F1, F2).compare(); - int Res2 = FunctionComparator(DL, F2, F1).compare(); + int Res1 = FunctionComparator(F1, F2).compare(); + int Res2 = FunctionComparator(F2, F1).compare(); // If F1 <= F2, then F2 >= F1, otherwise report failure. if (Res1 != -Res2) { @@ -1174,8 +1166,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { continue; Function *F3 = cast<Function>(*K); - int Res3 = FunctionComparator(DL, F1, F3).compare(); - int Res4 = FunctionComparator(DL, F2, F3).compare(); + int Res3 = FunctionComparator(F1, F3).compare(); + int Res4 = FunctionComparator(F2, F3).compare(); bool Transitive = true; @@ -1212,8 +1204,6 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { bool MergeFunctions::runOnModule(Module &M) { bool Changed = false; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) @@ -1420,7 +1410,7 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { // that was already inserted. bool MergeFunctions::insert(Function *NewFunction) { std::pair<FnTreeType::iterator, bool> Result = - FnTree.insert(FunctionNode(NewFunction, DL)); + FnTree.insert(FunctionNode(NewFunction)); if (Result.second) { DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n'); @@ -1457,7 +1447,7 @@ bool MergeFunctions::insert(Function *NewFunction) { void MergeFunctions::remove(Function *F) { // We need to make sure we remove F, not a function "equal" to F per the // function equality comparator. - FnTreeType::iterator found = FnTree.find(FunctionNode(F, DL)); + FnTreeType::iterator found = FnTree.find(FunctionNode(F)); size_t Erased = 0; if (found != FnTree.end() && found->getFunc() == F) { Erased = 1; diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 9a75050..d28d563 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -77,6 +77,10 @@ static cl::opt<bool> EnableMLSM("mlsm", cl::init(true), cl::Hidden, cl::desc("Enable motion of merged load and store")); +static cl::opt<bool> EnableLoopInterchange( + "enable-loopinterchange", cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental LoopInterchange Pass")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -93,7 +97,6 @@ PassManagerBuilder::PassManagerBuilder() { DisableGVNLoadPRE = false; VerifyInput = false; VerifyOutput = false; - StripDebug = false; MergeFunctions = false; } @@ -239,6 +242,8 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops + if (EnableLoopInterchange) + MPM.add(createLoopInterchangePass()); // Interchange loops if (!DisableUnrollLoops) MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops @@ -305,8 +310,7 @@ void PassManagerBuilder::populateModulePassManager( // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies // on the rotated form. - if (ExtraVectorizerPasses) - MPM.add(createLoopRotatePass()); + MPM.add(createLoopRotatePass()); MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize)); // FIXME: Because of #pragma vectorize enable, the passes below are always @@ -358,9 +362,20 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createCFGSimplificationPass()); MPM.add(createInstructionCombiningPass()); - if (!DisableUnrollLoops) + if (!DisableUnrollLoops) { MPM.add(createLoopUnrollPass()); // Unroll small loops + // This is a barrier pass to avoid combine LICM pass and loop unroll pass + // within same loop pass manager. + MPM.add(createInstructionSimplifierPass()); + + // Runtime unrolling will introduce runtime check in loop prologue. If the + // unrolled loop is a inner loop, then the prologue will be inside the + // outer loop. LICM pass can help to promote the runtime check out if the + // checked value is loop invariant. + MPM.add(createLICMPass()); + } + // After vectorization and unrolling, assume intrinsics may tell us more // about pointer alignments. MPM.add(createAlignmentFromAssumptionsPass()); @@ -454,6 +469,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // More loops are countable; try to optimize them. PM.add(createIndVarSimplifyPass()); PM.add(createLoopDeletionPass()); + if (EnableLoopInterchange) + PM.add(createLoopInterchangePass()); + PM.add(createLoopVectorizePass(true, LoopVectorize)); // More scalar chains could be vectorized due to more alias information @@ -473,10 +491,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); +} - // Lower bitset metadata to bitsets. - PM.add(createLowerBitSetsPass()); - +void PassManagerBuilder::addLateLTOOptimizationPasses( + legacy::PassManagerBase &PM) { // Delete basic blocks, which optimization passes may have killed. PM.add(createCFGSimplificationPass()); @@ -496,19 +514,19 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (VerifyInput) PM.add(createVerifierPass()); - if (StripDebug) - PM.add(createStripSymbolsPass(true)); + if (OptLevel > 1) + addLTOOptimizationPasses(PM); - if (VerifyInput) - PM.add(createDebugInfoVerifierPass()); + // Lower bit sets to globals. This pass supports Clang's control flow + // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI + // is enabled. The pass does nothing if CFI is disabled. + PM.add(createLowerBitSetsPass()); if (OptLevel != 0) - addLTOOptimizationPasses(PM); + addLateLTOOptimizationPasses(PM); - if (VerifyOutput) { + if (VerifyOutput) PM.add(createVerifierPass()); - PM.add(createDebugInfoVerifierPass()); - } } inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) { diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 752f79d..c608f84 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -891,7 +891,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero, /// This basically requires proving that the add in the original type would not /// overflow to change the sign bit or have a carry out. bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, - Instruction *CxtI) { + Instruction &CxtI) { // There are different heuristics we can use for this. Here are some simple // ones. @@ -909,18 +909,18 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, // // Since the carry into the most significant position is always equal to // the carry out of the addition, there is no signed overflow. - if (ComputeNumSignBits(LHS, 0, CxtI) > 1 && - ComputeNumSignBits(RHS, 0, CxtI) > 1) + if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 && + ComputeNumSignBits(RHS, 0, &CxtI) > 1) return true; unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI); // Addition of two 2's compliment numbers having opposite signs will never // overflow. @@ -943,21 +943,21 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, /// overflow to change the sign bit or have a carry out. /// TODO: Handle this for Vectors. bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, - Instruction *CxtI) { + Instruction &CxtI) { // If LHS and RHS each have at least two sign bits, the subtraction // cannot overflow. - if (ComputeNumSignBits(LHS, 0, CxtI) > 1 && - ComputeNumSignBits(RHS, 0, CxtI) > 1) + if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 && + ComputeNumSignBits(RHS, 0, &CxtI) > 1) return true; unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI); // Subtraction of two 2's compliment numbers having identical signs will // never overflow. @@ -972,12 +972,14 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, /// \brief Return true if we can prove that: /// (sub LHS, RHS) === (sub nuw LHS, RHS) bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, - Instruction *CxtI) { + Instruction &CxtI) { // If the LHS is negative and the RHS is non-negative, no unsigned wrap. bool LHSKnownNonNegative, LHSKnownNegative; bool RHSKnownNonNegative, RHSKnownNegative; - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0, CxtI); - ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0, CxtI); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0, + &CxtI); + ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0, + &CxtI); if (LHSKnownNegative && RHSKnownNonNegative) return true; @@ -1046,15 +1048,15 @@ static Value *checkForNegativeOperand(BinaryOperator &I, } Instruction *InstCombiner::visitAdd(BinaryOperator &I) { - bool Changed = SimplifyAssociativeOrCommutative(I); - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + bool Changed = SimplifyAssociativeOrCommutative(I); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - if (Value *V = SimplifyVectorOp(I)) - return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); - if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, TLI, DT, AC)) - return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), + I.hasNoUnsignedWrap(), DL, TLI, DT, AC)) + return ReplaceInstUsesWith(I, V); // (A*B)+(A*C) -> A*(B+C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) @@ -1243,7 +1245,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSExt(CI, I.getType()) == RHSC && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) { + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new, smaller add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); @@ -1256,10 +1258,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // Only do this if x/y have the same type, if at last one of them has a // single use (so we don't increase the number of sexts), and if the // integer add will not overflow. - if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + if (LHSConv->getOperand(0)->getType() == + RHSConv->getOperand(0)->getType() && (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && WillNotOverflowSignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), &I)) { + RHSConv->getOperand(0), I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); @@ -1307,7 +1310,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // TODO(jingyue): Consider WillNotOverflowSignedAdd and // WillNotOverflowUnsignedAdd to reduce the number of invocations of // computeKnownBits. - if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, &I)) { + if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) { Changed = true; I.setHasNoSignedWrap(true); } @@ -1371,7 +1374,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSIToFP(CI, I.getType()) == CFP && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) { + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); @@ -1384,10 +1387,11 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { // Only do this if x/y have the same type, if at last one of them has a // single use (so we don't increase the number of int->fp conversions), // and if the integer add will not overflow. - if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + if (LHSConv->getOperand(0)->getType() == + RHSConv->getOperand(0)->getType() && (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && WillNotOverflowSignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), &I)) { + RHSConv->getOperand(0), I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0),"addconv"); @@ -1436,8 +1440,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { /// Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty) { - assert(DL && "Must have target data info for this"); - // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize // this. bool Swapped = false; @@ -1662,26 +1664,24 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". - if (DL) { - Value *LHSOp, *RHSOp; - if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && - match(Op1, m_PtrToInt(m_Value(RHSOp)))) - if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) - return ReplaceInstUsesWith(I, Res); - - // trunc(p)-trunc(q) -> trunc(p-q) - if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && - match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) - if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) - return ReplaceInstUsesWith(I, Res); - } + Value *LHSOp, *RHSOp; + if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && + match(Op1, m_PtrToInt(m_Value(RHSOp)))) + if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) + return ReplaceInstUsesWith(I, Res); + + // trunc(p)-trunc(q) -> trunc(p-q) + if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && + match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) + if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) + return ReplaceInstUsesWith(I, Res); bool Changed = false; - if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, &I)) { + if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) { Changed = true; I.setHasNoSignedWrap(true); } - if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, &I)) { + if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) { Changed = true; I.setHasNoUnsignedWrap(true); } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 863eeaf..ee21c81 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -979,9 +979,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // Make a constant range that's the intersection of the two icmp ranges. // If the intersection is empty, we know that the result is false. ConstantRange LHSRange = - ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue()); + ConstantRange::makeAllowedICmpRegion(LHSCC, LHSCst->getValue()); ConstantRange RHSRange = - ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue()); + ConstantRange::makeAllowedICmpRegion(RHSCC, RHSCst->getValue()); if (LHSRange.intersectWith(RHSRange).isEmptySet()) return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); @@ -1709,15 +1709,17 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Value *Mask = nullptr; Value *Masked = nullptr; if (LAnd->getOperand(0) == RAnd->getOperand(0) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, AC, CxtI, DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, AC, CxtI, DT)) { + isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, AC, CxtI, + DT) && + isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, AC, CxtI, + DT)) { Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1)); Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask); } else if (LAnd->getOperand(1) == RAnd->getOperand(1) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(0), false, 0, AC, CxtI, - DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(0), false, 0, AC, CxtI, - DT)) { + isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, AC, + CxtI, DT) && + isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, AC, + CxtI, DT)) { Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0)); Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask); } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 05e7162..21243c2 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" @@ -61,8 +60,8 @@ static Type *reduceToSingleValueType(Type *T) { } Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, AC, MI, DT); - unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, AC, MI, DT); + unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT); + unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT); unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment(); @@ -108,7 +107,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { if (StrippedDest != MI->getArgOperand(0)) { Type *SrcETy = cast<PointerType>(StrippedDest->getType()) ->getElementType(); - if (DL && SrcETy->isSized() && DL->getTypeStoreSize(SrcETy) == Size) { + if (SrcETy->isSized() && DL.getTypeStoreSize(SrcETy) == Size) { // The SrcETy might be something like {{{double}}} or [1 x double]. Rip // down through these levels if so. SrcETy = reduceToSingleValueType(SrcETy); @@ -156,7 +155,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { } Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { - unsigned Alignment = getKnownAlignment(MI->getDest(), DL, AC, MI, DT); + unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT); if (MI->getAlignment() < Alignment) { MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Alignment, false)); @@ -198,6 +197,71 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return nullptr; } +/// The shuffle mask for a perm2*128 selects any two halves of two 256-bit +/// source vectors, unless a zero bit is set. If a zero bit is set, +/// then ignore that half of the mask and clear that half of the vector. +static Value *SimplifyX86vperm2(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder) { + if (auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { + VectorType *VecTy = cast<VectorType>(II.getType()); + ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); + + // The immediate permute control byte looks like this: + // [1:0] - select 128 bits from sources for low half of destination + // [2] - ignore + // [3] - zero low half of destination + // [5:4] - select 128 bits from sources for high half of destination + // [6] - ignore + // [7] - zero high half of destination + + uint8_t Imm = CInt->getZExtValue(); + + bool LowHalfZero = Imm & 0x08; + bool HighHalfZero = Imm & 0x80; + + // If both zero mask bits are set, this was just a weird way to + // generate a zero vector. + if (LowHalfZero && HighHalfZero) + return ZeroVector; + + // If 0 or 1 zero mask bits are set, this is a simple shuffle. + unsigned NumElts = VecTy->getNumElements(); + unsigned HalfSize = NumElts / 2; + SmallVector<int, 8> ShuffleMask(NumElts); + + // The high bit of the selection field chooses the 1st or 2nd operand. + bool LowInputSelect = Imm & 0x02; + bool HighInputSelect = Imm & 0x20; + + // The low bit of the selection field chooses the low or high half + // of the selected operand. + bool LowHalfSelect = Imm & 0x01; + bool HighHalfSelect = Imm & 0x10; + + // Determine which operand(s) are actually in use for this instruction. + Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); + Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); + + // If needed, replace operands based on zero mask. + V0 = LowHalfZero ? ZeroVector : V0; + V1 = HighHalfZero ? ZeroVector : V1; + + // Permute low half of result. + unsigned StartIndex = LowHalfSelect ? HalfSize : 0; + for (unsigned i = 0; i < HalfSize; ++i) + ShuffleMask[i] = StartIndex + i; + + // Permute high half of result. + StartIndex = HighHalfSelect ? HalfSize : 0; + StartIndex += NumElts; + for (unsigned i = 0; i < HalfSize; ++i) + ShuffleMask[i + HalfSize] = StartIndex + i; + + return Builder.CreateShuffleVector(V0, V1, ShuffleMask); + } + return nullptr; +} + /// visitCallInst - CallInst simplification. This mostly only handles folding /// of intrinsic instructions. For normal calls, it allows visitCallSite to do /// the heavy lifting. @@ -386,7 +450,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // can prove that it will never overflow. if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - if (WillNotOverflowSignedAdd(LHS, RHS, II)) { + if (WillNotOverflowSignedAdd(LHS, RHS, *II)) { return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false); } } @@ -407,11 +471,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } } if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) { - if (WillNotOverflowSignedSub(LHS, RHS, II)) { + if (WillNotOverflowSignedSub(LHS, RHS, *II)) { return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false); } } else { - if (WillNotOverflowUnsignedSub(LHS, RHS, II)) { + if (WillNotOverflowUnsignedSub(LHS, RHS, *II)) { return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false); } } @@ -452,7 +516,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - if (WillNotOverflowSignedMul(LHS, RHS, II)) { + if (WillNotOverflowSignedMul(LHS, RHS, *II)) { return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false); } } @@ -544,7 +608,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: // Turn PPC lvx -> load if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >= 16) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); @@ -561,7 +625,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); @@ -578,7 +642,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } case Intrinsic::ppc_qpx_qvlfs: // Turn PPC QPX qvlfs -> load if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >= 16) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); @@ -587,7 +651,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::ppc_qpx_qvlfd: // Turn PPC QPX qvlfd -> load if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, AC, DT) >= 32) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); @@ -596,7 +660,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::ppc_qpx_qvstfs: // Turn PPC QPX qvstfs -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); @@ -606,7 +670,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::ppc_qpx_qvstfd: // Turn PPC QPX qvstfd -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, AC, DT) >= 32) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); @@ -618,7 +682,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(1)->getType()); @@ -735,9 +799,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { unsigned LowHalfElts = VWidth / 2; APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts)); APInt UndefElts(VWidth, 0); - if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), - InputDemandedElts, - UndefElts)) { + if (Value *TmpV = SimplifyDemandedVectorElts( + II->getArgOperand(0), InputDemandedElts, UndefElts)) { II->setArgOperand(0, TmpV); return II; } @@ -906,6 +969,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return ReplaceInstUsesWith(CI, Shuffle); } + case Intrinsic::x86_avx_vperm2f128_pd_256: + case Intrinsic::x86_avx_vperm2f128_ps_256: + case Intrinsic::x86_avx_vperm2f128_si_256: + case Intrinsic::x86_avx2_vperm2i128: + if (Value *V = SimplifyX86vperm2(*II, *Builder)) + return ReplaceInstUsesWith(*II, V); + break; + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. // Note that ppc_altivec_vperm has a big-endian bias, so when creating @@ -945,12 +1016,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { unsigned Idx = cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); Idx &= 31; // Match the hardware behavior. - if (DL && DL->isLittleEndian()) + if (DL.isLittleEndian()) Idx = 31 - Idx; if (!ExtractedElts[Idx]) { - Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0; - Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1; + Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0; + Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1; ExtractedElts[Idx] = Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse, Builder->getInt32(Idx&15)); @@ -979,7 +1050,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::arm_neon_vst2lane: case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: { - unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, AC, II, DT); + unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, AC, DT); unsigned AlignArg = II->getNumArgOperands() - 1; ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg)); if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { @@ -1118,7 +1189,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { RHS->getType()->isPointerTy() && cast<Constant>(RHS)->isNullValue()) { LoadInst* LI = cast<LoadInst>(LHS); - if (isValidAssumeForContext(II, LI, DL, DT)) { + if (isValidAssumeForContext(II, LI, DT)) { MDNode *MD = MDNode::get(II->getContext(), None); LI->setMetadata(LLVMContext::MD_nonnull, MD); return EraseInstFromFunction(*II); @@ -1192,8 +1263,8 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { /// isSafeToEliminateVarargsCast - If this cast does not affect the value /// passed through the varargs area, we can eliminate the use of the cast. static bool isSafeToEliminateVarargsCast(const CallSite CS, - const CastInst * const CI, - const DataLayout * const DL, + const DataLayout &DL, + const CastInst *const CI, const int ix) { if (!CI->isLosslessCast()) return false; @@ -1217,7 +1288,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); if (!SrcTy->isSized() || !DstTy->isSized()) return false; - if (!DL || DL->getTypeAllocSize(SrcTy) != DL->getTypeAllocSize(DstTy)) + if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy)) return false; return true; } @@ -1226,7 +1297,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, // Currently we're only working with the checking functions, memcpy_chk, // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, // strcat_chk and strncat_chk. -Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) { +Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) { if (!CI->getCalledFunction()) return nullptr; auto InstCombineRAUW = [this](Instruction *From, Value *With) { @@ -1391,7 +1462,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(), E = CS.arg_end(); I != E; ++I, ++ix) { CastInst *CI = dyn_cast<CastInst>(*I); - if (CI && isSafeToEliminateVarargsCast(CS, CI, DL, ix)) { + if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) { *I = CI->getOperand(0); Changed = true; } @@ -1408,7 +1479,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // this. None of these calls are seen as possibly dead so go ahead and // delete the instruction now. if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) { - Instruction *I = tryOptimizeCall(CI, DL); + Instruction *I = tryOptimizeCall(CI); // If we changed something return the result, etc. Otherwise let // the fallthrough check. if (I) return EraseInstFromFunction(*I); @@ -1487,7 +1558,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // // into: // call void @takes_i32_inalloca(i32* null) - if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca)) + // + // Similarly, avoid folding away bitcasts of byval calls. + if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) || + Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal)) return false; CallSite::arg_iterator AI = CS.arg_begin(); @@ -1512,12 +1586,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1, Attribute::ByVal)) { PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); - if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL) + if (!ParamPTy || !ParamPTy->getElementType()->isSized()) return false; Type *CurElTy = ActTy->getPointerElementType(); - if (DL->getTypeAllocSize(CurElTy) != - DL->getTypeAllocSize(ParamPTy->getElementType())) + if (DL.getTypeAllocSize(CurElTy) != + DL.getTypeAllocSize(ParamPTy->getElementType())) return false; } } diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 3e2b719..fe544c2 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -80,9 +80,6 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, /// try to eliminate the cast by moving the type information into the alloc. Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI) { - // This requires DataLayout to get the alloca alignment and size information. - if (!DL) return nullptr; - PointerType *PTy = cast<PointerType>(CI.getType()); BuilderTy AllocaBuilder(*Builder); @@ -93,8 +90,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, Type *CastElTy = PTy->getElementType(); if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr; - unsigned AllocElTyAlign = DL->getABITypeAlignment(AllocElTy); - unsigned CastElTyAlign = DL->getABITypeAlignment(CastElTy); + unsigned AllocElTyAlign = DL.getABITypeAlignment(AllocElTy); + unsigned CastElTyAlign = DL.getABITypeAlignment(CastElTy); if (CastElTyAlign < AllocElTyAlign) return nullptr; // If the allocation has multiple uses, only promote it if we are strictly @@ -102,14 +99,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // same, we open the door to infinite loops of various kinds. if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr; - uint64_t AllocElTySize = DL->getTypeAllocSize(AllocElTy); - uint64_t CastElTySize = DL->getTypeAllocSize(CastElTy); + uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy); + uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy); if (CastElTySize == 0 || AllocElTySize == 0) return nullptr; // If the allocation has multiple uses, only promote it if we're not // shrinking the amount of memory being allocated. - uint64_t AllocElTyStoreSize = DL->getTypeStoreSize(AllocElTy); - uint64_t CastElTyStoreSize = DL->getTypeStoreSize(CastElTy); + uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy); + uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy); if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr; // See if we can satisfy the modulus by pulling a scale out of the array @@ -215,7 +212,8 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, PHINode *OPN = cast<PHINode>(I); PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues()); for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { - Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); + Value *V = + EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); NPN->addIncoming(V, OPN->getIncomingBlock(i)); } Res = NPN; @@ -234,25 +232,22 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, /// This function is a wrapper around CastInst::isEliminableCastPair. It /// simply extracts arguments and returns what that function returns. static Instruction::CastOps -isEliminableCastPair( - const CastInst *CI, ///< The first cast instruction - unsigned opcode, ///< The opcode of the second cast instruction - Type *DstTy, ///< The target type for the second cast instruction - const DataLayout *DL ///< The target data for pointer size -) { - +isEliminableCastPair(const CastInst *CI, ///< First cast instruction + unsigned opcode, ///< Opcode for the second cast + Type *DstTy, ///< Target type for the second cast + const DataLayout &DL) { Type *SrcTy = CI->getOperand(0)->getType(); // A from above Type *MidTy = CI->getType(); // B from above // Get the opcodes of the two Cast instructions Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); Instruction::CastOps secondOp = Instruction::CastOps(opcode); - Type *SrcIntPtrTy = DL && SrcTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(SrcTy) : nullptr; - Type *MidIntPtrTy = DL && MidTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(MidTy) : nullptr; - Type *DstIntPtrTy = DL && DstTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(DstTy) : nullptr; + Type *SrcIntPtrTy = + SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr; + Type *MidIntPtrTy = + MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr; + Type *DstIntPtrTy = + DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr; unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy, SrcIntPtrTy, MidIntPtrTy, DstIntPtrTy); @@ -298,7 +293,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { // eliminate it now. if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast if (Instruction::CastOps opc = - isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) { + isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) { // The first cast (CSrc) is eliminable so we need to fix up or replace // the second cast (CI). CSrc will then have a good chance of being dead. return CastInst::Create(opc, CSrc->getOperand(0), CI.getType()); @@ -314,8 +309,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { if (isa<PHINode>(Src)) { // We don't do this if this would create a PHI node with an illegal type if // it is currently legal. - if (!Src->getType()->isIntegerTy() || - !CI.getType()->isIntegerTy() || + if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() || ShouldChangeType(CI.getType(), Src->getType())) if (Instruction *NV = FoldOpIntoPhi(CI)) return NV; @@ -1419,18 +1413,15 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { // If the source integer type is not the intptr_t type for this target, do a // trunc or zext to the intptr_t type, then inttoptr of it. This allows the // cast to be exposed to other transforms. - - if (DL) { - unsigned AS = CI.getAddressSpace(); - if (CI.getOperand(0)->getType()->getScalarSizeInBits() != - DL->getPointerSizeInBits(AS)) { - Type *Ty = DL->getIntPtrType(CI.getContext(), AS); - if (CI.getType()->isVectorTy()) // Handle vectors of pointers. - Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); - - Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty); - return new IntToPtrInst(P, CI.getType()); - } + unsigned AS = CI.getAddressSpace(); + if (CI.getOperand(0)->getType()->getScalarSizeInBits() != + DL.getPointerSizeInBits(AS)) { + Type *Ty = DL.getIntPtrType(CI.getContext(), AS); + if (CI.getType()->isVectorTy()) // Handle vectors of pointers. + Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); + + Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty); + return new IntToPtrInst(P, CI.getType()); } if (Instruction *I = commonCastTransforms(CI)) @@ -1460,32 +1451,33 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { return &CI; } - if (!DL) - return commonCastTransforms(CI); - // If the GEP has a single use, and the base pointer is a bitcast, and the // GEP computes a constant offset, see if we can convert these three // instructions into fewer. This typically happens with unions and other // non-type-safe code. unsigned AS = GEP->getPointerAddressSpace(); - unsigned OffsetBits = DL->getPointerSizeInBits(AS); + unsigned OffsetBits = DL.getPointerSizeInBits(AS); APInt Offset(OffsetBits, 0); BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0)); - if (GEP->hasOneUse() && - BCI && - GEP->accumulateConstantOffset(*DL, Offset)) { + if (GEP->hasOneUse() && BCI && GEP->accumulateConstantOffset(DL, Offset)) { + // FIXME: This is insufficiently tested - just a no-crash test + // (test/Transforms/InstCombine/2007-05-14-Crash.ll) + // // Get the base pointer input of the bitcast, and the type it points to. Value *OrigBase = BCI->getOperand(0); SmallVector<Value*, 8> NewIndices; - if (FindElementAtOffset(OrigBase->getType(), - Offset.getSExtValue(), + if (FindElementAtOffset(OrigBase->getType(), Offset.getSExtValue(), NewIndices)) { + // FIXME: This codepath is completely untested - could be unreachable + // for all I know. // If we were able to index down into an element, create the GEP // and bitcast the result. This eliminates one bitcast, potentially // two. - Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(OrigBase, NewIndices) : - Builder->CreateGEP(OrigBase, NewIndices); + Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() + ? Builder->CreateInBoundsGEP(OrigBase, NewIndices) + : Builder->CreateGEP( + OrigBase->getType()->getPointerElementType(), + OrigBase, NewIndices); NGEP->takeName(GEP); if (isa<BitCastInst>(CI)) @@ -1504,16 +1496,13 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast // to be exposed to other transforms. - if (!DL) - return commonPointerCastTransforms(CI); - Type *Ty = CI.getType(); unsigned AS = CI.getPointerAddressSpace(); - if (Ty->getScalarSizeInBits() == DL->getPointerSizeInBits(AS)) + if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS)) return commonPointerCastTransforms(CI); - Type *PtrTy = DL->getIntPtrType(CI.getContext(), AS); + Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS); if (Ty->isVectorTy()) // Handle vectors of pointers. PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements()); @@ -1597,8 +1586,8 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) { /// This returns false if the pattern can't be matched or true if it can, /// filling in Elements with the elements found here. static bool CollectInsertionElements(Value *V, unsigned Shift, - SmallVectorImpl<Value*> &Elements, - Type *VecEltTy, InstCombiner &IC) { + SmallVectorImpl<Value *> &Elements, + Type *VecEltTy, bool isBigEndian) { assert(isMultipleOfTypeSize(Shift, VecEltTy) && "Shift should be a multiple of the element type size"); @@ -1614,7 +1603,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, return true; unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy); - if (IC.getDataLayout()->isBigEndian()) + if (isBigEndian) ElementIndex = Elements.size() - ElementIndex - 1; // Fail if multiple elements are inserted into this slot. @@ -1634,7 +1623,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, // it to the right type so it gets properly inserted. if (NumElts == 1) return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), - Shift, Elements, VecEltTy, IC); + Shift, Elements, VecEltTy, isBigEndian); // Okay, this is a constant that covers multiple elements. Slice it up into // pieces and insert each element-sized piece into the vector. @@ -1649,7 +1638,8 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), ShiftI)); Piece = ConstantExpr::getTrunc(Piece, ElementIntTy); - if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC)) + if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, + isBigEndian)) return false; } return true; @@ -1662,28 +1652,28 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, switch (I->getOpcode()) { default: return false; // Unhandled case. case Instruction::BitCast: - return CollectInsertionElements(I->getOperand(0), Shift, - Elements, VecEltTy, IC); + return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian); case Instruction::ZExt: if (!isMultipleOfTypeSize( I->getOperand(0)->getType()->getPrimitiveSizeInBits(), VecEltTy)) return false; - return CollectInsertionElements(I->getOperand(0), Shift, - Elements, VecEltTy, IC); + return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian); case Instruction::Or: - return CollectInsertionElements(I->getOperand(0), Shift, - Elements, VecEltTy, IC) && - CollectInsertionElements(I->getOperand(1), Shift, - Elements, VecEltTy, IC); + return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian) && + CollectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy, + isBigEndian); case Instruction::Shl: { // Must be shifting by a constant that is a multiple of the element size. ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); if (!CI) return false; Shift += CI->getZExtValue(); if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false; - return CollectInsertionElements(I->getOperand(0), Shift, - Elements, VecEltTy, IC); + return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian); } } @@ -1706,15 +1696,13 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, /// Into two insertelements that do "buildvector{%inc, %inc5}". static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombiner &IC) { - // We need to know the target byte order to perform this optimization. - if (!IC.getDataLayout()) return nullptr; - VectorType *DestVecTy = cast<VectorType>(CI.getType()); Value *IntInput = CI.getOperand(0); SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); if (!CollectInsertionElements(IntInput, 0, Elements, - DestVecTy->getElementType(), IC)) + DestVecTy->getElementType(), + IC.getDataLayout().isBigEndian())) return nullptr; // If we succeeded, we know that all of the element are specified by Elements @@ -1734,10 +1722,8 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double /// bitcast. The various long double bitcasts can't get in here. -static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ - // We need to know the target byte order to perform this optimization. - if (!IC.getDataLayout()) return nullptr; - +static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI, InstCombiner &IC, + const DataLayout &DL) { Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(); @@ -1760,7 +1746,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ } unsigned Elt = 0; - if (IC.getDataLayout()->isBigEndian()) + if (DL.isBigEndian()) Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1; return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } @@ -1784,7 +1770,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ } unsigned Elt = ShAmt->getZExtValue() / DestWidth; - if (IC.getDataLayout()->isBigEndian()) + if (DL.isBigEndian()) Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt; return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } @@ -1839,7 +1825,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // Try to optimize int -> float bitcasts. if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy)) - if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) + if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this, DL)) return I; if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index f48d89b..803b50a 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -229,10 +229,6 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, Instruction *InstCombiner:: FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { - // We need TD information to know the pointer size unless this is inbounds. - if (!GEP->isInBounds() && !DL) - return nullptr; - Constant *Init = GV->getInitializer(); if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) return nullptr; @@ -303,7 +299,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // the array, this will fully represent all the comparison results. uint64_t MagicBitvector = 0; - // Scan the array and see if one of our patterns matches. Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { @@ -398,7 +393,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // index down like the GEP would do implicitly. We don't have to do this for // an inbounds GEP because the index can't be out of range. if (!GEP->isInBounds()) { - Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize) Idx = Builder->CreateTrunc(Idx, IntPtrTy); @@ -487,10 +482,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // - Default to i32 if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) Ty = Idx->getType(); - else if (DL) - Ty = DL->getSmallestLegalIntType(Init->getContext(), ArrayElementCount); - else if (ArrayElementCount <= 32) - Ty = Type::getInt32Ty(Init->getContext()); + else + Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount); if (Ty) { Value *V = Builder->CreateIntCast(Idx, Ty, false); @@ -514,8 +507,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, /// /// If we can't emit an optimized form for this expression, this returns null. /// -static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { - const DataLayout &DL = *IC.getDataLayout(); +static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC, + const DataLayout &DL) { gep_type_iterator GTI = gep_type_begin(GEP); // Check to see if this gep only has a single variable index. If so, and if @@ -628,12 +621,12 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, RHS = RHS->stripPointerCasts(); Value *PtrBase = GEPLHS->getOperand(0); - if (DL && PtrBase == RHS && GEPLHS->isInBounds()) { + if (PtrBase == RHS && GEPLHS->isInBounds()) { // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). // This transformation (ignoring the base and scales) is valid because we // know pointers can't overflow since the gep is inbounds. See if we can // output an optimized form. - Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); + Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this, DL); // If not, synthesize the offset the hard way. if (!Offset) @@ -661,11 +654,11 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // If we're comparing GEPs with two base pointers that only differ in type // and both GEPs have only constant indices or just one use, then fold // the compare with the adjusted indices. - if (DL && GEPLHS->isInBounds() && GEPRHS->isInBounds() && + if (GEPLHS->isInBounds() && GEPRHS->isInBounds() && (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) && (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && PtrBase->stripPointerCasts() == - GEPRHS->getOperand(0)->stripPointerCasts()) { + GEPRHS->getOperand(0)->stripPointerCasts()) { Value *LOffset = EmitGEPOffset(GEPLHS); Value *ROffset = EmitGEPOffset(GEPRHS); @@ -733,9 +726,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // Only lower this if the icmp is the only user of the GEP or if we expect // the result to fold to a constant! - if (DL && - GEPsInBounds && - (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && + if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) { // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) Value *L = EmitGEPOffset(GEPLHS); @@ -1928,8 +1919,8 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the // integer type is the same size as the pointer type. - if (DL && LHSCI->getOpcode() == Instruction::PtrToInt && - DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) { + if (LHSCI->getOpcode() == Instruction::PtrToInt && + DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) { Value *RHSOp = nullptr; if (PtrToIntOperator *RHSC = dyn_cast<PtrToIntOperator>(ICI.getOperand(1))) { Value *RHSCIOp = RHSC->getOperand(0); @@ -2660,8 +2651,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { unsigned BitWidth = 0; if (Ty->isIntOrIntVectorTy()) BitWidth = Ty->getScalarSizeInBits(); - else if (DL) // Pointers require DL info to get their size. - BitWidth = DL->getTypeSizeInBits(Ty->getScalarType()); + else // Get pointer size. + BitWidth = DL.getTypeSizeInBits(Ty->getScalarType()); bool isSignBit = false; @@ -2774,8 +2765,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Op0KnownZero, Op0KnownOne, 0)) return &I; if (SimplifyDemandedBits(I.getOperandUse(1), - APInt::getAllOnesValue(BitWidth), - Op1KnownZero, Op1KnownOne, 0)) + APInt::getAllOnesValue(BitWidth), Op1KnownZero, + Op1KnownOne, 0)) return &I; // Given the known and unknown bits, compute a range that the LHS could be @@ -3094,9 +3085,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } case Instruction::IntToPtr: // icmp pred inttoptr(X), null -> icmp pred X, 0 - if (RHSC->isNullValue() && DL && - DL->getIntPtrType(RHSC->getType()) == - LHSI->getOperand(0)->getType()) + if (RHSC->isNullValue() && + DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType()) return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), Constant::getNullValue(LHSI->getOperand(0)->getType())); break; @@ -3428,7 +3418,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // if A is a power of 2. if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && match(Op1, m_Zero()) && - isKnownToBeAPowerOfTwo(A, false, 0, AC, &I, DT) && I.isEquality()) + isKnownToBeAPowerOfTwo(A, DL, false, 0, AC, &I, DT) && I.isEquality()) return new ICmpInst(I.getInversePredicate(), Builder->CreateAnd(A, B), Op1); @@ -3563,6 +3553,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } + // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0 + if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) && + match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) { + unsigned TypeBits = Cst1->getBitWidth(); + unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); + if (ShAmt < TypeBits && ShAmt != 0) { + Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted"); + APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt); + Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal), + I.getName() + ".mask"); + return new ICmpInst(I.getPredicate(), And, + Constant::getNullValue(Cst1->getType())); + } + } + // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to // "icmp (and X, mask), cst" uint64_t ShAmt = 0; diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 2fd5318..fb2321d 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -158,10 +158,10 @@ private: AssumptionCache *AC; TargetLibraryInfo *TLI; DominatorTree *DT; + const DataLayout &DL; // Optional analyses. When non-null, these can both be used to do better // combining and will be updated to reflect any changes. - const DataLayout *DL; LoopInfo *LI; bool MadeIRChange; @@ -169,7 +169,7 @@ private: public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder, bool MinimizeSize, AssumptionCache *AC, TargetLibraryInfo *TLI, - DominatorTree *DT, const DataLayout *DL, LoopInfo *LI) + DominatorTree *DT, const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {} @@ -180,7 +180,7 @@ public: AssumptionCache *getAssumptionCache() const { return AC; } - const DataLayout *getDataLayout() const { return DL; } + const DataLayout &getDataLayout() const { return DL; } DominatorTree *getDominatorTree() const { return DT; } @@ -330,17 +330,17 @@ private: Type *Ty); Instruction *visitCallSite(CallSite CS); - Instruction *tryOptimizeCall(CallInst *CI, const DataLayout *DL); + Instruction *tryOptimizeCall(CallInst *CI); bool transformConstExprCastCall(CallSite CS); Instruction *transformCallThroughTrampoline(CallSite CS, IntrinsicInst *Tramp); Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, bool DoXform = true); Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); - bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction *CxtI); - bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI); - bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI); - bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction *CxtI); + bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction &CxtI); + bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction &CxtI); + bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction &CxtI); + bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction &CxtI); Value *EmitGEPOffset(User *GEP); Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask); @@ -372,6 +372,10 @@ public: /// I to the worklist, replace all uses of I with the new value, then return /// I, so that the inst combiner will know that I was modified. Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { + // If there are no uses to replace, then we return nullptr to indicate that + // no changes were made to the program. + if (I.use_empty()) return nullptr; + Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. // If we are replacing the instruction with itself, this must be in a @@ -423,7 +427,7 @@ public: } void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - unsigned Depth = 0, Instruction *CxtI = nullptr) const { + unsigned Depth, Instruction *CxtI) const { return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, AC, CxtI, DT); } @@ -468,7 +472,7 @@ private: /// bits. Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne, unsigned Depth, - Instruction *CxtI = nullptr); + Instruction *CxtI); bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne, unsigned Depth = 0); /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index b9eb986..6b0f268 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -164,62 +164,75 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI, return nullptr; } -Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { - // Ensure that the alloca array size argument has type intptr_t, so that - // any casting is exposed early. - if (DL) { - Type *IntPtrTy = DL->getIntPtrType(AI.getType()); - if (AI.getArraySize()->getType() != IntPtrTy) { - Value *V = Builder->CreateIntCast(AI.getArraySize(), - IntPtrTy, false); - AI.setOperand(0, V); - return &AI; - } +static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { + // Check for array size of 1 (scalar allocation). + if (!AI.isArrayAllocation()) { + // i32 1 is the canonical array size for scalar allocations. + if (AI.getArraySize()->getType()->isIntegerTy(32)) + return nullptr; + + // Canonicalize it. + Value *V = IC.Builder->getInt32(1); + AI.setOperand(0, V); + return &AI; } // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 - if (AI.isArrayAllocation()) { // Check C != 1 - if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { - Type *NewTy = - ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName()); - New->setAlignment(AI.getAlignment()); - - // Scan to the end of the allocation instructions, to skip over a block of - // allocas if possible...also skip interleaved debug info - // - BasicBlock::iterator It = New; - while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; - - // Now that I is pointing to the first non-allocation-inst in the block, - // insert our getelementptr instruction... - // - Type *IdxTy = DL - ? DL->getIntPtrType(AI.getType()) - : Type::getInt64Ty(AI.getContext()); - Value *NullIdx = Constant::getNullValue(IdxTy); - Value *Idx[2] = { NullIdx, NullIdx }; - Instruction *GEP = + if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { + Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); + AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName()); + New->setAlignment(AI.getAlignment()); + + // Scan to the end of the allocation instructions, to skip over a block of + // allocas if possible...also skip interleaved debug info + // + BasicBlock::iterator It = New; + while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) + ++It; + + // Now that I is pointing to the first non-allocation-inst in the block, + // insert our getelementptr instruction... + // + Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType()); + Value *NullIdx = Constant::getNullValue(IdxTy); + Value *Idx[2] = {NullIdx, NullIdx}; + Instruction *GEP = GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); - InsertNewInstBefore(GEP, *It); + IC.InsertNewInstBefore(GEP, *It); - // Now make everything use the getelementptr instead of the original - // allocation. - return ReplaceInstUsesWith(AI, GEP); - } else if (isa<UndefValue>(AI.getArraySize())) { - return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); - } + // Now make everything use the getelementptr instead of the original + // allocation. + return IC.ReplaceInstUsesWith(AI, GEP); } - if (DL && AI.getAllocatedType()->isSized()) { + if (isa<UndefValue>(AI.getArraySize())) + return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); + + // Ensure that the alloca array size argument has type intptr_t, so that + // any casting is exposed early. + Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType()); + if (AI.getArraySize()->getType() != IntPtrTy) { + Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); + AI.setOperand(0, V); + return &AI; + } + + return nullptr; +} + +Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { + if (auto *I = simplifyAllocaArraySize(*this, AI)) + return I; + + if (AI.getAllocatedType()->isSized()) { // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) - AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType())); + AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType())); // Move all alloca's of zero byte objects to the entry block and merge them // together. Note that we only do this for alloca's, because malloc should // allocate and return a unique pointer, even for a zero byte allocation. - if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) { + if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) { // For a zero sized alloca there is no point in doing an array allocation. // This is helpful if the array size is a complicated expression not used // elsewhere. @@ -237,7 +250,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // dominance as the array size was forced to a constant earlier already. AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || - DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { + DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { AI.moveBefore(FirstInst); return &AI; } @@ -246,7 +259,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // assign it the preferred alignment. if (EntryAI->getAlignment() == 0) EntryAI->setAlignment( - DL->getPrefTypeAlignment(EntryAI->getAllocatedType())); + DL.getPrefTypeAlignment(EntryAI->getAllocatedType())); // Replace this zero-sized alloca with the one at the start of the entry // block after ensuring that the address will be aligned enough for both // types. @@ -270,7 +283,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { SmallVector<Instruction *, 4> ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { unsigned SourceAlign = getOrEnforceKnownAlignment( - Copy->getSource(), AI.getAlignment(), DL, AC, &AI, DT); + Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT); if (AI.getAlignment() <= SourceAlign) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); @@ -439,22 +452,22 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { return nullptr; Type *Ty = LI.getType(); + const DataLayout &DL = IC.getDataLayout(); // Try to canonicalize loads which are only ever stored to operate over // integers instead of any other type. We only do this when the loaded type // is sized and has a size exactly the same as its store size and the store // size is a legal integer type. - const DataLayout *DL = IC.getDataLayout(); - if (!Ty->isIntegerTy() && Ty->isSized() && DL && - DL->isLegalInteger(DL->getTypeStoreSizeInBits(Ty)) && - DL->getTypeStoreSizeInBits(Ty) == DL->getTypeSizeInBits(Ty)) { + if (!Ty->isIntegerTy() && Ty->isSized() && + DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) && + DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) { if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) { auto *SI = dyn_cast<StoreInst>(U); return SI && SI->getPointerOperand() != &LI; })) { LoadInst *NewLoad = combineLoadToNewType( IC, LI, - Type::getIntNTy(LI.getContext(), DL->getTypeStoreSizeInBits(Ty))); + Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty))); // Replace all the stores with stores of the newly loaded value. for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) { auto *SI = cast<StoreInst>(*UI++); @@ -489,7 +502,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { // // FIXME: This should probably live in ValueTracking (or similar). static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, - const DataLayout *DL) { + const DataLayout &DL) { SmallPtrSet<Value *, 4> Visited; SmallVector<Value *, 4> Worklist(1, V); @@ -529,7 +542,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, if (!CS) return false; - uint64_t TypeSize = DL->getTypeAllocSize(AI->getAllocatedType()); + uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType()); // Make sure that, even if the multiplication below would wrap as an // uint64_t, we still do the right thing. if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize)) @@ -541,7 +554,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, if (!GV->hasDefinitiveInitializer() || !GV->isConstant()) return false; - uint64_t InitSize = DL->getTypeAllocSize(GV->getType()->getElementType()); + uint64_t InitSize = DL.getTypeAllocSize(GV->getType()->getElementType()); if (InitSize > MaxSize) return false; continue; @@ -570,8 +583,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, // offsets those indices implied. static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx) { - const DataLayout *DL = IC.getDataLayout(); - if (GEPI->getNumOperands() < 2 || !DL) + if (GEPI->getNumOperands() < 2) return false; // Find the first non-zero index of a GEP. If all indices are zero, return @@ -603,7 +615,8 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, GetElementPtrInst::getIndexedType(GEPI->getOperand(0)->getType(), Ops); if (!AllocTy || !AllocTy->isSized()) return false; - uint64_t TyAllocSize = DL->getTypeAllocSize(AllocTy); + const DataLayout &DL = IC.getDataLayout(); + uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy); // If there are more indices after the one we might replace with a zero, make // sure they're all non-negative. If any of them are negative, the overall @@ -665,18 +678,16 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { return Res; // Attempt to improve the alignment. - if (DL) { - unsigned KnownAlign = getOrEnforceKnownAlignment( - Op, DL->getPrefTypeAlignment(LI.getType()), DL, AC, &LI, DT); - unsigned LoadAlign = LI.getAlignment(); - unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : - DL->getABITypeAlignment(LI.getType()); - - if (KnownAlign > EffectiveLoadAlign) - LI.setAlignment(KnownAlign); - else if (LoadAlign == 0) - LI.setAlignment(EffectiveLoadAlign); - } + unsigned KnownAlign = getOrEnforceKnownAlignment( + Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT); + unsigned LoadAlign = LI.getAlignment(); + unsigned EffectiveLoadAlign = + LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType()); + + if (KnownAlign > EffectiveLoadAlign) + LI.setAlignment(KnownAlign); + else if (LoadAlign == 0) + LI.setAlignment(EffectiveLoadAlign); // Replace GEP indices if possible. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) { @@ -738,8 +749,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). unsigned Align = LI.getAlignment(); - if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) && - isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) { + if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) && + isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) { LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), SI->getOperand(1)->getName()+".val"); LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), @@ -807,6 +818,30 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) { return false; } +static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { + // FIXME: We could probably with some care handle both volatile and atomic + // stores here but it isn't clear that this is important. + if (!SI.isSimple()) + return false; + + Value *V = SI.getValueOperand(); + Type *T = V->getType(); + + if (!T->isAggregateType()) + return false; + + if (StructType *ST = dyn_cast<StructType>(T)) { + // If the struct only have one element, we unpack. + if (ST->getNumElements() == 1) { + V = IC.Builder->CreateExtractValue(V, 0); + combineStoreToNewValue(IC, SI, V); + return true; + } + } + + return false; +} + /// equivalentAddressValues - Test if A and B will obviously have the same /// value. This includes recognizing that %t0 and %t1 will have the same /// value in code like this: @@ -845,18 +880,20 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { return EraseInstFromFunction(SI); // Attempt to improve the alignment. - if (DL) { - unsigned KnownAlign = getOrEnforceKnownAlignment( - Ptr, DL->getPrefTypeAlignment(Val->getType()), DL, AC, &SI, DT); - unsigned StoreAlign = SI.getAlignment(); - unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : - DL->getABITypeAlignment(Val->getType()); - - if (KnownAlign > EffectiveStoreAlign) - SI.setAlignment(KnownAlign); - else if (StoreAlign == 0) - SI.setAlignment(EffectiveStoreAlign); - } + unsigned KnownAlign = getOrEnforceKnownAlignment( + Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT); + unsigned StoreAlign = SI.getAlignment(); + unsigned EffectiveStoreAlign = + StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType()); + + if (KnownAlign > EffectiveStoreAlign) + SI.setAlignment(KnownAlign); + else if (StoreAlign == 0) + SI.setAlignment(EffectiveStoreAlign); + + // Try to canonicalize the stored type. + if (unpackStoreToAggregate(*this, SI)) + return EraseInstFromFunction(SI); // Replace GEP indices if possible. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) { diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index c48e3c9..35513f1 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -26,7 +26,7 @@ using namespace PatternMatch; /// where it is known to be non-zero. If this allows us to simplify the /// computation, do so and return the new operand, otherwise return null. static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, - Instruction *CxtI) { + Instruction &CxtI) { // If V has multiple uses, then we would have to do more analysis to determine // if this is safe. For example, the use could be in dynamically unreached // code. @@ -47,8 +47,8 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, // inexact. Similarly for <<. if (BinaryOperator *I = dyn_cast<BinaryOperator>(V)) if (I->isLogicalShift() && - isKnownToBeAPowerOfTwo(I->getOperand(0), false, 0, - IC.getAssumptionCache(), CxtI, + isKnownToBeAPowerOfTwo(I->getOperand(0), IC.getDataLayout(), false, 0, + IC.getAssumptionCache(), &CxtI, IC.getDominatorTree())) { // We know that this is an exact/nuw shift and that the input is a // non-zero context as well. @@ -126,7 +126,7 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) { /// \brief Return true if we can prove that: /// (mul LHS, RHS) === (mul nsw LHS, RHS) bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, - Instruction *CxtI) { + Instruction &CxtI) { // Multiplying n * m significant bits yields a result of n + m significant // bits. If the total number of significant bits does not exceed the // result bit width (minus 1), there is no overflow. @@ -137,8 +137,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, // Note that underestimating the number of sign bits gives a more // conservative answer. - unsigned SignBits = ComputeNumSignBits(LHS, 0, CxtI) + - ComputeNumSignBits(RHS, 0, CxtI); + unsigned SignBits = + ComputeNumSignBits(LHS, 0, &CxtI) + ComputeNumSignBits(RHS, 0, &CxtI); // First handle the easy case: if we have enough sign bits there's // definitely no overflow. @@ -157,8 +157,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, // For simplicity we just check if at least one side is not negative. bool LHSNonNegative, LHSNegative; bool RHSNonNegative, RHSNegative; - ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, CxtI); - ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, CxtI); + ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, &CxtI); + ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, &CxtI); if (LHSNonNegative || RHSNonNegative) return true; } @@ -375,7 +375,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } } - if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, &I)) { + if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, I)) { Changed = true; I.setHasNoSignedWrap(true); } @@ -422,7 +422,7 @@ static bool isFiniteNonZeroFp(Constant *C) { if (C->getType()->isVectorTy()) { for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E; ++I) { - ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I)); + ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I)); if (!CFP || !CFP->getValueAPF().isFiniteNonZero()) return false; } @@ -437,7 +437,7 @@ static bool isNormalFp(Constant *C) { if (C->getType()->isVectorTy()) { for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E; ++I) { - ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I)); + ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I)); if (!CFP || !CFP->getValueAPF().isNormal()) return false; } @@ -780,7 +780,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // The RHS is known non-zero. - if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) { + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) { I.setOperand(1, V); return &I; } @@ -1155,7 +1155,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { return BO; } - if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) { + if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) { // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) // Safe because the only negative value (1 << Y) can take on is // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have @@ -1338,7 +1338,7 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // The RHS is known non-zero. - if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) { + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) { I.setOperand(1, V); return &I; } @@ -1385,7 +1385,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { I.getType()); // X urem Y -> X and Y-1, where Y is a power of 2, - if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) { + if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) { Constant *N1 = Constant::getAllOnesValue(I.getType()); Value *Add = Builder->CreateAdd(Op1, N1); return BinaryOperator::CreateAnd(Op0, Add); diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 0e73db8..ca2caed 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/IR/DataLayout.h" using namespace llvm; #define DEBUG_TYPE "instcombine" @@ -231,7 +230,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { Value *Base = FixedOperands[0]; GetElementPtrInst *NewGEP = - GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1)); + GetElementPtrInst::Create(FirstInst->getSourceElementType(), Base, + makeArrayRef(FixedOperands).slice(1)); if (AllInBounds) NewGEP->setIsInBounds(); NewGEP->setDebugLoc(FirstInst->getDebugLoc()); return NewGEP; @@ -891,8 +891,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { // it is only used by trunc or trunc(lshr) operations. If so, we split the // PHI into the various pieces being extracted. This sort of thing is // introduced when SROA promotes an aggregate to a single large integer type. - if (PN.getType()->isIntegerTy() && DL && - !DL->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) + if (PN.getType()->isIntegerTy() && + !DL.isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) return Res; diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index dd0e65f..b28611f 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -312,9 +312,9 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is /// replaced with RepOp. static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, - const DataLayout *TD, const TargetLibraryInfo *TLI, - DominatorTree *DT, AssumptionCache *AC) { + const DataLayout &DL, DominatorTree *DT, + AssumptionCache *AC) { // Trivial replacement. if (V == Op) return RepOp; @@ -326,18 +326,18 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, // If this is a binary operator, try to simplify it with the replaced op. if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) { if (B->getOperand(0) == Op) - return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI); + return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), DL, TLI); if (B->getOperand(1) == Op) - return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI); + return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, DL, TLI); } // Same for CmpInsts. if (CmpInst *C = dyn_cast<CmpInst>(I)) { if (C->getOperand(0) == Op) - return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD, + return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), DL, TLI, DT, AC); if (C->getOperand(1) == Op) - return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD, + return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, DL, TLI, DT, AC); } @@ -361,14 +361,14 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, if (ConstOps.size() == I->getNumOperands()) { if (CmpInst *C = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0], - ConstOps[1], TD, TLI); + ConstOps[1], DL, TLI); if (LoadInst *LI = dyn_cast<LoadInst>(I)) if (!LI->isVolatile()) - return ConstantFoldLoadFromConstPtr(ConstOps[0], TD); + return ConstantFoldLoadFromConstPtr(ConstOps[0], DL); - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - ConstOps, TD, TLI); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), ConstOps, + DL, TLI); } } @@ -635,25 +635,25 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, // arms of the select. See if substituting this value into the arm and // simplifying the result yields the same value as the other arm. if (Pred == ICmpInst::ICMP_EQ) { - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) == + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) == + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == TrueVal) return ReplaceInstUsesWith(SI, FalseVal); - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) == + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) == + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == FalseVal) return ReplaceInstUsesWith(SI, FalseVal); } else if (Pred == ICmpInst::ICMP_NE) { - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) == + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) == + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == FalseVal) return ReplaceInstUsesWith(SI, TrueVal); - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) == + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) == + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == TrueVal) return ReplaceInstUsesWith(SI, TrueVal); } @@ -927,7 +927,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return BinaryOperator::CreateAnd(NotCond, FalseVal); } if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { - if (C->getZExtValue() == false) { + if (!C->getZExtValue()) { // Change: A = select B, C, false --> A = and B, C return BinaryOperator::CreateAnd(CondVal, TrueVal); } @@ -1203,37 +1203,41 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return NV; if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) { - // select(C, select(C, a, b), c) -> select(C, a, c) - if (TrueSI->getCondition() == CondVal) { - if (SI.getTrueValue() == TrueSI->getTrueValue()) - return nullptr; - SI.setOperand(1, TrueSI->getTrueValue()); - return &SI; - } - // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b) - // We choose this as normal form to enable folding on the And and shortening - // paths for the values (this helps GetUnderlyingObjects() for example). - if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) { - Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition()); - SI.setOperand(0, And); - SI.setOperand(1, TrueSI->getTrueValue()); - return &SI; + if (TrueSI->getCondition()->getType() == CondVal->getType()) { + // select(C, select(C, a, b), c) -> select(C, a, c) + if (TrueSI->getCondition() == CondVal) { + if (SI.getTrueValue() == TrueSI->getTrueValue()) + return nullptr; + SI.setOperand(1, TrueSI->getTrueValue()); + return &SI; + } + // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b) + // We choose this as normal form to enable folding on the And and shortening + // paths for the values (this helps GetUnderlyingObjects() for example). + if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) { + Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition()); + SI.setOperand(0, And); + SI.setOperand(1, TrueSI->getTrueValue()); + return &SI; + } } } if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) { - // select(C, a, select(C, b, c)) -> select(C, a, c) - if (FalseSI->getCondition() == CondVal) { - if (SI.getFalseValue() == FalseSI->getFalseValue()) - return nullptr; - SI.setOperand(2, FalseSI->getFalseValue()); - return &SI; - } - // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b) - if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) { - Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition()); - SI.setOperand(0, Or); - SI.setOperand(2, FalseSI->getFalseValue()); - return &SI; + if (FalseSI->getCondition()->getType() == CondVal->getType()) { + // select(C, a, select(C, b, c)) -> select(C, a, c) + if (FalseSI->getCondition() == CondVal) { + if (SI.getFalseValue() == FalseSI->getFalseValue()) + return nullptr; + SI.setOperand(2, FalseSI->getFalseValue()); + return &SI; + } + // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b) + if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) { + Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition()); + SI.setOperand(0, Or); + SI.setOperand(2, FalseSI->getFalseValue()); + return &SI; + } } } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index b4976e0..a414ec6 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -187,7 +187,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, /// GetShiftedValue - When CanEvaluateShifted returned true for an expression, /// this value inserts the new computation that produces the shifted value. static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, - InstCombiner &IC) { + InstCombiner &IC, const DataLayout &DL) { // We can always evaluate constants shifted. if (Constant *C = dyn_cast<Constant>(V)) { if (isLeftShift) @@ -196,8 +196,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, V = IC.Builder->CreateLShr(C, NumBits); // If we got a constantexpr back, try to simplify it with TD info. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - V = ConstantFoldConstantExpression(CE, IC.getDataLayout(), - IC.getTargetLibraryInfo()); + V = ConstantFoldConstantExpression(CE, DL, IC.getTargetLibraryInfo()); return V; } @@ -210,8 +209,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, case Instruction::Or: case Instruction::Xor: // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. - I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC)); - I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); + I->setOperand( + 0, GetShiftedValue(I->getOperand(0), NumBits, isLeftShift, IC, DL)); + I->setOperand( + 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL)); return I; case Instruction::Shl: { @@ -297,8 +298,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, } case Instruction::Select: - I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); - I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC)); + I->setOperand( + 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL)); + I->setOperand( + 2, GetShiftedValue(I->getOperand(2), NumBits, isLeftShift, IC, DL)); return I; case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never @@ -306,8 +309,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, // instructions with a single use. PHINode *PN = cast<PHINode>(I); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), - NumBits, isLeftShift, IC)); + PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), NumBits, + isLeftShift, IC, DL)); return PN; } } @@ -337,8 +340,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); - return ReplaceInstUsesWith(I, - GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this)); + return ReplaceInstUsesWith( + I, GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this, DL)); } // See if we can simplify any instructions used by the instruction whose sole diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index c5603aa..cd391d0 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" @@ -70,8 +69,8 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); - Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, - KnownZero, KnownOne, 0, &Inst); + Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne, + 0, &Inst); if (!V) return false; if (V == &Inst) return true; ReplaceInstUsesWith(Inst, V); @@ -84,9 +83,9 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne, unsigned Depth) { - Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, - KnownZero, KnownOne, Depth, - dyn_cast<Instruction>(U.getUser())); + Value *NewVal = + SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, KnownOne, Depth, + dyn_cast<Instruction>(U.getUser())); if (!NewVal) return false; U = NewVal; return true; @@ -122,15 +121,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, assert(Depth <= 6 && "Limit Search Depth"); uint32_t BitWidth = DemandedMask.getBitWidth(); Type *VTy = V->getType(); - assert((DL || !VTy->isPointerTy()) && - "SimplifyDemandedBits needs to know bit widths!"); - assert((!DL || DL->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && - (!VTy->isIntOrIntVectorTy() || - VTy->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && - KnownOne.getBitWidth() == BitWidth && - "Value *V, DemandedMask, KnownZero and KnownOne " - "must have same BitWidth"); + assert( + (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) && + KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + "Value *V, DemandedMask, KnownZero and KnownOne " + "must have same BitWidth"); if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { // We know all of the bits for a constant! KnownOne = CI->getValue() & DemandedMask; @@ -174,9 +170,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // this instruction has a simpler value in that context. if (I->getOpcode() == Instruction::And) { // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, CxtI); // If all of the demanded bits are known 1 on one side, return the other. @@ -198,9 +194,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // only bits from X or Y are demanded. // If either the LHS or the RHS are One, the result is One. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, CxtI); // If all of the demanded bits are known zero on one side, return the @@ -225,9 +221,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // We can simplify (X^Y) -> X or Y in the user's context if we know that // only bits from X or Y are demanded. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, CxtI); // If all of the demanded bits are known zero on one side, return the @@ -256,10 +252,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::And: // If either the LHS or the RHS are Zero, the result is zero. - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, + RHSKnownOne, Depth + 1) || SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero, - LHSKnownZero, LHSKnownOne, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); @@ -294,10 +290,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::Or: // If either the LHS or the RHS are One, the result is One. - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, + RHSKnownOne, Depth + 1) || SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, - LHSKnownZero, LHSKnownOne, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); @@ -336,10 +332,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownOne = RHSKnownOne | LHSKnownOne; break; case Instruction::Xor: { - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - LHSKnownZero, LHSKnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, + RHSKnownOne, Depth + 1) || + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero, + LHSKnownOne, Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); @@ -423,10 +419,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; } case Instruction::Select: - if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - LHSKnownZero, LHSKnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero, + RHSKnownOne, Depth + 1) || + SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero, + LHSKnownOne, Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); @@ -445,8 +441,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, DemandedMask = DemandedMask.zext(truncBf); KnownZero = KnownZero.zext(truncBf); KnownOne = KnownOne.zext(truncBf); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, + KnownOne, Depth + 1)) return I; DemandedMask = DemandedMask.trunc(BitWidth); KnownZero = KnownZero.trunc(BitWidth); @@ -471,8 +467,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Don't touch a vector-to-scalar bitcast. return nullptr; - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, + KnownOne, Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); break; @@ -483,8 +479,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, DemandedMask = DemandedMask.trunc(SrcBitWidth); KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, + KnownOne, Depth + 1)) return I; DemandedMask = DemandedMask.zext(BitWidth); KnownZero = KnownZero.zext(BitWidth); @@ -510,8 +506,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero, + KnownOne, Depth + 1)) return I; InputDemandedBits = InputDemandedBits.zext(BitWidth); KnownZero = KnownZero.zext(BitWidth); @@ -552,7 +548,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Find information about known zero/one bits in the input. if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, - LHSKnownZero, LHSKnownOne, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth + 1)) return I; // If the RHS of the add has bits set that can't affect the input, reduce @@ -602,9 +598,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // significant bit and all those below it. APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1) || + LHSKnownZero, LHSKnownOne, Depth + 1) || SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth + 1)) return I; } } @@ -619,9 +615,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, uint32_t NLZ = DemandedMask.countLeadingZeros(); APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1) || + LHSKnownZero, LHSKnownOne, Depth + 1) || SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth + 1)) return I; } @@ -662,8 +658,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, else if (IOp->hasNoUnsignedWrap()) DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, + KnownOne, Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); KnownZero <<= ShiftAmt; @@ -686,8 +682,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (cast<LShrOperator>(I)->isExact()) DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, + KnownOne, Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); @@ -731,8 +727,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (cast<AShrOperator>(I)->isExact()) DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, + KnownOne, Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now. @@ -772,8 +768,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt LowBits = RA - 1; APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, - LHSKnownZero, LHSKnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero, + LHSKnownOne, Depth + 1)) return I; // The low bits of LHS are unchanged by the srem. @@ -798,7 +794,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // remainder is zero. if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, CxtI); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) @@ -808,10 +804,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, case Instruction::URem: { APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); APInt AllOnes = APInt::getAllOnesValue(BitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, - KnownZero2, KnownOne2, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), AllOnes, - KnownZero2, KnownOne2, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2, + KnownOne2, Depth + 1) || + SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2, + KnownOne2, Depth + 1)) return I; unsigned Leaders = KnownZero2.countLeadingOnes(); @@ -1051,7 +1047,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // Note that we can't propagate undef elt info, because we don't know // which elt is getting updated. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, - UndefElts2, Depth+1); + UndefElts2, Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } break; } @@ -1069,7 +1065,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt DemandedElts2 = DemandedElts; DemandedElts2.clearBit(IdxNo); TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, - UndefElts, Depth+1); + UndefElts, Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } // The inserted element is defined. @@ -1097,12 +1093,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt UndefElts4(LHSVWidth, 0); TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded, - UndefElts4, Depth+1); + UndefElts4, Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } APInt UndefElts3(LHSVWidth, 0); TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded, - UndefElts3, Depth+1); + UndefElts3, Depth + 1); if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } bool NewUndefElts = false; @@ -1152,12 +1148,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } } - TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, - UndefElts, Depth+1); + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts, + Depth + 1); if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded, - UndefElts2, Depth+1); + UndefElts2, Depth + 1); if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; } // Output elements are undefined if both are undefined. @@ -1204,7 +1200,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // div/rem demand all inputs, because they don't want divide by zero. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, - UndefElts2, Depth+1); + UndefElts2, Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; @@ -1238,11 +1234,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Instruction::Sub: case Instruction::Mul: // div/rem demand all inputs, because they don't want divide by zero. - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, - UndefElts, Depth+1); + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts, + Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts, - UndefElts2, Depth+1); + UndefElts2, Depth + 1); if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } // Output elements are undefined if both are undefined. Consider things @@ -1251,8 +1247,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, break; case Instruction::FPTrunc: case Instruction::FPExt: - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, - UndefElts, Depth+1); + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts, + Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } break; @@ -1273,10 +1269,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Intrinsic::x86_sse2_min_sd: case Intrinsic::x86_sse2_max_sd: TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, - UndefElts, Depth+1); + UndefElts, Depth + 1); if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, - UndefElts2, Depth+1); + UndefElts2, Depth + 1); if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } // If only the low elt is demanded and this is a scalarizable intrinsic, diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index e07efb5..b6beb65 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -202,8 +202,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { APInt UndefElts(VectorWidth, 0); APInt DemandedMask(VectorWidth, 0); DemandedMask.setBit(IndexVal); - if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), - DemandedMask, UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask, + UndefElts)) { EI.setOperand(0, V); return &EI; } @@ -733,7 +733,8 @@ static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) { case Instruction::GetElementPtr: { Value *Ptr = NewOps[0]; ArrayRef<Value*> Idx = NewOps.slice(1); - GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I); GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds()); return GEP; } diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 88fcd53..90551e4 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -57,6 +57,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> @@ -75,7 +76,7 @@ STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc , "Number of reassociations"); Value *InstCombiner::EmitGEPOffset(User *GEP) { - return llvm::EmitGEPOffset(Builder, *getDataLayout(), GEP); + return llvm::EmitGEPOffset(Builder, DL, GEP); } /// ShouldChangeType - Return true if it is desirable to convert a computation @@ -84,13 +85,10 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) { bool InstCombiner::ShouldChangeType(Type *From, Type *To) const { assert(From->isIntegerTy() && To->isIntegerTy()); - // If we don't have DL, we don't know if the source/dest are legal. - if (!DL) return false; - unsigned FromWidth = From->getPrimitiveSizeInBits(); unsigned ToWidth = To->getPrimitiveSizeInBits(); - bool FromLegal = DL->isLegalInteger(FromWidth); - bool ToLegal = DL->isLegalInteger(ToWidth); + bool FromLegal = DL.isLegalInteger(FromWidth); + bool ToLegal = DL.isLegalInteger(ToWidth); // If this is a legal integer from type, and the result would be an illegal // type, don't do the transformation. @@ -445,7 +443,7 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode, /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). static Value *tryFactorization(InstCombiner::BuilderTy *Builder, - const DataLayout *DL, BinaryOperator &I, + const DataLayout &DL, BinaryOperator &I, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D) { @@ -872,12 +870,9 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { /// will land us at the specified offset. If so, fill them into NewIndices and /// return the resultant element type, otherwise return null. Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, - SmallVectorImpl<Value*> &NewIndices) { + SmallVectorImpl<Value *> &NewIndices) { assert(PtrTy->isPtrOrPtrVectorTy()); - if (!DL) - return nullptr; - Type *Ty = PtrTy->getPointerElementType(); if (!Ty->isSized()) return nullptr; @@ -885,9 +880,9 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] - Type *IntPtrTy = DL->getIntPtrType(PtrTy); + Type *IntPtrTy = DL.getIntPtrType(PtrTy); int64_t FirstIdx = 0; - if (int64_t TySize = DL->getTypeAllocSize(Ty)) { + if (int64_t TySize = DL.getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; Offset -= FirstIdx*TySize; @@ -905,11 +900,11 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, // Index into the types. If we fail, set OrigBase to null. while (Offset) { // Indexing into tail padding between struct/array elements. - if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty)) + if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty)) return nullptr; if (StructType *STy = dyn_cast<StructType>(Ty)) { - const StructLayout *SL = DL->getStructLayout(STy); + const StructLayout *SL = DL.getStructLayout(STy); assert(Offset < (int64_t)SL->getSizeInBytes() && "Offset must stay within the indexed type"); @@ -920,7 +915,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, Offset -= SL->getElementOffset(Elt); Ty = STy->getElementType(Elt); } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { - uint64_t EltSize = DL->getTypeAllocSize(AT->getElementType()); + uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType()); assert(EltSize && "Cannot index into a zero-sized array"); NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); Offset %= EltSize; @@ -1214,7 +1209,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { // It may not be safe to reorder shuffles and things like div, urem, etc. // because we may trap when executing those ops on unknown vector elements. // See PR20059. - if (!isSafeToSpeculativelyExecute(&Inst, DL)) return nullptr; + if (!isSafeToSpeculativelyExecute(&Inst)) + return nullptr; unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements(); Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); @@ -1300,37 +1296,37 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Eliminate unneeded casts for indices, and replace indices which displace // by multiples of a zero size type with zero. - if (DL) { - bool MadeChange = false; - Type *IntPtrTy = DL->getIntPtrType(GEP.getPointerOperandType()); - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); - I != E; ++I, ++GTI) { - // Skip indices into struct types. - SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI); - if (!SeqTy) continue; - - // If the element type has zero size then any index over it is equivalent - // to an index of zero, so replace it with zero if it is not zero already. - if (SeqTy->getElementType()->isSized() && - DL->getTypeAllocSize(SeqTy->getElementType()) == 0) - if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) { - *I = Constant::getNullValue(IntPtrTy); - MadeChange = true; - } + bool MadeChange = false; + Type *IntPtrTy = DL.getIntPtrType(GEP.getPointerOperandType()); + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; + ++I, ++GTI) { + // Skip indices into struct types. + SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI); + if (!SeqTy) + continue; - Type *IndexTy = (*I)->getType(); - if (IndexTy != IntPtrTy) { - // If we are using a wider index than needed for this platform, shrink - // it to what we need. If narrower, sign-extend it to what we need. - // This explicit cast can make subsequent optimizations more obvious. - *I = Builder->CreateIntCast(*I, IntPtrTy, true); + // If the element type has zero size then any index over it is equivalent + // to an index of zero, so replace it with zero if it is not zero already. + if (SeqTy->getElementType()->isSized() && + DL.getTypeAllocSize(SeqTy->getElementType()) == 0) + if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) { + *I = Constant::getNullValue(IntPtrTy); MadeChange = true; } + + Type *IndexTy = (*I)->getType(); + if (IndexTy != IntPtrTy) { + // If we are using a wider index than needed for this platform, shrink + // it to what we need. If narrower, sign-extend it to what we need. + // This explicit cast can make subsequent optimizations more obvious. + *I = Builder->CreateIntCast(*I, IntPtrTy, true); + MadeChange = true; } - if (MadeChange) return &GEP; } + if (MadeChange) + return &GEP; // Check to see if the inputs to the PHI node are getelementptr instructions. if (PHINode *PN = dyn_cast<PHINode>(PtrOp)) { @@ -1338,6 +1334,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (!Op1) return nullptr; + // Don't fold a GEP into itself through a PHI node. This can only happen + // through the back-edge of a loop. Folding a GEP into itself means that + // the value of the previous iteration needs to be stored in the meantime, + // thus requiring an additional register variable to be live, but not + // actually achieving anything (the GEP still needs to be executed once per + // loop iteration). + if (Op1 == &GEP) + return nullptr; + signed DI = -1; for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { @@ -1345,6 +1350,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands()) return nullptr; + // As for Op1 above, don't try to fold a GEP into itself. + if (Op2 == &GEP) + return nullptr; + // Keep track of the type as we walk the GEP. Type *CurTy = Op1->getOperand(0)->getType()->getScalarType(); @@ -1481,19 +1490,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } if (!Indices.empty()) - return (GEP.isInBounds() && Src->isInBounds()) ? - GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices, - GEP.getName()) : - GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName()); + return GEP.isInBounds() && Src->isInBounds() + ? GetElementPtrInst::CreateInBounds( + Src->getSourceElementType(), Src->getOperand(0), Indices, + GEP.getName()) + : GetElementPtrInst::Create(Src->getSourceElementType(), + Src->getOperand(0), Indices, + GEP.getName()); } - if (DL && GEP.getNumIndices() == 1) { + if (GEP.getNumIndices() == 1) { unsigned AS = GEP.getPointerAddressSpace(); if (GEP.getOperand(1)->getType()->getScalarSizeInBits() == - DL->getPointerSizeInBits(AS)) { + DL.getPointerSizeInBits(AS)) { Type *PtrTy = GEP.getPointerOperandType(); Type *Ty = PtrTy->getPointerElementType(); - uint64_t TyAllocSize = DL->getTypeAllocSize(Ty); + uint64_t TyAllocSize = DL.getTypeAllocSize(Ty); bool Matched = false; uint64_t C; @@ -1562,8 +1574,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (CATy->getElementType() == StrippedPtrTy->getElementType()) { // -> GEP i8* X, ... SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end()); - GetElementPtrInst *Res = - GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName()); + GetElementPtrInst *Res = GetElementPtrInst::Create( + StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName()); Res->setIsInBounds(GEP.isInBounds()); if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) return Res; @@ -1599,9 +1611,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // %0 = GEP [10 x i8] addrspace(1)* X, ... // addrspacecast i8 addrspace(1)* %0 to i8* SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end()); - Value *NewGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : - Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); + Value *NewGEP = + GEP.isInBounds() + ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, + GEP.getName()) + : Builder->CreateGEP(StrippedPtrTy->getElementType(), + StrippedPtr, Idx, GEP.getName()); return new AddrSpaceCastInst(NewGEP, GEP.getType()); } } @@ -1612,14 +1627,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast Type *SrcElTy = StrippedPtrTy->getElementType(); Type *ResElTy = PtrOp->getType()->getPointerElementType(); - if (DL && SrcElTy->isArrayTy() && - DL->getTypeAllocSize(SrcElTy->getArrayElementType()) == - DL->getTypeAllocSize(ResElTy)) { - Type *IdxType = DL->getIntPtrType(GEP.getType()); + if (SrcElTy->isArrayTy() && + DL.getTypeAllocSize(SrcElTy->getArrayElementType()) == + DL.getTypeAllocSize(ResElTy)) { + Type *IdxType = DL.getIntPtrType(GEP.getType()); Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; - Value *NewGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : - Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); + Value *NewGEP = + GEP.isInBounds() + ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) + : Builder->CreateGEP(StrippedPtrTy->getElementType(), + StrippedPtr, Idx, GEP.getName()); // V and GEP are both pointer types --> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1630,11 +1647,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // %V = mul i64 %N, 4 // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast - if (DL && ResElTy->isSized() && SrcElTy->isSized()) { + if (ResElTy->isSized() && SrcElTy->isSized()) { // Check that changing the type amounts to dividing the index by a scale // factor. - uint64_t ResSize = DL->getTypeAllocSize(ResElTy); - uint64_t SrcSize = DL->getTypeAllocSize(SrcElTy); + uint64_t ResSize = DL.getTypeAllocSize(ResElTy); + uint64_t SrcSize = DL.getTypeAllocSize(SrcElTy); if (ResSize && SrcSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -1642,7 +1659,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Earlier transforms ensure that the index has type IntPtrType, which // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) && + assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) && "Index not cast to pointer width?"); bool NSW; @@ -1650,9 +1667,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Successfully decomposed Idx as NewIdx * Scale, form a new GEP. // If the multiplication NewIdx * Scale may overflow then the new // GEP may not be "inbounds". - Value *NewGEP = GEP.isInBounds() && NSW ? - Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) : - Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName()); + Value *NewGEP = + GEP.isInBounds() && NSW + ? Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, + GEP.getName()) + : Builder->CreateGEP(StrippedPtrTy->getElementType(), + StrippedPtr, NewIdx, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1665,13 +1685,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - if (DL && ResElTy->isSized() && SrcElTy->isSized() && - SrcElTy->isArrayTy()) { + if (ResElTy->isSized() && SrcElTy->isSized() && SrcElTy->isArrayTy()) { // Check that changing to the array element type amounts to dividing the // index by a scale factor. - uint64_t ResSize = DL->getTypeAllocSize(ResElTy); - uint64_t ArrayEltSize - = DL->getTypeAllocSize(SrcElTy->getArrayElementType()); + uint64_t ResSize = DL.getTypeAllocSize(ResElTy); + uint64_t ArrayEltSize = + DL.getTypeAllocSize(SrcElTy->getArrayElementType()); if (ResSize && ArrayEltSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -1679,7 +1698,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Earlier transforms ensure that the index has type IntPtrType, which // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) && + assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) && "Index not cast to pointer width?"); bool NSW; @@ -1688,13 +1707,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If the multiplication NewIdx * Scale may overflow then the new // GEP may not be "inbounds". Value *Off[2] = { - Constant::getNullValue(DL->getIntPtrType(GEP.getType())), - NewIdx - }; + Constant::getNullValue(DL.getIntPtrType(GEP.getType())), + NewIdx}; Value *NewGEP = GEP.isInBounds() && NSW ? Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) : - Builder->CreateGEP(StrippedPtr, Off, GEP.getName()); + Builder->CreateGEP(SrcElTy, StrippedPtr, Off, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEP.getType()); @@ -1704,9 +1722,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } - if (!DL) - return nullptr; - // addrspacecast between types is canonicalized as a bitcast, then an // addrspacecast. To take advantage of the below bitcast + struct GEP, look // through the addrspacecast. @@ -1727,10 +1742,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { Value *Operand = BCI->getOperand(0); PointerType *OpType = cast<PointerType>(Operand->getType()); - unsigned OffsetBits = DL->getPointerTypeSizeInBits(GEP.getType()); + unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType()); APInt Offset(OffsetBits, 0); if (!isa<BitCastInst>(Operand) && - GEP.accumulateConstantOffset(*DL, Offset)) { + GEP.accumulateConstantOffset(DL, Offset)) { // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. @@ -1761,7 +1776,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) { Value *NGEP = GEP.isInBounds() ? Builder->CreateInBoundsGEP(Operand, NewIndices) : - Builder->CreateGEP(Operand, NewIndices); + Builder->CreateGEP(OpType->getElementType(), Operand, NewIndices); if (NGEP->getType() == GEP.getType()) return ReplaceInstUsesWith(GEP, NGEP); @@ -2012,6 +2027,15 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { return &BI; } + // If the condition is irrelevant, remove the use so that other + // transforms on the condition become more effective. + if (BI.isConditional() && + BI.getSuccessor(0) == BI.getSuccessor(1) && + !isa<UndefValue>(BI.getCondition())) { + BI.setCondition(UndefValue::get(BI.getCondition()->getType())); + return &BI; + } + // Canonicalize fcmp_one -> fcmp_oeq FCmpInst::Predicate FPred; Value *Y; if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), @@ -2051,7 +2075,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { Value *Cond = SI.getCondition(); unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(Cond, KnownZero, KnownOne); + computeKnownBits(Cond, KnownZero, KnownOne, 0, &SI); unsigned LeadingKnownZeros = KnownZero.countLeadingOnes(); unsigned LeadingKnownOnes = KnownOne.countLeadingOnes(); @@ -2070,8 +2094,8 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { // x86 generates redundant zero-extenstion instructions if the operand is // truncated to i8 or i16. bool TruncCond = false; - if (DL && BitWidth > NewWidth && - NewWidth >= DL->getLargestLegalIntTypeSize()) { + if (NewWidth > 0 && BitWidth > NewWidth && + NewWidth >= DL.getLargestLegalIntTypeSize()) { TruncCond = true; IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth); Builder->SetInsertPoint(&SI); @@ -2632,7 +2656,7 @@ bool InstCombiner::run() { } // Instruction isn't dead, see if we can constant propagate it. - if (!I->use_empty() && isa<Constant>(I->getOperand(0))) + if (!I->use_empty() && isa<Constant>(I->getOperand(0))) { if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) { DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); @@ -2643,6 +2667,7 @@ bool InstCombiner::run() { MadeIRChange = true; continue; } + } // See if we can trivially sink this instruction to a successor basic block. if (I->hasOneUse()) { @@ -2756,10 +2781,9 @@ bool InstCombiner::run() { /// many instructions are dead or constant). Additionally, if we find a branch /// whose condition is a known constant, we only visit the reachable successors. /// -static bool AddReachableCodeToWorklist(BasicBlock *BB, - SmallPtrSetImpl<BasicBlock*> &Visited, +static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, + SmallPtrSetImpl<BasicBlock *> &Visited, InstCombineWorklist &ICWorklist, - const DataLayout *DL, const TargetLibraryInfo *TLI) { bool MadeIRChange = false; SmallVector<BasicBlock*, 256> Worklist; @@ -2797,23 +2821,22 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, continue; } - if (DL) { - // See if we can constant fold its operands. - for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); - i != e; ++i) { - ConstantExpr *CE = dyn_cast<ConstantExpr>(i); - if (CE == nullptr) continue; + // See if we can constant fold its operands. + for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); i != e; + ++i) { + ConstantExpr *CE = dyn_cast<ConstantExpr>(i); + if (CE == nullptr) + continue; - Constant*& FoldRes = FoldedConstants[CE]; - if (!FoldRes) - FoldRes = ConstantFoldConstantExpression(CE, DL, TLI); - if (!FoldRes) - FoldRes = CE; + Constant *&FoldRes = FoldedConstants[CE]; + if (!FoldRes) + FoldRes = ConstantFoldConstantExpression(CE, DL, TLI); + if (!FoldRes) + FoldRes = CE; - if (FoldRes != CE) { - *i = FoldRes; - MadeIRChange = true; - } + if (FoldRes != CE) { + *i = FoldRes; + MadeIRChange = true; } } @@ -2867,7 +2890,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, /// /// This also does basic constant propagation and other forward fixing to make /// the combiner itself run much faster. -static bool prepareICWorklistFromFunction(Function &F, const DataLayout *DL, +static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, TargetLibraryInfo *TLI, InstCombineWorklist &ICWorklist) { bool MadeIRChange = false; @@ -2877,7 +2900,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout *DL, // track of which blocks we visit. SmallPtrSet<BasicBlock *, 64> Visited; MadeIRChange |= - AddReachableCodeToWorklist(F.begin(), Visited, ICWorklist, DL, TLI); + AddReachableCodeToWorklist(F.begin(), DL, Visited, ICWorklist, TLI); // Do a quick scan over the function. If we find any blocks that are // unreachable, remove any instructions inside of them. This prevents @@ -2910,12 +2933,13 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout *DL, return MadeIRChange; } -static bool combineInstructionsOverFunction( - Function &F, InstCombineWorklist &Worklist, AssumptionCache &AC, - TargetLibraryInfo &TLI, DominatorTree &DT, const DataLayout *DL = nullptr, - LoopInfo *LI = nullptr) { +static bool +combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, + AssumptionCache &AC, TargetLibraryInfo &TLI, + DominatorTree &DT, LoopInfo *LI = nullptr) { // Minimizing size? bool MinimizeSize = F.hasFnAttribute(Attribute::MinSize); + auto &DL = F.getParent()->getDataLayout(); /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. @@ -2950,15 +2974,13 @@ static bool combineInstructionsOverFunction( PreservedAnalyses InstCombinePass::run(Function &F, AnalysisManager<Function> *AM) { - auto *DL = F.getParent()->getDataLayout(); - auto &AC = AM->getResult<AssumptionAnalysis>(F); auto &DT = AM->getResult<DominatorTreeAnalysis>(F); auto &TLI = AM->getResult<TargetLibraryAnalysis>(F); auto *LI = AM->getCachedResult<LoopAnalysis>(F); - if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, DL, LI)) + if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3007,12 +3029,10 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); // Optional analyses. - auto *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - auto *DL = DLP ? &DLP->getDataLayout() : nullptr; auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, DL, LI); + return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI); } char InstructionCombiningPass::ID = 0; diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 882aab0..978c857 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -24,6 +24,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" @@ -43,12 +46,14 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/ASanStackFrameLayout.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" #include <algorithm> #include <string> #include <system_error> @@ -70,17 +75,15 @@ static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; static const uint64_t kWindowsShadowOffset32 = 3ULL << 28; -static const size_t kMinStackMallocSize = 1 << 6; // 64B +static const size_t kMinStackMallocSize = 1 << 6; // 64B static const size_t kMaxStackMallocSize = 1 << 16; // 64K static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3; static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E; static const char *const kAsanModuleCtorName = "asan.module_ctor"; static const char *const kAsanModuleDtorName = "asan.module_dtor"; -static const uint64_t kAsanCtorAndDtorPriority = 1; +static const uint64_t kAsanCtorAndDtorPriority = 1; static const char *const kAsanReportErrorTemplate = "__asan_report_"; -static const char *const kAsanReportLoadN = "__asan_report_load_n"; -static const char *const kAsanReportStoreN = "__asan_report_store_n"; static const char *const kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *const kAsanUnregisterGlobalsName = "__asan_unregister_globals"; @@ -90,7 +93,7 @@ static const char *const kAsanInitName = "__asan_init_v5"; static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp"; static const char *const kAsanPtrSub = "__sanitizer_ptr_sub"; static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return"; -static const int kMaxAsanStackMallocSizeClass = 10; +static const int kMaxAsanStackMallocSizeClass = 10; static const char *const kAsanStackMallocNameTemplate = "__asan_stack_malloc_"; static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_"; static const char *const kAsanGenPrefix = "__asan_gen_"; @@ -103,10 +106,6 @@ static const char *const kAsanUnpoisonStackMemoryName = static const char *const kAsanOptionDetectUAR = "__asan_option_detect_stack_use_after_return"; -#ifndef NDEBUG -static const int kAsanStackAfterReturnMagic = 0xf5; -#endif - // Accesses sizes are powers of two: 1, 2, 4, 8, 16. static const size_t kNumberOfAccessSizes = 5; @@ -120,84 +119,110 @@ static const unsigned kAsanAllocaPartialVal2 = 0x000000cbU; // This flag may need to be replaced with -f[no-]asan-reads. static cl::opt<bool> ClInstrumentReads("asan-instrument-reads", - cl::desc("instrument read instructions"), cl::Hidden, cl::init(true)); -static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes", - cl::desc("instrument write instructions"), cl::Hidden, cl::init(true)); -static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics", - cl::desc("instrument atomic instructions (rmw, cmpxchg)"), - cl::Hidden, cl::init(true)); -static cl::opt<bool> ClAlwaysSlowPath("asan-always-slow-path", - cl::desc("use instrumentation with slow path for all accesses"), - cl::Hidden, cl::init(false)); + cl::desc("instrument read instructions"), + cl::Hidden, cl::init(true)); +static cl::opt<bool> ClInstrumentWrites( + "asan-instrument-writes", cl::desc("instrument write instructions"), + cl::Hidden, cl::init(true)); +static cl::opt<bool> ClInstrumentAtomics( + "asan-instrument-atomics", + cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, + cl::init(true)); +static cl::opt<bool> ClAlwaysSlowPath( + "asan-always-slow-path", + cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden, + cl::init(false)); // This flag limits the number of instructions to be instrumented // in any given BB. Normally, this should be set to unlimited (INT_MAX), // but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary // set it to 10000. -static cl::opt<int> ClMaxInsnsToInstrumentPerBB("asan-max-ins-per-bb", - cl::init(10000), - cl::desc("maximal number of instructions to instrument in any given BB"), - cl::Hidden); +static cl::opt<int> ClMaxInsnsToInstrumentPerBB( + "asan-max-ins-per-bb", cl::init(10000), + cl::desc("maximal number of instructions to instrument in any given BB"), + cl::Hidden); // This flag may need to be replaced with -f[no]asan-stack. -static cl::opt<bool> ClStack("asan-stack", - cl::desc("Handle stack memory"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClStack("asan-stack", cl::desc("Handle stack memory"), + cl::Hidden, cl::init(true)); static cl::opt<bool> ClUseAfterReturn("asan-use-after-return", - cl::desc("Check return-after-free"), cl::Hidden, cl::init(true)); + cl::desc("Check return-after-free"), + cl::Hidden, cl::init(true)); // This flag may need to be replaced with -f[no]asan-globals. static cl::opt<bool> ClGlobals("asan-globals", - cl::desc("Handle global objects"), cl::Hidden, cl::init(true)); + cl::desc("Handle global objects"), cl::Hidden, + cl::init(true)); static cl::opt<bool> ClInitializers("asan-initialization-order", - cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(true)); -static cl::opt<bool> ClInvalidPointerPairs("asan-detect-invalid-pointer-pair", - cl::desc("Instrument <, <=, >, >=, - with pointer operands"), - cl::Hidden, cl::init(false)); -static cl::opt<unsigned> ClRealignStack("asan-realign-stack", - cl::desc("Realign stack to the value of this flag (power of two)"), - cl::Hidden, cl::init(32)); + cl::desc("Handle C++ initializer order"), + cl::Hidden, cl::init(true)); +static cl::opt<bool> ClInvalidPointerPairs( + "asan-detect-invalid-pointer-pair", + cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden, + cl::init(false)); +static cl::opt<unsigned> ClRealignStack( + "asan-realign-stack", + cl::desc("Realign stack to the value of this flag (power of two)"), + cl::Hidden, cl::init(32)); static cl::opt<int> ClInstrumentationWithCallsThreshold( "asan-instrumentation-with-call-threshold", - cl::desc("If the function being instrumented contains more than " - "this number of memory accesses, use callbacks instead of " - "inline checks (-1 means never use callbacks)."), - cl::Hidden, cl::init(7000)); + cl::desc( + "If the function being instrumented contains more than " + "this number of memory accesses, use callbacks instead of " + "inline checks (-1 means never use callbacks)."), + cl::Hidden, cl::init(7000)); static cl::opt<std::string> ClMemoryAccessCallbackPrefix( - "asan-memory-access-callback-prefix", - cl::desc("Prefix for memory access callbacks"), cl::Hidden, - cl::init("__asan_")); + "asan-memory-access-callback-prefix", + cl::desc("Prefix for memory access callbacks"), cl::Hidden, + cl::init("__asan_")); static cl::opt<bool> ClInstrumentAllocas("asan-instrument-allocas", - cl::desc("instrument dynamic allocas"), cl::Hidden, cl::init(false)); + cl::desc("instrument dynamic allocas"), + cl::Hidden, cl::init(false)); +static cl::opt<bool> ClSkipPromotableAllocas( + "asan-skip-promotable-allocas", + cl::desc("Do not instrument promotable allocas"), cl::Hidden, + cl::init(true)); // These flags allow to change the shadow mapping. // The shadow mapping looks like // Shadow = (Mem >> scale) + (1 << offset_log) static cl::opt<int> ClMappingScale("asan-mapping-scale", - cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0)); + cl::desc("scale of asan shadow mapping"), + cl::Hidden, cl::init(0)); // Optimization flags. Not user visible, used mostly for testing // and benchmarking the tool. -static cl::opt<bool> ClOpt("asan-opt", - cl::desc("Optimize instrumentation"), cl::Hidden, cl::init(true)); -static cl::opt<bool> ClOptSameTemp("asan-opt-same-temp", - cl::desc("Instrument the same temp just once"), cl::Hidden, - cl::init(true)); +static cl::opt<bool> ClOpt("asan-opt", cl::desc("Optimize instrumentation"), + cl::Hidden, cl::init(true)); +static cl::opt<bool> ClOptSameTemp( + "asan-opt-same-temp", cl::desc("Instrument the same temp just once"), + cl::Hidden, cl::init(true)); static cl::opt<bool> ClOptGlobals("asan-opt-globals", - cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true)); + cl::desc("Don't instrument scalar globals"), + cl::Hidden, cl::init(true)); +static cl::opt<bool> ClOptStack( + "asan-opt-stack", cl::desc("Don't instrument scalar stack variables"), + cl::Hidden, cl::init(false)); -static cl::opt<bool> ClCheckLifetime("asan-check-lifetime", - cl::desc("Use llvm.lifetime intrinsics to insert extra checks"), - cl::Hidden, cl::init(false)); +static cl::opt<bool> ClCheckLifetime( + "asan-check-lifetime", + cl::desc("Use llvm.lifetime intrinsics to insert extra checks"), cl::Hidden, + cl::init(false)); static cl::opt<bool> ClDynamicAllocaStack( "asan-stack-dynamic-alloca", cl::desc("Use dynamic alloca to represent stack variables"), cl::Hidden, cl::init(true)); +static cl::opt<uint32_t> ClForceExperiment( + "asan-force-experiment", + cl::desc("Force optimization experiment (for testing)"), cl::Hidden, + cl::init(0)); + // Debug flags. static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, cl::init(0)); static cl::opt<int> ClDebugStack("asan-debug-stack", cl::desc("debug stack"), cl::Hidden, cl::init(0)); -static cl::opt<std::string> ClDebugFunc("asan-debug-func", - cl::Hidden, cl::desc("Debug func")); +static cl::opt<std::string> ClDebugFunc("asan-debug-func", cl::Hidden, + cl::desc("Debug func")); static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"), cl::Hidden, cl::init(-1)); static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"), @@ -207,10 +232,10 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); STATISTIC(NumInstrumentedDynamicAllocas, "Number of instrumented dynamic allocas"); -STATISTIC(NumOptimizedAccessesToGlobalArray, - "Number of optimized accesses to global arrays"); STATISTIC(NumOptimizedAccessesToGlobalVar, "Number of optimized accesses to global vars"); +STATISTIC(NumOptimizedAccessesToStackVar, + "Number of optimized accesses to stack vars"); namespace { /// Frontend-provided metadata for source location. @@ -238,9 +263,7 @@ struct LocationMetadata { class GlobalsMetadata { public: struct Entry { - Entry() - : SourceLoc(), Name(), IsDynInit(false), - IsBlacklisted(false) {} + Entry() : SourceLoc(), Name(), IsDynInit(false), IsBlacklisted(false) {} LocationMetadata SourceLoc; StringRef Name; bool IsDynInit; @@ -249,19 +272,17 @@ class GlobalsMetadata { GlobalsMetadata() : inited_(false) {} - void init(Module& M) { + void init(Module &M) { assert(!inited_); inited_ = true; NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals"); - if (!Globals) - return; + if (!Globals) return; for (auto MDN : Globals->operands()) { // Metadata node contains the global and the fields of "Entry". assert(MDN->getNumOperands() == 5); auto *GV = mdconst::extract_or_null<GlobalVariable>(MDN->getOperand(0)); // The optimizer may optimize away a global entirely. - if (!GV) - continue; + if (!GV) continue; // We can already have an entry for GV if it was merged with another // global. Entry &E = Entries[GV]; @@ -286,7 +307,7 @@ class GlobalsMetadata { private: bool inited_; - DenseMap<GlobalVariable*, Entry> Entries; + DenseMap<GlobalVariable *, Entry> Entries; }; /// This struct defines the shadow mapping using the rule: @@ -371,17 +392,36 @@ struct AddressSanitizer : public FunctionPass { } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } - void instrumentMop(Instruction *I, bool UseCalls); + uint64_t getAllocaSizeInBytes(AllocaInst *AI) const { + Type *Ty = AI->getAllocatedType(); + uint64_t SizeInBytes = + AI->getModule()->getDataLayout().getTypeAllocSize(Ty); + return SizeInBytes; + } + /// Check if we want (and can) handle this alloca. + bool isInterestingAlloca(AllocaInst &AI) const; + /// If it is an interesting memory access, return the PointerOperand + /// and set IsWrite/Alignment. Otherwise return nullptr. + Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, + uint64_t *TypeSize, + unsigned *Alignment) const; + void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I, + bool UseCalls, const DataLayout &DL); void instrumentPointerComparisonOrSubtraction(Instruction *I); void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, bool IsWrite, - Value *SizeArgument, bool UseCalls); + Value *SizeArgument, bool UseCalls, uint32_t Exp); + void instrumentUnusualSizeOrAlignment(Instruction *I, Value *Addr, + uint32_t TypeSize, bool IsWrite, + Value *SizeArgument, bool UseCalls, + uint32_t Exp); Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize); Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex, - Value *SizeArgument); + Value *SizeArgument, uint32_t Exp); void instrumentMemIntrinsic(MemIntrinsic *MI); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool runOnFunction(Function &F) override; @@ -396,9 +436,10 @@ struct AddressSanitizer : public FunctionPass { bool LooksLikeCodeInBug11395(Instruction *I); bool GlobalIsLinkerInitialized(GlobalVariable *G); + bool isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr, + uint64_t TypeSize) const; LLVMContext *C; - const DataLayout *DL; Triple TargetTriple; int LongSize; Type *IntptrTy; @@ -408,12 +449,12 @@ struct AddressSanitizer : public FunctionPass { Function *AsanInitFunction; Function *AsanHandleNoReturnFunc; Function *AsanPtrCmpFunction, *AsanPtrSubFunction; - // This array is indexed by AccessIsWrite and log2(AccessSize). - Function *AsanErrorCallback[2][kNumberOfAccessSizes]; - Function *AsanMemoryAccessCallback[2][kNumberOfAccessSizes]; - // This array is indexed by AccessIsWrite. - Function *AsanErrorCallbackSized[2], - *AsanMemoryAccessCallbackSized[2]; + // This array is indexed by AccessIsWrite, Experiment and log2(AccessSize). + Function *AsanErrorCallback[2][2][kNumberOfAccessSizes]; + Function *AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes]; + // This array is indexed by AccessIsWrite and Experiment. + Function *AsanErrorCallbackSized[2][2]; + Function *AsanMemoryAccessCallbackSized[2][2]; Function *AsanMemmove, *AsanMemcpy, *AsanMemset; InlineAsm *EmptyAsm; GlobalsMetadata GlobalsMD; @@ -426,9 +467,7 @@ class AddressSanitizerModule : public ModulePass { AddressSanitizerModule() : ModulePass(ID) {} bool runOnModule(Module &M) override; static char ID; // Pass identification, replacement for typeid - const char *getPassName() const override { - return "AddressSanitizerModule"; - } + const char *getPassName() const override { return "AddressSanitizerModule"; } private: void initializeCallbacks(Module &M); @@ -444,7 +483,6 @@ class AddressSanitizerModule : public ModulePass { GlobalsMetadata GlobalsMD; Type *IntptrTy; LLVMContext *C; - const DataLayout *DL; Triple TargetTriple; ShadowMapping Mapping; Function *AsanPoisonGlobals; @@ -471,12 +509,12 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { Type *IntptrPtrTy; ShadowMapping Mapping; - SmallVector<AllocaInst*, 16> AllocaVec; - SmallVector<Instruction*, 8> RetVec; + SmallVector<AllocaInst *, 16> AllocaVec; + SmallVector<Instruction *, 8> RetVec; unsigned StackAlignment; Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1], - *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1]; + *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1]; Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc; // Stores a place and arguments of poisoning/unpoisoning call for alloca. @@ -497,33 +535,38 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { Value *LeftRzAddr; Value *RightRzAddr; bool Poison; - explicit DynamicAllocaCall(AllocaInst *AI, - Value *LeftRzAddr = nullptr, - Value *RightRzAddr = nullptr) - : AI(AI), LeftRzAddr(LeftRzAddr), RightRzAddr(RightRzAddr), Poison(true) - {} + explicit DynamicAllocaCall(AllocaInst *AI, Value *LeftRzAddr = nullptr, + Value *RightRzAddr = nullptr) + : AI(AI), + LeftRzAddr(LeftRzAddr), + RightRzAddr(RightRzAddr), + Poison(true) {} }; SmallVector<DynamicAllocaCall, 1> DynamicAllocaVec; // Maps Value to an AllocaInst from which the Value is originated. - typedef DenseMap<Value*, AllocaInst*> AllocaForValueMapTy; + typedef DenseMap<Value *, AllocaInst *> AllocaForValueMapTy; AllocaForValueMapTy AllocaForValue; bool HasNonEmptyInlineAsm; std::unique_ptr<CallInst> EmptyInlineAsm; FunctionStackPoisoner(Function &F, AddressSanitizer &ASan) - : F(F), ASan(ASan), DIB(*F.getParent(), /*AllowUnresolved*/ false), - C(ASan.C), IntptrTy(ASan.IntptrTy), - IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping), - StackAlignment(1 << Mapping.Scale), HasNonEmptyInlineAsm(false), + : F(F), + ASan(ASan), + DIB(*F.getParent(), /*AllowUnresolved*/ false), + C(ASan.C), + IntptrTy(ASan.IntptrTy), + IntptrPtrTy(PointerType::get(IntptrTy, 0)), + Mapping(ASan.Mapping), + StackAlignment(1 << Mapping.Scale), + HasNonEmptyInlineAsm(false), EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {} bool runOnFunction() { if (!ClStack) return false; // Collect alloca, ret, lifetime instructions etc. - for (BasicBlock *BB : depth_first(&F.getEntryBlock())) - visit(*BB); + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB); if (AllocaVec.empty() && DynamicAllocaVec.empty()) return false; @@ -544,33 +587,31 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { // ----------------------- Visitors. /// \brief Collect all Ret instructions. - void visitReturnInst(ReturnInst &RI) { - RetVec.push_back(&RI); - } + void visitReturnInst(ReturnInst &RI) { RetVec.push_back(&RI); } // Unpoison dynamic allocas redzones. void unpoisonDynamicAlloca(DynamicAllocaCall &AllocaCall) { - if (!AllocaCall.Poison) - return; + if (!AllocaCall.Poison) return; for (auto Ret : RetVec) { IRBuilder<> IRBRet(Ret); PointerType *Int32PtrTy = PointerType::getUnqual(IRBRet.getInt32Ty()); Value *Zero = Constant::getNullValue(IRBRet.getInt32Ty()); Value *PartialRzAddr = IRBRet.CreateSub(AllocaCall.RightRzAddr, ConstantInt::get(IntptrTy, 4)); - IRBRet.CreateStore(Zero, IRBRet.CreateIntToPtr(AllocaCall.LeftRzAddr, - Int32PtrTy)); - IRBRet.CreateStore(Zero, IRBRet.CreateIntToPtr(PartialRzAddr, - Int32PtrTy)); - IRBRet.CreateStore(Zero, IRBRet.CreateIntToPtr(AllocaCall.RightRzAddr, - Int32PtrTy)); + IRBRet.CreateStore( + Zero, IRBRet.CreateIntToPtr(AllocaCall.LeftRzAddr, Int32PtrTy)); + IRBRet.CreateStore(Zero, + IRBRet.CreateIntToPtr(PartialRzAddr, Int32PtrTy)); + IRBRet.CreateStore( + Zero, IRBRet.CreateIntToPtr(AllocaCall.RightRzAddr, Int32PtrTy)); } } // Right shift for BigEndian and left shift for LittleEndian. Value *shiftAllocaMagic(Value *Val, IRBuilder<> &IRB, Value *Shift) { - return ASan.DL->isLittleEndian() ? IRB.CreateShl(Val, Shift) - : IRB.CreateLShr(Val, Shift); + auto &DL = F.getParent()->getDataLayout(); + return DL.isLittleEndian() ? IRB.CreateShl(Val, Shift) + : IRB.CreateLShr(Val, Shift); } // Compute PartialRzMagic for dynamic alloca call. Since we don't know the @@ -599,7 +640,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { /// \brief Collect Alloca instructions we want (and can) handle. void visitAllocaInst(AllocaInst &AI) { - if (!isInterestingAlloca(AI)) return; + if (!ASan.isInterestingAlloca(AI)) return; StackAlignment = std::max(StackAlignment, AI.getAlignment()); if (isDynamicAlloca(AI)) @@ -613,8 +654,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { void visitIntrinsicInst(IntrinsicInst &II) { if (!ClCheckLifetime) return; Intrinsic::ID ID = II.getIntrinsicID(); - if (ID != Intrinsic::lifetime_start && - ID != Intrinsic::lifetime_end) + if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end) return; // Found lifetime intrinsic, add ASan instrumentation if necessary. ConstantInt *Size = dyn_cast<ConstantInt>(II.getArgOperand(0)); @@ -644,8 +684,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { bool doesDominateAllExits(const Instruction *I) const { for (auto Ret : RetVec) { - if (!ASan.getDominatorTree().dominates(I, Ret)) - return false; + if (!ASan.getDominatorTree().dominates(I, Ret)) return false; } return true; } @@ -653,19 +692,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { bool isDynamicAlloca(AllocaInst &AI) const { return AI.isArrayAllocation() || !AI.isStaticAlloca(); } - - // Check if we want (and can) handle this alloca. - bool isInterestingAlloca(AllocaInst &AI) const { - return (AI.getAllocatedType()->isSized() && - // alloca() may be called with 0 size, ignore it. - getAllocaSizeInBytes(&AI) > 0); - } - - uint64_t getAllocaSizeInBytes(AllocaInst *AI) const { - Type *Ty = AI->getAllocatedType(); - uint64_t SizeInBytes = ASan.DL->getTypeAllocSize(Ty); - return SizeInBytes; - } /// Finds alloca where the value comes from. AllocaInst *findAllocaForValue(Value *V); void poisonRedZones(ArrayRef<uint8_t> ShadowBytes, IRBuilder<> &IRB, @@ -683,21 +709,25 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { } // namespace char AddressSanitizer::ID = 0; -INITIALIZE_PASS_BEGIN(AddressSanitizer, "asan", - "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", - false, false) +INITIALIZE_PASS_BEGIN( + AddressSanitizer, "asan", + "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, + false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(AddressSanitizer, "asan", - "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", - false, false) +INITIALIZE_PASS_END( + AddressSanitizer, "asan", + "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, + false) FunctionPass *llvm::createAddressSanitizerFunctionPass() { return new AddressSanitizer(); } char AddressSanitizerModule::ID = 0; -INITIALIZE_PASS(AddressSanitizerModule, "asan-module", +INITIALIZE_PASS( + AddressSanitizerModule, "asan-module", "AddressSanitizer: detects use-after-free and out-of-bounds bugs." - "ModulePass", false, false) + "ModulePass", + false, false) ModulePass *llvm::createAddressSanitizerModulePass() { return new AddressSanitizerModule(); } @@ -709,16 +739,15 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { } // \brief Create a constant for Str so that we can pass it to the run-time lib. -static GlobalVariable *createPrivateGlobalForString( - Module &M, StringRef Str, bool AllowMerging) { +static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str, + bool AllowMerging) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); // We use private linkage for module-local strings. If they can be merged // with another one, we set the unnamed_addr attribute. GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true, GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix); - if (AllowMerging) - GV->setUnnamedAddr(true); + if (AllowMerging) GV->setUnnamedAddr(true); GV->setAlignment(1); // Strings may not be merged w/o setting align 1. return GV; } @@ -747,8 +776,7 @@ static bool GlobalWasGeneratedByAsan(GlobalVariable *G) { Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { // Shadow >> scale Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); - if (Mapping.Offset == 0) - return Shadow; + if (Mapping.Offset == 0) return Shadow; // (Shadow >> scale) | offset if (Mapping.OrShadowOffset) return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset)); @@ -775,38 +803,61 @@ void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { MI->eraseFromParent(); } -// If I is an interesting memory access, return the PointerOperand -// and set IsWrite/Alignment. Otherwise return nullptr. -static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, - unsigned *Alignment) { +/// Check if we want (and can) handle this alloca. +bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) const { + return (AI.getAllocatedType()->isSized() && + // alloca() may be called with 0 size, ignore it. + getAllocaSizeInBytes(&AI) > 0 && + // We are only interested in allocas not promotable to registers. + // Promotable allocas are common under -O0. + (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI))); +} + +/// If I is an interesting memory access, return the PointerOperand +/// and set IsWrite/Alignment. Otherwise return nullptr. +Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I, + bool *IsWrite, + uint64_t *TypeSize, + unsigned *Alignment) const { // Skip memory accesses inserted by another instrumentation. - if (I->getMetadata("nosanitize")) - return nullptr; + if (I->getMetadata("nosanitize")) return nullptr; + + Value *PtrOperand = nullptr; + const DataLayout &DL = I->getModule()->getDataLayout(); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { if (!ClInstrumentReads) return nullptr; *IsWrite = false; + *TypeSize = DL.getTypeStoreSizeInBits(LI->getType()); *Alignment = LI->getAlignment(); - return LI->getPointerOperand(); - } - if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + PtrOperand = LI->getPointerOperand(); + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { if (!ClInstrumentWrites) return nullptr; *IsWrite = true; + *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType()); *Alignment = SI->getAlignment(); - return SI->getPointerOperand(); - } - if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { + PtrOperand = SI->getPointerOperand(); + } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { if (!ClInstrumentAtomics) return nullptr; *IsWrite = true; + *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType()); *Alignment = 0; - return RMW->getPointerOperand(); - } - if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { + PtrOperand = RMW->getPointerOperand(); + } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { if (!ClInstrumentAtomics) return nullptr; *IsWrite = true; + *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType()); *Alignment = 0; - return XCHG->getPointerOperand(); + PtrOperand = XCHG->getPointerOperand(); } - return nullptr; + + // Treat memory accesses to promotable allocas as non-interesting since they + // will not cause memory violations. This greatly speeds up the instrumented + // executable at -O0. + if (ClSkipPromotableAllocas) + if (auto AI = dyn_cast_or_null<AllocaInst>(PtrOperand)) + return isInterestingAlloca(*AI) ? AI : nullptr; + + return PtrOperand; } static bool isPointerOperand(Value *V) { @@ -818,17 +869,15 @@ static bool isPointerOperand(Value *V) { // the frontend. static bool isInterestingPointerComparisonOrSubtraction(Instruction *I) { if (ICmpInst *Cmp = dyn_cast<ICmpInst>(I)) { - if (!Cmp->isRelational()) - return false; + if (!Cmp->isRelational()) return false; } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { - if (BO->getOpcode() != Instruction::Sub) - return false; + if (BO->getOpcode() != Instruction::Sub) return false; } else { return false; } if (!isPointerOperand(I->getOperand(0)) || !isPointerOperand(I->getOperand(1))) - return false; + return false; return true; } @@ -839,8 +888,8 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) { return G->hasInitializer() && !GlobalsMD.get(G).IsDynInit; } -void -AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) { +void AddressSanitizer::instrumentPointerComparisonOrSubtraction( + Instruction *I) { IRBuilder<> IRB(I); Function *F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction; Value *Param[2] = {I->getOperand(0), I->getOperand(1)}; @@ -851,38 +900,47 @@ AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) { IRB.CreateCall2(F, Param[0], Param[1]); } -void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) { +void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, + Instruction *I, bool UseCalls, + const DataLayout &DL) { bool IsWrite = false; unsigned Alignment = 0; - Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &Alignment); + uint64_t TypeSize = 0; + Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment); assert(Addr); + + // Optimization experiments. + // The experiments can be used to evaluate potential optimizations that remove + // instrumentation (assess false negatives). Instead of completely removing + // some instrumentation, you set Exp to a non-zero value (mask of optimization + // experiments that want to remove instrumentation of this instruction). + // If Exp is non-zero, this pass will emit special calls into runtime + // (e.g. __asan_report_exp_load1 instead of __asan_report_load1). These calls + // make runtime terminate the program in a special way (with a different + // exit status). Then you run the new compiler on a buggy corpus, collect + // the special terminations (ideally, you don't see them at all -- no false + // negatives) and make the decision on the optimization. + uint32_t Exp = ClForceExperiment; + if (ClOpt && ClOptGlobals) { - if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) { - // If initialization order checking is disabled, a simple access to a - // dynamically initialized global is always valid. - if (!ClInitializers || GlobalIsLinkerInitialized(G)) { - NumOptimizedAccessesToGlobalVar++; - return; - } - } - ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr); - if (CE && CE->isGEPWithNoNotionalOverIndexing()) { - if (GlobalVariable *G = dyn_cast<GlobalVariable>(CE->getOperand(0))) { - if (CE->getOperand(1)->isNullValue() && GlobalIsLinkerInitialized(G)) { - NumOptimizedAccessesToGlobalArray++; - return; - } - } + // If initialization order checking is disabled, a simple access to a + // dynamically initialized global is always valid. + GlobalVariable *G = dyn_cast<GlobalVariable>(GetUnderlyingObject(Addr, DL)); + if (G != NULL && (!ClInitializers || GlobalIsLinkerInitialized(G)) && + isSafeAccess(ObjSizeVis, Addr, TypeSize)) { + NumOptimizedAccessesToGlobalVar++; + return; } } - Type *OrigPtrTy = Addr->getType(); - Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); - - assert(OrigTy->isSized()); - uint32_t TypeSize = DL->getTypeStoreSizeInBits(OrigTy); - - assert((TypeSize % 8) == 0); + if (ClOpt && ClOptStack) { + // A direct inbounds access to a stack variable is always valid. + if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) && + isSafeAccess(ObjSizeVis, Addr, TypeSize)) { + NumOptimizedAccessesToStackVar++; + return; + } + } if (IsWrite) NumInstrumentedWrites++; @@ -895,23 +953,10 @@ void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) { if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 || TypeSize == 128) && (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8)) - return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls); - // Instrument unusual size or unusual alignment. - // We can not do it with a single check, so we do 1-byte check for the first - // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able - // to report the actual access size. - IRBuilder<> IRB(I); - Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8); - Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); - if (UseCalls) { - IRB.CreateCall2(AsanMemoryAccessCallbackSized[IsWrite], AddrLong, Size); - } else { - Value *LastByte = IRB.CreateIntToPtr( - IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)), - OrigPtrTy); - instrumentAddress(I, I, Addr, 8, IsWrite, Size, false); - instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false); - } + return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls, + Exp); + instrumentUnusualSizeOrAlignment(I, Addr, TypeSize, IsWrite, nullptr, + UseCalls, Exp); } // Validate the result of Module::getOrInsertFunction called for an interface @@ -921,17 +966,34 @@ void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) { static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { if (isa<Function>(FuncOrBitcast)) return cast<Function>(FuncOrBitcast); FuncOrBitcast->dump(); - report_fatal_error("trying to redefine an AddressSanitizer " - "interface function"); + report_fatal_error( + "trying to redefine an AddressSanitizer " + "interface function"); } -Instruction *AddressSanitizer::generateCrashCode( - Instruction *InsertBefore, Value *Addr, - bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument) { +Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore, + Value *Addr, bool IsWrite, + size_t AccessSizeIndex, + Value *SizeArgument, + uint32_t Exp) { IRBuilder<> IRB(InsertBefore); - CallInst *Call = SizeArgument - ? IRB.CreateCall2(AsanErrorCallbackSized[IsWrite], Addr, SizeArgument) - : IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr); + Value *ExpVal = Exp == 0 ? nullptr : ConstantInt::get(IRB.getInt32Ty(), Exp); + CallInst *Call = nullptr; + if (SizeArgument) { + if (Exp == 0) + Call = IRB.CreateCall2(AsanErrorCallbackSized[IsWrite][0], Addr, + SizeArgument); + else + Call = IRB.CreateCall3(AsanErrorCallbackSized[IsWrite][1], Addr, + SizeArgument, ExpVal); + } else { + if (Exp == 0) + Call = + IRB.CreateCall(AsanErrorCallback[IsWrite][0][AccessSizeIndex], Addr); + else + Call = IRB.CreateCall2(AsanErrorCallback[IsWrite][1][AccessSizeIndex], + Addr, ExpVal); + } // We don't do Call->setDoesNotReturn() because the BB already has // UnreachableInst at the end. @@ -941,19 +1003,19 @@ Instruction *AddressSanitizer::generateCrashCode( } Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, - Value *ShadowValue, - uint32_t TypeSize) { + Value *ShadowValue, + uint32_t TypeSize) { size_t Granularity = 1 << Mapping.Scale; // Addr & (Granularity - 1) - Value *LastAccessedByte = IRB.CreateAnd( - AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); + Value *LastAccessedByte = + IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); // (Addr & (Granularity - 1)) + size - 1 if (TypeSize / 8 > 1) LastAccessedByte = IRB.CreateAdd( LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)); // (uint8_t) ((Addr & (Granularity-1)) + size - 1) - LastAccessedByte = IRB.CreateIntCast( - LastAccessedByte, ShadowValue->getType(), false); + LastAccessedByte = + IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false); // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue); } @@ -961,24 +1023,29 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, bool IsWrite, - Value *SizeArgument, bool UseCalls) { + Value *SizeArgument, bool UseCalls, + uint32_t Exp) { IRBuilder<> IRB(InsertBefore); Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); if (UseCalls) { - IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][AccessSizeIndex], - AddrLong); + if (Exp == 0) + IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex], + AddrLong); + else + IRB.CreateCall2(AsanMemoryAccessCallback[IsWrite][1][AccessSizeIndex], + AddrLong, ConstantInt::get(IRB.getInt32Ty(), Exp)); return; } - Type *ShadowTy = IntegerType::get( - *C, std::max(8U, TypeSize >> Mapping.Scale)); + Type *ShadowTy = + IntegerType::get(*C, std::max(8U, TypeSize >> Mapping.Scale)); Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); Value *ShadowPtr = memToShadow(AddrLong, IRB); Value *CmpVal = Constant::getNullValue(ShadowTy); - Value *ShadowValue = IRB.CreateLoad( - IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); + Value *ShadowValue = + IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); size_t Granularity = 1 << Mapping.Scale; @@ -987,9 +1054,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { // We use branch weights for the slow path check, to indicate that the slow // path is rarely taken. This seems to be the case for SPEC benchmarks. - TerminatorInst *CheckTerm = - SplitBlockAndInsertIfThen(Cmp, InsertBefore, false, - MDBuilder(*C).createBranchWeights(1, 100000)); + TerminatorInst *CheckTerm = SplitBlockAndInsertIfThen( + Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000)); assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional()); BasicBlock *NextBB = CheckTerm->getSuccessor(0); IRB.SetInsertPoint(CheckTerm); @@ -1003,11 +1069,37 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, true); } - Instruction *Crash = generateCrashCode( - CrashTerm, AddrLong, IsWrite, AccessSizeIndex, SizeArgument); + Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite, + AccessSizeIndex, SizeArgument, Exp); Crash->setDebugLoc(OrigIns->getDebugLoc()); } +// Instrument unusual size or unusual alignment. +// We can not do it with a single check, so we do 1-byte check for the first +// and the last bytes. We call __asan_report_*_n(addr, real_size) to be able +// to report the actual access size. +void AddressSanitizer::instrumentUnusualSizeOrAlignment( + Instruction *I, Value *Addr, uint32_t TypeSize, bool IsWrite, + Value *SizeArgument, bool UseCalls, uint32_t Exp) { + IRBuilder<> IRB(I); + Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8); + Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + if (UseCalls) { + if (Exp == 0) + IRB.CreateCall2(AsanMemoryAccessCallbackSized[IsWrite][0], AddrLong, + Size); + else + IRB.CreateCall3(AsanMemoryAccessCallbackSized[IsWrite][1], AddrLong, Size, + ConstantInt::get(IRB.getInt32Ty(), Exp)); + } else { + Value *LastByte = IRB.CreateIntToPtr( + IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)), + Addr->getType()); + instrumentAddress(I, I, Addr, 8, IsWrite, Size, false, Exp); + instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false, Exp); + } +} + void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName) { // Set up the arguments to our poison/unpoison functions. @@ -1029,12 +1121,11 @@ void AddressSanitizerModule::createInitializerPoisonCalls( ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); for (Use &OP : CA->operands()) { - if (isa<ConstantAggregateZero>(OP)) - continue; + if (isa<ConstantAggregateZero>(OP)) continue; ConstantStruct *CS = cast<ConstantStruct>(OP); // Must have a function or null ptr. - if (Function* F = dyn_cast<Function>(CS->getOperand(1))) { + if (Function *F = dyn_cast<Function>(CS->getOperand(1))) { if (F->getName() == kAsanModuleCtorName) continue; ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0)); // Don't instrument CTORs that will run before asan.module_ctor. @@ -1059,13 +1150,11 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { G->getLinkage() != GlobalVariable::PrivateLinkage && G->getLinkage() != GlobalVariable::InternalLinkage) return false; - if (G->hasComdat()) - return false; + if (G->hasComdat()) return false; // Two problems with thread-locals: // - The address of the main thread's copy can't be computed at link-time. // - Need to poison all copies, not just the main thread's one. - if (G->isThreadLocal()) - return false; + if (G->isThreadLocal()) return false; // For now, just ignore this Global if the alignment is large. if (G->getAlignment() > MinRedzoneSizeForGlobal()) return false; @@ -1076,10 +1165,8 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { StringRef ParsedSegment, ParsedSection; unsigned TAA = 0, StubSize = 0; bool TAAParsed; - std::string ErrorCode = - MCSectionMachO::ParseSectionSpecifier(Section, ParsedSegment, - ParsedSection, TAA, TAAParsed, - StubSize); + std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier( + Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize); if (!ErrorCode.empty()) { report_fatal_error("Invalid section specifier '" + ParsedSection + "': " + ErrorCode + "."); @@ -1140,12 +1227,11 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage); // Declare functions that register/unregister globals. AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction( - kAsanRegisterGlobalsName, IRB.getVoidTy(), - IntptrTy, IntptrTy, nullptr)); + kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); AsanRegisterGlobals->setLinkage(Function::ExternalLinkage); - AsanUnregisterGlobals = checkInterfaceFunction(M.getOrInsertFunction( - kAsanUnregisterGlobalsName, - IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); + AsanUnregisterGlobals = checkInterfaceFunction( + M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(), + IntptrTy, IntptrTy, nullptr)); AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage); } @@ -1158,8 +1244,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { SmallVector<GlobalVariable *, 16> GlobalsToChange; for (auto &G : M.globals()) { - if (ShouldInstrumentGlobal(&G)) - GlobalsToChange.push_back(&G); + if (ShouldInstrumentGlobal(&G)) GlobalsToChange.push_back(&G); } size_t n = GlobalsToChange.size(); @@ -1184,8 +1269,9 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { // We shouldn't merge same module names, as this string serves as unique // module ID in runtime. GlobalVariable *ModuleName = createPrivateGlobalForString( - M, M.getModuleIdentifier(), /*AllowMerging*/false); + M, M.getModuleIdentifier(), /*AllowMerging*/ false); + auto &DL = M.getDataLayout(); for (size_t i = 0; i < n; i++) { static const uint64_t kMaxGlobalRedzone = 1 << 18; GlobalVariable *G = GlobalsToChange[i]; @@ -1199,32 +1285,30 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { PointerType *PtrTy = cast<PointerType>(G->getType()); Type *Ty = PtrTy->getElementType(); - uint64_t SizeInBytes = DL->getTypeAllocSize(Ty); + uint64_t SizeInBytes = DL.getTypeAllocSize(Ty); uint64_t MinRZ = MinRedzoneSizeForGlobal(); // MinRZ <= RZ <= kMaxGlobalRedzone // and trying to make RZ to be ~ 1/4 of SizeInBytes. - uint64_t RZ = std::max(MinRZ, - std::min(kMaxGlobalRedzone, - (SizeInBytes / MinRZ / 4) * MinRZ)); + uint64_t RZ = std::max( + MinRZ, std::min(kMaxGlobalRedzone, (SizeInBytes / MinRZ / 4) * MinRZ)); uint64_t RightRedzoneSize = RZ; // Round up to MinRZ - if (SizeInBytes % MinRZ) - RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ); + if (SizeInBytes % MinRZ) RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ); assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); StructType *NewTy = StructType::get(Ty, RightRedZoneTy, nullptr); - Constant *NewInitializer = ConstantStruct::get( - NewTy, G->getInitializer(), - Constant::getNullValue(RightRedZoneTy), nullptr); + Constant *NewInitializer = + ConstantStruct::get(NewTy, G->getInitializer(), + Constant::getNullValue(RightRedZoneTy), nullptr); // Create a new global variable with enough space for a redzone. GlobalValue::LinkageTypes Linkage = G->getLinkage(); if (G->isConstant() && Linkage == GlobalValue::PrivateLinkage) Linkage = GlobalValue::InternalLinkage; - GlobalVariable *NewGlobal = new GlobalVariable( - M, NewTy, G->isConstant(), Linkage, - NewInitializer, "", G, G->getThreadLocalMode()); + GlobalVariable *NewGlobal = + new GlobalVariable(M, NewTy, G->isConstant(), Linkage, NewInitializer, + "", G, G->getThreadLocalMode()); NewGlobal->copyAttributesFrom(G); NewGlobal->setAlignment(MinRZ); @@ -1253,8 +1337,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { ConstantExpr::getPointerCast(ModuleName, IntptrTy), ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc, nullptr); - if (ClInitializers && MD.IsDynInit) - HasDynamicallyInitializedGlobals = true; + if (ClInitializers && MD.IsDynInit) HasDynamicallyInitializedGlobals = true; DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n"); } @@ -1273,9 +1356,9 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { // We also need to unregister globals at the end, e.g. when a shared library // gets closed. - Function *AsanDtorFunction = Function::Create( - FunctionType::get(Type::getVoidTy(*C), false), - GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); + Function *AsanDtorFunction = + Function::Create(FunctionType::get(Type::getVoidTy(*C), false), + GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB)); IRB_Dtor.CreateCall2(AsanUnregisterGlobals, @@ -1288,12 +1371,8 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { } bool AddressSanitizerModule::runOnModule(Module &M) { - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (!DLP) - return false; - DL = &DLP->getDataLayout(); C = &(M.getContext()); - int LongSize = DL->getPointerSizeInBits(); + int LongSize = M.getDataLayout().getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); TargetTriple = Triple(M.getTargetTriple()); Mapping = getShadowMapping(TargetTriple, LongSize); @@ -1305,8 +1384,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { assert(CtorFunc); IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); - if (ClGlobals) - Changed |= InstrumentGlobals(IRB, M); + if (ClGlobals) Changed |= InstrumentGlobals(IRB, M); return Changed; } @@ -1314,33 +1392,34 @@ bool AddressSanitizerModule::runOnModule(Module &M) { void AddressSanitizer::initializeCallbacks(Module &M) { IRBuilder<> IRB(*C); // Create __asan_report* callbacks. - for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { - for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; - AccessSizeIndex++) { - // IsWrite and TypeSize are encoded in the function name. - std::string Suffix = - (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex); - AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = - checkInterfaceFunction( - M.getOrInsertFunction(kAsanReportErrorTemplate + Suffix, - IRB.getVoidTy(), IntptrTy, nullptr)); - AsanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] = - checkInterfaceFunction( - M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + Suffix, - IRB.getVoidTy(), IntptrTy, nullptr)); + // IsWrite, TypeSize and Exp are encoded in the function name. + for (int Exp = 0; Exp < 2; Exp++) { + for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { + const std::string TypeStr = AccessIsWrite ? "store" : "load"; + const std::string ExpStr = Exp ? "exp_" : ""; + const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr; + AsanErrorCallbackSized[AccessIsWrite][Exp] = + checkInterfaceFunction(M.getOrInsertFunction( + kAsanReportErrorTemplate + ExpStr + TypeStr + "_n", + IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr)); + AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = + checkInterfaceFunction(M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N", + IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr)); + for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; + AccessSizeIndex++) { + const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex); + AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] = + checkInterfaceFunction(M.getOrInsertFunction( + kAsanReportErrorTemplate + ExpStr + Suffix, IRB.getVoidTy(), + IntptrTy, ExpType, nullptr)); + AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] = + checkInterfaceFunction(M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + ExpStr + Suffix, IRB.getVoidTy(), + IntptrTy, ExpType, nullptr)); + } } } - AsanErrorCallbackSized[0] = checkInterfaceFunction(M.getOrInsertFunction( - kAsanReportLoadN, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); - AsanErrorCallbackSized[1] = checkInterfaceFunction(M.getOrInsertFunction( - kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); - - AsanMemoryAccessCallbackSized[0] = checkInterfaceFunction( - M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "loadN", - IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); - AsanMemoryAccessCallbackSized[1] = checkInterfaceFunction( - M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "storeN", - IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); AsanMemmove = checkInterfaceFunction(M.getOrInsertFunction( ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(), @@ -1368,21 +1447,17 @@ void AddressSanitizer::initializeCallbacks(Module &M) { // virtual bool AddressSanitizer::doInitialization(Module &M) { // Initialize the private fields. No one has accessed them before. - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (!DLP) - report_fatal_error("data layout missing"); - DL = &DLP->getDataLayout(); GlobalsMD.init(M); C = &(M.getContext()); - LongSize = DL->getPointerSizeInBits(); + LongSize = M.getDataLayout().getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); TargetTriple = Triple(M.getTargetTriple()); - AsanCtorFunction = Function::Create( - FunctionType::get(Type::getVoidTy(*C), false), - GlobalValue::InternalLinkage, kAsanModuleCtorName, &M); + AsanCtorFunction = + Function::Create(FunctionType::get(Type::getVoidTy(*C), false), + GlobalValue::InternalLinkage, kAsanModuleCtorName, &M); BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction); // call __asan_init in the module ctor. IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB)); @@ -1424,22 +1499,21 @@ bool AddressSanitizer::runOnFunction(Function &F) { // If needed, insert __asan_init before checking for SanitizeAddress attr. maybeInsertAsanInitAtFunctionEntry(F); - if (!F.hasFnAttribute(Attribute::SanitizeAddress)) - return false; + if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return false; - if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) - return false; + if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false; // We want to instrument every address only once per basic block (unless there // are calls between uses). - SmallSet<Value*, 16> TempsToInstrument; - SmallVector<Instruction*, 16> ToInstrument; - SmallVector<Instruction*, 8> NoReturnCalls; - SmallVector<BasicBlock*, 16> AllBlocks; - SmallVector<Instruction*, 16> PointerComparisonsOrSubtracts; + SmallSet<Value *, 16> TempsToInstrument; + SmallVector<Instruction *, 16> ToInstrument; + SmallVector<Instruction *, 8> NoReturnCalls; + SmallVector<BasicBlock *, 16> AllBlocks; + SmallVector<Instruction *, 16> PointerComparisonsOrSubtracts; int NumAllocas = 0; bool IsWrite; unsigned Alignment; + uint64_t TypeSize; // Fill the set of memory operations to instrument. for (auto &BB : F) { @@ -1448,8 +1522,8 @@ bool AddressSanitizer::runOnFunction(Function &F) { int NumInsnsPerBB = 0; for (auto &Inst : BB) { if (LooksLikeCodeInBug11395(&Inst)) return false; - if (Value *Addr = - isInterestingMemoryAccess(&Inst, &IsWrite, &Alignment)) { + if (Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize, + &Alignment)) { if (ClOpt && ClOptSameTemp) { if (!TempsToInstrument.insert(Addr).second) continue; // We've seen this temp in the current BB. @@ -1461,21 +1535,18 @@ bool AddressSanitizer::runOnFunction(Function &F) { } else if (isa<MemIntrinsic>(Inst)) { // ok, take it. } else { - if (isa<AllocaInst>(Inst)) - NumAllocas++; + if (isa<AllocaInst>(Inst)) NumAllocas++; CallSite CS(&Inst); if (CS) { // A call inside BB. TempsToInstrument.clear(); - if (CS.doesNotReturn()) - NoReturnCalls.push_back(CS.getInstruction()); + if (CS.doesNotReturn()) NoReturnCalls.push_back(CS.getInstruction()); } continue; } ToInstrument.push_back(&Inst); NumInsnsPerBB++; - if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) - break; + if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break; } } @@ -1484,13 +1555,20 @@ bool AddressSanitizer::runOnFunction(Function &F) { ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold) UseCalls = true; + const TargetLibraryInfo *TLI = + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + const DataLayout &DL = F.getParent()->getDataLayout(); + ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(), + /*RoundToAlign=*/true); + // Instrument. int NumInstrumented = 0; for (auto Inst : ToInstrument) { if (ClDebugMin < 0 || ClDebugMax < 0 || (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { - if (isInterestingMemoryAccess(Inst, &IsWrite, &Alignment)) - instrumentMop(Inst, UseCalls); + if (isInterestingMemoryAccess(Inst, &IsWrite, &TypeSize, &Alignment)) + instrumentMop(ObjSizeVis, Inst, UseCalls, + F.getParent()->getDataLayout()); else instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); } @@ -1549,10 +1627,9 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) { IntptrTy, IntptrTy, nullptr)); } -void -FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes, - IRBuilder<> &IRB, Value *ShadowBase, - bool DoPoison) { +void FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes, + IRBuilder<> &IRB, Value *ShadowBase, + bool DoPoison) { size_t n = ShadowBytes.size(); size_t i = 0; // We need to (un)poison n bytes of stack shadow. Poison as many as we can @@ -1563,7 +1640,7 @@ FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes, for (; i + LargeStoreSizeInBytes - 1 < n; i += LargeStoreSizeInBytes) { uint64_t Val = 0; for (size_t j = 0; j < LargeStoreSizeInBytes; j++) { - if (ASan.DL->isLittleEndian()) + if (F.getParent()->getDataLayout().isLittleEndian()) Val |= (uint64_t)ShadowBytes[i + j] << (8 * j); else Val = (Val << 8) | ShadowBytes[i + j]; @@ -1582,9 +1659,8 @@ FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes, static int StackMallocSizeClass(uint64_t LocalStackSize) { assert(LocalStackSize <= kMaxStackMallocSize); uint64_t MaxSize = kMinStackMallocSize; - for (int i = 0; ; i++, MaxSize *= 2) - if (LocalStackSize <= MaxSize) - return i; + for (int i = 0;; i++, MaxSize *= 2) + if (LocalStackSize <= MaxSize) return i; llvm_unreachable("impossible LocalStackSize"); } @@ -1596,18 +1672,21 @@ static int StackMallocSizeClass(uint64_t LocalStackSize) { void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined( IRBuilder<> &IRB, Value *ShadowBase, int Size) { assert(!(Size % 8)); - assert(kAsanStackAfterReturnMagic == 0xf5); + + // kAsanStackAfterReturnMagic is 0xf5. + const uint64_t kAsanStackAfterReturnMagic64 = 0xf5f5f5f5f5f5f5f5ULL; + for (int i = 0; i < Size; i += 8) { Value *p = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)); - IRB.CreateStore(ConstantInt::get(IRB.getInt64Ty(), 0xf5f5f5f5f5f5f5f5ULL), - IRB.CreateIntToPtr(p, IRB.getInt64Ty()->getPointerTo())); + IRB.CreateStore( + ConstantInt::get(IRB.getInt64Ty(), kAsanStackAfterReturnMagic64), + IRB.CreateIntToPtr(p, IRB.getInt64Ty()->getPointerTo())); } } static DebugLoc getFunctionEntryDebugLocation(Function &F) { for (const auto &Inst : F.getEntryBlock()) - if (!isa<AllocaInst>(Inst)) - return Inst.getDebugLoc(); + if (!isa<AllocaInst>(Inst)) return Inst.getDebugLoc(); return DebugLoc(); } @@ -1664,9 +1743,9 @@ void FunctionStackPoisoner::poisonStack() { SmallVector<ASanStackVariableDescription, 16> SVD; SVD.reserve(AllocaVec.size()); for (AllocaInst *AI : AllocaVec) { - ASanStackVariableDescription D = { AI->getName().data(), - getAllocaSizeInBytes(AI), - AI->getAlignment(), AI, 0}; + ASanStackVariableDescription D = {AI->getName().data(), + ASan.getAllocaSizeInBytes(AI), + AI->getAlignment(), AI, 0}; SVD.push_back(D); } // Minimal header size (left redzone) is 4 pointers, @@ -1757,19 +1836,19 @@ void FunctionStackPoisoner::poisonStack() { BasePlus0); // Write the frame description constant to redzone[1]. Value *BasePlus1 = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize/8)), - IntptrPtrTy); + IRB.CreateAdd(LocalStackBase, + ConstantInt::get(IntptrTy, ASan.LongSize / 8)), + IntptrPtrTy); GlobalVariable *StackDescriptionGlobal = createPrivateGlobalForString(*F.getParent(), L.DescriptionString, - /*AllowMerging*/true); - Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, - IntptrTy); + /*AllowMerging*/ true); + Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); IRB.CreateStore(Description, BasePlus1); // Write the PC to redzone[2]. Value *BasePlus2 = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, - 2 * ASan.LongSize/8)), - IntptrPtrTy); + IRB.CreateAdd(LocalStackBase, + ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)), + IntptrPtrTy); IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2); // Poison the stack redzones at the entry. @@ -1830,8 +1909,7 @@ void FunctionStackPoisoner::poisonStack() { } // We are done. Remove the old unused alloca instructions. - for (auto AI : AllocaVec) - AI->eraseFromParent(); + for (auto AI : AllocaVec) AI->eraseFromParent(); } void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, @@ -1839,9 +1917,9 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, // For now just insert the call to ASan runtime. Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy); Value *SizeArg = ConstantInt::get(IntptrTy, Size); - IRB.CreateCall2(DoPoison ? AsanPoisonStackMemoryFunc - : AsanUnpoisonStackMemoryFunc, - AddrArg, SizeArg); + IRB.CreateCall2( + DoPoison ? AsanPoisonStackMemoryFunc : AsanUnpoisonStackMemoryFunc, + AddrArg, SizeArg); } // Handling llvm.lifetime intrinsics for a given %alloca: @@ -1856,12 +1934,11 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) { if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) // We're intested only in allocas we can handle. - return isInterestingAlloca(*AI) ? AI : nullptr; + return ASan.isInterestingAlloca(*AI) ? AI : nullptr; // See if we've already calculated (or started to calculate) alloca for a // given value. AllocaForValueMapTy::iterator I = AllocaForValue.find(V); - if (I != AllocaForValue.end()) - return I->second; + if (I != AllocaForValue.end()) return I->second; // Store 0 while we're calculating alloca for value V to avoid // infinite recursion if the value references itself. AllocaForValue[V] = nullptr; @@ -1880,8 +1957,7 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) { Res = IncValueAI; } } - if (Res) - AllocaForValue[V] = Res; + if (Res) AllocaForValue[V] = Res; return Res; } @@ -1912,14 +1988,14 @@ Value *FunctionStackPoisoner::computePartialRzMagic(Value *PartialSize, Value *Shift = IRB.CreateAnd(PartialSize, IRB.getInt32(~7)); unsigned Val1Int = kAsanAllocaPartialVal1; unsigned Val2Int = kAsanAllocaPartialVal2; - if (!ASan.DL->isLittleEndian()) { + if (!F.getParent()->getDataLayout().isLittleEndian()) { Val1Int = sys::getSwappedBytes(Val1Int); Val2Int = sys::getSwappedBytes(Val2Int); } Value *Val1 = shiftAllocaMagic(IRB.getInt32(Val1Int), IRB, Shift); Value *PartialBits = IRB.CreateAnd(PartialSize, IRB.getInt32(7)); // For BigEndian get 0x000000YZ -> 0xYZ000000. - if (ASan.DL->isBigEndian()) + if (F.getParent()->getDataLayout().isBigEndian()) PartialBits = IRB.CreateShl(PartialBits, IRB.getInt32(24)); Value *Val2 = IRB.getInt32(Val2Int); Value *Cond = @@ -1953,7 +2029,8 @@ void FunctionStackPoisoner::handleDynamicAllocaCall( // redzones, and OldSize is number of allocated blocks with // ElementSize size, get allocated memory size in bytes by // OldSize * ElementSize. - unsigned ElementSize = ASan.DL->getTypeAllocSize(AI->getAllocatedType()); + unsigned ElementSize = + F.getParent()->getDataLayout().getTypeAllocSize(AI->getAllocatedType()); Value *OldSize = IRB.CreateMul(AI->getArraySize(), ConstantInt::get(IntptrTy, ElementSize)); @@ -2021,3 +2098,20 @@ void FunctionStackPoisoner::handleDynamicAllocaCall( AI->eraseFromParent(); NumInstrumentedDynamicAllocas++; } + +// isSafeAccess returns true if Addr is always inbounds with respect to its +// base object. For example, it is a field access or an array access with +// constant inbounds index. +bool AddressSanitizer::isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, + Value *Addr, uint64_t TypeSize) const { + SizeOffsetType SizeOffset = ObjSizeVis.compute(Addr); + if (!ObjSizeVis.bothKnown(SizeOffset)) return false; + uint64_t Size = SizeOffset.first.getZExtValue(); + int64_t Offset = SizeOffset.second.getSExtValue(); + // Three checks are required to ensure safety: + // . Offset >= 0 (since the offset is given from the base ptr) + // . Size >= Offset (unsigned) + // . Size - Offset >= NeededSize (unsigned) + return Offset >= 0 && Size >= uint64_t(Offset) && + Size - uint64_t(Offset) >= TypeSize / 8; +} diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index 2b5f39c..8113834 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetFolder.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" @@ -24,7 +25,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; #define DEBUG_TYPE "bounds-checking" @@ -49,12 +49,10 @@ namespace { bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DataLayoutPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); } private: - const DataLayout *DL; const TargetLibraryInfo *TLI; ObjectSizeOffsetEvaluator *ObjSizeEval; BuilderTy *Builder; @@ -63,7 +61,7 @@ namespace { BasicBlock *getTrapBB(); void emitBranchToTrap(Value *Cmp = nullptr); - bool instrument(Value *Ptr, Value *Val); + bool instrument(Value *Ptr, Value *Val, const DataLayout &DL); }; } @@ -125,8 +123,9 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) { /// result from the load or the value being stored. It is used to determine the /// size of memory block that is touched. /// Returns true if any change was made to the IR, false otherwise. -bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { - uint64_t NeededSize = DL->getTypeStoreSize(InstVal->getType()); +bool BoundsChecking::instrument(Value *Ptr, Value *InstVal, + const DataLayout &DL) { + uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType()); DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize) << " bytes\n"); @@ -141,7 +140,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { Value *Offset = SizeOffset.second; ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size); - Type *IntTy = DL->getIntPtrType(Ptr->getType()); + Type *IntTy = DL.getIntPtrType(Ptr->getType()); Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize); // three checks are required to ensure safety: @@ -165,7 +164,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { } bool BoundsChecking::runOnFunction(Function &F) { - DL = &getAnalysis<DataLayoutPass>().getDataLayout(); + const DataLayout &DL = F.getParent()->getDataLayout(); TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); TrapBB = nullptr; @@ -192,13 +191,16 @@ bool BoundsChecking::runOnFunction(Function &F) { Builder->SetInsertPoint(Inst); if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { - MadeChange |= instrument(LI->getPointerOperand(), LI); + MadeChange |= instrument(LI->getPointerOperand(), LI, DL); } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - MadeChange |= instrument(SI->getPointerOperand(), SI->getValueOperand()); + MadeChange |= + instrument(SI->getPointerOperand(), SI->getValueOperand(), DL); } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) { - MadeChange |= instrument(AI->getPointerOperand(),AI->getCompareOperand()); + MadeChange |= + instrument(AI->getPointerOperand(), AI->getCompareOperand(), DL); } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) { - MadeChange |= instrument(AI->getPointerOperand(), AI->getValOperand()); + MadeChange |= + instrument(AI->getPointerOperand(), AI->getValOperand(), DL); } else { llvm_unreachable("unknown Instruction type"); } diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 6adf0d2..b3925ee 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -217,7 +217,6 @@ class DataFlowSanitizer : public ModulePass { WK_Custom }; - const DataLayout *DL; Module *Mod; LLVMContext *Ctx; IntegerType *ShadowTy; @@ -422,16 +421,13 @@ bool DataFlowSanitizer::doInitialization(Module &M) { bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 || TargetTriple.getArch() == llvm::Triple::mips64el; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (!DLP) - report_fatal_error("data layout missing"); - DL = &DLP->getDataLayout(); + const DataLayout &DL = M.getDataLayout(); Mod = &M; Ctx = &M.getContext(); ShadowTy = IntegerType::get(*Ctx, ShadowWidth); ShadowPtrTy = PointerType::getUnqual(ShadowTy); - IntptrTy = DL->getIntPtrType(*Ctx); + IntptrTy = DL.getIntPtrType(*Ctx); ZeroShadow = ConstantInt::getSigned(ShadowTy, 0); ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidth / 8); if (IsX86_64) @@ -593,9 +589,6 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT, } bool DataFlowSanitizer::runOnModule(Module &M) { - if (!DL) - return false; - if (ABIList.isIn(M, "skip")) return false; @@ -1056,7 +1049,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8; SmallVector<Value *, 2> Objs; - GetUnderlyingObjects(Addr, Objs, DFS.DL); + GetUnderlyingObjects(Addr, Objs, Pos->getModule()->getDataLayout()); bool AllConstants = true; for (SmallVector<Value *, 2>::iterator i = Objs.begin(), e = Objs.end(); i != e; ++i) { @@ -1157,7 +1150,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, } void DFSanVisitor::visitLoadInst(LoadInst &LI) { - uint64_t Size = DFSF.DFS.DL->getTypeStoreSize(LI.getType()); + auto &DL = LI.getModule()->getDataLayout(); + uint64_t Size = DL.getTypeStoreSize(LI.getType()); if (Size == 0) { DFSF.setShadow(&LI, DFSF.DFS.ZeroShadow); return; @@ -1167,7 +1161,7 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) { if (ClPreserveAlignment) { Align = LI.getAlignment(); if (Align == 0) - Align = DFSF.DFS.DL->getABITypeAlignment(LI.getType()); + Align = DL.getABITypeAlignment(LI.getType()); } else { Align = 1; } @@ -1235,8 +1229,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align, } void DFSanVisitor::visitStoreInst(StoreInst &SI) { - uint64_t Size = - DFSF.DFS.DL->getTypeStoreSize(SI.getValueOperand()->getType()); + auto &DL = SI.getModule()->getDataLayout(); + uint64_t Size = DL.getTypeStoreSize(SI.getValueOperand()->getType()); if (Size == 0) return; @@ -1244,7 +1238,7 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) { if (ClPreserveAlignment) { Align = SI.getAlignment(); if (Align == 0) - Align = DFSF.DFS.DL->getABITypeAlignment(SI.getValueOperand()->getType()); + Align = DL.getABITypeAlignment(SI.getValueOperand()->getType()); } else { Align = 1; } diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index cb965fb..a793e69 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -47,6 +47,8 @@ using namespace llvm; static cl::opt<std::string> DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden, cl::ValueRequired); +static cl::opt<bool> DefaultExitBlockBeforeBody("gcov-exit-block-before-body", + cl::init(false), cl::Hidden); GCOVOptions GCOVOptions::getDefault() { GCOVOptions Options; @@ -55,6 +57,7 @@ GCOVOptions GCOVOptions::getDefault() { Options.UseCfgChecksum = false; Options.NoRedZone = false; Options.FunctionNamesInData = true; + Options.ExitBlockBeforeBody = DefaultExitBlockBeforeBody; if (DefaultGCOVVersion.size() != 4) { llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") + @@ -70,20 +73,10 @@ namespace { class GCOVProfiler : public ModulePass { public: static char ID; - GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) { - init(); - } - GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){ + GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {} + GCOVProfiler(const GCOVOptions &Opts) : ModulePass(ID), Options(Opts) { assert((Options.EmitNotes || Options.EmitData) && "GCOVProfiler asked to do nothing?"); - init(); - } - const char *getPassName() const override { - return "GCOV Profiler"; - } - - private: - void init() { ReversedVersion[0] = Options.Version[3]; ReversedVersion[1] = Options.Version[2]; ReversedVersion[2] = Options.Version[1]; @@ -91,6 +84,11 @@ namespace { ReversedVersion[4] = '\0'; initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } + const char *getPassName() const override { + return "GCOV Profiler"; + } + + private: bool runOnModule(Module &M) override; // Create the .gcno files for the Module based on DebugInfo. @@ -312,7 +310,7 @@ namespace { class GCOVFunction : public GCOVRecord { public: GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident, - bool UseCfgChecksum) + bool UseCfgChecksum, bool ExitBlockBeforeBody) : SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0), ReturnBlock(1, os) { this->os = os; @@ -322,11 +320,13 @@ namespace { uint32_t i = 0; for (auto &BB : *F) { - // Skip index 1 (0, 2, 3, 4, ...) because that's assigned to the - // ReturnBlock. - bool first = i == 0; - Blocks.insert(std::make_pair(&BB, GCOVBlock(i++ + !first, os))); + // Skip index 1 if it's assigned to the ReturnBlock. + if (i == 1 && ExitBlockBeforeBody) + ++i; + Blocks.insert(std::make_pair(&BB, GCOVBlock(i++, os))); } + if (!ExitBlockBeforeBody) + ReturnBlock.Number = i; std::string FunctionNameAndLine; raw_string_ostream FNLOS(FunctionNameAndLine); @@ -469,7 +469,7 @@ static bool functionHasLines(Function *F) { if (Loc.isUnknown()) continue; // Artificial lines such as calls to the global constructors. - if (Loc.getLine() == 0) continue; + if (Loc.getLine() == 0) continue; return true; } @@ -513,7 +513,8 @@ void GCOVProfiler::emitProfileNotes() { EntryBlock.splitBasicBlock(It); Funcs.push_back(make_unique<GCOVFunction>(SP, &out, FunctionIdent++, - Options.UseCfgChecksum)); + Options.UseCfgChecksum, + Options.ExitBlockBeforeBody)); GCOVFunction &Func = *Funcs.back(); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 4152679..c2aa1e2 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -274,7 +274,6 @@ class MemorySanitizer : public FunctionPass { MemorySanitizer(int TrackOrigins = 0) : FunctionPass(ID), TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)), - DL(nullptr), WarningFn(nullptr) {} const char *getPassName() const override { return "MemorySanitizer"; } bool runOnFunction(Function &F) override; @@ -287,7 +286,6 @@ class MemorySanitizer : public FunctionPass { /// \brief Track origins (allocation points) of uninitialized values. int TrackOrigins; - const DataLayout *DL; LLVMContext *C; Type *IntptrTy; Type *OriginTy; @@ -449,10 +447,7 @@ void MemorySanitizer::initializeCallbacks(Module &M) { /// /// inserts a call to __msan_init to the module's constructor list. bool MemorySanitizer::doInitialization(Module &M) { - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (!DLP) - report_fatal_error("data layout missing"); - DL = &DLP->getDataLayout(); + auto &DL = M.getDataLayout(); Triple TargetTriple(M.getTargetTriple()); switch (TargetTriple.getOS()) { @@ -604,7 +599,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) { - unsigned IntptrSize = MS.DL->getTypeStoreSize(MS.IntptrTy); + const DataLayout &DL = F.getParent()->getDataLayout(); + unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy); if (IntptrSize == kOriginSize) return Origin; assert(IntptrSize == kOriginSize * 2); Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false); @@ -614,8 +610,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// \brief Fill memory range with the given origin value. void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr, unsigned Size, unsigned Alignment) { - unsigned IntptrAlignment = MS.DL->getABITypeAlignment(MS.IntptrTy); - unsigned IntptrSize = MS.DL->getTypeStoreSize(MS.IntptrTy); + const DataLayout &DL = F.getParent()->getDataLayout(); + unsigned IntptrAlignment = DL.getABITypeAlignment(MS.IntptrTy); + unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy); assert(IntptrAlignment >= kMinOriginAlignment); assert(IntptrSize >= kOriginSize); @@ -643,8 +640,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin, unsigned Alignment, bool AsCall) { + const DataLayout &DL = F.getParent()->getDataLayout(); unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment); - unsigned StoreSize = MS.DL->getTypeStoreSize(Shadow->getType()); + unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType()); if (isa<StructType>(Shadow->getType())) { paintOrigin(IRB, updateOrigin(Origin, IRB), getOriginPtr(Addr, IRB, Alignment), StoreSize, @@ -661,7 +659,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } unsigned TypeSizeInBits = - MS.DL->getTypeSizeInBits(ConvertedShadow->getType()); + DL.getTypeSizeInBits(ConvertedShadow->getType()); unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits); if (AsCall && SizeIndex < kNumberOfAccessSizes) { Value *Fn = MS.MaybeStoreOriginFn[SizeIndex]; @@ -731,8 +729,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return; } - unsigned TypeSizeInBits = - MS.DL->getTypeSizeInBits(ConvertedShadow->getType()); + const DataLayout &DL = OrigIns->getModule()->getDataLayout(); + + unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType()); unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits); if (AsCall && SizeIndex < kNumberOfAccessSizes) { Value *Fn = MS.MaybeWarningFn[SizeIndex]; @@ -772,7 +771,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// \brief Add MemorySanitizer instrumentation to a function. bool runOnFunction() { MS.initializeCallbacks(*F.getParent()); - if (!MS.DL) return false; // In the presence of unreachable blocks, we may see Phi nodes with // incoming nodes from such blocks. Since InstVisitor skips unreachable @@ -828,8 +826,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // This may return weird-sized types like i1. if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy)) return IT; + const DataLayout &DL = F.getParent()->getDataLayout(); if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) { - uint32_t EltSize = MS.DL->getTypeSizeInBits(VT->getElementType()); + uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType()); return VectorType::get(IntegerType::get(*MS.C, EltSize), VT->getNumElements()); } @@ -845,7 +844,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n"); return Res; } - uint32_t TypeSize = MS.DL->getTypeSizeInBits(OrigTy); + uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy); return IntegerType::get(*MS.C, TypeSize); } @@ -1038,14 +1037,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Function *F = A->getParent(); IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI()); unsigned ArgOffset = 0; + const DataLayout &DL = F->getParent()->getDataLayout(); for (auto &FArg : F->args()) { if (!FArg.getType()->isSized()) { DEBUG(dbgs() << "Arg is not sized\n"); continue; } - unsigned Size = FArg.hasByValAttr() - ? MS.DL->getTypeAllocSize(FArg.getType()->getPointerElementType()) - : MS.DL->getTypeAllocSize(FArg.getType()); + unsigned Size = + FArg.hasByValAttr() + ? DL.getTypeAllocSize(FArg.getType()->getPointerElementType()) + : DL.getTypeAllocSize(FArg.getType()); if (A == &FArg) { bool Overflow = ArgOffset + Size > kParamTLSSize; Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset); @@ -1056,7 +1057,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { unsigned ArgAlign = FArg.getParamAlignment(); if (ArgAlign == 0) { Type *EltType = A->getType()->getPointerElementType(); - ArgAlign = MS.DL->getABITypeAlignment(EltType); + ArgAlign = DL.getABITypeAlignment(EltType); } if (Overflow) { // ParamTLS overflow. @@ -2427,10 +2428,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { DEBUG(dbgs() << " Arg#" << i << ": " << *A << " Shadow: " << *ArgShadow << "\n"); bool ArgIsInitialized = false; + const DataLayout &DL = F.getParent()->getDataLayout(); if (CS.paramHasAttr(i + 1, Attribute::ByVal)) { assert(A->getType()->isPointerTy() && "ByVal argument is not a pointer!"); - Size = MS.DL->getTypeAllocSize(A->getType()->getPointerElementType()); + Size = DL.getTypeAllocSize(A->getType()->getPointerElementType()); if (ArgOffset + Size > kParamTLSSize) break; unsigned ParamAlignment = CS.getParamAlignment(i + 1); unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment); @@ -2438,7 +2440,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB), Size, Alignment); } else { - Size = MS.DL->getTypeAllocSize(A->getType()); + Size = DL.getTypeAllocSize(A->getType()); if (ArgOffset + Size > kParamTLSSize) break; Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase, kShadowTLSAlignment); @@ -2531,7 +2533,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setShadow(&I, getCleanShadow(&I)); setOrigin(&I, getCleanOrigin()); IRBuilder<> IRB(I.getNextNode()); - uint64_t Size = MS.DL->getTypeAllocSize(I.getAllocatedType()); + const DataLayout &DL = F.getParent()->getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(I.getAllocatedType()); if (PoisonStack && ClPoisonStackWithCall) { IRB.CreateCall2(MS.MsanPoisonStackFn, IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), @@ -2723,6 +2726,7 @@ struct VarArgAMD64Helper : public VarArgHelper { unsigned GpOffset = 0; unsigned FpOffset = AMD64GpEndOffset; unsigned OverflowOffset = AMD64FpEndOffset; + const DataLayout &DL = F.getParent()->getDataLayout(); for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end(); ArgIt != End; ++ArgIt) { Value *A = *ArgIt; @@ -2732,7 +2736,7 @@ struct VarArgAMD64Helper : public VarArgHelper { // ByVal arguments always go to the overflow area. assert(A->getType()->isPointerTy()); Type *RealTy = A->getType()->getPointerElementType(); - uint64_t ArgSize = MS.DL->getTypeAllocSize(RealTy); + uint64_t ArgSize = DL.getTypeAllocSize(RealTy); Value *Base = getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset); OverflowOffset += RoundUpToAlignment(ArgSize, 8); IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB), @@ -2754,7 +2758,7 @@ struct VarArgAMD64Helper : public VarArgHelper { FpOffset += 16; break; case AK_Memory: - uint64_t ArgSize = MS.DL->getTypeAllocSize(A->getType()); + uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset); OverflowOffset += RoundUpToAlignment(ArgSize, 8); } @@ -2862,11 +2866,12 @@ struct VarArgMIPS64Helper : public VarArgHelper { void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override { unsigned VAArgOffset = 0; + const DataLayout &DL = F.getParent()->getDataLayout(); for (CallSite::arg_iterator ArgIt = CS.arg_begin() + 1, End = CS.arg_end(); ArgIt != End; ++ArgIt) { Value *A = *ArgIt; Value *Base; - uint64_t ArgSize = MS.DL->getTypeAllocSize(A->getType()); + uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); #if defined(__MIPSEB__) || defined(MIPSEB) // Adjusting the shadow for argument with size < 8 to match the placement // of bits in big endian system diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 8c56e87..289675e 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -59,6 +59,7 @@ static const char *const kSanCovWithCheckName = "__sanitizer_cov_with_check"; static const char *const kSanCovIndirCallName = "__sanitizer_cov_indir_call16"; static const char *const kSanCovTraceEnter = "__sanitizer_cov_trace_func_enter"; static const char *const kSanCovTraceBB = "__sanitizer_cov_trace_basic_block"; +static const char *const kSanCovTraceCmp = "__sanitizer_cov_trace_cmp"; static const char *const kSanCovModuleCtorName = "sancov.module_ctor"; static const uint64_t kSanCtorAndDtorPriority = 2; @@ -72,7 +73,7 @@ static cl::opt<unsigned> ClCoverageBlockThreshold( "sanitizer-coverage-block-threshold", cl::desc("Use a callback with a guard check inside it if there are" " more than this number of blocks."), - cl::Hidden, cl::init(1000)); + cl::Hidden, cl::init(500)); static cl::opt<bool> ClExperimentalTracing("sanitizer-coverage-experimental-tracing", @@ -80,6 +81,22 @@ static cl::opt<bool> "callbacks at every basic block"), cl::Hidden, cl::init(false)); +static cl::opt<bool> + ClExperimentalCMPTracing("sanitizer-coverage-experimental-trace-compares", + cl::desc("Experimental tracing of CMP and similar " + "instructions"), + cl::Hidden, cl::init(false)); + +// Experimental 8-bit counters used as an additional search heuristic during +// coverage-guided fuzzing. +// The counters are not thread-friendly: +// - contention on these counters may cause significant slowdown; +// - the counter updates are racy and the results may be inaccurate. +// They are also inaccurate due to 8-bit integer overflow. +static cl::opt<bool> ClUse8bitCounters("sanitizer-coverage-8bit-counters", + cl::desc("Experimental 8-bit counters"), + cl::Hidden, cl::init(false)); + namespace { class SanitizerCoverageModule : public ModulePass { @@ -94,26 +111,29 @@ class SanitizerCoverageModule : public ModulePass { return "SanitizerCoverageModule"; } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DataLayoutPass>(); - } - private: void InjectCoverageForIndirectCalls(Function &F, ArrayRef<Instruction *> IndirCalls); - bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks, - ArrayRef<Instruction *> IndirCalls); + void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets); + bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks); + void SetNoSanitizeMetada(Instruction *I); void InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls); + unsigned NumberOfInstrumentedBlocks() { + return SanCovFunction->getNumUses() + SanCovWithCheckFunction->getNumUses(); + } Function *SanCovFunction; Function *SanCovWithCheckFunction; Function *SanCovIndirCallFunction; Function *SanCovModuleInit; Function *SanCovTraceEnter, *SanCovTraceBB; + Function *SanCovTraceCmpFunction; InlineAsm *EmptyAsm; - Type *IntptrTy; + Type *IntptrTy, *Int64Ty; LLVMContext *C; + const DataLayout *DL; GlobalVariable *GuardArray; + GlobalVariable *EightBitCounterArray; int CoverageLevel; }; @@ -133,12 +153,13 @@ static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { bool SanitizerCoverageModule::runOnModule(Module &M) { if (!CoverageLevel) return false; C = &(M.getContext()); - DataLayoutPass *DLP = &getAnalysis<DataLayoutPass>(); - IntptrTy = Type::getIntNTy(*C, DLP->getDataLayout().getPointerSizeInBits()); + DL = &M.getDataLayout(); + IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits()); Type *VoidTy = Type::getVoidTy(*C); IRBuilder<> IRB(*C); Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); + Int64Ty = IRB.getInt64Ty(); Function *CtorFunc = Function::Create(FunctionType::get(VoidTy, false), @@ -152,9 +173,12 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { M.getOrInsertFunction(kSanCovWithCheckName, VoidTy, Int32PtrTy, nullptr)); SanCovIndirCallFunction = checkInterfaceFunction(M.getOrInsertFunction( kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr)); - SanCovModuleInit = checkInterfaceFunction( - M.getOrInsertFunction(kSanCovModuleInitName, Type::getVoidTy(*C), - Int32PtrTy, IntptrTy, Int8PtrTy, nullptr)); + SanCovTraceCmpFunction = checkInterfaceFunction(M.getOrInsertFunction( + kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr)); + + SanCovModuleInit = checkInterfaceFunction(M.getOrInsertFunction( + kSanCovModuleInitName, Type::getVoidTy(*C), Int32PtrTy, IntptrTy, + Int8PtrTy, Int8PtrTy, nullptr)); SanCovModuleInit->setLinkage(Function::ExternalLinkage); // We insert an empty inline asm after cov callbacks to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), @@ -171,26 +195,49 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { // At this point we create a dummy array of guards because we don't // know how many elements we will need. Type *Int32Ty = IRB.getInt32Ty(); + Type *Int8Ty = IRB.getInt8Ty(); + GuardArray = new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, nullptr, "__sancov_gen_cov_tmp"); + if (ClUse8bitCounters) + EightBitCounterArray = + new GlobalVariable(M, Int8Ty, false, GlobalVariable::ExternalLinkage, + nullptr, "__sancov_gen_cov_tmp"); for (auto &F : M) runOnFunction(F); + auto N = NumberOfInstrumentedBlocks(); + // Now we know how many elements we need. Create an array of guards // with one extra element at the beginning for the size. - Type *Int32ArrayNTy = - ArrayType::get(Int32Ty, SanCovFunction->getNumUses() + 1); + Type *Int32ArrayNTy = ArrayType::get(Int32Ty, N + 1); GlobalVariable *RealGuardArray = new GlobalVariable( M, Int32ArrayNTy, false, GlobalValue::PrivateLinkage, Constant::getNullValue(Int32ArrayNTy), "__sancov_gen_cov"); + // Replace the dummy array with the real one. GuardArray->replaceAllUsesWith( IRB.CreatePointerCast(RealGuardArray, Int32PtrTy)); GuardArray->eraseFromParent(); + GlobalVariable *RealEightBitCounterArray; + if (ClUse8bitCounters) { + // Make sure the array is 16-aligned. + static const int kCounterAlignment = 16; + Type *Int8ArrayNTy = + ArrayType::get(Int8Ty, RoundUpToAlignment(N, kCounterAlignment)); + RealEightBitCounterArray = new GlobalVariable( + M, Int8ArrayNTy, false, GlobalValue::PrivateLinkage, + Constant::getNullValue(Int8ArrayNTy), "__sancov_gen_cov_counter"); + RealEightBitCounterArray->setAlignment(kCounterAlignment); + EightBitCounterArray->replaceAllUsesWith( + IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy)); + EightBitCounterArray->eraseFromParent(); + } + // Create variable for module (compilation unit) name Constant *ModNameStrConst = ConstantDataArray::getString(M.getContext(), M.getName(), true); @@ -200,10 +247,13 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { // Call __sanitizer_cov_module_init IRB.SetInsertPoint(CtorFunc->getEntryBlock().getTerminator()); - IRB.CreateCall3(SanCovModuleInit, - IRB.CreatePointerCast(RealGuardArray, Int32PtrTy), - ConstantInt::get(IntptrTy, SanCovFunction->getNumUses()), - IRB.CreatePointerCast(ModuleName, Int8PtrTy)); + IRB.CreateCall4( + SanCovModuleInit, IRB.CreatePointerCast(RealGuardArray, Int32PtrTy), + ConstantInt::get(IntptrTy, N), + ClUse8bitCounters + ? IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy) + : Constant::getNullValue(Int8PtrTy), + IRB.CreatePointerCast(ModuleName, Int8PtrTy)); return true; } @@ -215,23 +265,28 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { SplitAllCriticalEdges(F); SmallVector<Instruction*, 8> IndirCalls; SmallVector<BasicBlock*, 16> AllBlocks; + SmallVector<Instruction*, 8> CmpTraceTargets; for (auto &BB : F) { AllBlocks.push_back(&BB); - if (CoverageLevel >= 4) - for (auto &Inst : BB) { + for (auto &Inst : BB) { + if (CoverageLevel >= 4) { CallSite CS(&Inst); if (CS && !CS.getCalledFunction()) IndirCalls.push_back(&Inst); } + if (ClExperimentalCMPTracing) + if (isa<ICmpInst>(&Inst)) + CmpTraceTargets.push_back(&Inst); + } } - InjectCoverage(F, AllBlocks, IndirCalls); + InjectCoverage(F, AllBlocks); + InjectCoverageForIndirectCalls(F, IndirCalls); + InjectTraceForCmp(F, CmpTraceTargets); return true; } -bool -SanitizerCoverageModule::InjectCoverage(Function &F, - ArrayRef<BasicBlock *> AllBlocks, - ArrayRef<Instruction *> IndirCalls) { +bool SanitizerCoverageModule::InjectCoverage(Function &F, + ArrayRef<BasicBlock *> AllBlocks) { if (!CoverageLevel) return false; if (CoverageLevel == 1) { @@ -241,7 +296,6 @@ SanitizerCoverageModule::InjectCoverage(Function &F, InjectCoverageAtBlock(F, *BB, ClCoverageBlockThreshold < AllBlocks.size()); } - InjectCoverageForIndirectCalls(F, IndirCalls); return true; } @@ -273,6 +327,32 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( } } +void SanitizerCoverageModule::InjectTraceForCmp( + Function &F, ArrayRef<Instruction *> CmpTraceTargets) { + if (!ClExperimentalCMPTracing) return; + for (auto I : CmpTraceTargets) { + if (ICmpInst *ICMP = dyn_cast<ICmpInst>(I)) { + IRBuilder<> IRB(ICMP); + Value *A0 = ICMP->getOperand(0); + Value *A1 = ICMP->getOperand(1); + if (!A0->getType()->isIntegerTy()) continue; + uint64_t TypeSize = DL->getTypeStoreSizeInBits(A0->getType()); + // __sanitizer_cov_indir_call((type_size << 32) | predicate, A0, A1); + IRB.CreateCall3( + SanCovTraceCmpFunction, + ConstantInt::get(Int64Ty, (TypeSize << 32) | ICMP->getPredicate()), + IRB.CreateIntCast(A0, Int64Ty, true), + IRB.CreateIntCast(A1, Int64Ty, true)); + } + } +} + +void SanitizerCoverageModule::SetNoSanitizeMetada(Instruction *I) { + I->setMetadata( + I->getParent()->getParent()->getParent()->getMDKindID("nosanitize"), + MDNode::get(*C, None)); +} + void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls) { BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end(); @@ -286,14 +366,15 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } bool IsEntryBB = &BB == &F.getEntryBlock(); - DebugLoc EntryLoc = - IsEntryBB ? IP->getDebugLoc().getFnDebugLoc(*C) : IP->getDebugLoc(); + DebugLoc EntryLoc = IsEntryBB && !IP->getDebugLoc().isUnknown() + ? IP->getDebugLoc().getFnDebugLoc(*C) + : IP->getDebugLoc(); IRBuilder<> IRB(IP); IRB.SetCurrentDebugLocation(EntryLoc); SmallVector<Value *, 1> Indices; Value *GuardP = IRB.CreateAdd( IRB.CreatePointerCast(GuardArray, IntptrTy), - ConstantInt::get(IntptrTy, (1 + SanCovFunction->getNumUses()) * 4)); + ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); if (UseCalls) { @@ -302,8 +383,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, LoadInst *Load = IRB.CreateLoad(GuardP); Load->setAtomic(Monotonic); Load->setAlignment(4); - Load->setMetadata(F.getParent()->getMDKindID("nosanitize"), - MDNode::get(*C, None)); + SetNoSanitizeMetada(Load); Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); Instruction *Ins = SplitBlockAndInsertIfThen( Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); @@ -314,6 +394,19 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, IRB.CreateCall(EmptyAsm); // Avoids callback merge. } + if(ClUse8bitCounters) { + IRB.SetInsertPoint(IP); + Value *P = IRB.CreateAdd( + IRB.CreatePointerCast(EightBitCounterArray, IntptrTy), + ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1)); + P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy()); + LoadInst *LI = IRB.CreateLoad(P); + Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1)); + StoreInst *SI = IRB.CreateStore(Inc, P); + SetNoSanitizeMetada(LI); + SetNoSanitizeMetada(SI); + } + if (ClExperimentalTracing) { // Experimental support for tracing. // Insert a callback with the same guard variable as used for coverage. diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index e4a4911..c3ba722 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -19,14 +19,14 @@ // The rest is handled by the run-time library. //===----------------------------------------------------------------------===// -#include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -76,7 +76,7 @@ namespace { /// ThreadSanitizer: instrument the code in module to find races. struct ThreadSanitizer : public FunctionPass { - ThreadSanitizer() : FunctionPass(ID), DL(nullptr) {} + ThreadSanitizer() : FunctionPass(ID) {} const char *getPassName() const override; bool runOnFunction(Function &F) override; bool doInitialization(Module &M) override; @@ -84,15 +84,15 @@ struct ThreadSanitizer : public FunctionPass { private: void initializeCallbacks(Module &M); - bool instrumentLoadOrStore(Instruction *I); - bool instrumentAtomic(Instruction *I); + bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL); + bool instrumentAtomic(Instruction *I, const DataLayout &DL); bool instrumentMemIntrinsic(Instruction *I); - void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local, - SmallVectorImpl<Instruction*> &All); + void chooseInstructionsToInstrument(SmallVectorImpl<Instruction *> &Local, + SmallVectorImpl<Instruction *> &All, + const DataLayout &DL); bool addrPointsToConstantData(Value *Addr); - int getMemoryAccessFuncIndex(Value *Addr); + int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL); - const DataLayout *DL; Type *IntptrTy; IntegerType *OrdTy; // Callbacks to run-time library are computed in doInitialization. @@ -230,10 +230,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { } bool ThreadSanitizer::doInitialization(Module &M) { - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (!DLP) - report_fatal_error("data layout missing"); - DL = &DLP->getDataLayout(); + const DataLayout &DL = M.getDataLayout(); // Always insert a call to __tsan_init into the module's CTORs. IRBuilder<> IRB(M.getContext()); @@ -285,8 +282,8 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { // 'Local' is a vector of insns within the same BB (no calls between). // 'All' is a vector of insns that will be instrumented. void ThreadSanitizer::chooseInstructionsToInstrument( - SmallVectorImpl<Instruction*> &Local, - SmallVectorImpl<Instruction*> &All) { + SmallVectorImpl<Instruction *> &Local, SmallVectorImpl<Instruction *> &All, + const DataLayout &DL) { SmallSet<Value*, 8> WriteTargets; // Iterate from the end. for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(), @@ -310,7 +307,7 @@ void ThreadSanitizer::chooseInstructionsToInstrument( Value *Addr = isa<StoreInst>(*I) ? cast<StoreInst>(I)->getPointerOperand() : cast<LoadInst>(I)->getPointerOperand(); - if (isa<AllocaInst>(GetUnderlyingObject(Addr, nullptr)) && + if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) && !PointerMayBeCaptured(Addr, true, true)) { // The variable is addressable but not captured, so it cannot be // referenced from a different thread and participate in a data race @@ -338,7 +335,6 @@ static bool isAtomic(Instruction *I) { } bool ThreadSanitizer::runOnFunction(Function &F) { - if (!DL) return false; initializeCallbacks(*F.getParent()); SmallVector<Instruction*, 8> RetVec; SmallVector<Instruction*, 8> AllLoadsAndStores; @@ -348,6 +344,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) { bool Res = false; bool HasCalls = false; bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread); + const DataLayout &DL = F.getParent()->getDataLayout(); // Traverse all instructions, collect loads/stores/returns, check for calls. for (auto &BB : F) { @@ -362,10 +359,11 @@ bool ThreadSanitizer::runOnFunction(Function &F) { if (isa<MemIntrinsic>(Inst)) MemIntrinCalls.push_back(&Inst); HasCalls = true; - chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); + chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, + DL); } } - chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); + chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, DL); } // We have collected all loads and stores. @@ -375,14 +373,14 @@ bool ThreadSanitizer::runOnFunction(Function &F) { // Instrument memory accesses only if we want to report bugs in the function. if (ClInstrumentMemoryAccesses && SanitizeFunction) for (auto Inst : AllLoadsAndStores) { - Res |= instrumentLoadOrStore(Inst); + Res |= instrumentLoadOrStore(Inst, DL); } // Instrument atomic memory accesses in any case (they can be used to // implement synchronization). if (ClInstrumentAtomics) for (auto Inst : AtomicAccesses) { - Res |= instrumentAtomic(Inst); + Res |= instrumentAtomic(Inst, DL); } if (ClInstrumentMemIntrinsics && SanitizeFunction) @@ -406,13 +404,14 @@ bool ThreadSanitizer::runOnFunction(Function &F) { return Res; } -bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { +bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I, + const DataLayout &DL) { IRBuilder<> IRB(I); bool IsWrite = isa<StoreInst>(*I); Value *Addr = IsWrite ? cast<StoreInst>(I)->getPointerOperand() : cast<LoadInst>(I)->getPointerOperand(); - int Idx = getMemoryAccessFuncIndex(Addr); + int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; if (IsWrite && isVtableAccess(I)) { @@ -443,7 +442,7 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { ? cast<StoreInst>(I)->getAlignment() : cast<LoadInst>(I)->getAlignment(); Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); - const uint32_t TypeSize = DL->getTypeStoreSizeInBits(OrigTy); + const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); Value *OnAccessFunc = nullptr; if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0) OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; @@ -504,11 +503,11 @@ bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) { // The following page contains more background information: // http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/ -bool ThreadSanitizer::instrumentAtomic(Instruction *I) { +bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { IRBuilder<> IRB(I); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { Value *Addr = LI->getPointerOperand(); - int Idx = getMemoryAccessFuncIndex(Addr); + int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; const size_t ByteSize = 1 << Idx; @@ -522,7 +521,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) { } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { Value *Addr = SI->getPointerOperand(); - int Idx = getMemoryAccessFuncIndex(Addr); + int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; const size_t ByteSize = 1 << Idx; @@ -536,7 +535,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) { ReplaceInstWithInst(I, C); } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) { Value *Addr = RMWI->getPointerOperand(); - int Idx = getMemoryAccessFuncIndex(Addr); + int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx]; @@ -553,7 +552,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) { ReplaceInstWithInst(I, C); } else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) { Value *Addr = CASI->getPointerOperand(); - int Idx = getMemoryAccessFuncIndex(Addr); + int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; const size_t ByteSize = 1 << Idx; @@ -583,11 +582,12 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) { return true; } -int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr) { +int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr, + const DataLayout &DL) { Type *OrigPtrTy = Addr->getType(); Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); assert(OrigTy->isSized()); - uint32_t TypeSize = DL->getTypeStoreSizeInBits(OrigTy); + uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); if (TypeSize != 8 && TypeSize != 16 && TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { NumAccessesWithBadSize++; diff --git a/lib/Transforms/ObjCARC/ARCInstKind.cpp b/lib/Transforms/ObjCARC/ARCInstKind.cpp index f1e9dce..72df9ab 100644 --- a/lib/Transforms/ObjCARC/ARCInstKind.cpp +++ b/lib/Transforms/ObjCARC/ARCInstKind.cpp @@ -168,6 +168,60 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) { return ARCInstKind::CallOrUser; } +// A whitelist of intrinsics that we know do not use objc pointers or decrement +// ref counts. +static bool isInertIntrinsic(unsigned ID) { + // TODO: Make this into a covered switch. + switch (ID) { + case Intrinsic::returnaddress: + case Intrinsic::frameaddress: + case Intrinsic::stacksave: + case Intrinsic::stackrestore: + case Intrinsic::vastart: + case Intrinsic::vacopy: + case Intrinsic::vaend: + case Intrinsic::objectsize: + case Intrinsic::prefetch: + case Intrinsic::stackprotector: + case Intrinsic::eh_return_i32: + case Intrinsic::eh_return_i64: + case Intrinsic::eh_typeid_for: + case Intrinsic::eh_dwarf_cfa: + case Intrinsic::eh_sjlj_lsda: + case Intrinsic::eh_sjlj_functioncontext: + case Intrinsic::init_trampoline: + case Intrinsic::adjust_trampoline: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + // Don't let dbg info affect our results. + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + // Short cut: Some intrinsics obviously don't use ObjC pointers. + return true; + default: + return false; + } +} + +// A whitelist of intrinsics that we know do not use objc pointers or decrement +// ref counts. +static bool isUseOnlyIntrinsic(unsigned ID) { + // We are conservative and even though intrinsics are unlikely to touch + // reference counts, we white list them for safety. + // + // TODO: Expand this into a covered switch. There is a lot more here. + switch (ID) { + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: + return true; + default: + return false; + } +} + /// \brief Determine what kind of construct V is. ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) { if (const Instruction *I = dyn_cast<Instruction>(V)) { @@ -180,49 +234,23 @@ ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) { switch (I->getOpcode()) { case Instruction::Call: { const CallInst *CI = cast<CallInst>(I); - // Check for calls to special functions. + // See if we have a function that we know something about. if (const Function *F = CI->getCalledFunction()) { ARCInstKind Class = GetFunctionClass(F); if (Class != ARCInstKind::CallOrUser) return Class; - - // None of the intrinsic functions do objc_release. For intrinsics, the - // only question is whether or not they may be users. - switch (F->getIntrinsicID()) { - case Intrinsic::returnaddress: - case Intrinsic::frameaddress: - case Intrinsic::stacksave: - case Intrinsic::stackrestore: - case Intrinsic::vastart: - case Intrinsic::vacopy: - case Intrinsic::vaend: - case Intrinsic::objectsize: - case Intrinsic::prefetch: - case Intrinsic::stackprotector: - case Intrinsic::eh_return_i32: - case Intrinsic::eh_return_i64: - case Intrinsic::eh_typeid_for: - case Intrinsic::eh_dwarf_cfa: - case Intrinsic::eh_sjlj_lsda: - case Intrinsic::eh_sjlj_functioncontext: - case Intrinsic::init_trampoline: - case Intrinsic::adjust_trampoline: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - // Don't let dbg info affect our results. - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - // Short cut: Some intrinsics obviously don't use ObjC pointers. + unsigned ID = F->getIntrinsicID(); + if (isInertIntrinsic(ID)) return ARCInstKind::None; - default: - break; - } + if (isUseOnlyIntrinsic(ID)) + return ARCInstKind::User; } + + // Otherwise, be conservative. return GetCallSiteClass(CI); } case Instruction::Invoke: + // Otherwise, be conservative. return GetCallSiteClass(cast<InvokeInst>(I)); case Instruction::BitCast: case Instruction::GetElementPtr: diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h index e286dbc..87de33b 100644 --- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h +++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h @@ -27,22 +27,22 @@ namespace llvm { namespace objcarc { +enum class ARCRuntimeEntryPointKind { + AutoreleaseRV, + Release, + Retain, + RetainBlock, + Autorelease, + StoreStrong, + RetainRV, + RetainAutorelease, + RetainAutoreleaseRV, +}; + /// Declarations for ObjC runtime functions and constants. These are initialized /// lazily to avoid cluttering up the Module with unused declarations. class ARCRuntimeEntryPoints { public: - enum EntryPointType { - EPT_AutoreleaseRV, - EPT_Release, - EPT_Retain, - EPT_RetainBlock, - EPT_Autorelease, - EPT_StoreStrong, - EPT_RetainRV, - EPT_RetainAutorelease, - EPT_RetainAutoreleaseRV - }; - ARCRuntimeEntryPoints() : TheModule(nullptr), AutoreleaseRV(nullptr), Release(nullptr), @@ -56,7 +56,7 @@ public: ~ARCRuntimeEntryPoints() { } - void Initialize(Module *M) { + void init(Module *M) { TheModule = M; AutoreleaseRV = nullptr; Release = nullptr; @@ -69,30 +69,30 @@ public: RetainAutoreleaseRV = nullptr; } - Constant *get(const EntryPointType entry) { + Constant *get(ARCRuntimeEntryPointKind kind) { assert(TheModule != nullptr && "Not initialized."); - switch (entry) { - case EPT_AutoreleaseRV: + switch (kind) { + case ARCRuntimeEntryPointKind::AutoreleaseRV: return getI8XRetI8XEntryPoint(AutoreleaseRV, "objc_autoreleaseReturnValue", true); - case EPT_Release: + case ARCRuntimeEntryPointKind::Release: return getVoidRetI8XEntryPoint(Release, "objc_release"); - case EPT_Retain: + case ARCRuntimeEntryPointKind::Retain: return getI8XRetI8XEntryPoint(Retain, "objc_retain", true); - case EPT_RetainBlock: + case ARCRuntimeEntryPointKind::RetainBlock: return getI8XRetI8XEntryPoint(RetainBlock, "objc_retainBlock", false); - case EPT_Autorelease: + case ARCRuntimeEntryPointKind::Autorelease: return getI8XRetI8XEntryPoint(Autorelease, "objc_autorelease", true); - case EPT_StoreStrong: + case ARCRuntimeEntryPointKind::StoreStrong: return getI8XRetI8XXI8XEntryPoint(StoreStrong, "objc_storeStrong"); - case EPT_RetainRV: + case ARCRuntimeEntryPointKind::RetainRV: return getI8XRetI8XEntryPoint(RetainRV, "objc_retainAutoreleasedReturnValue", true); - case EPT_RetainAutorelease: + case ARCRuntimeEntryPointKind::RetainAutorelease: return getI8XRetI8XEntryPoint(RetainAutorelease, "objc_retainAutorelease", true); - case EPT_RetainAutoreleaseRV: + case ARCRuntimeEntryPointKind::RetainAutoreleaseRV: return getI8XRetI8XEntryPoint(RetainAutoreleaseRV, "objc_retainAutoreleaseReturnValue", true); } diff --git a/lib/Transforms/ObjCARC/Android.mk b/lib/Transforms/ObjCARC/Android.mk index 97c5a9d..e120fbe 100644 --- a/lib/Transforms/ObjCARC/Android.mk +++ b/lib/Transforms/ObjCARC/Android.mk @@ -9,6 +9,7 @@ transforms_objcarc_SRC_FILES := \ ObjCARC.cpp \ ObjCARCExpand.cpp \ ObjCARCOpts.cpp \ + PtrState.cpp \ ProvenanceAnalysis.cpp \ ProvenanceAnalysisEvaluator.cpp diff --git a/lib/Transforms/ObjCARC/BlotMapVector.h b/lib/Transforms/ObjCARC/BlotMapVector.h new file mode 100644 index 0000000..d6439b6 --- /dev/null +++ b/lib/Transforms/ObjCARC/BlotMapVector.h @@ -0,0 +1,108 @@ +//===- BlotMapVector.h - A MapVector with the blot operation -*- C++ -*----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include <vector> +#include <algorithm> + +namespace llvm { +/// \brief An associative container with fast insertion-order (deterministic) +/// iteration over its elements. Plus the special blot operation. +template <class KeyT, class ValueT> class BlotMapVector { + /// Map keys to indices in Vector. + typedef DenseMap<KeyT, size_t> MapTy; + MapTy Map; + + typedef std::vector<std::pair<KeyT, ValueT>> VectorTy; + /// Keys and values. + VectorTy Vector; + +public: + typedef typename VectorTy::iterator iterator; + typedef typename VectorTy::const_iterator const_iterator; + iterator begin() { return Vector.begin(); } + iterator end() { return Vector.end(); } + const_iterator begin() const { return Vector.begin(); } + const_iterator end() const { return Vector.end(); } + +#ifdef XDEBUG + ~BlotMapVector() { + assert(Vector.size() >= Map.size()); // May differ due to blotting. + for (typename MapTy::const_iterator I = Map.begin(), E = Map.end(); I != E; + ++I) { + assert(I->second < Vector.size()); + assert(Vector[I->second].first == I->first); + } + for (typename VectorTy::const_iterator I = Vector.begin(), E = Vector.end(); + I != E; ++I) + assert(!I->first || (Map.count(I->first) && + Map[I->first] == size_t(I - Vector.begin()))); + } +#endif + + ValueT &operator[](const KeyT &Arg) { + std::pair<typename MapTy::iterator, bool> Pair = + Map.insert(std::make_pair(Arg, size_t(0))); + if (Pair.second) { + size_t Num = Vector.size(); + Pair.first->second = Num; + Vector.push_back(std::make_pair(Arg, ValueT())); + return Vector[Num].second; + } + return Vector[Pair.first->second].second; + } + + std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &InsertPair) { + std::pair<typename MapTy::iterator, bool> Pair = + Map.insert(std::make_pair(InsertPair.first, size_t(0))); + if (Pair.second) { + size_t Num = Vector.size(); + Pair.first->second = Num; + Vector.push_back(InsertPair); + return std::make_pair(Vector.begin() + Num, true); + } + return std::make_pair(Vector.begin() + Pair.first->second, false); + } + + iterator find(const KeyT &Key) { + typename MapTy::iterator It = Map.find(Key); + if (It == Map.end()) + return Vector.end(); + return Vector.begin() + It->second; + } + + const_iterator find(const KeyT &Key) const { + typename MapTy::const_iterator It = Map.find(Key); + if (It == Map.end()) + return Vector.end(); + return Vector.begin() + It->second; + } + + /// This is similar to erase, but instead of removing the element from the + /// vector, it just zeros out the key in the vector. This leaves iterators + /// intact, but clients must be prepared for zeroed-out keys when iterating. + void blot(const KeyT &Key) { + typename MapTy::iterator It = Map.find(Key); + if (It == Map.end()) + return; + Vector[It->second].first = KeyT(); + Map.erase(It); + } + + void clear() { + Map.clear(); + Vector.clear(); + } + + bool empty() const { + assert(Map.empty() == Vector.empty()); + return Map.empty(); + } +}; +} // diff --git a/lib/Transforms/ObjCARC/CMakeLists.txt b/lib/Transforms/ObjCARC/CMakeLists.txt index 2adea88..fbcae29 100644 --- a/lib/Transforms/ObjCARC/CMakeLists.txt +++ b/lib/Transforms/ObjCARC/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMObjCARCOpts DependencyAnalysis.cpp ProvenanceAnalysis.cpp ProvenanceAnalysisEvaluator.cpp + PtrState.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index 4985d0e..b197c97 100644 --- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -53,10 +53,12 @@ bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr, if (AliasAnalysis::onlyReadsMemory(MRB)) return false; if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { + const DataLayout &DL = Inst->getModule()->getDataLayout(); for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) { const Value *Op = *I; - if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && + PA.related(Ptr, Op, DL)) return true; } return false; @@ -87,6 +89,8 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr, if (Class == ARCInstKind::Call) return false; + const DataLayout &DL = Inst->getModule()->getDataLayout(); + // Consider various instructions which may have pointer arguments which are // not "uses". if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) { @@ -100,24 +104,26 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr, for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), OE = CS.arg_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && + PA.related(Ptr, Op, DL)) return true; } return false; } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // Special-case stores, because we don't care about the stored value, just // the store address. - const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); + const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand(), DL); // If we can't tell what the underlying object was, assume there is a // dependence. - return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Op, Ptr); + return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && + PA.related(Op, Ptr, DL); } // Check each operand for a match. for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op, DL)) return true; } return false; diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index df29f05..7595e2d 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -24,6 +24,7 @@ #define LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARC_H #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Optional.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ValueTracking.h" @@ -72,9 +73,10 @@ static inline bool ModuleHasARC(const Module &M) { /// \brief This is a wrapper around getUnderlyingObject which also knows how to /// look through objc_retain and objc_autorelease calls, which we know to return /// their argument verbatim. -static inline const Value *GetUnderlyingObjCPtr(const Value *V) { +static inline const Value *GetUnderlyingObjCPtr(const Value *V, + const DataLayout &DL) { for (;;) { - V = GetUnderlyingObject(V); + V = GetUnderlyingObject(V, DL); if (!IsForwarding(GetBasicARCInstKind(V))) break; V = cast<CallInst>(V)->getArgOperand(0); @@ -257,6 +259,55 @@ static inline bool IsObjCIdentifiedObject(const Value *V) { return false; } +enum class ARCMDKindID { + ImpreciseRelease, + CopyOnEscape, + NoObjCARCExceptions, +}; + +/// A cache of MDKinds used by various ARC optimizations. +class ARCMDKindCache { + Module *M; + + /// The Metadata Kind for clang.imprecise_release metadata. + llvm::Optional<unsigned> ImpreciseReleaseMDKind; + + /// The Metadata Kind for clang.arc.copy_on_escape metadata. + llvm::Optional<unsigned> CopyOnEscapeMDKind; + + /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata. + llvm::Optional<unsigned> NoObjCARCExceptionsMDKind; + +public: + void init(Module *Mod) { + M = Mod; + ImpreciseReleaseMDKind = NoneType::None; + CopyOnEscapeMDKind = NoneType::None; + NoObjCARCExceptionsMDKind = NoneType::None; + } + + unsigned get(ARCMDKindID ID) { + switch (ID) { + case ARCMDKindID::ImpreciseRelease: + if (!ImpreciseReleaseMDKind) + ImpreciseReleaseMDKind = + M->getContext().getMDKindID("clang.imprecise_release"); + return *ImpreciseReleaseMDKind; + case ARCMDKindID::CopyOnEscape: + if (!CopyOnEscapeMDKind) + CopyOnEscapeMDKind = + M->getContext().getMDKindID("clang.arc.copy_on_escape"); + return *CopyOnEscapeMDKind; + case ARCMDKindID::NoObjCARCExceptions: + if (!NoObjCARCExceptionsMDKind) + NoObjCARCExceptionsMDKind = + M->getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); + return *NoObjCARCExceptionsMDKind; + } + llvm_unreachable("Covered switch isn't covered?!"); + } +}; + } // end namespace objcarc } // end namespace llvm diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp index be291a0..b1515e3 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp @@ -46,6 +46,11 @@ ImmutablePass *llvm::createObjCARCAliasAnalysisPass() { return new ObjCARCAliasAnalysis(); } +bool ObjCARCAliasAnalysis::doInitialization(Module &M) { + InitializeAliasAnalysis(this, &M.getDataLayout()); + return true; +} + void ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -69,8 +74,8 @@ ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) { // If that failed, climb to the underlying object, including climbing through // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *UA = GetUnderlyingObjCPtr(SA); - const Value *UB = GetUnderlyingObjCPtr(SB); + const Value *UA = GetUnderlyingObjCPtr(SA, *DL); + const Value *UB = GetUnderlyingObjCPtr(SB, *DL); if (UA != SA || UB != SB) { Result = AliasAnalysis::alias(Location(UA), Location(UB)); // We can't use MustAlias or PartialAlias results here because @@ -99,7 +104,7 @@ ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc, // If that failed, climb to the underlying object, including climbing through // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *U = GetUnderlyingObjCPtr(S); + const Value *U = GetUnderlyingObjCPtr(S, *DL); if (U != S) return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal); diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h index 3fcea4e..3c5a021 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h +++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h @@ -44,9 +44,7 @@ namespace objcarc { } private: - void initializePass() override { - InitializeAliasAnalysis(this); - } + bool doInitialization(Module &M) override; /// This method is used when a pass implements an analysis interface through /// multiple inheritance. If needed, it should override this to adjust the diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 6473d3a..2a3139f 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::objcarc; @@ -134,7 +135,7 @@ bool ObjCARCContract::optimizeRetainCall(Function &F, Instruction *Retain) { // We do not have to worry about tail calls/does not throw since // retain/retainRV have the same properties. - Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_RetainRV); + Constant *Decl = EP.get(ARCRuntimeEntryPointKind::RetainRV); cast<CallInst>(Retain)->setCalledFunction(Decl); DEBUG(dbgs() << "New: " << *Retain << "\n"); @@ -181,8 +182,8 @@ bool ObjCARCContract::contractAutorelease( " Retain: " << *Retain << "\n"); Constant *Decl = EP.get(Class == ARCInstKind::AutoreleaseRV - ? ARCRuntimeEntryPoints::EPT_RetainAutoreleaseRV - : ARCRuntimeEntryPoints::EPT_RetainAutorelease); + ? ARCRuntimeEntryPointKind::RetainAutoreleaseRV + : ARCRuntimeEntryPointKind::RetainAutorelease); Retain->setCalledFunction(Decl); DEBUG(dbgs() << " New RetainAutorelease: " << *Retain << "\n"); @@ -380,7 +381,7 @@ void ObjCARCContract::tryToContractReleaseIntoStoreStrong(Instruction *Release, Args[0] = new BitCastInst(Args[0], I8XX, "", Store); if (Args[1]->getType() != I8X) Args[1] = new BitCastInst(Args[1], I8X, "", Store); - Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_StoreStrong); + Constant *Decl = EP.get(ARCRuntimeEntryPointKind::StoreStrong); CallInst *StoreStrong = CallInst::Create(Decl, Args, "", Store); StoreStrong->setDoesNotThrow(); StoreStrong->setDebugLoc(Store->getDebugLoc()); @@ -647,7 +648,7 @@ bool ObjCARCContract::doInitialization(Module &M) { if (!Run) return false; - EP.Initialize(&M); + EP.init(&M); // Initialize RetainRVMarker. RetainRVMarker = nullptr; diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index f55b77f..4d75658 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -26,9 +26,11 @@ #include "ObjCARC.h" #include "ARCRuntimeEntryPoints.h" +#include "BlotMapVector.h" #include "DependencyAnalysis.h" #include "ObjCARCAliasAnalysis.h" #include "ProvenanceAnalysis.h" +#include "PtrState.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" @@ -45,102 +47,6 @@ using namespace llvm::objcarc; #define DEBUG_TYPE "objc-arc-opts" -/// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific. -/// @{ - -namespace { - /// \brief An associative container with fast insertion-order (deterministic) - /// iteration over its elements. Plus the special blot operation. - template<class KeyT, class ValueT> - class MapVector { - /// Map keys to indices in Vector. - typedef DenseMap<KeyT, size_t> MapTy; - MapTy Map; - - typedef std::vector<std::pair<KeyT, ValueT> > VectorTy; - /// Keys and values. - VectorTy Vector; - - public: - typedef typename VectorTy::iterator iterator; - typedef typename VectorTy::const_iterator const_iterator; - iterator begin() { return Vector.begin(); } - iterator end() { return Vector.end(); } - const_iterator begin() const { return Vector.begin(); } - const_iterator end() const { return Vector.end(); } - -#ifdef XDEBUG - ~MapVector() { - assert(Vector.size() >= Map.size()); // May differ due to blotting. - for (typename MapTy::const_iterator I = Map.begin(), E = Map.end(); - I != E; ++I) { - assert(I->second < Vector.size()); - assert(Vector[I->second].first == I->first); - } - for (typename VectorTy::const_iterator I = Vector.begin(), - E = Vector.end(); I != E; ++I) - assert(!I->first || - (Map.count(I->first) && - Map[I->first] == size_t(I - Vector.begin()))); - } -#endif - - ValueT &operator[](const KeyT &Arg) { - std::pair<typename MapTy::iterator, bool> Pair = - Map.insert(std::make_pair(Arg, size_t(0))); - if (Pair.second) { - size_t Num = Vector.size(); - Pair.first->second = Num; - Vector.push_back(std::make_pair(Arg, ValueT())); - return Vector[Num].second; - } - return Vector[Pair.first->second].second; - } - - std::pair<iterator, bool> - insert(const std::pair<KeyT, ValueT> &InsertPair) { - std::pair<typename MapTy::iterator, bool> Pair = - Map.insert(std::make_pair(InsertPair.first, size_t(0))); - if (Pair.second) { - size_t Num = Vector.size(); - Pair.first->second = Num; - Vector.push_back(InsertPair); - return std::make_pair(Vector.begin() + Num, true); - } - return std::make_pair(Vector.begin() + Pair.first->second, false); - } - - iterator find(const KeyT &Key) { - typename MapTy::iterator It = Map.find(Key); - if (It == Map.end()) return Vector.end(); - return Vector.begin() + It->second; - } - - const_iterator find(const KeyT &Key) const { - typename MapTy::const_iterator It = Map.find(Key); - if (It == Map.end()) return Vector.end(); - return Vector.begin() + It->second; - } - - /// This is similar to erase, but instead of removing the element from the - /// vector, it just zeros out the key in the vector. This leaves iterators - /// intact, but clients must be prepared for zeroed-out keys when iterating. - void blot(const KeyT &Key) { - typename MapTy::iterator It = Map.find(Key); - if (It == Map.end()) return; - Vector[It->second].first = KeyT(); - Map.erase(It); - } - - void clear() { - Map.clear(); - Vector.clear(); - } - }; -} - -/// @} -/// /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC. /// @{ @@ -177,13 +83,14 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { /// This is a wrapper around getUnderlyingObjCPtr along the lines of /// GetUnderlyingObjects except that it returns early when it sees the first /// alloca. -static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) { +static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V, + const DataLayout &DL) { SmallPtrSet<const Value *, 4> Visited; SmallVector<const Value *, 4> Worklist; Worklist.push_back(V); do { const Value *P = Worklist.pop_back_val(); - P = GetUnderlyingObjCPtr(P); + P = GetUnderlyingObjCPtr(P, DL); if (isa<AllocaInst>(P)) return true; @@ -270,293 +177,6 @@ STATISTIC(NumReleasesAfterOpt, #endif namespace { - /// \enum Sequence - /// - /// \brief A sequence of states that a pointer may go through in which an - /// objc_retain and objc_release are actually needed. - enum Sequence { - S_None, - S_Retain, ///< objc_retain(x). - S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement. - S_Use, ///< any use of x. - S_Stop, ///< like S_Release, but code motion is stopped. - S_Release, ///< objc_release(x). - S_MovableRelease ///< objc_release(x), !clang.imprecise_release. - }; - - raw_ostream &operator<<(raw_ostream &OS, const Sequence S) - LLVM_ATTRIBUTE_UNUSED; - raw_ostream &operator<<(raw_ostream &OS, const Sequence S) { - switch (S) { - case S_None: - return OS << "S_None"; - case S_Retain: - return OS << "S_Retain"; - case S_CanRelease: - return OS << "S_CanRelease"; - case S_Use: - return OS << "S_Use"; - case S_Release: - return OS << "S_Release"; - case S_MovableRelease: - return OS << "S_MovableRelease"; - case S_Stop: - return OS << "S_Stop"; - } - llvm_unreachable("Unknown sequence type."); - } -} - -static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { - // The easy cases. - if (A == B) - return A; - if (A == S_None || B == S_None) - return S_None; - - if (A > B) std::swap(A, B); - if (TopDown) { - // Choose the side which is further along in the sequence. - if ((A == S_Retain || A == S_CanRelease) && - (B == S_CanRelease || B == S_Use)) - return B; - } else { - // Choose the side which is further along in the sequence. - if ((A == S_Use || A == S_CanRelease) && - (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease)) - return A; - // If both sides are releases, choose the more conservative one. - if (A == S_Stop && (B == S_Release || B == S_MovableRelease)) - return A; - if (A == S_Release && B == S_MovableRelease) - return A; - } - - return S_None; -} - -namespace { - /// \brief Unidirectional information about either a - /// retain-decrement-use-release sequence or release-use-decrement-retain - /// reverse sequence. - struct RRInfo { - /// After an objc_retain, the reference count of the referenced - /// object is known to be positive. Similarly, before an objc_release, the - /// reference count of the referenced object is known to be positive. If - /// there are retain-release pairs in code regions where the retain count - /// is known to be positive, they can be eliminated, regardless of any side - /// effects between them. - /// - /// Also, a retain+release pair nested within another retain+release - /// pair all on the known same pointer value can be eliminated, regardless - /// of any intervening side effects. - /// - /// KnownSafe is true when either of these conditions is satisfied. - bool KnownSafe; - - /// True of the objc_release calls are all marked with the "tail" keyword. - bool IsTailCallRelease; - - /// If the Calls are objc_release calls and they all have a - /// clang.imprecise_release tag, this is the metadata tag. - MDNode *ReleaseMetadata; - - /// For a top-down sequence, the set of objc_retains or - /// objc_retainBlocks. For bottom-up, the set of objc_releases. - SmallPtrSet<Instruction *, 2> Calls; - - /// The set of optimal insert positions for moving calls in the opposite - /// sequence. - SmallPtrSet<Instruction *, 2> ReverseInsertPts; - - /// If this is true, we cannot perform code motion but can still remove - /// retain/release pairs. - bool CFGHazardAfflicted; - - RRInfo() : - KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(nullptr), - CFGHazardAfflicted(false) {} - - void clear(); - - /// Conservatively merge the two RRInfo. Returns true if a partial merge has - /// occurred, false otherwise. - bool Merge(const RRInfo &Other); - - }; -} - -void RRInfo::clear() { - KnownSafe = false; - IsTailCallRelease = false; - ReleaseMetadata = nullptr; - Calls.clear(); - ReverseInsertPts.clear(); - CFGHazardAfflicted = false; -} - -bool RRInfo::Merge(const RRInfo &Other) { - // Conservatively merge the ReleaseMetadata information. - if (ReleaseMetadata != Other.ReleaseMetadata) - ReleaseMetadata = nullptr; - - // Conservatively merge the boolean state. - KnownSafe &= Other.KnownSafe; - IsTailCallRelease &= Other.IsTailCallRelease; - CFGHazardAfflicted |= Other.CFGHazardAfflicted; - - // Merge the call sets. - Calls.insert(Other.Calls.begin(), Other.Calls.end()); - - // Merge the insert point sets. If there are any differences, - // that makes this a partial merge. - bool Partial = ReverseInsertPts.size() != Other.ReverseInsertPts.size(); - for (Instruction *Inst : Other.ReverseInsertPts) - Partial |= ReverseInsertPts.insert(Inst).second; - return Partial; -} - -namespace { - /// \brief This class summarizes several per-pointer runtime properties which - /// are propogated through the flow graph. - class PtrState { - /// True if the reference count is known to be incremented. - bool KnownPositiveRefCount; - - /// True if we've seen an opportunity for partial RR elimination, such as - /// pushing calls into a CFG triangle or into one side of a CFG diamond. - bool Partial; - - /// The current position in the sequence. - unsigned char Seq : 8; - - /// Unidirectional information about the current sequence. - RRInfo RRI; - - public: - PtrState() : KnownPositiveRefCount(false), Partial(false), - Seq(S_None) {} - - - bool IsKnownSafe() const { - return RRI.KnownSafe; - } - - void SetKnownSafe(const bool NewValue) { - RRI.KnownSafe = NewValue; - } - - bool IsTailCallRelease() const { - return RRI.IsTailCallRelease; - } - - void SetTailCallRelease(const bool NewValue) { - RRI.IsTailCallRelease = NewValue; - } - - bool IsTrackingImpreciseReleases() const { - return RRI.ReleaseMetadata != nullptr; - } - - const MDNode *GetReleaseMetadata() const { - return RRI.ReleaseMetadata; - } - - void SetReleaseMetadata(MDNode *NewValue) { - RRI.ReleaseMetadata = NewValue; - } - - bool IsCFGHazardAfflicted() const { - return RRI.CFGHazardAfflicted; - } - - void SetCFGHazardAfflicted(const bool NewValue) { - RRI.CFGHazardAfflicted = NewValue; - } - - void SetKnownPositiveRefCount() { - DEBUG(dbgs() << "Setting Known Positive.\n"); - KnownPositiveRefCount = true; - } - - void ClearKnownPositiveRefCount() { - DEBUG(dbgs() << "Clearing Known Positive.\n"); - KnownPositiveRefCount = false; - } - - bool HasKnownPositiveRefCount() const { - return KnownPositiveRefCount; - } - - void SetSeq(Sequence NewSeq) { - DEBUG(dbgs() << "Old: " << Seq << "; New: " << NewSeq << "\n"); - Seq = NewSeq; - } - - Sequence GetSeq() const { - return static_cast<Sequence>(Seq); - } - - void ClearSequenceProgress() { - ResetSequenceProgress(S_None); - } - - void ResetSequenceProgress(Sequence NewSeq) { - DEBUG(dbgs() << "Resetting sequence progress.\n"); - SetSeq(NewSeq); - Partial = false; - RRI.clear(); - } - - void Merge(const PtrState &Other, bool TopDown); - - void InsertCall(Instruction *I) { - RRI.Calls.insert(I); - } - - void InsertReverseInsertPt(Instruction *I) { - RRI.ReverseInsertPts.insert(I); - } - - void ClearReverseInsertPts() { - RRI.ReverseInsertPts.clear(); - } - - bool HasReverseInsertPts() const { - return !RRI.ReverseInsertPts.empty(); - } - - const RRInfo &GetRRInfo() const { - return RRI; - } - }; -} - -void -PtrState::Merge(const PtrState &Other, bool TopDown) { - Seq = MergeSeqs(GetSeq(), Other.GetSeq(), TopDown); - KnownPositiveRefCount &= Other.KnownPositiveRefCount; - - // If we're not in a sequence (anymore), drop all associated state. - if (Seq == S_None) { - Partial = false; - RRI.clear(); - } else if (Partial || Other.Partial) { - // If we're doing a merge on a path that's previously seen a partial - // merge, conservatively drop the sequence, to avoid doing partial - // RR elimination. If the branch predicates for the two merge differ, - // mixing them is unsafe. - ClearSequenceProgress(); - } else { - // Otherwise merge the other PtrState's RRInfo into our RRInfo. At this - // point, we know that currently we are not partial. Stash whether or not - // the merge operation caused us to undergo a partial merging of reverse - // insertion points. - Partial = RRI.Merge(Other.RRI); - } -} - -namespace { /// \brief Per-BasicBlock state. class BBState { /// The number of unique control paths from the entry which can reach this @@ -566,20 +186,18 @@ namespace { /// The number of unique control paths to exits from this block. unsigned BottomUpPathCount; - /// A type for PerPtrTopDown and PerPtrBottomUp. - typedef MapVector<const Value *, PtrState> MapTy; - /// The top-down traversal uses this to record information known about a /// pointer at the bottom of each block. - MapTy PerPtrTopDown; + BlotMapVector<const Value *, TopDownPtrState> PerPtrTopDown; /// The bottom-up traversal uses this to record information known about a /// pointer at the top of each block. - MapTy PerPtrBottomUp; + BlotMapVector<const Value *, BottomUpPtrState> PerPtrBottomUp; /// Effective predecessors of the current block ignoring ignorable edges and /// ignored backedges. SmallVector<BasicBlock *, 2> Preds; + /// Effective successors of the current block ignoring ignorable edges and /// ignored backedges. SmallVector<BasicBlock *, 2> Succs; @@ -589,26 +207,38 @@ namespace { BBState() : TopDownPathCount(0), BottomUpPathCount(0) { } - typedef MapTy::iterator ptr_iterator; - typedef MapTy::const_iterator ptr_const_iterator; + typedef decltype(PerPtrTopDown)::iterator top_down_ptr_iterator; + typedef decltype(PerPtrTopDown)::const_iterator const_top_down_ptr_iterator; - ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } - ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } - ptr_const_iterator top_down_ptr_begin() const { + top_down_ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } + top_down_ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } + const_top_down_ptr_iterator top_down_ptr_begin() const { return PerPtrTopDown.begin(); } - ptr_const_iterator top_down_ptr_end() const { + const_top_down_ptr_iterator top_down_ptr_end() const { return PerPtrTopDown.end(); } + bool hasTopDownPtrs() const { + return !PerPtrTopDown.empty(); + } + + typedef decltype(PerPtrBottomUp)::iterator bottom_up_ptr_iterator; + typedef decltype( + PerPtrBottomUp)::const_iterator const_bottom_up_ptr_iterator; - ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); } - ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } - ptr_const_iterator bottom_up_ptr_begin() const { + bottom_up_ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); } - ptr_const_iterator bottom_up_ptr_end() const { + bottom_up_ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } + const_bottom_up_ptr_iterator bottom_up_ptr_begin() const { + return PerPtrBottomUp.begin(); + } + const_bottom_up_ptr_iterator bottom_up_ptr_end() const { return PerPtrBottomUp.end(); } + bool hasBottomUpPtrs() const { + return !PerPtrBottomUp.empty(); + } /// Mark this block as being an entry block, which has one path from the /// entry by definition. @@ -621,20 +251,20 @@ namespace { /// Attempt to find the PtrState object describing the top down state for /// pointer Arg. Return a new initialized PtrState describing the top down /// state for Arg if we do not find one. - PtrState &getPtrTopDownState(const Value *Arg) { + TopDownPtrState &getPtrTopDownState(const Value *Arg) { return PerPtrTopDown[Arg]; } /// Attempt to find the PtrState object describing the bottom up state for /// pointer Arg. Return a new initialized PtrState describing the bottom up /// state for Arg if we do not find one. - PtrState &getPtrBottomUpState(const Value *Arg) { + BottomUpPtrState &getPtrBottomUpState(const Value *Arg) { return PerPtrBottomUp[Arg]; } /// Attempt to find the PtrState object describing the bottom up state for /// pointer Arg. - ptr_iterator findPtrBottomUpState(const Value *Arg) { + bottom_up_ptr_iterator findPtrBottomUpState(const Value *Arg) { return PerPtrBottomUp.find(Arg); } @@ -685,6 +315,11 @@ namespace { const unsigned BBState::OverflowOccurredValue = 0xffffffff; } +namespace llvm { +raw_ostream &operator<<(raw_ostream &OS, + BBState &BBState) LLVM_ATTRIBUTE_UNUSED; +} + void BBState::InitFromPred(const BBState &Other) { PerPtrTopDown = Other.PerPtrTopDown; TopDownPathCount = Other.TopDownPathCount; @@ -724,19 +359,18 @@ void BBState::MergePred(const BBState &Other) { // For each entry in the other set, if our set has an entry with the same key, // merge the entries. Otherwise, copy the entry and merge it with an empty // entry. - for (ptr_const_iterator MI = Other.top_down_ptr_begin(), - ME = Other.top_down_ptr_end(); MI != ME; ++MI) { - std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI); - Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, + for (auto MI = Other.top_down_ptr_begin(), ME = Other.top_down_ptr_end(); + MI != ME; ++MI) { + auto Pair = PerPtrTopDown.insert(*MI); + Pair.first->second.Merge(Pair.second ? TopDownPtrState() : MI->second, /*TopDown=*/true); } // For each entry in our set, if the other set doesn't have an entry with the // same key, force it to merge with an empty entry. - for (ptr_iterator MI = top_down_ptr_begin(), - ME = top_down_ptr_end(); MI != ME; ++MI) + for (auto MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI) if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end()) - MI->second.Merge(PtrState(), /*TopDown=*/true); + MI->second.Merge(TopDownPtrState(), /*TopDown=*/true); } /// The bottom-up traversal uses this to merge information about successors to @@ -768,304 +402,80 @@ void BBState::MergeSucc(const BBState &Other) { // For each entry in the other set, if our set has an entry with the // same key, merge the entries. Otherwise, copy the entry and merge // it with an empty entry. - for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(), - ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) { - std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI); - Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, + for (auto MI = Other.bottom_up_ptr_begin(), ME = Other.bottom_up_ptr_end(); + MI != ME; ++MI) { + auto Pair = PerPtrBottomUp.insert(*MI); + Pair.first->second.Merge(Pair.second ? BottomUpPtrState() : MI->second, /*TopDown=*/false); } // For each entry in our set, if the other set doesn't have an entry // with the same key, force it to merge with an empty entry. - for (ptr_iterator MI = bottom_up_ptr_begin(), - ME = bottom_up_ptr_end(); MI != ME; ++MI) + for (auto MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME; + ++MI) if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end()) - MI->second.Merge(PtrState(), /*TopDown=*/false); + MI->second.Merge(BottomUpPtrState(), /*TopDown=*/false); } -// Only enable ARC Annotations if we are building a debug version of -// libObjCARCOpts. -#ifndef NDEBUG -#define ARC_ANNOTATIONS -#endif - -// Define some macros along the lines of DEBUG and some helper functions to make -// it cleaner to create annotations in the source code and to no-op when not -// building in debug mode. -#ifdef ARC_ANNOTATIONS - -#include "llvm/Support/CommandLine.h" - -/// Enable/disable ARC sequence annotations. -static cl::opt<bool> -EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false), - cl::desc("Enable emission of arc data flow analysis " - "annotations")); -static cl::opt<bool> -DisableCheckForCFGHazards("disable-objc-arc-checkforcfghazards", cl::init(false), - cl::desc("Disable check for cfg hazards when " - "annotating")); -static cl::opt<std::string> -ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier", - cl::init(""), - cl::desc("filter out all data flow annotations " - "but those that apply to the given " - "target llvm identifier.")); - -/// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an -/// instruction so that we can track backwards when post processing via the llvm -/// arc annotation processor tool. If the function is an -static MDString *AppendMDNodeToSourcePtr(unsigned NodeId, - Value *Ptr) { - MDString *Hash = nullptr; - - // If pointer is a result of an instruction and it does not have a source - // MDNode it, attach a new MDNode onto it. If pointer is a result of - // an instruction and does have a source MDNode attached to it, return a - // reference to said Node. Otherwise just return 0. - if (Instruction *Inst = dyn_cast<Instruction>(Ptr)) { - MDNode *Node; - if (!(Node = Inst->getMetadata(NodeId))) { - // We do not have any node. Generate and attatch the hash MDString to the - // instruction. - - // We just use an MDString to ensure that this metadata gets written out - // of line at the module level and to provide a very simple format - // encoding the information herein. Both of these makes it simpler to - // parse the annotations by a simple external program. - std::string Str; - raw_string_ostream os(Str); - os << "(" << Inst->getParent()->getParent()->getName() << ",%" - << Inst->getName() << ")"; - - Hash = MDString::get(Inst->getContext(), os.str()); - Inst->setMetadata(NodeId, MDNode::get(Inst->getContext(),Hash)); - } else { - // We have a node. Grab its hash and return it. - assert(Node->getNumOperands() == 1 && - "An ARCAnnotationProvenanceSourceMDKind can only have 1 operand."); - Hash = cast<MDString>(Node->getOperand(0)); +raw_ostream &llvm::operator<<(raw_ostream &OS, BBState &BBInfo) { + // Dump the pointers we are tracking. + OS << " TopDown State:\n"; + if (!BBInfo.hasTopDownPtrs()) { + DEBUG(llvm::dbgs() << " NONE!\n"); + } else { + for (auto I = BBInfo.top_down_ptr_begin(), E = BBInfo.top_down_ptr_end(); + I != E; ++I) { + const PtrState &P = I->second; + OS << " Ptr: " << *I->first + << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false") + << "\n ImpreciseRelease: " + << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n" + << " HasCFGHazards: " + << (P.IsCFGHazardAfflicted()?"true":"false") << "\n" + << " KnownPositive: " + << (P.HasKnownPositiveRefCount()?"true":"false") << "\n" + << " Seq: " + << P.GetSeq() << "\n"; } - } else if (Argument *Arg = dyn_cast<Argument>(Ptr)) { - std::string str; - raw_string_ostream os(str); - os << "(" << Arg->getParent()->getName() << ",%" << Arg->getName() - << ")"; - Hash = MDString::get(Arg->getContext(), os.str()); - } - - return Hash; -} - -static std::string SequenceToString(Sequence A) { - std::string str; - raw_string_ostream os(str); - os << A; - return os.str(); -} - -/// Helper function to change a Sequence into a String object using our overload -/// for raw_ostream so we only have printing code in one location. -static MDString *SequenceToMDString(LLVMContext &Context, - Sequence A) { - return MDString::get(Context, SequenceToString(A)); -} - -/// A simple function to generate a MDNode which describes the change in state -/// for Value *Ptr caused by Instruction *Inst. -static void AppendMDNodeToInstForPtr(unsigned NodeId, - Instruction *Inst, - Value *Ptr, - MDString *PtrSourceMDNodeID, - Sequence OldSeq, - Sequence NewSeq) { - MDNode *Node = nullptr; - Metadata *tmp[3] = {PtrSourceMDNodeID, - SequenceToMDString(Inst->getContext(), OldSeq), - SequenceToMDString(Inst->getContext(), NewSeq)}; - Node = MDNode::get(Inst->getContext(), tmp); - - Inst->setMetadata(NodeId, Node); -} - -/// Add to the beginning of the basic block llvm.ptr.annotations which show the -/// state of a pointer at the entrance to a basic block. -static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB, - Value *Ptr, Sequence Seq) { - // If we have a target identifier, make sure that we match it before - // continuing. - if(!ARCAnnotationTargetIdentifier.empty() && - !Ptr->getName().equals(ARCAnnotationTargetIdentifier)) - return; - - Module *M = BB->getParent()->getParent(); - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *I8XX = PointerType::getUnqual(I8X); - Type *Params[] = {I8XX, I8XX}; - FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), Params, - /*isVarArg=*/false); - Constant *Callee = M->getOrInsertFunction(Name, FTy); - - IRBuilder<> Builder(BB, BB->getFirstInsertionPt()); - - Value *PtrName; - StringRef Tmp = Ptr->getName(); - if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) { - Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp, - Tmp + "_STR"); - PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, - cast<Constant>(ActualPtrName), Tmp); - } - - Value *S; - std::string SeqStr = SequenceToString(Seq); - if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) { - Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr, - SeqStr + "_STR"); - S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, - cast<Constant>(ActualPtrName), SeqStr); - } - - Builder.CreateCall2(Callee, PtrName, S); -} - -/// Add to the end of the basic block llvm.ptr.annotations which show the state -/// of the pointer at the bottom of the basic block. -static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB, - Value *Ptr, Sequence Seq) { - // If we have a target identifier, make sure that we match it before emitting - // an annotation. - if(!ARCAnnotationTargetIdentifier.empty() && - !Ptr->getName().equals(ARCAnnotationTargetIdentifier)) - return; - - Module *M = BB->getParent()->getParent(); - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *I8XX = PointerType::getUnqual(I8X); - Type *Params[] = {I8XX, I8XX}; - FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), Params, - /*isVarArg=*/false); - Constant *Callee = M->getOrInsertFunction(Name, FTy); - - IRBuilder<> Builder(BB, std::prev(BB->end())); - - Value *PtrName; - StringRef Tmp = Ptr->getName(); - if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) { - Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp, - Tmp + "_STR"); - PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, - cast<Constant>(ActualPtrName), Tmp); } - Value *S; - std::string SeqStr = SequenceToString(Seq); - if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) { - Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr, - SeqStr + "_STR"); - S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, - cast<Constant>(ActualPtrName), SeqStr); + OS << " BottomUp State:\n"; + if (!BBInfo.hasBottomUpPtrs()) { + DEBUG(llvm::dbgs() << " NONE!\n"); + } else { + for (auto I = BBInfo.bottom_up_ptr_begin(), E = BBInfo.bottom_up_ptr_end(); + I != E; ++I) { + const PtrState &P = I->second; + OS << " Ptr: " << *I->first + << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false") + << "\n ImpreciseRelease: " + << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n" + << " HasCFGHazards: " + << (P.IsCFGHazardAfflicted()?"true":"false") << "\n" + << " KnownPositive: " + << (P.HasKnownPositiveRefCount()?"true":"false") << "\n" + << " Seq: " + << P.GetSeq() << "\n"; + } } - Builder.CreateCall2(Callee, PtrName, S); -} -/// Adds a source annotation to pointer and a state change annotation to Inst -/// referencing the source annotation and the old/new state of pointer. -static void GenerateARCAnnotation(unsigned InstMDId, - unsigned PtrMDId, - Instruction *Inst, - Value *Ptr, - Sequence OldSeq, - Sequence NewSeq) { - if (EnableARCAnnotations) { - // If we have a target identifier, make sure that we match it before - // emitting an annotation. - if(!ARCAnnotationTargetIdentifier.empty() && - !Ptr->getName().equals(ARCAnnotationTargetIdentifier)) - return; - - // First generate the source annotation on our pointer. This will return an - // MDString* if Ptr actually comes from an instruction implying we can put - // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL), - // then we know that our pointer is from an Argument so we put a reference - // to the argument number. - // - // The point of this is to make it easy for the - // llvm-arc-annotation-processor tool to cross reference where the source - // pointer is in the LLVM IR since the LLVM IR parser does not submit such - // information via debug info for backends to use (since why would anyone - // need such a thing from LLVM IR besides in non-standard cases - // [i.e. this]). - MDString *SourcePtrMDNode = - AppendMDNodeToSourcePtr(PtrMDId, Ptr); - AppendMDNodeToInstForPtr(InstMDId, Inst, Ptr, SourcePtrMDNode, OldSeq, - NewSeq); - } + return OS; } -// The actual interface for accessing the above functionality is defined via -// some simple macros which are defined below. We do this so that the user does -// not need to pass in what metadata id is needed resulting in cleaner code and -// additionally since it provides an easy way to conditionally no-op all -// annotation support in a non-debug build. - -/// Use this macro to annotate a sequence state change when processing -/// instructions bottom up, -#define ANNOTATE_BOTTOMUP(inst, ptr, old, new) \ - GenerateARCAnnotation(ARCAnnotationBottomUpMDKind, \ - ARCAnnotationProvenanceSourceMDKind, (inst), \ - const_cast<Value*>(ptr), (old), (new)) -/// Use this macro to annotate a sequence state change when processing -/// instructions top down. -#define ANNOTATE_TOPDOWN(inst, ptr, old, new) \ - GenerateARCAnnotation(ARCAnnotationTopDownMDKind, \ - ARCAnnotationProvenanceSourceMDKind, (inst), \ - const_cast<Value*>(ptr), (old), (new)) - -#define ANNOTATE_BB(_states, _bb, _name, _type, _direction) \ - do { \ - if (EnableARCAnnotations) { \ - for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \ - E = (_states)._direction##_ptr_end(); I != E; ++I) { \ - Value *Ptr = const_cast<Value*>(I->first); \ - Sequence Seq = I->second.GetSeq(); \ - GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq); \ - } \ - } \ - } while (0) - -#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \ - ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbstart", \ - Entrance, bottom_up) -#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \ - ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \ - Terminator, bottom_up) -#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \ - ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \ - Entrance, top_down) -#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \ - ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \ - Terminator, top_down) - -#else // !ARC_ANNOTATION -// If annotations are off, noop. -#define ANNOTATE_BOTTOMUP(inst, ptr, old, new) -#define ANNOTATE_TOPDOWN(inst, ptr, old, new) -#define ANNOTATE_BOTTOMUP_BBSTART(states, basicblock) -#define ANNOTATE_BOTTOMUP_BBEND(states, basicblock) -#define ANNOTATE_TOPDOWN_BBSTART(states, basicblock) -#define ANNOTATE_TOPDOWN_BBEND(states, basicblock) -#endif // !ARC_ANNOTATION - namespace { + /// \brief The main ARC optimization pass. class ObjCARCOpt : public FunctionPass { bool Changed; ProvenanceAnalysis PA; + + /// A cache of references to runtime entry point constants. ARCRuntimeEntryPoints EP; + /// A cache of MDKinds that can be passed into other functions to propagate + /// MDKind identifiers. + ARCMDKindCache MDKindCache; + // This is used to track if a pointer is stored into an alloca. DenseSet<const Value *> MultiOwnersSet; @@ -1076,24 +486,6 @@ namespace { /// is in fact used in the current function. unsigned UsedInThisFunction; - /// The Metadata Kind for clang.imprecise_release metadata. - unsigned ImpreciseReleaseMDKind; - - /// The Metadata Kind for clang.arc.copy_on_escape metadata. - unsigned CopyOnEscapeMDKind; - - /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata. - unsigned NoObjCARCExceptionsMDKind; - -#ifdef ARC_ANNOTATIONS - /// The Metadata Kind for llvm.arc.annotation.bottomup metadata. - unsigned ARCAnnotationBottomUpMDKind; - /// The Metadata Kind for llvm.arc.annotation.topdown metadata. - unsigned ARCAnnotationTopDownMDKind; - /// The Metadata Kind for llvm.arc.annotation.provenancesource metadata. - unsigned ARCAnnotationProvenanceSourceMDKind; -#endif // ARC_ANNOATIONS - bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, ARCInstKind &Class); @@ -1102,47 +494,41 @@ namespace { void CheckForCFGHazards(const BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, BBState &MyStates) const; - bool VisitInstructionBottomUp(Instruction *Inst, - BasicBlock *BB, - MapVector<Value *, RRInfo> &Retains, + bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB, + BlotMapVector<Value *, RRInfo> &Retains, BBState &MyStates); bool VisitBottomUp(BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains); + BlotMapVector<Value *, RRInfo> &Retains); bool VisitInstructionTopDown(Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates); bool VisitTopDown(BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, DenseMap<Value *, RRInfo> &Releases); - bool Visit(Function &F, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, + bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates, + BlotMapVector<Value *, RRInfo> &Retains, DenseMap<Value *, RRInfo> &Releases); void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, - MapVector<Value *, RRInfo> &Retains, + BlotMapVector<Value *, RRInfo> &Retains, DenseMap<Value *, RRInfo> &Releases, - SmallVectorImpl<Instruction *> &DeadInsts, - Module *M); - - bool ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - Module *M, - SmallVectorImpl<Instruction *> &NewRetains, - SmallVectorImpl<Instruction *> &NewReleases, - SmallVectorImpl<Instruction *> &DeadInsts, - RRInfo &RetainsToMove, - RRInfo &ReleasesToMove, - Value *Arg, - bool KnownSafe, - bool &AnyPairsCompletelyEliminated); + SmallVectorImpl<Instruction *> &DeadInsts, Module *M); + + bool + PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates, + BlotMapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, Module *M, + SmallVectorImpl<Instruction *> &NewRetains, + SmallVectorImpl<Instruction *> &NewReleases, + SmallVectorImpl<Instruction *> &DeadInsts, + RRInfo &RetainsToMove, RRInfo &ReleasesToMove, + Value *Arg, bool KnownSafe, + bool &AnyPairsCompletelyEliminated); bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - Module *M); + BlotMapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, Module *M); void OptimizeWeakCalls(Function &F); @@ -1238,7 +624,7 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { "objc_retain since the operand is not a return value.\n" "Old = " << *RetainRV << "\n"); - Constant *NewDecl = EP.get(ARCRuntimeEntryPoints::EPT_Retain); + Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain); cast<CallInst>(RetainRV)->setCalledFunction(NewDecl); DEBUG(dbgs() << "New = " << *RetainRV << "\n"); @@ -1274,7 +660,7 @@ void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, "Old = " << *AutoreleaseRV << "\n"); CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV); - Constant *NewDecl = EP.get(ARCRuntimeEntryPoints::EPT_Autorelease); + Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease); AutoreleaseRVCI->setCalledFunction(NewDecl); AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease. Class = ARCInstKind::Autorelease; @@ -1380,10 +766,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { // Create the declaration lazily. LLVMContext &C = Inst->getContext(); - Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Release); + Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release); CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "", Call); - NewCall->setMetadata(ImpreciseReleaseMDKind, MDNode::get(C, None)); + NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), + MDNode::get(C, None)); DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) " "since x is otherwise unused.\nOld: " << *Call << "\nNew: " @@ -1547,7 +934,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { /// no CFG hazards by checking the states of various bottom up pointers. static void CheckForUseCFGHazard(const Sequence SuccSSeq, const bool SuccSRRIKnownSafe, - PtrState &S, + TopDownPtrState &S, bool &SomeSuccHasSame, bool &AllSuccsHaveSame, bool &NotAllSeqEqualButKnownSafe, @@ -1585,7 +972,7 @@ static void CheckForUseCFGHazard(const Sequence SuccSSeq, /// pointers. static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq, const bool SuccSRRIKnownSafe, - PtrState &S, + TopDownPtrState &S, bool &SomeSuccHasSame, bool &AllSuccsHaveSame, bool &NotAllSeqEqualButKnownSafe) { @@ -1618,9 +1005,9 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, BBState &MyStates) const { // If any top-down local-use or possible-dec has a succ which is earlier in // the sequence, forget it. - for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(), - E = MyStates.top_down_ptr_end(); I != E; ++I) { - PtrState &S = I->second; + for (auto I = MyStates.top_down_ptr_begin(), E = MyStates.top_down_ptr_end(); + I != E; ++I) { + TopDownPtrState &S = I->second; const Sequence Seq = I->second.GetSeq(); // We only care about S_Retain, S_CanRelease, and S_Use. @@ -1646,7 +1033,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, const DenseMap<const BasicBlock *, BBState>::iterator BBI = BBStates.find(*SI); assert(BBI != BBStates.end()); - const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); + const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); const Sequence SuccSSeq = SuccS.GetSeq(); // If bottom up, the pointer is in an S_None state, clear the sequence @@ -1705,44 +1092,21 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, } } -bool -ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, - BasicBlock *BB, - MapVector<Value *, RRInfo> &Retains, - BBState &MyStates) { +bool ObjCARCOpt::VisitInstructionBottomUp( + Instruction *Inst, BasicBlock *BB, BlotMapVector<Value *, RRInfo> &Retains, + BBState &MyStates) { bool NestingDetected = false; ARCInstKind Class = GetARCInstKind(Inst); const Value *Arg = nullptr; - DEBUG(dbgs() << "Class: " << Class << "\n"); + DEBUG(dbgs() << " Class: " << Class << "\n"); switch (Class) { case ARCInstKind::Release: { Arg = GetArgRCIdentityRoot(Inst); - PtrState &S = MyStates.getPtrBottomUpState(Arg); - - // If we see two releases in a row on the same pointer. If so, make - // a note, and we'll cicle back to revisit it after we've - // hopefully eliminated the second release, which may allow us to - // eliminate the first release too. - // Theoretically we could implement removal of nested retain+release - // pairs by making PtrState hold a stack of states, but this is - // simple and avoids adding overhead for the non-nested case. - if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) { - DEBUG(dbgs() << "Found nested releases (i.e. a release pair)\n"); - NestingDetected = true; - } - - MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release; - ANNOTATE_BOTTOMUP(Inst, Arg, S.GetSeq(), NewSeq); - S.ResetSequenceProgress(NewSeq); - S.SetReleaseMetadata(ReleaseMetadata); - S.SetKnownSafe(S.HasKnownPositiveRefCount()); - S.SetTailCallRelease(cast<CallInst>(Inst)->isTailCall()); - S.InsertCall(Inst); - S.SetKnownPositiveRefCount(); + BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg); + NestingDetected |= S.InitBottomUp(MDKindCache, Inst); break; } case ARCInstKind::RetainBlock: @@ -1753,35 +1117,16 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, case ARCInstKind::Retain: case ARCInstKind::RetainRV: { Arg = GetArgRCIdentityRoot(Inst); - - PtrState &S = MyStates.getPtrBottomUpState(Arg); - S.SetKnownPositiveRefCount(); - - Sequence OldSeq = S.GetSeq(); - switch (OldSeq) { - case S_Stop: - case S_Release: - case S_MovableRelease: - case S_Use: - // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an - // imprecise release, clear our reverse insertion points. - if (OldSeq != S_Use || S.IsTrackingImpreciseReleases()) - S.ClearReverseInsertPts(); - // FALL THROUGH - case S_CanRelease: - // Don't do retain+release tracking for ARCInstKind::RetainRV, - // because it's - // better to let it remain as the first instruction after a call. - if (Class != ARCInstKind::RetainRV) + BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg); + if (S.MatchWithRetain()) { + // Don't do retain+release tracking for ARCInstKind::RetainRV, because + // it's better to let it remain as the first instruction after a call. + if (Class != ARCInstKind::RetainRV) { + DEBUG(llvm::dbgs() << " Matching with: " << *Inst << "\n"); Retains[Inst] = S.GetRRInfo(); + } S.ClearSequenceProgress(); - break; - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); } - ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq()); // A retain moving bottom up can be a use. break; } @@ -1807,9 +1152,10 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, // in the presence of allocas we only unconditionally remove pointers if // both our retain and our release are KnownSafe. if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand())) { - BBState::ptr_iterator I = MyStates.findPtrBottomUpState( - GetRCIdentityRoot(SI->getValueOperand())); + const DataLayout &DL = BB->getModule()->getDataLayout(); + if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand(), DL)) { + auto I = MyStates.findPtrBottomUpState( + GetRCIdentityRoot(SI->getValueOperand())); if (I != MyStates.bottom_up_ptr_end()) MultiOwnersSet.insert(I->first); } @@ -1821,90 +1167,26 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, // Consider any other possible effects of this instruction on each // pointer being tracked. - for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(), - ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) { + for (auto MI = MyStates.bottom_up_ptr_begin(), + ME = MyStates.bottom_up_ptr_end(); + MI != ME; ++MI) { const Value *Ptr = MI->first; if (Ptr == Arg) continue; // Handled above. - PtrState &S = MI->second; - Sequence Seq = S.GetSeq(); + BottomUpPtrState &S = MI->second; - // Check for possible releases. - if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr - << "\n"); - S.ClearKnownPositiveRefCount(); - switch (Seq) { - case S_Use: - S.SetSeq(S_CanRelease); - ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S.GetSeq()); - continue; - case S_CanRelease: - case S_Release: - case S_MovableRelease: - case S_Stop: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } + if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class)) + continue; - // Check for possible direct uses. - switch (Seq) { - case S_Release: - case S_MovableRelease: - if (CanUse(Inst, Ptr, PA, Class)) { - DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr - << "\n"); - assert(!S.HasReverseInsertPts()); - // If this is an invoke instruction, we're scanning it as part of - // one of its successor blocks, since we can't insert code after it - // in its own block, and we don't want to split critical edges. - if (isa<InvokeInst>(Inst)) - S.InsertReverseInsertPt(BB->getFirstInsertionPt()); - else - S.InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst))); - S.SetSeq(S_Use); - ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use); - } else if (Seq == S_Release && IsUser(Class)) { - DEBUG(dbgs() << "PreciseReleaseUse: Seq: " << Seq << "; " << *Ptr - << "\n"); - // Non-movable releases depend on any possible objc pointer use. - S.SetSeq(S_Stop); - ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop); - assert(!S.HasReverseInsertPts()); - // As above; handle invoke specially. - if (isa<InvokeInst>(Inst)) - S.InsertReverseInsertPt(BB->getFirstInsertionPt()); - else - S.InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst))); - } - break; - case S_Stop: - if (CanUse(Inst, Ptr, PA, Class)) { - DEBUG(dbgs() << "PreciseStopUse: Seq: " << Seq << "; " << *Ptr - << "\n"); - S.SetSeq(S_Use); - ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use); - } - break; - case S_CanRelease: - case S_Use: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } + S.HandlePotentialUse(BB, Inst, Ptr, PA, Class); } return NestingDetected; } -bool -ObjCARCOpt::VisitBottomUp(BasicBlock *BB, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains) { +bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + BlotMapVector<Value *, RRInfo> &Retains) { DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n"); @@ -1929,9 +1211,8 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, } } - // If ARC Annotations are enabled, output the current state of pointers at the - // bottom of the basic block. - ANNOTATE_BOTTOMUP_BBEND(MyStates, BB); + DEBUG(llvm::dbgs() << "Before:\n" << BBStates[BB] << "\n" + << "Performing Dataflow:\n"); // Visit all the instructions, bottom-up. for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { @@ -1941,7 +1222,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, if (isa<InvokeInst>(Inst)) continue; - DEBUG(dbgs() << "Visiting " << *Inst << "\n"); + DEBUG(dbgs() << " Visiting " << *Inst << "\n"); NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); } @@ -1956,9 +1237,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); } - // If ARC Annotations are enabled, output the current state of pointers at the - // top of the basic block. - ANNOTATE_BOTTOMUP_BBSTART(MyStates, BB); + DEBUG(llvm::dbgs() << "\nFinal State:\n" << BBStates[BB] << "\n"); return NestingDetected; } @@ -1971,144 +1250,63 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, ARCInstKind Class = GetARCInstKind(Inst); const Value *Arg = nullptr; + DEBUG(llvm::dbgs() << " Class: " << Class << "\n"); + switch (Class) { case ARCInstKind::RetainBlock: // In OptimizeIndividualCalls, we have strength reduced all optimizable // objc_retainBlocks to objc_retains. Thus at this point any - // objc_retainBlocks that we see are not optimizable. + // objc_retainBlocks that we see are not optimizable. We need to break since + // a retain can be a potential use. break; case ARCInstKind::Retain: case ARCInstKind::RetainRV: { Arg = GetArgRCIdentityRoot(Inst); - - PtrState &S = MyStates.getPtrTopDownState(Arg); - - // Don't do retain+release tracking for ARCInstKind::RetainRV, because - // it's - // better to let it remain as the first instruction after a call. - if (Class != ARCInstKind::RetainRV) { - // If we see two retains in a row on the same pointer. If so, make - // a note, and we'll cicle back to revisit it after we've - // hopefully eliminated the second retain, which may allow us to - // eliminate the first retain too. - // Theoretically we could implement removal of nested retain+release - // pairs by making PtrState hold a stack of states, but this is - // simple and avoids adding overhead for the non-nested case. - if (S.GetSeq() == S_Retain) - NestingDetected = true; - - ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_Retain); - S.ResetSequenceProgress(S_Retain); - S.SetKnownSafe(S.HasKnownPositiveRefCount()); - S.InsertCall(Inst); - } - - S.SetKnownPositiveRefCount(); - + TopDownPtrState &S = MyStates.getPtrTopDownState(Arg); + NestingDetected |= S.InitTopDown(Class, Inst); // A retain can be a potential use; procede to the generic checking // code below. break; } case ARCInstKind::Release: { Arg = GetArgRCIdentityRoot(Inst); - - PtrState &S = MyStates.getPtrTopDownState(Arg); - S.ClearKnownPositiveRefCount(); - - Sequence OldSeq = S.GetSeq(); - - MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - - switch (OldSeq) { - case S_Retain: - case S_CanRelease: - if (OldSeq == S_Retain || ReleaseMetadata != nullptr) - S.ClearReverseInsertPts(); - // FALL THROUGH - case S_Use: - S.SetReleaseMetadata(ReleaseMetadata); - S.SetTailCallRelease(cast<CallInst>(Inst)->isTailCall()); + TopDownPtrState &S = MyStates.getPtrTopDownState(Arg); + // Try to form a tentative pair in between this release instruction and the + // top down pointers that we are tracking. + if (S.MatchWithRelease(MDKindCache, Inst)) { + // If we succeed, copy S's RRInfo into the Release -> {Retain Set + // Map}. Then we clear S. + DEBUG(llvm::dbgs() << " Matching with: " << *Inst << "\n"); Releases[Inst] = S.GetRRInfo(); - ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None); S.ClearSequenceProgress(); - break; - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); } break; } case ARCInstKind::AutoreleasepoolPop: // Conservatively, clear MyStates for all known pointers. MyStates.clearTopDownPointers(); - return NestingDetected; + return false; case ARCInstKind::AutoreleasepoolPush: case ARCInstKind::None: - // These are irrelevant. - return NestingDetected; + // These can not be uses of + return false; default: break; } // Consider any other possible effects of this instruction on each // pointer being tracked. - for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(), - ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) { + for (auto MI = MyStates.top_down_ptr_begin(), + ME = MyStates.top_down_ptr_end(); + MI != ME; ++MI) { const Value *Ptr = MI->first; if (Ptr == Arg) continue; // Handled above. - PtrState &S = MI->second; - Sequence Seq = S.GetSeq(); - - // Check for possible releases. - if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr - << "\n"); - S.ClearKnownPositiveRefCount(); - switch (Seq) { - case S_Retain: - S.SetSeq(S_CanRelease); - ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_CanRelease); - assert(!S.HasReverseInsertPts()); - S.InsertReverseInsertPt(Inst); - - // One call can't cause a transition from S_Retain to S_CanRelease - // and S_CanRelease to S_Use. If we've made the first transition, - // we're done. - continue; - case S_Use: - case S_CanRelease: - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } - } + TopDownPtrState &S = MI->second; + if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class)) + continue; - // Check for possible direct uses. - switch (Seq) { - case S_CanRelease: - if (CanUse(Inst, Ptr, PA, Class)) { - DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr - << "\n"); - S.SetSeq(S_Use); - ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use); - } - break; - case S_Retain: - case S_Use: - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } + S.HandlePotentialUse(Inst, Ptr, PA, Class); } return NestingDetected; @@ -2140,27 +1338,22 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, } } - // If ARC Annotations are enabled, output the current state of pointers at the - // top of the basic block. - ANNOTATE_TOPDOWN_BBSTART(MyStates, BB); + DEBUG(llvm::dbgs() << "Before:\n" << BBStates[BB] << "\n" + << "Performing Dataflow:\n"); // Visit all the instructions, top-down. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { Instruction *Inst = I; - DEBUG(dbgs() << "Visiting " << *Inst << "\n"); + DEBUG(dbgs() << " Visiting " << *Inst << "\n"); NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates); } - // If ARC Annotations are enabled, output the current state of pointers at the - // bottom of the basic block. - ANNOTATE_TOPDOWN_BBEND(MyStates, BB); - -#ifdef ARC_ANNOTATIONS - if (!(EnableARCAnnotations && DisableCheckForCFGHazards)) -#endif + DEBUG(llvm::dbgs() << "\nState Before Checking for CFG Hazards:\n" + << BBStates[BB] << "\n\n"); CheckForCFGHazards(BB, BBStates, MyStates); + DEBUG(llvm::dbgs() << "Final State:\n" << BBStates[BB] << "\n"); return NestingDetected; } @@ -2246,11 +1439,10 @@ ComputePostOrders(Function &F, } // Visit the function both top-down and bottom-up. -bool -ObjCARCOpt::Visit(Function &F, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases) { +bool ObjCARCOpt::Visit(Function &F, + DenseMap<const BasicBlock *, BBState> &BBStates, + BlotMapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases) { // Use reverse-postorder traversals, because we magically know that loops // will be well behaved, i.e. they won't repeatedly call retain on a single @@ -2260,7 +1452,7 @@ ObjCARCOpt::Visit(Function &F, SmallVector<BasicBlock *, 16> PostOrder; SmallVector<BasicBlock *, 16> ReverseCFGPostOrder; ComputePostOrders(F, PostOrder, ReverseCFGPostOrder, - NoObjCARCExceptionsMDKind, + MDKindCache.get(ARCMDKindID::NoObjCARCExceptions), BBStates); // Use reverse-postorder on the reverse CFG for bottom-up. @@ -2281,10 +1473,9 @@ ObjCARCOpt::Visit(Function &F, } /// Move the calls in RetainsToMove and ReleasesToMove. -void ObjCARCOpt::MoveCalls(Value *Arg, - RRInfo &RetainsToMove, +void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, - MapVector<Value *, RRInfo> &Retains, + BlotMapVector<Value *, RRInfo> &Retains, DenseMap<Value *, RRInfo> &Releases, SmallVectorImpl<Instruction *> &DeadInsts, Module *M) { @@ -2297,7 +1488,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg, for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) { Value *MyArg = ArgTy == ParamTy ? Arg : new BitCastInst(Arg, ParamTy, "", InsertPt); - Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain); + Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt); Call->setDoesNotThrow(); Call->setTailCall(); @@ -2308,11 +1499,11 @@ void ObjCARCOpt::MoveCalls(Value *Arg, for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) { Value *MyArg = ArgTy == ParamTy ? Arg : new BitCastInst(Arg, ParamTy, "", InsertPt); - Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Release); + Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release); CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt); // Attach a clang.imprecise_release metadata tag, if appropriate. if (MDNode *M = ReleasesToMove.ReleaseMetadata) - Call->setMetadata(ImpreciseReleaseMDKind, M); + Call->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), M); Call->setDoesNotThrow(); if (ReleasesToMove.IsTailCallRelease) Call->setTailCall(); @@ -2335,20 +1526,15 @@ void ObjCARCOpt::MoveCalls(Value *Arg, } -bool -ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> - &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - Module *M, - SmallVectorImpl<Instruction *> &NewRetains, - SmallVectorImpl<Instruction *> &NewReleases, - SmallVectorImpl<Instruction *> &DeadInsts, - RRInfo &RetainsToMove, - RRInfo &ReleasesToMove, - Value *Arg, - bool KnownSafe, - bool &AnyPairsCompletelyEliminated) { +bool ObjCARCOpt::PairUpRetainsAndReleases( + DenseMap<const BasicBlock *, BBState> &BBStates, + BlotMapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, Module *M, + SmallVectorImpl<Instruction *> &NewRetains, + SmallVectorImpl<Instruction *> &NewReleases, + SmallVectorImpl<Instruction *> &DeadInsts, RRInfo &RetainsToMove, + RRInfo &ReleasesToMove, Value *Arg, bool KnownSafe, + bool &AnyPairsCompletelyEliminated) { // If a pair happens in a region where it is known that the reference count // is already incremented, we can similarly ignore possible decrements unless // we are dealing with a retainable object with multiple provenance sources. @@ -2369,15 +1555,14 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> for (SmallVectorImpl<Instruction *>::const_iterator NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) { Instruction *NewRetain = *NI; - MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain); + auto It = Retains.find(NewRetain); assert(It != Retains.end()); const RRInfo &NewRetainRRI = It->second; KnownSafeTD &= NewRetainRRI.KnownSafe; MultipleOwners = MultipleOwners || MultiOwnersSet.count(GetArgRCIdentityRoot(NewRetain)); for (Instruction *NewRetainRelease : NewRetainRRI.Calls) { - DenseMap<Value *, RRInfo>::const_iterator Jt = - Releases.find(NewRetainRelease); + auto Jt = Releases.find(NewRetainRelease); if (Jt == Releases.end()) return false; const RRInfo &NewRetainReleaseRRI = Jt->second; @@ -2446,15 +1631,13 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> for (SmallVectorImpl<Instruction *>::const_iterator NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) { Instruction *NewRelease = *NI; - DenseMap<Value *, RRInfo>::const_iterator It = - Releases.find(NewRelease); + auto It = Releases.find(NewRelease); assert(It != Releases.end()); const RRInfo &NewReleaseRRI = It->second; KnownSafeBU &= NewReleaseRRI.KnownSafe; CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted; for (Instruction *NewReleaseRetain : NewReleaseRRI.Calls) { - MapVector<Value *, RRInfo>::const_iterator Jt = - Retains.find(NewReleaseRetain); + auto Jt = Retains.find(NewReleaseRetain); if (Jt == Retains.end()) return false; const RRInfo &NewReleaseRetainRRI = Jt->second; @@ -2506,11 +1689,8 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> if (NewRetains.empty()) break; } - // If the pointer is known incremented in 1 direction and we do not have - // MultipleOwners, we can safely remove the retain/releases. Otherwise we need - // to be known safe in both directions. - bool UnconditionallySafe = (KnownSafeTD && KnownSafeBU) || - ((KnownSafeTD || KnownSafeBU) && !MultipleOwners); + // We can only remove pointers if we are known safe in both directions. + bool UnconditionallySafe = KnownSafeTD && KnownSafeBU; if (UnconditionallySafe) { RetainsToMove.ReverseInsertPts.clear(); ReleasesToMove.ReverseInsertPts.clear(); @@ -2540,12 +1720,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> if (OldDelta != 0) return false; -#ifdef ARC_ANNOTATIONS - // Do not move calls if ARC annotations are requested. - if (EnableARCAnnotations) - return false; -#endif // ARC_ANNOTATIONS - Changed = true; assert(OldCount != 0 && "Unreachable code?"); NumRRs += OldCount - NewCount; @@ -2558,12 +1732,10 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> /// Identify pairings between the retains and releases, and delete and/or move /// them. -bool -ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> - &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - Module *M) { +bool ObjCARCOpt::PerformCodePlacement( + DenseMap<const BasicBlock *, BBState> &BBStates, + BlotMapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, Module *M) { DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n"); bool AnyPairsCompletelyEliminated = false; @@ -2574,8 +1746,9 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> SmallVector<Instruction *, 8> DeadInsts; // Visit each retain. - for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), - E = Retains.end(); I != E; ++I) { + for (BlotMapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), + E = Retains.end(); + I != E; ++I) { Value *V = I->first; if (!V) continue; // blotted @@ -2602,11 +1775,10 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> // Connect the dots between the top-down-collected RetainsToMove and // bottom-up-collected ReleasesToMove to form sets of related calls. NewRetains.push_back(Retain); - bool PerformMoveCalls = - ConnectTDBUTraversals(BBStates, Retains, Releases, M, NewRetains, - NewReleases, DeadInsts, RetainsToMove, - ReleasesToMove, Arg, KnownSafe, - AnyPairsCompletelyEliminated); + bool PerformMoveCalls = PairUpRetainsAndReleases( + BBStates, Retains, Releases, M, NewRetains, NewReleases, DeadInsts, + RetainsToMove, ReleasesToMove, Arg, KnownSafe, + AnyPairsCompletelyEliminated); if (PerformMoveCalls) { // Ok, everything checks out and we're all set. Let's move/delete some @@ -2678,7 +1850,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { Changed = true; // If the load has a builtin retain, insert a plain retain for it. if (Class == ARCInstKind::LoadWeakRetained) { - Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain); + Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call); CI->setTailCall(); } @@ -2707,7 +1879,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { Changed = true; // If the load has a builtin retain, insert a plain retain for it. if (Class == ARCInstKind::LoadWeakRetained) { - Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain); + Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call); CI->setTailCall(); } @@ -2795,7 +1967,7 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) { // map stays valid when we get around to rewriting code and calls get // replaced by arguments. DenseMap<Value *, RRInfo> Releases; - MapVector<Value *, RRInfo> Retains; + BlotMapVector<Value *, RRInfo> Retains; // This is used during the traversal of the function to track the // states for each identified object at each block. @@ -2828,8 +2000,7 @@ HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain, if (DepInsts.size() != 1) return false; - CallInst *Call = - dyn_cast_or_null<CallInst>(*DepInsts.begin()); + auto *Call = dyn_cast_or_null<CallInst>(*DepInsts.begin()); // Check that the pointer is the return value of the call. if (!Call || Arg != Call) @@ -2857,8 +2028,7 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB, if (DepInsts.size() != 1) return nullptr; - CallInst *Retain = - dyn_cast_or_null<CallInst>(*DepInsts.begin()); + auto *Retain = dyn_cast_or_null<CallInst>(*DepInsts.begin()); // Check that we found a retain with the same argument. if (!Retain || !IsRetain(GetBasicARCInstKind(Retain)) || @@ -2883,8 +2053,7 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB, if (DepInsts.size() != 1) return nullptr; - CallInst *Autorelease = - dyn_cast_or_null<CallInst>(*DepInsts.begin()); + auto *Autorelease = dyn_cast_or_null<CallInst>(*DepInsts.begin()); if (!Autorelease) return nullptr; ARCInstKind AutoreleaseClass = GetBasicARCInstKind(Autorelease); @@ -2999,28 +2168,13 @@ bool ObjCARCOpt::doInitialization(Module &M) { if (!Run) return false; - // Identify the imprecise release metadata kind. - ImpreciseReleaseMDKind = - M.getContext().getMDKindID("clang.imprecise_release"); - CopyOnEscapeMDKind = - M.getContext().getMDKindID("clang.arc.copy_on_escape"); - NoObjCARCExceptionsMDKind = - M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); -#ifdef ARC_ANNOTATIONS - ARCAnnotationBottomUpMDKind = - M.getContext().getMDKindID("llvm.arc.annotation.bottomup"); - ARCAnnotationTopDownMDKind = - M.getContext().getMDKindID("llvm.arc.annotation.topdown"); - ARCAnnotationProvenanceSourceMDKind = - M.getContext().getMDKindID("llvm.arc.annotation.provenancesource"); -#endif // ARC_ANNOTATIONS - // Intuitively, objc_retain and others are nocapture, however in practice // they are not, because they return their argument value. And objc_release // calls finalizers which can have arbitrary side effects. + MDKindCache.init(&M); // Initialize our runtime entry point cache. - EP.Initialize(&M); + EP.init(&M); return false; } diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp index 410abfc..15ad8dc 100644 --- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp @@ -32,20 +32,22 @@ using namespace llvm::objcarc; bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) { + const DataLayout &DL = A->getModule()->getDataLayout(); // If the values are Selects with the same condition, we can do a more precise // check: just check for relations between the values on corresponding arms. if (const SelectInst *SB = dyn_cast<SelectInst>(B)) if (A->getCondition() == SB->getCondition()) - return related(A->getTrueValue(), SB->getTrueValue()) || - related(A->getFalseValue(), SB->getFalseValue()); + return related(A->getTrueValue(), SB->getTrueValue(), DL) || + related(A->getFalseValue(), SB->getFalseValue(), DL); // Check both arms of the Select node individually. - return related(A->getTrueValue(), B) || - related(A->getFalseValue(), B); + return related(A->getTrueValue(), B, DL) || + related(A->getFalseValue(), B, DL); } bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) { + const DataLayout &DL = A->getModule()->getDataLayout(); // If the values are PHIs in the same block, we can do a more precise as well // as efficient check: just check for relations between the values on // corresponding edges. @@ -53,7 +55,7 @@ bool ProvenanceAnalysis::relatedPHI(const PHINode *A, if (PNB->getParent() == A->getParent()) { for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) if (related(A->getIncomingValue(i), - PNB->getIncomingValueForBlock(A->getIncomingBlock(i)))) + PNB->getIncomingValueForBlock(A->getIncomingBlock(i)), DL)) return true; return false; } @@ -62,7 +64,7 @@ bool ProvenanceAnalysis::relatedPHI(const PHINode *A, SmallPtrSet<const Value *, 4> UniqueSrc; for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) { const Value *PV1 = A->getIncomingValue(i); - if (UniqueSrc.insert(PV1).second && related(PV1, B)) + if (UniqueSrc.insert(PV1).second && related(PV1, B, DL)) return true; } @@ -103,11 +105,11 @@ static bool IsStoredObjCPointer(const Value *P) { return false; } -bool ProvenanceAnalysis::relatedCheck(const Value *A, - const Value *B) { +bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B, + const DataLayout &DL) { // Skip past provenance pass-throughs. - A = GetUnderlyingObjCPtr(A); - B = GetUnderlyingObjCPtr(B); + A = GetUnderlyingObjCPtr(A, DL); + B = GetUnderlyingObjCPtr(B, DL); // Quick check. if (A == B) @@ -159,8 +161,8 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A, return true; } -bool ProvenanceAnalysis::related(const Value *A, - const Value *B) { +bool ProvenanceAnalysis::related(const Value *A, const Value *B, + const DataLayout &DL) { // Begin by inserting a conservative value into the map. If the insertion // fails, we have the answer already. If it succeeds, leave it there until we // compute the real answer to guard against recursive queries. @@ -170,7 +172,7 @@ bool ProvenanceAnalysis::related(const Value *A, if (!Pair.second) return Pair.first->second; - bool Result = relatedCheck(A, B); + bool Result = relatedCheck(A, B, DL); CachedResults[ValuePairTy(A, B)] = Result; return Result; } diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h index 4b5f4d8..0ac41d3 100644 --- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h @@ -30,6 +30,7 @@ namespace llvm { class Value; class AliasAnalysis; + class DataLayout; class PHINode; class SelectInst; } @@ -53,7 +54,7 @@ class ProvenanceAnalysis { typedef DenseMap<ValuePairTy, bool> CachedResultsTy; CachedResultsTy CachedResults; - bool relatedCheck(const Value *A, const Value *B); + bool relatedCheck(const Value *A, const Value *B, const DataLayout &DL); bool relatedSelect(const SelectInst *A, const Value *B); bool relatedPHI(const PHINode *A, const Value *B); @@ -67,7 +68,7 @@ public: AliasAnalysis *getAA() const { return AA; } - bool related(const Value *A, const Value *B); + bool related(const Value *A, const Value *B, const DataLayout &DL); void clear() { CachedResults.clear(); diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp index d836632..0be75af 100644 --- a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -65,6 +66,7 @@ bool PAEval::runOnFunction(Function &F) { ProvenanceAnalysis PA; PA.setAA(&getAnalysis<AliasAnalysis>()); + const DataLayout &DL = F.getParent()->getDataLayout(); for (Value *V1 : Values) { StringRef NameV1 = getName(V1); @@ -73,7 +75,7 @@ bool PAEval::runOnFunction(Function &F) { if (NameV1 >= NameV2) continue; errs() << NameV1 << " and " << NameV2; - if (PA.related(V1, V2)) + if (PA.related(V1, V2, DL)) errs() << " are related.\n"; else errs() << " are not related.\n"; diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp new file mode 100644 index 0000000..ae20e7e --- /dev/null +++ b/lib/Transforms/ObjCARC/PtrState.cpp @@ -0,0 +1,404 @@ +//===--- PtrState.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PtrState.h" +#include "DependencyAnalysis.h" +#include "ObjCARC.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::objcarc; + +#define DEBUG_TYPE "objc-arc-ptr-state" + +//===----------------------------------------------------------------------===// +// Utility +//===----------------------------------------------------------------------===// + +raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, const Sequence S) { + switch (S) { + case S_None: + return OS << "S_None"; + case S_Retain: + return OS << "S_Retain"; + case S_CanRelease: + return OS << "S_CanRelease"; + case S_Use: + return OS << "S_Use"; + case S_Release: + return OS << "S_Release"; + case S_MovableRelease: + return OS << "S_MovableRelease"; + case S_Stop: + return OS << "S_Stop"; + } + llvm_unreachable("Unknown sequence type."); +} + +//===----------------------------------------------------------------------===// +// Sequence +//===----------------------------------------------------------------------===// + +static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { + // The easy cases. + if (A == B) + return A; + if (A == S_None || B == S_None) + return S_None; + + if (A > B) + std::swap(A, B); + if (TopDown) { + // Choose the side which is further along in the sequence. + if ((A == S_Retain || A == S_CanRelease) && + (B == S_CanRelease || B == S_Use)) + return B; + } else { + // Choose the side which is further along in the sequence. + if ((A == S_Use || A == S_CanRelease) && + (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease)) + return A; + // If both sides are releases, choose the more conservative one. + if (A == S_Stop && (B == S_Release || B == S_MovableRelease)) + return A; + if (A == S_Release && B == S_MovableRelease) + return A; + } + + return S_None; +} + +//===----------------------------------------------------------------------===// +// RRInfo +//===----------------------------------------------------------------------===// + +void RRInfo::clear() { + KnownSafe = false; + IsTailCallRelease = false; + ReleaseMetadata = nullptr; + Calls.clear(); + ReverseInsertPts.clear(); + CFGHazardAfflicted = false; +} + +bool RRInfo::Merge(const RRInfo &Other) { + // Conservatively merge the ReleaseMetadata information. + if (ReleaseMetadata != Other.ReleaseMetadata) + ReleaseMetadata = nullptr; + + // Conservatively merge the boolean state. + KnownSafe &= Other.KnownSafe; + IsTailCallRelease &= Other.IsTailCallRelease; + CFGHazardAfflicted |= Other.CFGHazardAfflicted; + + // Merge the call sets. + Calls.insert(Other.Calls.begin(), Other.Calls.end()); + + // Merge the insert point sets. If there are any differences, + // that makes this a partial merge. + bool Partial = ReverseInsertPts.size() != Other.ReverseInsertPts.size(); + for (Instruction *Inst : Other.ReverseInsertPts) + Partial |= ReverseInsertPts.insert(Inst).second; + return Partial; +} + +//===----------------------------------------------------------------------===// +// PtrState +//===----------------------------------------------------------------------===// + +void PtrState::SetKnownPositiveRefCount() { + DEBUG(dbgs() << " Setting Known Positive.\n"); + KnownPositiveRefCount = true; +} + +void PtrState::ClearKnownPositiveRefCount() { + DEBUG(dbgs() << " Clearing Known Positive.\n"); + KnownPositiveRefCount = false; +} + +void PtrState::SetSeq(Sequence NewSeq) { + DEBUG(dbgs() << " Old: " << GetSeq() << "; New: " << NewSeq << "\n"); + Seq = NewSeq; +} + +void PtrState::ResetSequenceProgress(Sequence NewSeq) { + DEBUG(dbgs() << " Resetting sequence progress.\n"); + SetSeq(NewSeq); + Partial = false; + RRI.clear(); +} + +void PtrState::Merge(const PtrState &Other, bool TopDown) { + Seq = MergeSeqs(GetSeq(), Other.GetSeq(), TopDown); + KnownPositiveRefCount &= Other.KnownPositiveRefCount; + + // If we're not in a sequence (anymore), drop all associated state. + if (Seq == S_None) { + Partial = false; + RRI.clear(); + } else if (Partial || Other.Partial) { + // If we're doing a merge on a path that's previously seen a partial + // merge, conservatively drop the sequence, to avoid doing partial + // RR elimination. If the branch predicates for the two merge differ, + // mixing them is unsafe. + ClearSequenceProgress(); + } else { + // Otherwise merge the other PtrState's RRInfo into our RRInfo. At this + // point, we know that currently we are not partial. Stash whether or not + // the merge operation caused us to undergo a partial merging of reverse + // insertion points. + Partial = RRI.Merge(Other.RRI); + } +} + +//===----------------------------------------------------------------------===// +// BottomUpPtrState +//===----------------------------------------------------------------------===// + +bool BottomUpPtrState::InitBottomUp(ARCMDKindCache &Cache, Instruction *I) { + // If we see two releases in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second release, which may allow us to + // eliminate the first release too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + bool NestingDetected = false; + if (GetSeq() == S_Release || GetSeq() == S_MovableRelease) { + DEBUG(dbgs() << " Found nested releases (i.e. a release pair)\n"); + NestingDetected = true; + } + + MDNode *ReleaseMetadata = + I->getMetadata(Cache.get(ARCMDKindID::ImpreciseRelease)); + Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release; + ResetSequenceProgress(NewSeq); + SetReleaseMetadata(ReleaseMetadata); + SetKnownSafe(HasKnownPositiveRefCount()); + SetTailCallRelease(cast<CallInst>(I)->isTailCall()); + InsertCall(I); + SetKnownPositiveRefCount(); + return NestingDetected; +} + +bool BottomUpPtrState::MatchWithRetain() { + SetKnownPositiveRefCount(); + + Sequence OldSeq = GetSeq(); + switch (OldSeq) { + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an + // imprecise release, clear our reverse insertion points. + if (OldSeq != S_Use || IsTrackingImpreciseReleases()) + ClearReverseInsertPts(); + // FALL THROUGH + case S_CanRelease: + return true; + case S_None: + return false; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + llvm_unreachable("Sequence unknown enum value"); +} + +bool BottomUpPtrState::HandlePotentialAlterRefCount(Instruction *Inst, + const Value *Ptr, + ProvenanceAnalysis &PA, + ARCInstKind Class) { + Sequence S = GetSeq(); + + // Check for possible releases. + if (!CanAlterRefCount(Inst, Ptr, PA, Class)) + return false; + + DEBUG(dbgs() << " CanAlterRefCount: Seq: " << S << "; " << *Ptr + << "\n"); + switch (S) { + case S_Use: + SetSeq(S_CanRelease); + return true; + case S_CanRelease: + case S_Release: + case S_MovableRelease: + case S_Stop: + case S_None: + return false; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + llvm_unreachable("Sequence unknown enum value"); +} + +void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst, + const Value *Ptr, + ProvenanceAnalysis &PA, + ARCInstKind Class) { + // Check for possible direct uses. + switch (GetSeq()) { + case S_Release: + case S_MovableRelease: + if (CanUse(Inst, Ptr, PA, Class)) { + DEBUG(dbgs() << " CanUse: Seq: " << GetSeq() << "; " << *Ptr + << "\n"); + assert(!HasReverseInsertPts()); + // If this is an invoke instruction, we're scanning it as part of + // one of its successor blocks, since we can't insert code after it + // in its own block, and we don't want to split critical edges. + if (isa<InvokeInst>(Inst)) + InsertReverseInsertPt(BB->getFirstInsertionPt()); + else + InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst))); + SetSeq(S_Use); + } else if (Seq == S_Release && IsUser(Class)) { + DEBUG(dbgs() << " PreciseReleaseUse: Seq: " << GetSeq() << "; " + << *Ptr << "\n"); + // Non-movable releases depend on any possible objc pointer use. + SetSeq(S_Stop); + assert(!HasReverseInsertPts()); + // As above; handle invoke specially. + if (isa<InvokeInst>(Inst)) + InsertReverseInsertPt(BB->getFirstInsertionPt()); + else + InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst))); + } + break; + case S_Stop: + if (CanUse(Inst, Ptr, PA, Class)) { + DEBUG(dbgs() << " PreciseStopUse: Seq: " << GetSeq() << "; " + << *Ptr << "\n"); + SetSeq(S_Use); + } + break; + case S_CanRelease: + case S_Use: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } +} + +//===----------------------------------------------------------------------===// +// TopDownPtrState +//===----------------------------------------------------------------------===// + +bool TopDownPtrState::InitTopDown(ARCInstKind Kind, Instruction *I) { + bool NestingDetected = false; + // Don't do retain+release tracking for ARCInstKind::RetainRV, because + // it's + // better to let it remain as the first instruction after a call. + if (Kind != ARCInstKind::RetainRV) { + // If we see two retains in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second retain, which may allow us to + // eliminate the first retain too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + if (GetSeq() == S_Retain) + NestingDetected = true; + + ResetSequenceProgress(S_Retain); + SetKnownSafe(HasKnownPositiveRefCount()); + InsertCall(I); + } + + SetKnownPositiveRefCount(); + return NestingDetected; +} + +bool TopDownPtrState::MatchWithRelease(ARCMDKindCache &Cache, + Instruction *Release) { + ClearKnownPositiveRefCount(); + + Sequence OldSeq = GetSeq(); + + MDNode *ReleaseMetadata = + Release->getMetadata(Cache.get(ARCMDKindID::ImpreciseRelease)); + + switch (OldSeq) { + case S_Retain: + case S_CanRelease: + if (OldSeq == S_Retain || ReleaseMetadata != nullptr) + ClearReverseInsertPts(); + // FALL THROUGH + case S_Use: + SetReleaseMetadata(ReleaseMetadata); + SetTailCallRelease(cast<CallInst>(Release)->isTailCall()); + return true; + case S_None: + return false; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in bottom up state!"); + } + llvm_unreachable("Sequence unknown enum value"); +} + +bool TopDownPtrState::HandlePotentialAlterRefCount(Instruction *Inst, + const Value *Ptr, + ProvenanceAnalysis &PA, + ARCInstKind Class) { + // Check for possible releases. + if (!CanAlterRefCount(Inst, Ptr, PA, Class)) + return false; + + DEBUG(dbgs() << " CanAlterRefCount: Seq: " << GetSeq() << "; " << *Ptr + << "\n"); + ClearKnownPositiveRefCount(); + switch (GetSeq()) { + case S_Retain: + SetSeq(S_CanRelease); + assert(!HasReverseInsertPts()); + InsertReverseInsertPt(Inst); + + // One call can't cause a transition from S_Retain to S_CanRelease + // and S_CanRelease to S_Use. If we've made the first transition, + // we're done. + return true; + case S_Use: + case S_CanRelease: + case S_None: + return false; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + llvm_unreachable("covered switch is not covered!?"); +} + +void TopDownPtrState::HandlePotentialUse(Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, + ARCInstKind Class) { + // Check for possible direct uses. + switch (GetSeq()) { + case S_CanRelease: + if (!CanUse(Inst, Ptr, PA, Class)) + return; + DEBUG(dbgs() << " CanUse: Seq: " << GetSeq() << "; " << *Ptr + << "\n"); + SetSeq(S_Use); + return; + case S_Retain: + case S_Use: + case S_None: + return; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } +} diff --git a/lib/Transforms/ObjCARC/PtrState.h b/lib/Transforms/ObjCARC/PtrState.h new file mode 100644 index 0000000..e45e1ea --- /dev/null +++ b/lib/Transforms/ObjCARC/PtrState.h @@ -0,0 +1,210 @@ +//===--- PtrState.h - ARC State for a Ptr -------------------*- C++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains declarations for the ARC state associated with a ptr. It +// is only used by the ARC Sequence Dataflow computation. By separating this +// from the actual dataflow, it is easier to consider the mechanics of the ARC +// optimization separate from the actual predicates being used. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H +#define LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H + +#include "ARCInstKind.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" + +namespace llvm { +namespace objcarc { + +class ARCMDKindCache; +class ProvenanceAnalysis; + +/// \enum Sequence +/// +/// \brief A sequence of states that a pointer may go through in which an +/// objc_retain and objc_release are actually needed. +enum Sequence { + S_None, + S_Retain, ///< objc_retain(x). + S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement. + S_Use, ///< any use of x. + S_Stop, ///< like S_Release, but code motion is stopped. + S_Release, ///< objc_release(x). + S_MovableRelease ///< objc_release(x), !clang.imprecise_release. +}; + +raw_ostream &operator<<(raw_ostream &OS, + const Sequence S) LLVM_ATTRIBUTE_UNUSED; + +/// \brief Unidirectional information about either a +/// retain-decrement-use-release sequence or release-use-decrement-retain +/// reverse sequence. +struct RRInfo { + /// After an objc_retain, the reference count of the referenced + /// object is known to be positive. Similarly, before an objc_release, the + /// reference count of the referenced object is known to be positive. If + /// there are retain-release pairs in code regions where the retain count + /// is known to be positive, they can be eliminated, regardless of any side + /// effects between them. + /// + /// Also, a retain+release pair nested within another retain+release + /// pair all on the known same pointer value can be eliminated, regardless + /// of any intervening side effects. + /// + /// KnownSafe is true when either of these conditions is satisfied. + bool KnownSafe; + + /// True of the objc_release calls are all marked with the "tail" keyword. + bool IsTailCallRelease; + + /// If the Calls are objc_release calls and they all have a + /// clang.imprecise_release tag, this is the metadata tag. + MDNode *ReleaseMetadata; + + /// For a top-down sequence, the set of objc_retains or + /// objc_retainBlocks. For bottom-up, the set of objc_releases. + SmallPtrSet<Instruction *, 2> Calls; + + /// The set of optimal insert positions for moving calls in the opposite + /// sequence. + SmallPtrSet<Instruction *, 2> ReverseInsertPts; + + /// If this is true, we cannot perform code motion but can still remove + /// retain/release pairs. + bool CFGHazardAfflicted; + + RRInfo() + : KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(nullptr), + CFGHazardAfflicted(false) {} + + void clear(); + + /// Conservatively merge the two RRInfo. Returns true if a partial merge has + /// occurred, false otherwise. + bool Merge(const RRInfo &Other); +}; + +/// \brief This class summarizes several per-pointer runtime properties which +/// are propogated through the flow graph. +class PtrState { +protected: + /// True if the reference count is known to be incremented. + bool KnownPositiveRefCount; + + /// True if we've seen an opportunity for partial RR elimination, such as + /// pushing calls into a CFG triangle or into one side of a CFG diamond. + bool Partial; + + /// The current position in the sequence. + unsigned char Seq : 8; + + /// Unidirectional information about the current sequence. + RRInfo RRI; + + PtrState() : KnownPositiveRefCount(false), Partial(false), Seq(S_None) {} + +public: + bool IsKnownSafe() const { return RRI.KnownSafe; } + + void SetKnownSafe(const bool NewValue) { RRI.KnownSafe = NewValue; } + + bool IsTailCallRelease() const { return RRI.IsTailCallRelease; } + + void SetTailCallRelease(const bool NewValue) { + RRI.IsTailCallRelease = NewValue; + } + + bool IsTrackingImpreciseReleases() const { + return RRI.ReleaseMetadata != nullptr; + } + + const MDNode *GetReleaseMetadata() const { return RRI.ReleaseMetadata; } + + void SetReleaseMetadata(MDNode *NewValue) { RRI.ReleaseMetadata = NewValue; } + + bool IsCFGHazardAfflicted() const { return RRI.CFGHazardAfflicted; } + + void SetCFGHazardAfflicted(const bool NewValue) { + RRI.CFGHazardAfflicted = NewValue; + } + + void SetKnownPositiveRefCount(); + void ClearKnownPositiveRefCount(); + + bool HasKnownPositiveRefCount() const { return KnownPositiveRefCount; } + + void SetSeq(Sequence NewSeq); + + Sequence GetSeq() const { return static_cast<Sequence>(Seq); } + + void ClearSequenceProgress() { ResetSequenceProgress(S_None); } + + void ResetSequenceProgress(Sequence NewSeq); + void Merge(const PtrState &Other, bool TopDown); + + void InsertCall(Instruction *I) { RRI.Calls.insert(I); } + + void InsertReverseInsertPt(Instruction *I) { RRI.ReverseInsertPts.insert(I); } + + void ClearReverseInsertPts() { RRI.ReverseInsertPts.clear(); } + + bool HasReverseInsertPts() const { return !RRI.ReverseInsertPts.empty(); } + + const RRInfo &GetRRInfo() const { return RRI; } +}; + +struct BottomUpPtrState : PtrState { + BottomUpPtrState() : PtrState() {} + + /// (Re-)Initialize this bottom up pointer returning true if we detected a + /// pointer with nested releases. + bool InitBottomUp(ARCMDKindCache &Cache, Instruction *I); + + /// Return true if this set of releases can be paired with a release. Modifies + /// state appropriately to reflect that the matching occured if it is + /// successful. + /// + /// It is assumed that one has already checked that the RCIdentity of the + /// retain and the RCIdentity of this ptr state are the same. + bool MatchWithRetain(); + + void HandlePotentialUse(BasicBlock *BB, Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, ARCInstKind Class); + bool HandlePotentialAlterRefCount(Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, ARCInstKind Class); +}; + +struct TopDownPtrState : PtrState { + TopDownPtrState() : PtrState() {} + + /// (Re-)Initialize this bottom up pointer returning true if we detected a + /// pointer with nested releases. + bool InitTopDown(ARCInstKind Kind, Instruction *I); + + /// Return true if this set of retains can be paired with the given + /// release. Modifies state appropriately to reflect that the matching + /// occured. + bool MatchWithRelease(ARCMDKindCache &Cache, Instruction *Release); + + void HandlePotentialUse(Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, ARCInstKind Class); + + bool HandlePotentialAlterRefCount(Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, ARCInstKind Class); +}; + +} // end namespace objcarc +} // end namespace llvm + +#endif diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index 5c74885..5aa2b97 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -23,15 +23,15 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -71,7 +71,6 @@ struct AlignmentFromAssumptions : public FunctionPass { ScalarEvolution *SE; DominatorTree *DT; - const DataLayout *DL; bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV, const SCEV *&OffSCEV); @@ -123,7 +122,7 @@ static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV, // If the displacement is not an exact multiple, but the remainder is a // constant, then return this remainder (but only if it is a power of 2). - uint64_t DiffUnitsAbs = abs64(DiffUnits); + uint64_t DiffUnitsAbs = std::abs(DiffUnits); if (isPowerOf2_64(DiffUnitsAbs)) return (unsigned) DiffUnitsAbs; } @@ -316,7 +315,7 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) { continue; if (Instruction *K = dyn_cast<Instruction>(J)) - if (isValidAssumeForContext(ACall, K, DL, DT)) + if (isValidAssumeForContext(ACall, K, DT)) WorkList.push_back(K); } @@ -400,7 +399,7 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) { Visited.insert(J); for (User *UJ : J->users()) { Instruction *K = cast<Instruction>(UJ); - if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DL, DT)) + if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DT)) WorkList.push_back(K); } } @@ -413,8 +412,6 @@ bool AlignmentFromAssumptions::runOnFunction(Function &F) { auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; NewDestAlignments.clear(); NewSrcAlignments.clear(); diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk index ed803cd..cf30f39 100644 --- a/lib/Transforms/Scalar/Android.mk +++ b/lib/Transforms/Scalar/Android.mk @@ -20,6 +20,7 @@ transforms_scalar_SRC_FILES := \ LoopDeletion.cpp \ LoopIdiomRecognize.cpp \ LoopInstSimplify.cpp \ + LoopInterchange.cpp \ LoopRerollPass.cpp \ LoopRotation.cpp \ LoopStrengthReduce.cpp \ diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp index c7bd79d..09c605e 100644 --- a/lib/Transforms/Scalar/BDCE.cpp +++ b/lib/Transforms/Scalar/BDCE.cpp @@ -64,7 +64,6 @@ struct BDCE : public FunctionPass { APInt &KnownZero2, APInt &KnownOne2); AssumptionCache *AC; - const DataLayout *DL; DominatorTree *DT; }; } @@ -95,20 +94,21 @@ void BDCE::determineLiveOperandBits(const Instruction *UserI, // however, want to do this twice, so we cache the result in APInts that live // in the caller. For the two-relevant-operands case, both operand values are // provided here. - auto ComputeKnownBits = [&](unsigned BitWidth, const Value *V1, - const Value *V2) { - KnownZero = APInt(BitWidth, 0); - KnownOne = APInt(BitWidth, 0); - computeKnownBits(const_cast<Value*>(V1), KnownZero, KnownOne, DL, 0, AC, - UserI, DT); - - if (V2) { - KnownZero2 = APInt(BitWidth, 0); - KnownOne2 = APInt(BitWidth, 0); - computeKnownBits(const_cast<Value*>(V2), KnownZero2, KnownOne2, DL, 0, AC, - UserI, DT); - } - }; + auto ComputeKnownBits = + [&](unsigned BitWidth, const Value *V1, const Value *V2) { + const DataLayout &DL = I->getModule()->getDataLayout(); + KnownZero = APInt(BitWidth, 0); + KnownOne = APInt(BitWidth, 0); + computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0, + AC, UserI, DT); + + if (V2) { + KnownZero2 = APInt(BitWidth, 0); + KnownOne2 = APInt(BitWidth, 0); + computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL, + 0, AC, UserI, DT); + } + }; switch (UserI->getOpcode()) { default: break; @@ -263,7 +263,6 @@ bool BDCE::runOnFunction(Function& F) { return false; AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - DL = F.getParent()->getDataLayout(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); DenseMap<Instruction *, APInt> AliveBits; diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index d297eb1..d12fdb7 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_library(LLVMScalarOpts LoopDeletion.cpp LoopIdiomRecognize.cpp LoopInstSimplify.cpp + LoopInterchange.cpp LoopRerollPass.cpp LoopRotation.cpp LoopStrengthReduce.cpp diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index e3aab4b..4288742 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -43,6 +43,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include <tuple> using namespace llvm; diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 29d4e05..c974ebb 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -22,7 +22,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/IR/Constant.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/Pass.h" @@ -68,8 +67,7 @@ bool ConstantPropagation::runOnFunction(Function &F) { WorkList.insert(&*i); } bool Changed = false; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; + const DataLayout &DL = F.getParent()->getDataLayout(); TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 5a3b5cf..912d527 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -126,8 +127,9 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) { Changed = true; } - // FIXME: Provide DL, TLI, DT, AT to SimplifyInstruction. - if (Value *V = SimplifyInstruction(P)) { + // FIXME: Provide TLI, DT, AT to SimplifyInstruction. + const DataLayout &DL = BB->getModule()->getDataLayout(); + if (Value *V = SimplifyInstruction(P, DL)) { P->replaceAllUsesWith(V); P->eraseFromParent(); Changed = true; diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index c2ce1d5..cb8981b 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -33,7 +34,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -78,7 +79,8 @@ namespace { bool HandleFree(CallInst *F); bool handleEndBlock(BasicBlock &BB); void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, - SmallSetVector<Value*, 16> &DeadStackObjects); + SmallSetVector<Value *, 16> &DeadStackObjects, + const DataLayout &DL); void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -194,18 +196,12 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) { /// describe the memory operations for this instruction. static AliasAnalysis::Location getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { - const DataLayout *DL = AA.getDataLayout(); if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) return AA.getLocation(SI); if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) { // memcpy/memmove/memset. AliasAnalysis::Location Loc = AA.getLocationForDest(MI); - // If we don't have target data around, an unknown size in Location means - // that we should use the size of the pointee type. This isn't valid for - // memset/memcpy, which writes more than an i8. - if (Loc.Size == AliasAnalysis::UnknownSize && DL == nullptr) - return AliasAnalysis::Location(); return Loc; } @@ -215,11 +211,6 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { switch (II->getIntrinsicID()) { default: return AliasAnalysis::Location(); // Unhandled intrinsic. case Intrinsic::init_trampoline: - // If we don't have target data around, an unknown size in Location means - // that we should use the size of the pointee type. This isn't valid for - // init.trampoline, which writes more than an i8. - if (!DL) return AliasAnalysis::Location(); - // FIXME: We don't know the size of the trampoline, so we can't really // handle it here. return AliasAnalysis::Location(II->getArgOperand(0)); @@ -321,9 +312,10 @@ static Value *getStoredPointerOperand(Instruction *I) { return CS.getArgument(0); } -static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) { +static uint64_t getPointerSize(const Value *V, const DataLayout &DL, + const TargetLibraryInfo *TLI) { uint64_t Size; - if (getObjectSize(V, Size, AA.getDataLayout(), AA.getTargetLibraryInfo())) + if (getObjectSize(V, Size, DL, TLI)) return Size; return AliasAnalysis::UnknownSize; } @@ -343,10 +335,9 @@ namespace { /// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, const AliasAnalysis::Location &Earlier, - AliasAnalysis &AA, - int64_t &EarlierOff, - int64_t &LaterOff) { - const DataLayout *DL = AA.getDataLayout(); + const DataLayout &DL, + const TargetLibraryInfo *TLI, + int64_t &EarlierOff, int64_t &LaterOff) { const Value *P1 = Earlier.Ptr->stripPointerCasts(); const Value *P2 = Later.Ptr->stripPointerCasts(); @@ -367,7 +358,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // Otherwise, we have to have size information, and the later store has to be // larger than the earlier one. if (Later.Size == AliasAnalysis::UnknownSize || - Earlier.Size == AliasAnalysis::UnknownSize || DL == nullptr) + Earlier.Size == AliasAnalysis::UnknownSize) return OverwriteUnknown; // Check to see if the later store is to the entire object (either a global, @@ -382,7 +373,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, return OverwriteUnknown; // If the "Later" store is to a recognizable object, get its size. - uint64_t ObjectSize = getPointerSize(UO2, AA); + uint64_t ObjectSize = getPointerSize(UO2, DL, TLI); if (ObjectSize != AliasAnalysis::UnknownSize) if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size) return OverwriteComplete; @@ -560,8 +551,10 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { if (isRemovable(DepWrite) && !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) { int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA, - DepWriteOffset, InstWriteOffset); + const DataLayout &DL = BB.getModule()->getDataLayout(); + OverwriteResult OR = + isOverwrite(Loc, DepLoc, DL, AA->getTargetLibraryInfo(), + DepWriteOffset, InstWriteOffset); if (OR == OverwriteComplete) { DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite << "\n KILLER: " << *Inst << '\n'); @@ -655,6 +648,7 @@ bool DSE::HandleFree(CallInst *F) { AliasAnalysis::Location Loc = AliasAnalysis::Location(F->getOperand(0)); SmallVector<BasicBlock *, 16> Blocks; Blocks.push_back(F->getParent()); + const DataLayout &DL = F->getModule()->getDataLayout(); while (!Blocks.empty()) { BasicBlock *BB = Blocks.pop_back_val(); @@ -668,7 +662,7 @@ bool DSE::HandleFree(CallInst *F) { break; Value *DepPointer = - GetUnderlyingObject(getStoredPointerOperand(Dependency)); + GetUnderlyingObject(getStoredPointerOperand(Dependency), DL); // Check for aliasing. if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) @@ -728,6 +722,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (AI->hasByValOrInAllocaAttr()) DeadStackObjects.insert(AI); + const DataLayout &DL = BB.getModule()->getDataLayout(); + // Scan the basic block backwards for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ --BBI; @@ -736,7 +732,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) { // See through pointer-to-pointer bitcasts SmallVector<Value *, 4> Pointers; - GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers); + GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers, DL); // Stores to stack values are valid candidates for removal. bool AllDead = true; @@ -799,8 +795,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // the call is live. DeadStackObjects.remove_if([&](Value *I) { // See if the call site touches the value. - AliasAnalysis::ModRefResult A = - AA->getModRefInfo(CS, I, getPointerSize(I, *AA)); + AliasAnalysis::ModRefResult A = AA->getModRefInfo( + CS, I, getPointerSize(I, DL, AA->getTargetLibraryInfo())); return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref; }); @@ -835,7 +831,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Remove any allocas from the DeadPointer set that are loaded, as this // makes any stores above the access live. - RemoveAccessedObjects(LoadedLoc, DeadStackObjects); + RemoveAccessedObjects(LoadedLoc, DeadStackObjects, DL); // If all of the allocas were clobbered by the access then we're not going // to find anything else to process. @@ -850,8 +846,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { /// of the stack objects in the DeadStackObjects set. If so, they become live /// because the location is being loaded. void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, - SmallSetVector<Value*, 16> &DeadStackObjects) { - const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr); + SmallSetVector<Value *, 16> &DeadStackObjects, + const DataLayout &DL) { + const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL); // A constant can't be in the dead pointer set. if (isa<Constant>(UnderlyingPointer)) @@ -867,7 +864,8 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, // Remove objects that could alias LoadedLoc. DeadStackObjects.remove_if([&](Value *I) { // See if the loaded location could alias the stack location. - AliasAnalysis::Location StackLoc(I, getPointerSize(I, *AA)); + AliasAnalysis::Location StackLoc( + I, getPointerSize(I, DL, AA->getTargetLibraryInfo())); return !AA->isNoAlias(StackLoc, LoadedLoc); }); } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 9309623..d5b9e03 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -27,7 +28,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include <deque> @@ -263,7 +264,6 @@ namespace { class EarlyCSE { public: Function &F; - const DataLayout *DL; const TargetLibraryInfo &TLI; const TargetTransformInfo &TTI; DominatorTree &DT; @@ -308,11 +308,10 @@ public: unsigned CurrentGeneration; /// \brief Set up the EarlyCSE runner for a particular function. - EarlyCSE(Function &F, const DataLayout *DL, const TargetLibraryInfo &TLI, + EarlyCSE(Function &F, const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI, DominatorTree &DT, AssumptionCache &AC) - : F(F), DL(DL), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) { - } + : F(F), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {} bool run(); @@ -469,6 +468,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { Instruction *LastStore = nullptr; bool Changed = false; + const DataLayout &DL = BB->getModule()->getDataLayout(); // See if any instructions in the block can be eliminated. If so, do it. If // not, add them to AvailableValues. @@ -685,14 +685,12 @@ bool EarlyCSE::run() { PreservedAnalyses EarlyCSEPass::run(Function &F, AnalysisManager<Function> *AM) { - const DataLayout *DL = F.getParent()->getDataLayout(); - auto &TLI = AM->getResult<TargetLibraryAnalysis>(F); auto &TTI = AM->getResult<TargetIRAnalysis>(F); auto &DT = AM->getResult<DominatorTreeAnalysis>(F); auto &AC = AM->getResult<AssumptionAnalysis>(F); - EarlyCSE CSE(F, DL, TLI, TTI, DT, AC); + EarlyCSE CSE(F, TLI, TTI, DT, AC); if (!CSE.run()) return PreservedAnalyses::all(); @@ -724,14 +722,12 @@ public: if (skipOptnoneFunction(F)) return false; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - auto *DL = DLP ? &DLP->getDataLayout() : nullptr; auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - EarlyCSE CSE(F, DL, TLI, TTI, DT, AC); + EarlyCSE CSE(F, TLI, TTI, DT, AC); return CSE.run(); } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 73a1f25..c73e60f 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -33,6 +33,7 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -45,7 +46,7 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -584,14 +585,13 @@ namespace { /// Emit code into this block to adjust the value defined here to the /// specified type. This handles various coercion cases. - Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const; + Value *MaterializeAdjustedValue(LoadInst *LI, GVN &gvn) const; }; class GVN : public FunctionPass { bool NoLoads; MemoryDependenceAnalysis *MD; DominatorTree *DT; - const DataLayout *DL; const TargetLibraryInfo *TLI; AssumptionCache *AC; SetVector<BasicBlock *> DeadBlocks; @@ -630,7 +630,6 @@ namespace { InstrsToErase.push_back(I); } - const DataLayout *getDataLayout() const { return DL; } DominatorTree &getDominatorTree() const { return *DT; } AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } MemoryDependenceAnalysis &getMemDep() const { return *MD; } @@ -956,8 +955,9 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, return -1; int64_t StoreOffset = 0, LoadOffset = 0; - Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&DL); - Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &DL); + Value *StoreBase = + GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL); + Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL); if (StoreBase != LoadBase) return -1; @@ -1021,13 +1021,13 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, /// This function is called when we have a /// memdep query of a load that ends up being a clobbering store. static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, - StoreInst *DepSI, - const DataLayout &DL) { + StoreInst *DepSI) { // Cannot handle reading from store of first-class aggregate yet. if (DepSI->getValueOperand()->getType()->isStructTy() || DepSI->getValueOperand()->getType()->isArrayTy()) return -1; + const DataLayout &DL = DepSI->getModule()->getDataLayout(); Value *StorePtr = DepSI->getPointerOperand(); uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()); return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, @@ -1052,11 +1052,11 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, // then we should widen it! int64_t LoadOffs = 0; const Value *LoadBase = - GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &DL); + GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL); unsigned LoadSize = DL.getTypeStoreSize(LoadTy); - unsigned Size = MemoryDependenceAnalysis:: - getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, DL); + unsigned Size = MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize( + LoadBase, LoadOffs, LoadSize, DepLI); if (Size == 0) return -1; return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL); @@ -1086,7 +1086,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, Constant *Src = dyn_cast<Constant>(MTI->getSource()); if (!Src) return -1; - GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &DL)); + GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL)); if (!GV || !GV->isConstant()) return -1; // See if the access is within the bounds of the transfer. @@ -1104,7 +1104,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); - if (ConstantFoldLoadFromConstPtr(Src, &DL)) + if (ConstantFoldLoadFromConstPtr(Src, DL)) return Offset; return -1; } @@ -1157,7 +1157,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, Instruction *InsertPt, GVN &gvn) { - const DataLayout &DL = *gvn.getDataLayout(); + const DataLayout &DL = SrcVal->getModule()->getDataLayout(); // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to // widen SrcVal out to a larger load. unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType()); @@ -1265,7 +1265,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); - return ConstantFoldLoadFromConstPtr(Src, &DL); + return ConstantFoldLoadFromConstPtr(Src, DL); } @@ -1281,7 +1281,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) { assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block"); - return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn); + return ValuesPerBlock[0].MaterializeAdjustedValue(LI, gvn); } // Otherwise, we have to construct SSA form. @@ -1289,8 +1289,6 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, SSAUpdater SSAUpdate(&NewPHIs); SSAUpdate.Initialize(LI->getType(), LI->getName()); - Type *LoadTy = LI->getType(); - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { const AvailableValueInBlock &AV = ValuesPerBlock[i]; BasicBlock *BB = AV.BB; @@ -1298,7 +1296,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, if (SSAUpdate.HasValueForBlock(BB)) continue; - SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn)); + SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LI, gvn)); } // Perform PHI construction. @@ -1326,16 +1324,16 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, return V; } -Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { +Value *AvailableValueInBlock::MaterializeAdjustedValue(LoadInst *LI, + GVN &gvn) const { Value *Res; + Type *LoadTy = LI->getType(); + const DataLayout &DL = LI->getModule()->getDataLayout(); if (isSimpleValue()) { Res = getSimpleValue(); if (Res->getType() != LoadTy) { - const DataLayout *DL = gvn.getDataLayout(); - assert(DL && "Need target data to handle type mismatch case"); - Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), - *DL); - + Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), DL); + DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " << *getSimpleValue() << '\n' << *Res << '\n' << "\n\n\n"); @@ -1353,10 +1351,8 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c << *Res << '\n' << "\n\n\n"); } } else if (isMemIntrinValue()) { - const DataLayout *DL = gvn.getDataLayout(); - assert(DL && "Need target data to handle type mismatch case"); - Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, - LoadTy, BB->getTerminator(), *DL); + Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy, + BB->getTerminator(), DL); DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset << " " << *getMemIntrinValue() << '\n' << *Res << '\n' << "\n\n\n"); @@ -1383,6 +1379,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, // dependencies that produce an unknown value for the load (such as a call // that could potentially clobber the load). unsigned NumDeps = Deps.size(); + const DataLayout &DL = LI->getModule()->getDataLayout(); for (unsigned i = 0, e = NumDeps; i != e; ++i) { BasicBlock *DepBB = Deps[i].getBB(); MemDepResult DepInfo = Deps[i].getResult(); @@ -1409,9 +1406,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, // read by the load, we can extract the bits we need for the load from the // stored value. if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { - if (DL && Address) { - int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, - DepSI, *DL); + if (Address) { + int Offset = + AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI); if (Offset != -1) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, DepSI->getValueOperand(), @@ -1428,9 +1425,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) { // If this is a clobber and L is the first instruction in its block, then // we have the first instruction in the entry block. - if (DepLI != LI && Address && DL) { - int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), Address, - DepLI, *DL); + if (DepLI != LI && Address) { + int Offset = + AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL); if (Offset != -1) { ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI, @@ -1443,9 +1440,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, // If the clobbering value is a memset/memcpy/memmove, see if we can // forward a value on from it. if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { - if (DL && Address) { + if (Address) { int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, - DepMI, *DL); + DepMI, DL); if (Offset != -1) { ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, Offset)); @@ -1484,8 +1481,8 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, if (S->getValueOperand()->getType() != LI->getType()) { // If the stored value is larger or equal to the loaded value, we can // reuse it. - if (!DL || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), - LI->getType(), *DL)) { + if (!CanCoerceMustAliasedValueToLoad(S->getValueOperand(), + LI->getType(), DL)) { UnavailableBlocks.push_back(DepBB); continue; } @@ -1501,7 +1498,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, if (LD->getType() != LI->getType()) { // If the stored value is larger or equal to the loaded value, we can // reuse it. - if (!DL || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)) { + if (!CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL)) { UnavailableBlocks.push_back(DepBB); continue; } @@ -1613,6 +1610,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Check if the load can safely be moved to all the unavailable predecessors. bool CanDoPRE = true; + const DataLayout &DL = LI->getModule()->getDataLayout(); SmallVector<Instruction*, 8> NewInsts; for (auto &PredLoad : PredLoads) { BasicBlock *UnavailablePred = PredLoad.first; @@ -1833,10 +1831,11 @@ bool GVN::processLoad(LoadInst *L) { // ... to a pointer that has been loaded from before... MemDepResult Dep = MD->getDependency(L); + const DataLayout &DL = L->getModule()->getDataLayout(); // If we have a clobber and target data is around, see if this is a clobber // that we can fix up through code synthesis. - if (Dep.isClobber() && DL) { + if (Dep.isClobber()) { // Check to see if we have something like this: // store i32 123, i32* %P // %A = bitcast i32* %P to i8* @@ -1849,12 +1848,11 @@ bool GVN::processLoad(LoadInst *L) { // access code. Value *AvailVal = nullptr; if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) { - int Offset = AnalyzeLoadFromClobberingStore(L->getType(), - L->getPointerOperand(), - DepSI, *DL); + int Offset = AnalyzeLoadFromClobberingStore( + L->getType(), L->getPointerOperand(), DepSI); if (Offset != -1) AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, - L->getType(), L, *DL); + L->getType(), L, DL); } // Check to see if we have something like this: @@ -1867,9 +1865,8 @@ bool GVN::processLoad(LoadInst *L) { if (DepLI == L) return false; - int Offset = AnalyzeLoadFromClobberingLoad(L->getType(), - L->getPointerOperand(), - DepLI, *DL); + int Offset = AnalyzeLoadFromClobberingLoad( + L->getType(), L->getPointerOperand(), DepLI, DL); if (Offset != -1) AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this); } @@ -1877,11 +1874,10 @@ bool GVN::processLoad(LoadInst *L) { // If the clobbering value is a memset/memcpy/memmove, see if we can forward // a value on from it. if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { - int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), - L->getPointerOperand(), - DepMI, *DL); + int Offset = AnalyzeLoadFromClobberingMemInst( + L->getType(), L->getPointerOperand(), DepMI, DL); if (Offset != -1) - AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *DL); + AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, DL); } if (AvailVal) { @@ -1932,17 +1928,13 @@ bool GVN::processLoad(LoadInst *L) { // actually have the same type. See if we know how to reuse the stored // value (depending on its type). if (StoredVal->getType() != L->getType()) { - if (DL) { - StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), - L, *DL); - if (!StoredVal) - return false; - - DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal - << '\n' << *L << "\n\n\n"); - } - else + StoredVal = + CoerceAvailableValueToLoadType(StoredVal, L->getType(), L, DL); + if (!StoredVal) return false; + + DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal + << '\n' << *L << "\n\n\n"); } // Remove it! @@ -1961,17 +1953,12 @@ bool GVN::processLoad(LoadInst *L) { // the same type. See if we know how to reuse the previously loaded value // (depending on its type). if (DepLI->getType() != L->getType()) { - if (DL) { - AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), - L, *DL); - if (!AvailableVal) - return false; - - DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal - << "\n" << *L << "\n\n\n"); - } - else + AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L, DL); + if (!AvailableVal) return false; + + DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal + << "\n" << *L << "\n\n\n"); } // Remove it! @@ -2239,6 +2226,7 @@ bool GVN::processInstruction(Instruction *I) { // to value numbering it. Value numbering often exposes redundancies, for // example if it determines that %y is equal to %x then the instruction // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. + const DataLayout &DL = I->getModule()->getDataLayout(); if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { I->replaceAllUsesWith(V); if (MD && V->getType()->getScalarType()->isPointerTy()) @@ -2357,8 +2345,6 @@ bool GVN::runOnFunction(Function& F) { if (!NoLoads) MD = &getAnalysis<MemoryDependenceAnalysis>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index f99ebbc..51e8041 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -44,7 +45,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" @@ -73,7 +73,6 @@ namespace { LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; - const DataLayout *DL; TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; @@ -82,8 +81,8 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), - DL(nullptr), Changed(false) { + IndVarSimplify() + : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) { initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); } @@ -663,14 +662,14 @@ namespace { /// extended by this sign or zero extend operation. This is used to determine /// the final width of the IV before actually widening it. static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, - const DataLayout *DL, const TargetTransformInfo *TTI) { + const TargetTransformInfo *TTI) { bool IsSigned = Cast->getOpcode() == Instruction::SExt; if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) return; Type *Ty = Cast->getType(); uint64_t Width = SE->getTypeSizeInBits(Ty); - if (DL && !DL->isLegalInteger(Width)) + if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) return; // Cast is either an sext or zext up to this point. @@ -1201,7 +1200,6 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { namespace { class IndVarSimplifyVisitor : public IVVisitor { ScalarEvolution *SE; - const DataLayout *DL; const TargetTransformInfo *TTI; PHINode *IVPhi; @@ -1209,9 +1207,9 @@ namespace { WideIVInfo WI; IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, - const DataLayout *DL, const TargetTransformInfo *TTI, + const TargetTransformInfo *TTI, const DominatorTree *DTree) - : SE(SCEV), DL(DL), TTI(TTI), IVPhi(IV) { + : SE(SCEV), TTI(TTI), IVPhi(IV) { DT = DTree; WI.NarrowIV = IVPhi; if (ReduceLiveIVs) @@ -1219,9 +1217,7 @@ namespace { } // Implement the interface used by simplifyUsersOfIV. - void visitCast(CastInst *Cast) override { - visitIVCast(Cast, WI, SE, DL, TTI); - } + void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } }; } @@ -1255,7 +1251,7 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L, PHINode *CurrIV = LoopPhis.pop_back_val(); // Information about sign/zero extensions of CurrIV. - IndVarSimplifyVisitor Visitor(CurrIV, SE, DL, TTI, DT); + IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor); @@ -1521,9 +1517,8 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride. /// This is difficult in general for SCEV because of potential overflow. But we /// could at least handle constant BECounts. -static PHINode * -FindLoopCounter(Loop *L, const SCEV *BECount, - ScalarEvolution *SE, DominatorTree *DT, const DataLayout *DL) { +static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount, + ScalarEvolution *SE, DominatorTree *DT) { uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); Value *Cond = @@ -1552,7 +1547,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount, // AR may be wider than BECount. With eq/ne tests overflow is immaterial. // AR may not be a narrower type, or we may never exit. uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); - if (PhiWidth < BCWidth || (DL && !DL->isLegalInteger(PhiWidth))) + if (PhiWidth < BCWidth || + !L->getHeader()->getModule()->getDataLayout().isLegalInteger(PhiWidth)) continue; const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); @@ -1705,51 +1701,15 @@ LinearFunctionTestReplace(Loop *L, // compare against the post-incremented value, otherwise we must compare // against the preincremented value. if (L->getExitingBlock() == L->getLoopLatch()) { + // Add one to the "backedge-taken" count to get the trip count. + // This addition may overflow, which is valid as long as the comparison is + // truncated to BackedgeTakenCount->getType(). + IVCount = SE->getAddExpr(BackedgeTakenCount, + SE->getConstant(BackedgeTakenCount->getType(), 1)); // The BackedgeTaken expression contains the number of times that the // backedge branches to the loop header. This is one less than the // number of times the loop executes, so use the incremented indvar. - llvm::Value *IncrementedIndvar = - IndVar->getIncomingValueForBlock(L->getExitingBlock()); - const auto *IncrementedIndvarSCEV = - cast<SCEVAddRecExpr>(SE->getSCEV(IncrementedIndvar)); - // It is unsafe to use the incremented indvar if it has a wrapping flag, we - // don't want to compare against a poison value. Check the SCEV that - // corresponds to the incremented indvar, the SCEVExpander will only insert - // flags in the IR if the SCEV originally had wrapping flags. - // FIXME: In theory, SCEV could drop flags even though they exist in IR. - // A more robust solution would involve getting a new expression for - // CmpIndVar by applying non-NSW/NUW AddExprs. - auto WrappingFlags = - ScalarEvolution::setFlags(SCEV::FlagNUW, SCEV::FlagNSW); - const SCEV *IVInit = IncrementedIndvarSCEV->getStart(); - if (SE->getTypeSizeInBits(IVInit->getType()) > - SE->getTypeSizeInBits(IVCount->getType())) - IVInit = SE->getTruncateExpr(IVInit, IVCount->getType()); - unsigned BitWidth = SE->getTypeSizeInBits(IVCount->getType()); - Type *WideTy = IntegerType::get(SE->getContext(), BitWidth + 1); - // Check if InitIV + BECount+1 requires sign/zero extension. - // If not, clear the corresponding flag from WrappingFlags because it is not - // necessary for those flags in the IncrementedIndvarSCEV expression. - if (SE->getSignExtendExpr(SE->getAddExpr(IVInit, BackedgeTakenCount), - WideTy) == - SE->getAddExpr(SE->getSignExtendExpr(IVInit, WideTy), - SE->getSignExtendExpr(BackedgeTakenCount, WideTy))) - WrappingFlags = ScalarEvolution::clearFlags(WrappingFlags, SCEV::FlagNSW); - if (SE->getZeroExtendExpr(SE->getAddExpr(IVInit, BackedgeTakenCount), - WideTy) == - SE->getAddExpr(SE->getZeroExtendExpr(IVInit, WideTy), - SE->getZeroExtendExpr(BackedgeTakenCount, WideTy))) - WrappingFlags = ScalarEvolution::clearFlags(WrappingFlags, SCEV::FlagNUW); - if (!ScalarEvolution::maskFlags(IncrementedIndvarSCEV->getNoWrapFlags(), - WrappingFlags)) { - // Add one to the "backedge-taken" count to get the trip count. - // This addition may overflow, which is valid as long as the comparison is - // truncated to BackedgeTakenCount->getType(). - IVCount = - SE->getAddExpr(BackedgeTakenCount, - SE->getConstant(BackedgeTakenCount->getType(), 1)); - CmpIndVar = IncrementedIndvar; - } + CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); } Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE); @@ -1932,12 +1892,11 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); TLI = TLIP ? &TLIP->getTLI() : nullptr; auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); DeadInsts.clear(); Changed = false; @@ -1949,7 +1908,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); // Create a rewriter object which we'll use to transform the code with. - SCEVExpander Rewriter(*SE, "indvars"); + SCEVExpander Rewriter(*SE, DL, "indvars"); #ifndef NDEBUG Rewriter.setDebugType(DEBUG_TYPE); #endif @@ -1978,7 +1937,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) { - PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, DL); + PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT); if (IndVar) { // Check preconditions for proper SCEVExpander operation. SCEV does not // express SCEVExpander's dependencies, such as LoopSimplify. Instead any diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index 8559e63..cbdacad 100644 --- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -42,7 +42,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Optional.h" - #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" @@ -51,27 +50,23 @@ #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" - #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/Verifier.h" - +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" - +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/Transforms/Utils/UnrollLoop.h" - -#include "llvm/Pass.h" - #include <array> using namespace llvm; @@ -82,6 +77,9 @@ static cl::opt<unsigned> LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden, static cl::opt<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden, cl::init(false)); +static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden, + cl::init(false)); + static cl::opt<int> MaxExitProbReciprocal("irce-max-exit-prob-reciprocal", cl::Hidden, cl::init(10)); @@ -96,23 +94,41 @@ namespace { /// /// and /// -/// 2. a condition that is provably true for some range of values taken by the -/// containing loop's induction variable. -/// -/// Currently all inductive range checks are branches conditional on an -/// expression of the form +/// 2. a condition that is provably true for some contiguous range of values +/// taken by the containing loop's induction variable. /// -/// 0 <= (Offset + Scale * I) < Length -/// -/// where `I' is the canonical induction variable of a loop to which Offset and -/// Scale are loop invariant, and Length is >= 0. Currently the 'false' branch -/// is considered cold, looking at profiling data to verify that is a TODO. - class InductiveRangeCheck { + // Classifies a range check + enum RangeCheckKind : unsigned { + // Range check of the form "0 <= I". + RANGE_CHECK_LOWER = 1, + + // Range check of the form "I < L" where L is known positive. + RANGE_CHECK_UPPER = 2, + + // The logical and of the RANGE_CHECK_LOWER and RANGE_CHECK_UPPER + // conditions. + RANGE_CHECK_BOTH = RANGE_CHECK_LOWER | RANGE_CHECK_UPPER, + + // Unrecognized range check condition. + RANGE_CHECK_UNKNOWN = (unsigned)-1 + }; + + static const char *rangeCheckKindToStr(RangeCheckKind); + const SCEV *Offset; const SCEV *Scale; Value *Length; BranchInst *Branch; + RangeCheckKind Kind; + + static RangeCheckKind parseRangeCheckICmp(Loop *L, ICmpInst *ICI, + ScalarEvolution &SE, Value *&Index, + Value *&Length); + + static InductiveRangeCheck::RangeCheckKind + parseRangeCheck(Loop *L, ScalarEvolution &SE, Value *Condition, + const SCEV *&Index, Value *&UpperLimit); InductiveRangeCheck() : Offset(nullptr), Scale(nullptr), Length(nullptr), Branch(nullptr) { } @@ -124,13 +140,17 @@ public: void print(raw_ostream &OS) const { OS << "InductiveRangeCheck:\n"; + OS << " Kind: " << rangeCheckKindToStr(Kind) << "\n"; OS << " Offset: "; Offset->print(OS); OS << " Scale: "; Scale->print(OS); OS << " Length: "; - Length->print(OS); - OS << " Branch: "; + if (Length) + Length->print(OS); + else + OS << "(null)"; + OS << "\n Branch: "; getBranch()->print(OS); OS << "\n"; } @@ -207,160 +227,156 @@ char InductiveRangeCheckElimination::ID = 0; INITIALIZE_PASS(InductiveRangeCheckElimination, "irce", "Inductive range check elimination", false, false) -static bool IsLowerBoundCheck(Value *Check, Value *&IndexV) { - using namespace llvm::PatternMatch; +const char *InductiveRangeCheck::rangeCheckKindToStr( + InductiveRangeCheck::RangeCheckKind RCK) { + switch (RCK) { + case InductiveRangeCheck::RANGE_CHECK_UNKNOWN: + return "RANGE_CHECK_UNKNOWN"; - ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; - Value *LHS = nullptr, *RHS = nullptr; + case InductiveRangeCheck::RANGE_CHECK_UPPER: + return "RANGE_CHECK_UPPER"; - if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) - return false; + case InductiveRangeCheck::RANGE_CHECK_LOWER: + return "RANGE_CHECK_LOWER"; + + case InductiveRangeCheck::RANGE_CHECK_BOTH: + return "RANGE_CHECK_BOTH"; + } + + llvm_unreachable("unknown range check type!"); +} + +/// Parse a single ICmp instruction, `ICI`, into a range check. If `ICI` +/// cannot +/// be interpreted as a range check, return `RANGE_CHECK_UNKNOWN` and set +/// `Index` and `Length` to `nullptr`. Otherwise set `Index` to the value +/// being +/// range checked, and set `Length` to the upper limit `Index` is being range +/// checked with if (and only if) the range check type is stronger or equal to +/// RANGE_CHECK_UPPER. +/// +InductiveRangeCheck::RangeCheckKind +InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI, + ScalarEvolution &SE, Value *&Index, + Value *&Length) { + + auto IsNonNegativeAndNotLoopVarying = [&SE, L](Value *V) { + const SCEV *S = SE.getSCEV(V); + if (isa<SCEVCouldNotCompute>(S)) + return false; + + return SE.getLoopDisposition(S, L) == ScalarEvolution::LoopInvariant && + SE.isKnownNonNegative(S); + }; + + using namespace llvm::PatternMatch; + + ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); switch (Pred) { default: - return false; + return RANGE_CHECK_UNKNOWN; case ICmpInst::ICMP_SLE: std::swap(LHS, RHS); // fallthrough case ICmpInst::ICMP_SGE: - if (!match(RHS, m_ConstantInt<0>())) - return false; - IndexV = LHS; - return true; + if (match(RHS, m_ConstantInt<0>())) { + Index = LHS; + return RANGE_CHECK_LOWER; + } + return RANGE_CHECK_UNKNOWN; case ICmpInst::ICMP_SLT: std::swap(LHS, RHS); // fallthrough case ICmpInst::ICMP_SGT: - if (!match(RHS, m_ConstantInt<-1>())) - return false; - IndexV = LHS; - return true; - } -} - -static bool IsUpperBoundCheck(Value *Check, Value *Index, Value *&UpperLimit) { - using namespace llvm::PatternMatch; - - ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; - Value *LHS = nullptr, *RHS = nullptr; - - if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) - return false; + if (match(RHS, m_ConstantInt<-1>())) { + Index = LHS; + return RANGE_CHECK_LOWER; + } - switch (Pred) { - default: - return false; + if (IsNonNegativeAndNotLoopVarying(LHS)) { + Index = RHS; + Length = LHS; + return RANGE_CHECK_UPPER; + } + return RANGE_CHECK_UNKNOWN; - case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_ULT: std::swap(LHS, RHS); // fallthrough - case ICmpInst::ICMP_SLT: - if (LHS != Index) - return false; - UpperLimit = RHS; - return true; - case ICmpInst::ICMP_UGT: - std::swap(LHS, RHS); - // fallthrough - case ICmpInst::ICMP_ULT: - if (LHS != Index) - return false; - UpperLimit = RHS; - return true; + if (IsNonNegativeAndNotLoopVarying(LHS)) { + Index = RHS; + Length = LHS; + return RANGE_CHECK_BOTH; + } + return RANGE_CHECK_UNKNOWN; } + + llvm_unreachable("default clause returns!"); } -/// Split a condition into something semantically equivalent to (0 <= I < -/// Limit), both comparisons signed and Len loop invariant on L and positive. -/// On success, return true and set Index to I and UpperLimit to Limit. Return -/// false on failure (we may still write to UpperLimit and Index on failure). -/// It does not try to interpret I as a loop index. -/// -static bool SplitRangeCheckCondition(Loop *L, ScalarEvolution &SE, +/// Parses an arbitrary condition into a range check. `Length` is set only if +/// the range check is recognized to be `RANGE_CHECK_UPPER` or stronger. +InductiveRangeCheck::RangeCheckKind +InductiveRangeCheck::parseRangeCheck(Loop *L, ScalarEvolution &SE, Value *Condition, const SCEV *&Index, - Value *&UpperLimit) { - - // TODO: currently this catches some silly cases like comparing "%idx slt 1". - // Our transformations are still correct, but less likely to be profitable in - // those cases. We have to come up with some heuristics that pick out the - // range checks that are more profitable to clone a loop for. This function - // in general can be made more robust. - + Value *&Length) { using namespace llvm::PatternMatch; Value *A = nullptr; Value *B = nullptr; - ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; - - // In these early checks we assume that the matched UpperLimit is positive. - // We'll verify that fact later, before returning true. if (match(Condition, m_And(m_Value(A), m_Value(B)))) { - Value *IndexV = nullptr; - Value *ExpectedUpperBoundCheck = nullptr; + Value *IndexA = nullptr, *IndexB = nullptr; + Value *LengthA = nullptr, *LengthB = nullptr; + ICmpInst *ICmpA = dyn_cast<ICmpInst>(A), *ICmpB = dyn_cast<ICmpInst>(B); - if (IsLowerBoundCheck(A, IndexV)) - ExpectedUpperBoundCheck = B; - else if (IsLowerBoundCheck(B, IndexV)) - ExpectedUpperBoundCheck = A; - else - return false; + if (!ICmpA || !ICmpB) + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; - if (!IsUpperBoundCheck(ExpectedUpperBoundCheck, IndexV, UpperLimit)) - return false; + auto RCKindA = parseRangeCheckICmp(L, ICmpA, SE, IndexA, LengthA); + auto RCKindB = parseRangeCheckICmp(L, ICmpB, SE, IndexB, LengthB); - Index = SE.getSCEV(IndexV); + if (RCKindA == InductiveRangeCheck::RANGE_CHECK_UNKNOWN || + RCKindB == InductiveRangeCheck::RANGE_CHECK_UNKNOWN) + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; - if (isa<SCEVCouldNotCompute>(Index)) - return false; + if (IndexA != IndexB) + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; - } else if (match(Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) { - switch (Pred) { - default: - return false; + if (LengthA != nullptr && LengthB != nullptr && LengthA != LengthB) + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; - case ICmpInst::ICMP_SGT: - std::swap(A, B); - // fall through - case ICmpInst::ICMP_SLT: - UpperLimit = B; - Index = SE.getSCEV(A); - if (isa<SCEVCouldNotCompute>(Index) || !SE.isKnownNonNegative(Index)) - return false; - break; + Index = SE.getSCEV(IndexA); + if (isa<SCEVCouldNotCompute>(Index)) + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; - case ICmpInst::ICMP_UGT: - std::swap(A, B); - // fall through - case ICmpInst::ICMP_ULT: - UpperLimit = B; - Index = SE.getSCEV(A); - if (isa<SCEVCouldNotCompute>(Index)) - return false; - break; - } - } else { - return false; + Length = LengthA == nullptr ? LengthB : LengthA; + + return (InductiveRangeCheck::RangeCheckKind)(RCKindA | RCKindB); } - const SCEV *UpperLimitSCEV = SE.getSCEV(UpperLimit); - if (isa<SCEVCouldNotCompute>(UpperLimitSCEV) || - !SE.isKnownNonNegative(UpperLimitSCEV)) - return false; + if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) { + Value *IndexVal = nullptr; - if (SE.getLoopDisposition(UpperLimitSCEV, L) != - ScalarEvolution::LoopInvariant) { - DEBUG(dbgs() << " in function: " << L->getHeader()->getParent()->getName() - << " "; - dbgs() << " UpperLimit is not loop invariant: " - << UpperLimit->getName() << "\n";); - return false; + auto RCKind = parseRangeCheckICmp(L, ICI, SE, IndexVal, Length); + + if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN) + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; + + Index = SE.getSCEV(IndexVal); + if (isa<SCEVCouldNotCompute>(Index)) + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; + + return RCKind; } - return true; + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; } @@ -380,10 +396,15 @@ InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI, Value *Length = nullptr; const SCEV *IndexSCEV = nullptr; - if (!SplitRangeCheckCondition(L, SE, BI->getCondition(), IndexSCEV, Length)) + auto RCKind = InductiveRangeCheck::parseRangeCheck(L, SE, BI->getCondition(), + IndexSCEV, Length); + + if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN) return nullptr; - assert(IndexSCEV && Length && "contract with SplitRangeCheckCondition!"); + assert(IndexSCEV && "contract with SplitRangeCheckCondition!"); + assert((!(RCKind & InductiveRangeCheck::RANGE_CHECK_UPPER) || Length) && + "contract with SplitRangeCheckCondition!"); const SCEVAddRecExpr *IndexAddRec = dyn_cast<SCEVAddRecExpr>(IndexSCEV); bool IsAffineIndex = @@ -397,6 +418,7 @@ InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI, IRC->Offset = IndexAddRec->getStart(); IRC->Scale = IndexAddRec->getStepRecurrence(SE); IRC->Branch = BI; + IRC->Kind = RCKind; return IRC; } @@ -685,30 +707,40 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP } } - auto IsInductionVar = [&SE](const SCEVAddRecExpr *AR, bool &IsIncreasing) { - if (!AR->isAffine()) - return false; + auto HasNoSignedWrap = [&](const SCEVAddRecExpr *AR) { + if (AR->getNoWrapFlags(SCEV::FlagNSW)) + return true; IntegerType *Ty = cast<IntegerType>(AR->getType()); IntegerType *WideTy = IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2); - // Currently we only work with induction variables that have been proved to - // not wrap. This restriction can potentially be lifted in the future. - const SCEVAddRecExpr *ExtendAfterOp = dyn_cast<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy)); - if (!ExtendAfterOp) - return false; + if (ExtendAfterOp) { + const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy); + const SCEV *ExtendedStep = + SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy); - const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy); - const SCEV *ExtendedStep = - SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy); + bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart && + ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep; + + if (NoSignedWrap) + return true; + } + + // We may have proved this when computing the sign extension above. + return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap; + }; + + auto IsInductionVar = [&](const SCEVAddRecExpr *AR, bool &IsIncreasing) { + if (!AR->isAffine()) + return false; - bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart && - ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep; + // Currently we only work with induction variables that have been proved to + // not wrap. This restriction can potentially be lifted in the future. - if (!NoSignedWrap) + if (!HasNoSignedWrap(AR)) return false; if (const SCEVConstant *StepExpr = @@ -791,9 +823,10 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP "loop variant exit count doesn't make sense!"); assert(!L.contains(LatchExit) && "expected an exit block!"); - - Value *IndVarStartV = SCEVExpander(SE, "irce").expandCodeFor( - IndVarStart, IndVarTy, &*Preheader->rbegin()); + const DataLayout &DL = Preheader->getModule()->getDataLayout(); + Value *IndVarStartV = + SCEVExpander(SE, DL, "irce") + .expandCodeFor(IndVarStart, IndVarTy, &*Preheader->rbegin()); IndVarStartV->setName("indvar.start"); LoopStructure Result; @@ -831,12 +864,35 @@ LoopConstrainer::calculateSubRanges() const { const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt); bool Increasing = MainLoopStructure.IndVarIncreasing; + // We compute `Smallest` and `Greatest` such that [Smallest, Greatest) is the // range of values the induction variable takes. - const SCEV *Smallest = - Increasing ? Start : SE.getAddExpr(End, SE.getSCEV(One)); - const SCEV *Greatest = - Increasing ? End : SE.getAddExpr(Start, SE.getSCEV(One)); + + const SCEV *Smallest = nullptr, *Greatest = nullptr; + + if (Increasing) { + Smallest = Start; + Greatest = End; + } else { + // These two computations may sign-overflow. Here is why that is okay: + // + // We know that the induction variable does not sign-overflow on any + // iteration except the last one, and it starts at `Start` and ends at + // `End`, decrementing by one every time. + // + // * if `Smallest` sign-overflows we know `End` is `INT_SMAX`. Since the + // induction variable is decreasing we know that that the smallest value + // the loop body is actually executed with is `INT_SMIN` == `Smallest`. + // + // * if `Greatest` sign-overflows, we know it can only be `INT_SMIN`. In + // that case, `Clamp` will always return `Smallest` and + // [`Result.LowLimit`, `Result.HighLimit`) = [`Smallest`, `Smallest`) + // will be an empty range. Returning an empty range is always safe. + // + + Smallest = SE.getAddExpr(End, SE.getSCEV(One)); + Greatest = SE.getAddExpr(Start, SE.getSCEV(One)); + } auto Clamp = [this, Smallest, Greatest](const SCEV *S) { return SE.getSMaxExpr(Smallest, SE.getSMinExpr(Greatest, S)); @@ -1132,7 +1188,7 @@ bool LoopConstrainer::run() { IntegerType *IVTy = cast<IntegerType>(MainLoopStructure.IndVarNext->getType()); - SCEVExpander Expander(SE, "irce"); + SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "irce"); Instruction *InsertPt = OriginalPreheader->getTerminator(); // It would have been better to make `PreLoop' and `PostLoop' @@ -1293,8 +1349,19 @@ InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE, const SCEV *M = SE.getMinusSCEV(C, A); const SCEV *Begin = SE.getNegativeSCEV(M); - const SCEV *End = SE.getMinusSCEV(SE.getSCEV(getLength()), M); + const SCEV *UpperLimit = nullptr; + + // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L". + // We can potentially do much better here. + if (Value *V = getLength()) { + UpperLimit = SE.getSCEV(V); + } else { + assert(Kind == InductiveRangeCheck::RANGE_CHECK_LOWER && "invariant!"); + unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth(); + UpperLimit = SE.getConstant(APInt::getSignedMaxValue(BitWidth)); + } + const SCEV *End = SE.getMinusSCEV(UpperLimit, M); return InductiveRangeCheck::Range(Begin, End); } @@ -1344,12 +1411,18 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { if (RangeChecks.empty()) return false; - DEBUG(dbgs() << "irce: looking at loop "; L->print(dbgs()); - dbgs() << "irce: loop has " << RangeChecks.size() - << " inductive range checks: \n"; - for (InductiveRangeCheck *IRC : RangeChecks) - IRC->print(dbgs()); - ); + auto PrintRecognizedRangeChecks = [&](raw_ostream &OS) { + OS << "irce: looking at loop "; L->print(OS); + OS << "irce: loop has " << RangeChecks.size() + << " inductive range checks: \n"; + for (InductiveRangeCheck *IRC : RangeChecks) + IRC->print(OS); + }; + + DEBUG(PrintRecognizedRangeChecks(dbgs())); + + if (PrintRangeChecks) + PrintRecognizedRangeChecks(errs()); const char *FailureReason = nullptr; Optional<LoopStructure> MaybeLoopStructure = diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 8b54abd..83ac915 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" @@ -32,7 +33,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -78,7 +78,6 @@ namespace { /// revectored to the false side of the second if. /// class JumpThreading : public FunctionPass { - const DataLayout *DL; TargetLibraryInfo *TLI; LazyValueInfo *LVI; #ifdef NDEBUG @@ -159,8 +158,6 @@ bool JumpThreading::runOnFunction(Function &F) { return false; DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); LVI = &getAnalysis<LazyValueInfo>(); @@ -505,6 +502,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, assert(Preference == WantInteger && "Compares only produce integers"); PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0)); if (PN && PN->getParent() == BB) { + const DataLayout &DL = PN->getModule()->getDataLayout(); // We can do this simplification if any comparisons fold to true or false. // See if any do. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { @@ -709,7 +707,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // Run constant folding to see if we can reduce the condition to a simple // constant. if (Instruction *I = dyn_cast<Instruction>(Condition)) { - Value *SimpleVal = ConstantFoldInstruction(I, DL, TLI); + Value *SimpleVal = + ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI); if (SimpleVal) { I->replaceAllUsesWith(SimpleVal); I->eraseFromParent(); @@ -1521,7 +1520,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation. - SimplifyInstructionsInBlock(NewBB, DL, TLI); + SimplifyInstructionsInBlock(NewBB, TLI); // Threaded an edge! ++NumThreads; @@ -1586,7 +1585,6 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, BasicBlock::iterator BI = BB->begin(); for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); - // Clone the non-phi instructions of BB into PredBB, keeping track of the // mapping and using it to remap operands in the cloned instructions. for (; BI != BB->end(); ++BI) { @@ -1603,7 +1601,8 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, // If this instruction can be simplified after the operands are updated, // just use the simplified value instead. This frequently happens due to // phi translation. - if (Value *IV = SimplifyInstruction(New, DL)) { + if (Value *IV = + SimplifyInstruction(New, BB->getModule()->getDataLayout())) { delete New; ValueMapping[BI] = IV; } else { diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 14af38b..1333b02 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -38,6 +38,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -52,7 +53,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -76,21 +76,21 @@ static bool isNotUsedInLoop(Instruction &I, Loop *CurLoop); static bool hoist(Instruction &I, BasicBlock *Preheader); static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST ); -static bool isGuaranteedToExecute(Instruction &Inst, DominatorTree *DT, - Loop *CurLoop, LICMSafetyInfo * SafetyInfo); -static bool isSafeToExecuteUnconditionally(Instruction &Inst,DominatorTree *DT, - const DataLayout *DL, Loop *CurLoop, - LICMSafetyInfo * SafetyInfo); +static bool isGuaranteedToExecute(Instruction &Inst, DominatorTree *DT, + Loop *CurLoop, LICMSafetyInfo *SafetyInfo); +static bool isSafeToExecuteUnconditionally(Instruction &Inst, DominatorTree *DT, + Loop *CurLoop, + LICMSafetyInfo *SafetyInfo); static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, const AAMDNodes &AAInfo, AliasSetTracker *CurAST); static Instruction *CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, LoopInfo *LI); -static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, - DominatorTree *DT, const DataLayout *DL, - Loop *CurLoop, AliasSetTracker *CurAST, - LICMSafetyInfo * SafetyInfo); +static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, + DominatorTree *DT, Loop *CurLoop, + AliasSetTracker *CurAST, + LICMSafetyInfo *SafetyInfo); namespace { struct LICM : public LoopPass { @@ -130,7 +130,6 @@ namespace { LoopInfo *LI; // Current LoopInfo DominatorTree *DT; // Dominator Tree for the current Loop. - const DataLayout *DL; // DataLayout for constant folding. TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. // State that is updated as we process loops. @@ -181,8 +180,6 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); @@ -235,10 +232,10 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // instructions, we perform another pass to hoist them out of the loop. // if (L->hasDedicatedExits()) - Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, DL, TLI, - CurLoop, CurAST, &SafetyInfo); + Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, CurLoop, + CurAST, &SafetyInfo); if (Preheader) - Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, DL, TLI, + Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, CurLoop, CurAST, &SafetyInfo); // Now that all loop invariants have been removed from the loop, promote any @@ -291,10 +288,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { /// first order w.r.t the DominatorTree. This allows us to visit uses before /// definitions, allowing us to sink a loop body in one pass without iteration. /// -bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, - DominatorTree *DT, const DataLayout *DL, - TargetLibraryInfo *TLI, Loop *CurLoop, - AliasSetTracker *CurAST, LICMSafetyInfo * SafetyInfo) { +bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, + DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, + AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && @@ -311,8 +307,8 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // We are processing blocks in reverse dfo, so process children first. const std::vector<DomTreeNode*> &Children = N->getChildren(); for (unsigned i = 0, e = Children.size(); i != e; ++i) - Changed |= sinkRegion(Children[i], AA, LI, DT, DL, TLI, CurLoop, - CurAST, SafetyInfo); + Changed |= + sinkRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). if (inSubLoop(BB,CurLoop,LI)) return Changed; @@ -336,8 +332,8 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // outside of the loop. In this case, it doesn't even matter if the // operands of the instruction are loop invariant. // - if (isNotUsedInLoop(I, CurLoop) && - canSinkOrHoistInst(I, AA, DT, DL, CurLoop, CurAST, SafetyInfo)) { + if (isNotUsedInLoop(I, CurLoop) && + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo)) { ++II; Changed |= sink(I, LI, DT, CurLoop, CurAST); } @@ -350,10 +346,9 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, /// order w.r.t the DominatorTree. This allows us to visit definitions before /// uses, allowing us to hoist a loop body in one pass without iteration. /// -bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, - DominatorTree *DT, const DataLayout *DL, - TargetLibraryInfo *TLI, Loop *CurLoop, - AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { +bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, + DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, + AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && CurAST != nullptr && @@ -372,7 +367,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // Try constant folding this instruction. If all the operands are // constants, it is technically hoistable, but it would be better to just // fold it. - if (Constant *C = ConstantFoldInstruction(&I, DL, TLI)) { + if (Constant *C = ConstantFoldInstruction( + &I, I.getModule()->getDataLayout(), TLI)) { DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); CurAST->copyValue(&I, C); CurAST->deleteValue(&I); @@ -385,16 +381,16 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // if all of the operands of the instruction are loop invariant and if it // is safe to hoist the instruction. // - if (CurLoop->hasLoopInvariantOperands(&I) && - canSinkOrHoistInst(I, AA, DT, DL, CurLoop, CurAST, SafetyInfo) && - isSafeToExecuteUnconditionally(I, DT, DL, CurLoop, SafetyInfo)) + if (CurLoop->hasLoopInvariantOperands(&I) && + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo) && + isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo)) Changed |= hoist(I, CurLoop->getLoopPreheader()); } const std::vector<DomTreeNode*> &Children = N->getChildren(); for (unsigned i = 0, e = Children.size(); i != e; ++i) - Changed |= hoistRegion(Children[i], AA, LI, DT, DL, TLI, CurLoop, - CurAST, SafetyInfo); + Changed |= + hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); return Changed; } @@ -424,10 +420,9 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) { /// canSinkOrHoistInst - Return true if the hoister and sinker can handle this /// instruction. /// -bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, - DominatorTree *DT, const DataLayout *DL, - Loop *CurLoop, AliasSetTracker *CurAST, - LICMSafetyInfo * SafetyInfo) { +bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, + Loop *CurLoop, AliasSetTracker *CurAST, + LICMSafetyInfo *SafetyInfo) { // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { if (!LI->isUnordered()) @@ -487,7 +482,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, !isa<InsertValueInst>(I)) return false; - return isSafeToExecuteUnconditionally(I, DT, DL, CurLoop, SafetyInfo); + return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo); } /// Returns true if a PHINode is a trivially replaceable with an @@ -643,10 +638,10 @@ static bool hoist(Instruction &I, BasicBlock *Preheader) { /// or if it is a trapping instruction and is guaranteed to execute. /// static bool isSafeToExecuteUnconditionally(Instruction &Inst, DominatorTree *DT, - const DataLayout *DL, Loop *CurLoop, - LICMSafetyInfo * SafetyInfo) { + Loop *CurLoop, + LICMSafetyInfo *SafetyInfo) { // If it is not a trapping instruction, it is always safe to hoist. - if (isSafeToSpeculativelyExecute(&Inst, DL)) + if (isSafeToSpeculativelyExecute(&Inst)) return true; return isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo); diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp index 11e4d76..1f33f72 100644 --- a/lib/Transforms/Scalar/LoadCombine.cpp +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -12,17 +12,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" - #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/TargetFolder.h" -#include "llvm/Pass.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -52,13 +52,10 @@ struct LoadPOPPair { class LoadCombine : public BasicBlockPass { LLVMContext *C; - const DataLayout *DL; AliasAnalysis *AA; public: - LoadCombine() - : BasicBlockPass(ID), - C(nullptr), DL(nullptr), AA(nullptr) { + LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) { initializeSROAPass(*PassRegistry::getPassRegistry()); } @@ -85,12 +82,6 @@ private: bool LoadCombine::doInitialization(Function &F) { DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n"); C = &F.getContext(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (!DLP) { - DEBUG(dbgs() << " Skipping LoadCombine -- no target data!\n"); - return false; - } - DL = &DLP->getDataLayout(); return true; } @@ -100,9 +91,10 @@ PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) { POP.Offset = 0; while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) { if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) { - unsigned BitWidth = DL->getPointerTypeSizeInBits(GEP->getType()); + auto &DL = LI.getModule()->getDataLayout(); + unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType()); APInt Offset(BitWidth, 0); - if (GEP->accumulateConstantOffset(*DL, Offset)) + if (GEP->accumulateConstantOffset(DL, Offset)) POP.Offset += Offset.getZExtValue(); else // Can't handle GEPs with variable indices. @@ -145,7 +137,8 @@ bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) { if (PrevOffset == -1ull) { BaseLoad = L.Load; PrevOffset = L.POP.Offset; - PrevSize = DL->getTypeStoreSize(L.Load->getType()); + PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize( + L.Load->getType()); AggregateLoads.push_back(L); continue; } @@ -164,7 +157,8 @@ bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) { // FIXME: We may want to handle this case. continue; PrevOffset = L.POP.Offset; - PrevSize = DL->getTypeStoreSize(L.Load->getType()); + PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize( + L.Load->getType()); AggregateLoads.push_back(L); } if (combineLoads(AggregateLoads)) @@ -215,7 +209,8 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) { for (const auto &L : Loads) { Builder->SetInsertPoint(L.Load); Value *V = Builder->CreateExtractInteger( - *DL, NewLoad, cast<IntegerType>(L.Load->getType()), + L.Load->getModule()->getDataLayout(), NewLoad, + cast<IntegerType>(L.Load->getType()), L.POP.Offset - Loads[0].POP.Offset, "combine.extract"); L.Load->replaceAllUsesWith(V); } @@ -225,13 +220,13 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) { } bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { - if (skipOptnoneFunction(BB) || !DL) + if (skipOptnoneFunction(BB)) return false; AA = &getAnalysis<AliasAnalysis>(); - IRBuilder<true, TargetFolder> - TheBuilder(BB.getContext(), TargetFolder(DL)); + IRBuilder<true, TargetFolder> TheBuilder( + BB.getContext(), TargetFolder(BB.getModule()->getDataLayout())); Builder = &TheBuilder; DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap; diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 243c624..7bc2917 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -47,6 +47,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" @@ -56,7 +57,6 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -130,7 +130,6 @@ namespace { class LoopIdiomRecognize : public LoopPass { Loop *CurLoop; - const DataLayout *DL; DominatorTree *DT; ScalarEvolution *SE; TargetLibraryInfo *TLI; @@ -139,7 +138,10 @@ namespace { static char ID; explicit LoopIdiomRecognize() : LoopPass(ID) { initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); - DL = nullptr; DT = nullptr; SE = nullptr; TLI = nullptr; TTI = nullptr; + DT = nullptr; + SE = nullptr; + TLI = nullptr; + TTI = nullptr; } bool runOnLoop(Loop *L, LPPassManager &LPM) override; @@ -179,14 +181,6 @@ namespace { AU.addRequired<TargetTransformInfoWrapperPass>(); } - const DataLayout *getDataLayout() { - if (DL) - return DL; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; - return DL; - } - DominatorTree *getDominatorTree() { return DT ? DT : (DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree()); @@ -625,10 +619,6 @@ bool LoopIdiomRecognize::runOnCountableLoop() { if (BECst->getValue()->getValue() == 0) return false; - // We require target data for now. - if (!getDataLayout()) - return false; - // set DT (void)getDominatorTree(); @@ -742,7 +732,8 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { Value *StorePtr = SI->getPointerOperand(); // Reject stores that are so large that they overflow an unsigned. - uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); + auto &DL = CurLoop->getHeader()->getModule()->getDataLayout(); + uint64_t SizeInBits = DL.getTypeSizeInBits(StoredVal->getType()); if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) return false; @@ -917,7 +908,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // but it can be turned into memset_pattern if the target supports it. Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = nullptr; - + auto &DL = CurLoop->getHeader()->getModule()->getDataLayout(); unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); // If we're allowed to form a memset, and the stored value would be acceptable @@ -928,9 +919,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, CurLoop->isLoopInvariant(SplatValue)) { // Keep and use SplatValue. PatternValue = nullptr; - } else if (DestAS == 0 && - TLI->has(LibFunc::memset_pattern16) && - (PatternValue = getMemSetPatternValue(StoredVal, *DL))) { + } else if (DestAS == 0 && TLI->has(LibFunc::memset_pattern16) && + (PatternValue = getMemSetPatternValue(StoredVal, DL))) { // Don't create memset_pattern16s with address spaces. // It looks like we can use PatternValue! SplatValue = nullptr; @@ -945,7 +935,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // header. This allows us to insert code for it in the preheader. BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); - SCEVExpander Expander(*SE, "loop-idiom"); + SCEVExpander Expander(*SE, DL, "loop-idiom"); Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS); @@ -1005,7 +995,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // Otherwise we should form a memset_pattern16. PatternValue is known to be // an constant array of 16-bytes. Plop the value into a mergable global. GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, - GlobalValue::InternalLinkage, + GlobalValue::PrivateLinkage, PatternValue, ".memset_pattern"); GV->setUnnamedAddr(true); // Ok to merge these. GV->setAlignment(16); @@ -1042,7 +1032,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // header. This allows us to insert code for it in the preheader. BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); - SCEVExpander Expander(*SE, "loop-idiom"); + const DataLayout &DL = Preheader->getModule()->getDataLayout(); + SCEVExpander Expander(*SE, DL, "loop-idiom"); // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 6dc600e..e125026 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -77,8 +77,6 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache( @@ -110,6 +108,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { WorklistItem Item = VisitStack.pop_back_val(); BasicBlock *BB = Item.getPointer(); bool IsSubloopHeader = Item.getInt(); + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); // Simplify instructions in the current basic block. for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp new file mode 100644 index 0000000..f7626c5 --- /dev/null +++ b/lib/Transforms/Scalar/LoopInterchange.cpp @@ -0,0 +1,1154 @@ +//===- LoopInterchange.cpp - Loop interchange pass------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This Pass handles loop interchange transform. +// This pass interchanges loops to provide a more cache-friendly memory access +// patterns. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +using namespace llvm; + +#define DEBUG_TYPE "loop-interchange" + +namespace { + +typedef SmallVector<Loop *, 8> LoopVector; + +// TODO: Check if we can use a sparse matrix here. +typedef std::vector<std::vector<char>> CharMatrix; + +// Maximum number of dependencies that can be handled in the dependency matrix. +static const unsigned MaxMemInstrCount = 100; + +// Maximum loop depth supported. +static const unsigned MaxLoopNestDepth = 10; + +struct LoopInterchange; + +#ifdef DUMP_DEP_MATRICIES +void printDepMatrix(CharMatrix &DepMatrix) { + for (auto I = DepMatrix.begin(), E = DepMatrix.end(); I != E; ++I) { + std::vector<char> Vec = *I; + for (auto II = Vec.begin(), EE = Vec.end(); II != EE; ++II) + DEBUG(dbgs() << *II << " "); + DEBUG(dbgs() << "\n"); + } +} +#endif + +bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, + DependenceAnalysis *DA) { + typedef SmallVector<Value *, 16> ValueVector; + ValueVector MemInstr; + + if (Level > MaxLoopNestDepth) { + DEBUG(dbgs() << "Cannot handle loops of depth greater than " + << MaxLoopNestDepth << "\n"); + return false; + } + + // For each block. + for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end(); + BB != BE; ++BB) { + // Scan the BB and collect legal loads and stores. + for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; + ++I) { + Instruction *Ins = dyn_cast<Instruction>(I); + if (!Ins) + return false; + LoadInst *Ld = dyn_cast<LoadInst>(I); + StoreInst *St = dyn_cast<StoreInst>(I); + if (!St && !Ld) + continue; + if (Ld && !Ld->isSimple()) + return false; + if (St && !St->isSimple()) + return false; + MemInstr.push_back(I); + } + } + + DEBUG(dbgs() << "Found " << MemInstr.size() + << " Loads and Stores to analyze\n"); + + ValueVector::iterator I, IE, J, JE; + + for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { + for (J = I, JE = MemInstr.end(); J != JE; ++J) { + std::vector<char> Dep; + Instruction *Src = dyn_cast<Instruction>(*I); + Instruction *Des = dyn_cast<Instruction>(*J); + if (Src == Des) + continue; + if (isa<LoadInst>(Src) && isa<LoadInst>(Des)) + continue; + if (auto D = DA->depends(Src, Des, true)) { + DEBUG(dbgs() << "Found Dependency between Src=" << Src << " Des=" << Des + << "\n"); + if (D->isFlow()) { + // TODO: Handle Flow dependence.Check if it is sufficient to populate + // the Dependence Matrix with the direction reversed. + DEBUG(dbgs() << "Flow dependence not handled"); + return false; + } + if (D->isAnti()) { + DEBUG(dbgs() << "Found Anti dependence \n"); + unsigned Levels = D->getLevels(); + char Direction; + for (unsigned II = 1; II <= Levels; ++II) { + const SCEV *Distance = D->getDistance(II); + const SCEVConstant *SCEVConst = + dyn_cast_or_null<SCEVConstant>(Distance); + if (SCEVConst) { + const ConstantInt *CI = SCEVConst->getValue(); + if (CI->isNegative()) + Direction = '<'; + else if (CI->isZero()) + Direction = '='; + else + Direction = '>'; + Dep.push_back(Direction); + } else if (D->isScalar(II)) { + Direction = 'S'; + Dep.push_back(Direction); + } else { + unsigned Dir = D->getDirection(II); + if (Dir == Dependence::DVEntry::LT || + Dir == Dependence::DVEntry::LE) + Direction = '<'; + else if (Dir == Dependence::DVEntry::GT || + Dir == Dependence::DVEntry::GE) + Direction = '>'; + else if (Dir == Dependence::DVEntry::EQ) + Direction = '='; + else + Direction = '*'; + Dep.push_back(Direction); + } + } + while (Dep.size() != Level) { + Dep.push_back('I'); + } + + DepMatrix.push_back(Dep); + if (DepMatrix.size() > MaxMemInstrCount) { + DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount + << " dependencies inside loop\n"); + return false; + } + } + } + } + } + + // We don't have a DepMatrix to check legality return false + if (DepMatrix.size() == 0) + return false; + return true; +} + +// A loop is moved from index 'from' to an index 'to'. Update the Dependence +// matrix by exchanging the two columns. +void interChangeDepedencies(CharMatrix &DepMatrix, unsigned FromIndx, + unsigned ToIndx) { + unsigned numRows = DepMatrix.size(); + for (unsigned i = 0; i < numRows; ++i) { + char TmpVal = DepMatrix[i][ToIndx]; + DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx]; + DepMatrix[i][FromIndx] = TmpVal; + } +} + +// Checks if outermost non '=','S'or'I' dependence in the dependence matrix is +// '>' +bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row, + unsigned Column) { + for (unsigned i = 0; i <= Column; ++i) { + if (DepMatrix[Row][i] == '<') + return false; + if (DepMatrix[Row][i] == '>') + return true; + } + // All dependencies were '=','S' or 'I' + return false; +} + +// Checks if no dependence exist in the dependency matrix in Row before Column. +bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, + unsigned Column) { + for (unsigned i = 0; i < Column; ++i) { + if (DepMatrix[Row][i] != '=' || DepMatrix[Row][i] != 'S' || + DepMatrix[Row][i] != 'I') + return false; + } + return true; +} + +bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, + unsigned OuterLoopId, char InnerDep, char OuterDep) { + + if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId)) + return false; + + if (InnerDep == OuterDep) + return true; + + // It is legal to interchange if and only if after interchange no row has a + // '>' direction as the leftmost non-'='. + + if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I') + return true; + + if (InnerDep == '<') + return true; + + if (InnerDep == '>') { + // If OuterLoopId represents outermost loop then interchanging will make the + // 1st dependency as '>' + if (OuterLoopId == 0) + return false; + + // If all dependencies before OuterloopId are '=','S'or 'I'. Then + // interchanging will result in this row having an outermost non '=' + // dependency of '>' + if (!containsNoDependence(DepMatrix, Row, OuterLoopId)) + return true; + } + + return false; +} + +// Checks if it is legal to interchange 2 loops. +// [Theorm] A permutation of the loops in a perfect nest is legal if and only if +// the direction matrix, after the same permutation is applied to its columns, +// has no ">" direction as the leftmost non-"=" direction in any row. +bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, + unsigned OuterLoopId) { + + unsigned NumRows = DepMatrix.size(); + // For each row check if it is valid to interchange. + for (unsigned Row = 0; Row < NumRows; ++Row) { + char InnerDep = DepMatrix[Row][InnerLoopId]; + char OuterDep = DepMatrix[Row][OuterLoopId]; + if (InnerDep == '*' || OuterDep == '*') + return false; + else if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, + OuterDep)) + return false; + } + return true; +} + +static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) { + + DEBUG(dbgs() << "Calling populateWorklist called\n"); + LoopVector LoopList; + Loop *CurrentLoop = &L; + std::vector<Loop *> vec = CurrentLoop->getSubLoopsVector(); + while (vec.size() != 0) { + // The current loop has multiple subloops in it hence it is not tightly + // nested. + // Discard all loops above it added into Worklist. + if (vec.size() != 1) { + LoopList.clear(); + return; + } + LoopList.push_back(CurrentLoop); + CurrentLoop = *(vec.begin()); + vec = CurrentLoop->getSubLoopsVector(); + } + LoopList.push_back(CurrentLoop); + V.push_back(LoopList); +} + +static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) { + PHINode *InnerIndexVar = L->getCanonicalInductionVariable(); + if (InnerIndexVar) + return InnerIndexVar; + if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr) + return nullptr; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + PHINode *PhiVar = cast<PHINode>(I); + Type *PhiTy = PhiVar->getType(); + if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && + !PhiTy->isPointerTy()) + return nullptr; + const SCEVAddRecExpr *AddRec = + dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar)); + if (!AddRec || !AddRec->isAffine()) + continue; + const SCEV *Step = AddRec->getStepRecurrence(*SE); + const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); + if (!C) + continue; + // Found the induction variable. + // FIXME: Handle loops with more than one induction variable. Note that, + // currently, legality makes sure we have only one induction variable. + return PhiVar; + } + return nullptr; +} + +/// LoopInterchangeLegality checks if it is legal to interchange the loop. +class LoopInterchangeLegality { +public: + LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, + LoopInterchange *Pass) + : OuterLoop(Outer), InnerLoop(Inner), SE(SE), CurrentPass(Pass) {} + + /// Check if the loops can be interchanged. + bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, + CharMatrix &DepMatrix); + /// Check if the loop structure is understood. We do not handle triangular + /// loops for now. + bool isLoopStructureUnderstood(PHINode *InnerInductionVar); + + bool currentLimitations(); + +private: + bool tightlyNested(Loop *Outer, Loop *Inner); + + Loop *OuterLoop; + Loop *InnerLoop; + + /// Scev analysis. + ScalarEvolution *SE; + LoopInterchange *CurrentPass; +}; + +/// LoopInterchangeProfitability checks if it is profitable to interchange the +/// loop. +class LoopInterchangeProfitability { +public: + LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE) + : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {} + + /// Check if the loop interchange is profitable + bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, + CharMatrix &DepMatrix); + +private: + int getInstrOrderCost(); + + Loop *OuterLoop; + Loop *InnerLoop; + + /// Scev analysis. + ScalarEvolution *SE; +}; + +/// LoopInterchangeTransform interchanges the loop +class LoopInterchangeTransform { +public: + LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, + LoopInfo *LI, DominatorTree *DT, + LoopInterchange *Pass, BasicBlock *LoopNestExit) + : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), + LoopExit(LoopNestExit) {} + + /// Interchange OuterLoop and InnerLoop. + bool transform(); + void restructureLoops(Loop *InnerLoop, Loop *OuterLoop); + void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop); + +private: + void splitInnerLoopLatch(Instruction *); + void splitOuterLoopLatch(); + void splitInnerLoopHeader(); + bool adjustLoopLinks(); + void adjustLoopPreheaders(); + void adjustOuterLoopPreheader(); + void adjustInnerLoopPreheader(); + bool adjustLoopBranches(); + + Loop *OuterLoop; + Loop *InnerLoop; + + /// Scev analysis. + ScalarEvolution *SE; + LoopInfo *LI; + DominatorTree *DT; + BasicBlock *LoopExit; +}; + +// Main LoopInterchange Pass +struct LoopInterchange : public FunctionPass { + static char ID; + ScalarEvolution *SE; + LoopInfo *LI; + DependenceAnalysis *DA; + DominatorTree *DT; + LoopInterchange() + : FunctionPass(ID), SE(nullptr), LI(nullptr), DA(nullptr), DT(nullptr) { + initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<ScalarEvolution>(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addRequired<DependenceAnalysis>(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + } + + bool runOnFunction(Function &F) override { + SE = &getAnalysis<ScalarEvolution>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + DA = &getAnalysis<DependenceAnalysis>(); + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + DT = DTWP ? &DTWP->getDomTree() : nullptr; + // Build up a worklist of loop pairs to analyze. + SmallVector<LoopVector, 8> Worklist; + + for (Loop *L : *LI) + populateWorklist(*L, Worklist); + + DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n"); + bool Changed = true; + while (!Worklist.empty()) { + LoopVector LoopList = Worklist.pop_back_val(); + Changed = processLoopList(LoopList); + } + return Changed; + } + + bool isComputableLoopNest(LoopVector LoopList) { + for (auto I = LoopList.begin(), E = LoopList.end(); I != E; ++I) { + Loop *L = *I; + const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); + if (ExitCountOuter == SE->getCouldNotCompute()) { + DEBUG(dbgs() << "Couldn't compute Backedge count\n"); + return false; + } + if (L->getNumBackEdges() != 1) { + DEBUG(dbgs() << "NumBackEdges is not equal to 1\n"); + return false; + } + if (!L->getExitingBlock()) { + DEBUG(dbgs() << "Loop Doesn't have unique exit block\n"); + return false; + } + } + return true; + } + + unsigned selectLoopForInterchange(LoopVector LoopList) { + // TODO: Add a better heuristic to select the loop to be interchanged based + // on the dependece matrix. Currently we select the innermost loop. + return LoopList.size() - 1; + } + + bool processLoopList(LoopVector LoopList) { + bool Changed = false; + bool containsLCSSAPHI = false; + CharMatrix DependencyMatrix; + if (LoopList.size() < 2) { + DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n"); + return false; + } + if (!isComputableLoopNest(LoopList)) { + DEBUG(dbgs() << "Not vaild loop candidate for interchange\n"); + return false; + } + Loop *OuterMostLoop = *(LoopList.begin()); + + DEBUG(dbgs() << "Processing LoopList of size = " << LoopList.size() + << "\n"); + + if (!populateDependencyMatrix(DependencyMatrix, LoopList.size(), + OuterMostLoop, DA)) { + DEBUG(dbgs() << "Populating Dependency matrix failed\n"); + return false; + } +#ifdef DUMP_DEP_MATRICIES + DEBUG(dbgs() << "Dependence before inter change \n"); + printDepMatrix(DependencyMatrix); +#endif + + BasicBlock *OuterMostLoopLatch = OuterMostLoop->getLoopLatch(); + BranchInst *OuterMostLoopLatchBI = + dyn_cast<BranchInst>(OuterMostLoopLatch->getTerminator()); + if (!OuterMostLoopLatchBI) + return false; + + // Since we currently do not handle LCSSA PHI's any failure in loop + // condition will now branch to LoopNestExit. + // TODO: This should be removed once we handle LCSSA PHI nodes. + + // Get the Outermost loop exit. + BasicBlock *LoopNestExit; + if (OuterMostLoopLatchBI->getSuccessor(0) == OuterMostLoop->getHeader()) + LoopNestExit = OuterMostLoopLatchBI->getSuccessor(1); + else + LoopNestExit = OuterMostLoopLatchBI->getSuccessor(0); + + for (auto I = LoopList.begin(), E = LoopList.end(); I != E; ++I) { + Loop *L = *I; + BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *Header = L->getHeader(); + if (Latch && Latch != Header && isa<PHINode>(Latch->begin())) { + containsLCSSAPHI = true; + break; + } + } + + // TODO: Handle lcssa PHI's. Currently LCSSA PHI's are not handled. Handle + // the same by splitting the loop latch and adjusting loop links + // accordingly. + if (containsLCSSAPHI) + return false; + + unsigned SelecLoopId = selectLoopForInterchange(LoopList); + // Move the selected loop outwards to the best posible position. + for (unsigned i = SelecLoopId; i > 0; i--) { + bool Interchanged = + processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix); + if (!Interchanged) + return Changed; + // Loops interchanged reflect the same in LoopList + std::swap(LoopList[i - 1], LoopList[i]); + + // Update the DependencyMatrix + interChangeDepedencies(DependencyMatrix, i, i - 1); + +#ifdef DUMP_DEP_MATRICIES + DEBUG(dbgs() << "Dependence after inter change \n"); + printDepMatrix(DependencyMatrix); +#endif + Changed |= Interchanged; + } + return Changed; + } + + bool processLoop(LoopVector LoopList, unsigned InnerLoopId, + unsigned OuterLoopId, BasicBlock *LoopNestExit, + std::vector<std::vector<char>> &DependencyMatrix) { + + DEBUG(dbgs() << "Processing Innder Loop Id = " << InnerLoopId + << " and OuterLoopId = " << OuterLoopId << "\n"); + Loop *InnerLoop = LoopList[InnerLoopId]; + Loop *OuterLoop = LoopList[OuterLoopId]; + + LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, this); + if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { + DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n"); + return false; + } + DEBUG(dbgs() << "Loops are legal to interchange\n"); + LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE); + if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) { + DEBUG(dbgs() << "Interchanging Loops not profitable\n"); + return false; + } + + LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, this, + LoopNestExit); + LIT.transform(); + DEBUG(dbgs() << "Loops interchanged\n"); + return true; + } +}; + +} // end of namespace + +static bool containsUnsafeInstructions(BasicBlock *BB) { + for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { + if (I->mayHaveSideEffects() || I->mayReadFromMemory()) + return true; + } + return false; +} + +bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { + BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); + + DEBUG(dbgs() << "Checking if Loops are Tightly Nested\n"); + + // A perfectly nested loop will not have any branch in between the outer and + // inner block i.e. outer header will branch to either inner preheader and + // outerloop latch. + BranchInst *outerLoopHeaderBI = + dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); + if (!outerLoopHeaderBI) + return false; + unsigned num = outerLoopHeaderBI->getNumSuccessors(); + for (unsigned i = 0; i < num; i++) { + if (outerLoopHeaderBI->getSuccessor(i) != InnerLoopPreHeader && + outerLoopHeaderBI->getSuccessor(i) != OuterLoopLatch) + return false; + } + + DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch \n"); + // We do not have any basic block in between now make sure the outer header + // and outer loop latch doesnt contain any unsafe instructions. + if (containsUnsafeInstructions(OuterLoopHeader) || + containsUnsafeInstructions(OuterLoopLatch)) + return false; + + DEBUG(dbgs() << "Loops are perfectly nested \n"); + // We have a perfect loop nest. + return true; +} + +static unsigned getPHICount(BasicBlock *BB) { + unsigned PhiCount = 0; + for (auto I = BB->begin(); isa<PHINode>(I); ++I) + PhiCount++; + return PhiCount; +} + +bool LoopInterchangeLegality::isLoopStructureUnderstood( + PHINode *InnerInduction) { + + unsigned Num = InnerInduction->getNumOperands(); + BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader(); + for (unsigned i = 0; i < Num; ++i) { + Value *Val = InnerInduction->getOperand(i); + if (isa<Constant>(Val)) + continue; + Instruction *I = dyn_cast<Instruction>(Val); + if (!I) + return false; + // TODO: Handle triangular loops. + // e.g. for(int i=0;i<N;i++) + // for(int j=i;j<N;j++) + unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i); + if (InnerInduction->getIncomingBlock(IncomBlockIndx) == + InnerLoopPreheader && + !OuterLoop->isLoopInvariant(I)) { + return false; + } + } + return true; +} + +// This function indicates the current limitations in the transform as a result +// of which we do not proceed. +bool LoopInterchangeLegality::currentLimitations() { + + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); + BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); + BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); + BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); + + PHINode *InnerInductionVar; + PHINode *OuterInductionVar; + + // We currently handle only 1 induction variable inside the loop. We also do + // not handle reductions as of now. + if (getPHICount(InnerLoopHeader) > 1) + return true; + + if (getPHICount(OuterLoopHeader) > 1) + return true; + + InnerInductionVar = getInductionVariable(InnerLoop, SE); + OuterInductionVar = getInductionVariable(OuterLoop, SE); + + if (!OuterInductionVar || !InnerInductionVar) { + DEBUG(dbgs() << "Induction variable not found\n"); + return true; + } + + // TODO: Triangular loops are not handled for now. + if (!isLoopStructureUnderstood(InnerInductionVar)) { + DEBUG(dbgs() << "Loop structure not understood by pass\n"); + return true; + } + + // TODO: Loops with LCSSA PHI's are currently not handled. + if (isa<PHINode>(OuterLoopLatch->begin())) { + DEBUG(dbgs() << "Found and LCSSA PHI in outer loop latch\n"); + return true; + } + if (InnerLoopLatch != InnerLoopHeader && + isa<PHINode>(InnerLoopLatch->begin())) { + DEBUG(dbgs() << "Found and LCSSA PHI in inner loop latch\n"); + return true; + } + + // TODO: Current limitation: Since we split the inner loop latch at the point + // were induction variable is incremented (induction.next); We cannot have + // more than 1 user of induction.next since it would result in broken code + // after split. + // e.g. + // for(i=0;i<N;i++) { + // for(j = 0;j<M;j++) { + // A[j+1][i+2] = A[j][i]+k; + // } + // } + bool FoundInduction = false; + Instruction *InnerIndexVarInc = nullptr; + if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader) + InnerIndexVarInc = + dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1)); + else + InnerIndexVarInc = + dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0)); + + if (!InnerIndexVarInc) + return true; + + // Since we split the inner loop latch on this induction variable. Make sure + // we do not have any instruction between the induction variable and branch + // instruction. + + for (auto I = InnerLoopLatch->rbegin(), E = InnerLoopLatch->rend(); + I != E && !FoundInduction; ++I) { + if (isa<BranchInst>(*I) || isa<CmpInst>(*I) || isa<TruncInst>(*I)) + continue; + const Instruction &Ins = *I; + // We found an instruction. If this is not induction variable then it is not + // safe to split this loop latch. + if (!Ins.isIdenticalTo(InnerIndexVarInc)) + return true; + else + FoundInduction = true; + } + // The loop latch ended and we didnt find the induction variable return as + // current limitation. + if (!FoundInduction) + return true; + + return false; +} + +bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, + unsigned OuterLoopId, + CharMatrix &DepMatrix) { + + if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) { + DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId + << "and OuterLoopId = " << OuterLoopId + << "due to dependence\n"); + return false; + } + + // Create unique Preheaders if we already do not have one. + BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + + // Create a unique outer preheader - + // 1) If OuterLoop preheader is not present. + // 2) If OuterLoop Preheader is same as OuterLoop Header + // 3) If OuterLoop Preheader is same as Header of the previous loop. + // 4) If OuterLoop Preheader is Entry node. + if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() || + isa<PHINode>(OuterLoopPreHeader->begin()) || + !OuterLoopPreHeader->getUniquePredecessor()) { + OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, CurrentPass); + } + + if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() || + InnerLoopPreHeader == OuterLoop->getHeader()) { + InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, CurrentPass); + } + + // Check if the loops are tightly nested. + if (!tightlyNested(OuterLoop, InnerLoop)) { + DEBUG(dbgs() << "Loops not tightly nested\n"); + return false; + } + + // TODO: The loops could not be interchanged due to current limitations in the + // transform module. + if (currentLimitations()) { + DEBUG(dbgs() << "Not legal because of current transform limitation\n"); + return false; + } + + return true; +} + +int LoopInterchangeProfitability::getInstrOrderCost() { + unsigned GoodOrder, BadOrder; + BadOrder = GoodOrder = 0; + for (auto BI = InnerLoop->block_begin(), BE = InnerLoop->block_end(); + BI != BE; ++BI) { + for (auto I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) { + const Instruction &Ins = *I; + if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) { + unsigned NumOp = GEP->getNumOperands(); + bool FoundInnerInduction = false; + bool FoundOuterInduction = false; + for (unsigned i = 0; i < NumOp; ++i) { + const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i)); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal); + if (!AR) + continue; + + // If we find the inner induction after an outer induction e.g. + // for(int i=0;i<N;i++) + // for(int j=0;j<N;j++) + // A[i][j] = A[i-1][j-1]+k; + // then it is a good order. + if (AR->getLoop() == InnerLoop) { + // We found an InnerLoop induction after OuterLoop induction. It is + // a good order. + FoundInnerInduction = true; + if (FoundOuterInduction) { + GoodOrder++; + break; + } + } + // If we find the outer induction after an inner induction e.g. + // for(int i=0;i<N;i++) + // for(int j=0;j<N;j++) + // A[j][i] = A[j-1][i-1]+k; + // then it is a bad order. + if (AR->getLoop() == OuterLoop) { + // We found an OuterLoop induction after InnerLoop induction. It is + // a bad order. + FoundOuterInduction = true; + if (FoundInnerInduction) { + BadOrder++; + break; + } + } + } + } + } + } + return GoodOrder - BadOrder; +} + +static bool isProfitabileForVectorization(unsigned InnerLoopId, + unsigned OuterLoopId, + CharMatrix &DepMatrix) { + // TODO: Improve this heuristic to catch more cases. + // If the inner loop is loop independent or doesn't carry any dependency it is + // profitable to move this to outer position. + unsigned Row = DepMatrix.size(); + for (unsigned i = 0; i < Row; ++i) { + if (DepMatrix[i][InnerLoopId] != 'S' && DepMatrix[i][InnerLoopId] != 'I') + return false; + // TODO: We need to improve this heuristic. + if (DepMatrix[i][OuterLoopId] != '=') + return false; + } + // If outer loop has dependence and inner loop is loop independent then it is + // profitable to interchange to enable parallelism. + return true; +} + +bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, + unsigned OuterLoopId, + CharMatrix &DepMatrix) { + + // TODO: Add Better Profitibility checks. + // e.g + // 1) Construct dependency matrix and move the one with no loop carried dep + // inside to enable vectorization. + + // This is rough cost estimation algorithm. It counts the good and bad order + // of induction variables in the instruction and allows reordering if number + // of bad orders is more than good. + int Cost = 0; + Cost += getInstrOrderCost(); + DEBUG(dbgs() << "Cost = " << Cost << "\n"); + if (Cost < 0) + return true; + + // It is not profitable as per current cache profitibility model. But check if + // we can move this loop outside to improve parallelism. + bool ImprovesPar = + isProfitabileForVectorization(InnerLoopId, OuterLoopId, DepMatrix); + return ImprovesPar; +} + +void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop, + Loop *InnerLoop) { + for (Loop::iterator I = OuterLoop->begin(), E = OuterLoop->end(); I != E; + ++I) { + if (*I == InnerLoop) { + OuterLoop->removeChildLoop(I); + return; + } + } + assert(false && "Couldn't find loop"); +} + +void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop, + Loop *OuterLoop) { + Loop *OuterLoopParent = OuterLoop->getParentLoop(); + if (OuterLoopParent) { + // Remove the loop from its parent loop. + removeChildLoop(OuterLoopParent, OuterLoop); + removeChildLoop(OuterLoop, InnerLoop); + OuterLoopParent->addChildLoop(InnerLoop); + } else { + removeChildLoop(OuterLoop, InnerLoop); + LI->changeTopLevelLoop(OuterLoop, InnerLoop); + } + + for (Loop::iterator I = InnerLoop->begin(), E = InnerLoop->end(); I != E; ++I) + OuterLoop->addChildLoop(InnerLoop->removeChildLoop(I)); + + InnerLoop->addChildLoop(OuterLoop); +} + +bool LoopInterchangeTransform::transform() { + + DEBUG(dbgs() << "transform\n"); + bool Transformed = false; + Instruction *InnerIndexVar; + + if (InnerLoop->getSubLoops().size() == 0) { + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + DEBUG(dbgs() << "Calling Split Inner Loop\n"); + PHINode *InductionPHI = getInductionVariable(InnerLoop, SE); + if (!InductionPHI) { + DEBUG(dbgs() << "Failed to find the point to split loop latch \n"); + return false; + } + + if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader) + InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1)); + else + InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0)); + + // + // Split at the place were the induction variable is + // incremented/decremented. + // TODO: This splitting logic may not work always. Fix this. + splitInnerLoopLatch(InnerIndexVar); + DEBUG(dbgs() << "splitInnerLoopLatch Done\n"); + + // Splits the inner loops phi nodes out into a seperate basic block. + splitInnerLoopHeader(); + DEBUG(dbgs() << "splitInnerLoopHeader Done\n"); + } + + Transformed |= adjustLoopLinks(); + if (!Transformed) { + DEBUG(dbgs() << "adjustLoopLinks Failed\n"); + return false; + } + + restructureLoops(InnerLoop, OuterLoop); + return true; +} + +void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) { + BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); + BasicBlock *InnerLoopLatchPred = InnerLoopLatch; + InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI); +} + +void LoopInterchangeTransform::splitOuterLoopLatch() { + BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); + BasicBlock *OuterLatchLcssaPhiBlock = OuterLoopLatch; + OuterLoopLatch = SplitBlock(OuterLatchLcssaPhiBlock, + OuterLoopLatch->getFirstNonPHI(), DT, LI); +} + +void LoopInterchangeTransform::splitInnerLoopHeader() { + + // Split the inner loop header out. + BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); + SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI); + + DEBUG(dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & " + "InnerLoopHeader \n"); +} + +/// \brief Move all instructions except the terminator from FromBB right before +/// InsertBefore +static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { + auto &ToList = InsertBefore->getParent()->getInstList(); + auto &FromList = FromBB->getInstList(); + + ToList.splice(InsertBefore, FromList, FromList.begin(), + FromBB->getTerminator()); +} + +void LoopInterchangeTransform::adjustOuterLoopPreheader() { + BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); + BasicBlock *InnerPreHeader = InnerLoop->getLoopPreheader(); + + moveBBContents(OuterLoopPreHeader, InnerPreHeader->getTerminator()); +} + +void LoopInterchangeTransform::adjustInnerLoopPreheader() { + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + BasicBlock *OuterHeader = OuterLoop->getHeader(); + + moveBBContents(InnerLoopPreHeader, OuterHeader->getTerminator()); +} + +bool LoopInterchangeTransform::adjustLoopBranches() { + + DEBUG(dbgs() << "adjustLoopBranches called\n"); + // Adjust the loop preheader + BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); + BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); + BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); + BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); + BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor(); + BasicBlock *InnerLoopLatchPredecessor = + InnerLoopLatch->getUniquePredecessor(); + BasicBlock *InnerLoopLatchSuccessor; + BasicBlock *OuterLoopLatchSuccessor; + + BranchInst *OuterLoopLatchBI = + dyn_cast<BranchInst>(OuterLoopLatch->getTerminator()); + BranchInst *InnerLoopLatchBI = + dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); + BranchInst *OuterLoopHeaderBI = + dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); + BranchInst *InnerLoopHeaderBI = + dyn_cast<BranchInst>(InnerLoopHeader->getTerminator()); + + if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor || + !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI || + !InnerLoopHeaderBI) + return false; + + BranchInst *InnerLoopLatchPredecessorBI = + dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator()); + BranchInst *OuterLoopPredecessorBI = + dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator()); + + if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) + return false; + BasicBlock *InnerLoopHeaderSucessor = InnerLoopHeader->getUniqueSuccessor(); + if (!InnerLoopHeaderSucessor) + return false; + + // Adjust Loop Preheader and headers + + unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors(); + for (unsigned i = 0; i < NumSucc; ++i) { + if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader) + OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader); + } + + NumSucc = OuterLoopHeaderBI->getNumSuccessors(); + for (unsigned i = 0; i < NumSucc; ++i) { + if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch) + OuterLoopHeaderBI->setSuccessor(i, LoopExit); + else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader) + OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSucessor); + } + + BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI); + InnerLoopHeaderBI->eraseFromParent(); + + // -------------Adjust loop latches----------- + if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader) + InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1); + else + InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0); + + NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors(); + for (unsigned i = 0; i < NumSucc; ++i) { + if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch) + InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor); + } + + if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) + OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); + else + OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0); + + if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor) + InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor); + else + InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor); + + if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) { + OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch); + } else { + OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch); + } + + return true; +} +void LoopInterchangeTransform::adjustLoopPreheaders() { + + // We have interchanged the preheaders so we need to interchange the data in + // the preheader as well. + // This is because the content of inner preheader was previously executed + // inside the outer loop. + BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); + BranchInst *InnerTermBI = + cast<BranchInst>(InnerLoopPreHeader->getTerminator()); + + BasicBlock *HeaderSplit = + SplitBlock(OuterLoopHeader, OuterLoopHeader->getTerminator(), DT, LI); + Instruction *InsPoint = HeaderSplit->getFirstNonPHI(); + // These instructions should now be executed inside the loop. + // Move instruction into a new block after outer header. + moveBBContents(InnerLoopPreHeader, InsPoint); + // These instructions were not executed previously in the loop so move them to + // the older inner loop preheader. + moveBBContents(OuterLoopPreHeader, InnerTermBI); +} + +bool LoopInterchangeTransform::adjustLoopLinks() { + + // Adjust all branches in the inner and outer loop. + bool Changed = adjustLoopBranches(); + if (Changed) + adjustLoopPreheaders(); + return Changed; +} + +char LoopInterchange::ID = 0; +INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange", + "Interchanges loops for cache reuse", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) + +INITIALIZE_PASS_END(LoopInterchange, "loop-interchange", + "Interchanges loops for cache reuse", false, false) + +Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); } diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index fdf7e3b..ed103e6 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -30,7 +31,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -160,7 +160,6 @@ namespace { AliasAnalysis *AA; LoopInfo *LI; ScalarEvolution *SE; - const DataLayout *DL; TargetLibraryInfo *TLI; DominatorTree *DT; @@ -367,10 +366,8 @@ namespace { struct DAGRootTracker { DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV, ScalarEvolution *SE, AliasAnalysis *AA, - TargetLibraryInfo *TLI, const DataLayout *DL) - : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), - DL(DL), IV(IV) { - } + TargetLibraryInfo *TLI) + : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV) {} /// Stage 1: Find all the DAG roots for the induction variable. bool findRoots(); @@ -416,7 +413,6 @@ namespace { ScalarEvolution *SE; AliasAnalysis *AA; TargetLibraryInfo *TLI; - const DataLayout *DL; // The loop induction variable. Instruction *IV; @@ -1131,7 +1127,7 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) { // needed because otherwise isSafeToSpeculativelyExecute returns // false on PHI nodes. if (!isa<PHINode>(I) && !isSimpleLoadStore(I) && - !isSafeToSpeculativelyExecute(I, DL)) + !isSafeToSpeculativelyExecute(I)) // Intervening instructions cause side effects. FutureSideEffects = true; } @@ -1161,11 +1157,10 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) { // side effects, and this instruction might also, then we can't reorder // them, and this matching fails. As an exception, we allow the alias // set tracker to handle regular (simple) load/store dependencies. - if (FutureSideEffects && - ((!isSimpleLoadStore(BaseInst) && - !isSafeToSpeculativelyExecute(BaseInst, DL)) || - (!isSimpleLoadStore(RootInst) && - !isSafeToSpeculativelyExecute(RootInst, DL)))) { + if (FutureSideEffects && ((!isSimpleLoadStore(BaseInst) && + !isSafeToSpeculativelyExecute(BaseInst)) || + (!isSimpleLoadStore(RootInst) && + !isSafeToSpeculativelyExecute(RootInst)))) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst << " vs. " << *RootInst << " (side effects prevent reordering)\n"); @@ -1272,6 +1267,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) { ++J; } + const DataLayout &DL = Header->getModule()->getDataLayout(); // We need to create a new induction variable for each different BaseInst. for (auto &DRS : RootSets) { @@ -1284,7 +1280,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) { SE->getConstant(RealIVSCEV->getType(), 1), L, SCEV::FlagAnyWrap)); { // Limit the lifetime of SCEVExpander. - SCEVExpander Expander(*SE, "reroll"); + SCEVExpander Expander(*SE, DL, "reroll"); Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin()); for (auto &KV : Uses) { @@ -1324,7 +1320,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) { } } - SimplifyInstructionsInBlock(Header, DL, TLI); + SimplifyInstructionsInBlock(Header, TLI); DeleteDeadPHIs(Header, TLI); } @@ -1448,7 +1444,7 @@ void LoopReroll::ReductionTracker::replaceSelected() { bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount, ReductionTracker &Reductions) { - DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DL); + DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI); if (!DAGRoots.findRoots()) return false; @@ -1477,8 +1473,6 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); SE = &getAnalysis<ScalarEvolution>(); TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); BasicBlock *Header = L->getHeader(); diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 4d12349..a675e12 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -24,8 +24,10 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -412,6 +414,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader); + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + // For the rest of the instructions, either hoist to the OrigPreheader if // possible or create a clone in the OldPreHeader if not. TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); @@ -442,8 +446,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // With the operands remapped, see if the instruction constant folds or is // otherwise simplifyable. This commonly occurs because the entry from PHI // nodes allows icmps and other instructions to fold. - // FIXME: Provide DL, TLI, DT, AC to SimplifyInstruction. - Value *V = SimplifyInstruction(C); + // FIXME: Provide TLI, DT, AC to SimplifyInstruction. + Value *V = SimplifyInstruction(C, DL); if (V && LI->replacementPreservesLCSSAForm(C, V)) { // If so, then delete the temporary instruction and stick the folded value // in the map. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 318065e..8445d5f 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -68,6 +68,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -3825,7 +3826,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) && (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale)) - .ule(abs64(NewF.BaseOffset))) + .ule(std::abs(NewF.BaseOffset))) continue; // OK, looks good. @@ -3856,7 +3857,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { J != JE; ++J) if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J)) if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt( - abs64(NewF.BaseOffset)) && + std::abs(NewF.BaseOffset)) && (C->getValue()->getValue() + NewF.BaseOffset).countTrailingZeros() >= countTrailingZeros<uint64_t>(NewF.BaseOffset)) @@ -4823,7 +4824,8 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, // we can remove them after we are done working. SmallVector<WeakVH, 16> DeadInsts; - SCEVExpander Rewriter(SE, "lsr"); + SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), + "lsr"); #ifndef NDEBUG Rewriter.setDebugType(DEBUG_TYPE); #endif @@ -5093,7 +5095,8 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { Changed |= DeleteDeadPHIs(L->getHeader()); if (EnablePhiElim && L->isLoopSimplifyForm()) { SmallVector<WeakVH, 16> DeadInsts; - SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr"); + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), DL, "lsr"); #ifndef NDEBUG Rewriter.setDebugType(DEBUG_TYPE); #endif diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 924be16..600cbde 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -23,14 +24,13 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/UnrollLoop.h" -#include "llvm/IR/InstVisitor.h" -#include "llvm/Analysis/InstructionSimplify.h" #include <climits> using namespace llvm; @@ -259,6 +259,7 @@ static bool isLoadFromConstantInitializer(Value *V) { return false; } +namespace { struct FindConstantPointers { bool LoadCanBeConstantFolded; bool IndexIsConstant; @@ -356,11 +357,12 @@ class UnrollAnalyzer : public InstVisitor<UnrollAnalyzer, bool> { if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) RHS = SimpleRHS; Value *SimpleV = nullptr; + const DataLayout &DL = I.getModule()->getDataLayout(); if (auto FI = dyn_cast<FPMathOperator>(&I)) SimpleV = - SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags()); + SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL); else - SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS); + SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); if (SimpleV && CountedInstructions.insert(&I).second) NumberOfOptimizedInstructions += TTI.getUserCost(&I); @@ -540,6 +542,7 @@ public: return NumberOfOptimizedInstructions; } }; +} // namespace // Complete loop unrolling can make some loads constant, and we need to know if // that would expose any further optimization opportunities. @@ -619,6 +622,11 @@ static bool HasUnrollDisablePragma(const Loop *L) { return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable"); } +// Returns true if the loop has an runtime unroll(disable) pragma. +static bool HasRuntimeUnrollDisablePragma(const Loop *L) { + return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable"); +} + // If loop has an unroll_count pragma return the (necessarily // positive) value from the pragma. Otherwise return 0. static unsigned UnrollCountPragmaValue(const Loop *L) { @@ -807,6 +815,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Reduce count based on the type of unrolling and the threshold values. unsigned OriginalCount = Count; bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime; + if (HasRuntimeUnrollDisablePragma(L)) { + AllowRuntime = false; + } if (Unrolling == Partial) { bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; if (!AllowPartial && !CountSetExplicitly) { diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 987dc96..988d2af 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -42,6 +42,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -1082,6 +1083,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, /// pass. /// void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); while (!Worklist.empty()) { Instruction *I = Worklist.back(); Worklist.pop_back(); @@ -1104,7 +1106,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { // See if instruction simplification can hack this up. This is common for // things like "select false, X, Y" after unswitching made the condition be // 'false'. TODO: update the domtree properly so we can pass it here. - if (Value *V = SimplifyInstruction(I)) + if (Value *V = SimplifyInstruction(I, DL)) if (LI->replacementPreservesLCSSAForm(I, V)) { ReplaceUsesOfWith(I, V, Worklist, L, LPM); continue; diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 006b885..2b5a078 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -28,7 +29,6 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include <list> using namespace llvm; @@ -41,7 +41,8 @@ STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, - bool &VariableIdxFound, const DataLayout &TD){ + bool &VariableIdxFound, + const DataLayout &DL) { // Skip over the first indices. gep_type_iterator GTI = gep_type_begin(GEP); for (unsigned i = 1; i != Idx; ++i, ++GTI) @@ -57,13 +58,13 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, // Handle struct indices, which add their field offset to the pointer. if (StructType *STy = dyn_cast<StructType>(*GTI)) { - Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); continue; } // Otherwise, we have a sequential type like an array or vector. Multiply // the index by the ElementSize. - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); Offset += Size*OpC->getSExtValue(); } @@ -74,7 +75,7 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, /// constant offset, and return that constant offset. For example, Ptr1 might /// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8. static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, - const DataLayout &TD) { + const DataLayout &DL) { Ptr1 = Ptr1->stripPointerCasts(); Ptr2 = Ptr2->stripPointerCasts(); @@ -92,12 +93,12 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, // If one pointer is a GEP and the other isn't, then see if the GEP is a // constant offset from the base, as in "P" and "gep P, 1". if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { - Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD); + Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, DL); return !VariableIdxFound; } if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { - Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD); + Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, DL); return !VariableIdxFound; } @@ -115,8 +116,8 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) break; - int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); - int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); + int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, DL); + int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, DL); if (VariableIdxFound) return false; Offset = Offset2-Offset1; @@ -150,12 +151,11 @@ struct MemsetRange { /// TheStores - The actual stores that make up this range. SmallVector<Instruction*, 16> TheStores; - bool isProfitableToUseMemset(const DataLayout &TD) const; - + bool isProfitableToUseMemset(const DataLayout &DL) const; }; } // end anon namespace -bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const { +bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const { // If we found more than 4 stores to merge or 16 bytes, use memset. if (TheStores.size() >= 4 || End-Start >= 16) return true; @@ -183,7 +183,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const { // size. If so, check to see whether we will end up actually reducing the // number of stores used. unsigned Bytes = unsigned(End-Start); - unsigned MaxIntSize = TD.getLargestLegalIntTypeSize(); + unsigned MaxIntSize = DL.getLargestLegalIntTypeSize(); if (MaxIntSize == 0) MaxIntSize = 1; unsigned NumPointerStores = Bytes / MaxIntSize; @@ -314,14 +314,12 @@ namespace { class MemCpyOpt : public FunctionPass { MemoryDependenceAnalysis *MD; TargetLibraryInfo *TLI; - const DataLayout *DL; public: static char ID; // Pass identification, replacement for typeid MemCpyOpt() : FunctionPass(ID) { initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); MD = nullptr; TLI = nullptr; - DL = nullptr; } bool runOnFunction(Function &F) override; @@ -377,13 +375,13 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", /// attempts to merge them together into a memcpy/memset. Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, Value *StartPtr, Value *ByteVal) { - if (!DL) return nullptr; + const DataLayout &DL = StartInst->getModule()->getDataLayout(); // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. - MemsetRanges Ranges(*DL); + MemsetRanges Ranges(DL); BasicBlock::iterator BI = StartInst; for (++BI; !isa<TerminatorInst>(BI); ++BI) { @@ -406,8 +404,8 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; - if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), - Offset, *DL)) + if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, + DL)) break; Ranges.addStore(Offset, NextStore); @@ -420,7 +418,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; - if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *DL)) + if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, DL)) break; Ranges.addMemSet(Offset, MSI); @@ -452,7 +450,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, if (Range.TheStores.size() == 1) continue; // If it is profitable to lower this range to memset, do so now. - if (!Range.isProfitableToUseMemset(*DL)) + if (!Range.isProfitableToUseMemset(DL)) continue; // Otherwise, we do want to transform this! Create a new memset. @@ -464,7 +462,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, if (Alignment == 0) { Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); - Alignment = DL->getABITypeAlignment(EltType); + Alignment = DL.getABITypeAlignment(EltType); } AMemSet = @@ -494,8 +492,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!SI->isSimple()) return false; - - if (!DL) return false; + const DataLayout &DL = SI->getModule()->getDataLayout(); // Detect cases where we're performing call slot forwarding, but // happen to be using a load-store pair to implement it, rather than @@ -525,16 +522,16 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (C) { unsigned storeAlign = SI->getAlignment(); if (!storeAlign) - storeAlign = DL->getABITypeAlignment(SI->getOperand(0)->getType()); + storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); unsigned loadAlign = LI->getAlignment(); if (!loadAlign) - loadAlign = DL->getABITypeAlignment(LI->getType()); + loadAlign = DL.getABITypeAlignment(LI->getType()); - bool changed = performCallSlotOptzn(LI, - SI->getPointerOperand()->stripPointerCasts(), - LI->getPointerOperand()->stripPointerCasts(), - DL->getTypeStoreSize(SI->getOperand(0)->getType()), - std::min(storeAlign, loadAlign), C); + bool changed = performCallSlotOptzn( + LI, SI->getPointerOperand()->stripPointerCasts(), + LI->getPointerOperand()->stripPointerCasts(), + DL.getTypeStoreSize(SI->getOperand(0)->getType()), + std::min(storeAlign, loadAlign), C); if (changed) { MD->removeInstruction(SI); SI->eraseFromParent(); @@ -606,15 +603,13 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, if (!srcAlloca) return false; - // Check that all of src is copied to dest. - if (!DL) return false; - ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); if (!srcArraySize) return false; - uint64_t srcSize = DL->getTypeAllocSize(srcAlloca->getAllocatedType()) * - srcArraySize->getZExtValue(); + const DataLayout &DL = cpy->getModule()->getDataLayout(); + uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) * + srcArraySize->getZExtValue(); if (cpyLen < srcSize) return false; @@ -628,8 +623,8 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, if (!destArraySize) return false; - uint64_t destSize = DL->getTypeAllocSize(A->getAllocatedType()) * - destArraySize->getZExtValue(); + uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) * + destArraySize->getZExtValue(); if (destSize < srcSize) return false; @@ -648,7 +643,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, return false; } - uint64_t destSize = DL->getTypeAllocSize(StructTy); + uint64_t destSize = DL.getTypeAllocSize(StructTy); if (destSize < srcSize) return false; } @@ -659,7 +654,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, // Check that dest points to memory that is at least as aligned as src. unsigned srcAlign = srcAlloca->getAlignment(); if (!srcAlign) - srcAlign = DL->getABITypeAlignment(srcAlloca->getAllocatedType()); + srcAlign = DL.getABITypeAlignment(srcAlloca->getAllocatedType()); bool isDestSufficientlyAligned = srcAlign <= cpyAlign; // If dest is not aligned enough and we can't increase its alignment then // bail out. @@ -959,12 +954,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { /// processByValArgument - This is called on every byval argument in call sites. bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { - if (!DL) return false; - + const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout(); // Find out what feeds this byval argument. Value *ByValArg = CS.getArgument(ArgNo); Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType(); - uint64_t ByValSize = DL->getTypeAllocSize(ByValTy); + uint64_t ByValSize = DL.getTypeAllocSize(ByValTy); MemDepResult DepInfo = MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize), true, CS.getInstruction(), @@ -997,8 +991,8 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { *CS->getParent()->getParent()); DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); if (MDep->getAlignment() < ByValAlign && - getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &AC, - CS.getInstruction(), &DT) < ByValAlign) + getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, + CS.getInstruction(), &AC, &DT) < ByValAlign) return false; // Verify that the copied-from memory doesn't change in between the memcpy and @@ -1077,8 +1071,6 @@ bool MemCpyOpt::runOnFunction(Function &F) { bool MadeChange = false; MD = &getAnalysis<MemoryDependenceAnalysis>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); // If we don't have at least memset and memcpy, there is little point of doing diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp index 8fad63f..73f4296 100644 --- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -81,12 +81,13 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include <vector> diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 98016b4..307cc73 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -321,10 +321,8 @@ unsigned Reassociate::getRank(Value *V) { // If this is a not or neg instruction, do not count it for rank. This // assures us that X and ~X will have the same rank. - Type *Ty = V->getType(); - if ((!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) || - (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) && - !BinaryOperator::isFNeg(I))) + if (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) && + !BinaryOperator::isFNeg(I)) ++Rank; DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank << "\n"); @@ -351,7 +349,7 @@ void Reassociate::canonicalizeOperands(Instruction *I) { static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp) { - if (S1->getType()->isIntegerTy()) + if (S1->getType()->isIntOrIntVectorTy()) return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore); else { BinaryOperator *Res = @@ -363,7 +361,7 @@ static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name, static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp) { - if (S1->getType()->isIntegerTy()) + if (S1->getType()->isIntOrIntVectorTy()) return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore); else { BinaryOperator *Res = @@ -375,7 +373,7 @@ static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name, static BinaryOperator *CreateNeg(Value *S1, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp) { - if (S1->getType()->isIntegerTy()) + if (S1->getType()->isIntOrIntVectorTy()) return BinaryOperator::CreateNeg(S1, Name, InsertBefore); else { BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore); @@ -388,8 +386,8 @@ static BinaryOperator *CreateNeg(Value *S1, const Twine &Name, /// static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) { Type *Ty = Neg->getType(); - Constant *NegOne = Ty->isIntegerTy() ? ConstantInt::getAllOnesValue(Ty) - : ConstantFP::get(Ty, -1.0); + Constant *NegOne = Ty->isIntOrIntVectorTy() ? + ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0); BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg); Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op. @@ -872,7 +870,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, Constant *Undef = UndefValue::get(I->getType()); NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode), Undef, Undef, "", I); - if (NewOp->getType()->isFloatingPointTy()) + if (NewOp->getType()->isFPOrFPVectorTy()) NewOp->setFastMathFlags(I->getFastMathFlags()); } else { NewOp = NodesToRewrite.pop_back_val(); @@ -1520,8 +1518,8 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // Insert a new multiply. Type *Ty = TheOp->getType(); - Constant *C = Ty->isIntegerTy() ? ConstantInt::get(Ty, NumFound) - : ConstantFP::get(Ty, NumFound); + Constant *C = Ty->isIntOrIntVectorTy() ? + ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound); Instruction *Mul = CreateMul(TheOp, C, "factor", I, I); // Now that we have inserted a multiply, optimize it. This allows us to @@ -1661,7 +1659,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // from an expression will drop a use of maxocc, and this can cause // RemoveFactorFromExpression on successive values to behave differently. Instruction *DummyInst = - I->getType()->isIntegerTy() + I->getType()->isIntOrIntVectorTy() ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal) : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal); @@ -1792,7 +1790,7 @@ static Value *buildMultiplyTree(IRBuilder<> &Builder, Value *LHS = Ops.pop_back_val(); do { - if (LHS->getType()->isIntegerTy()) + if (LHS->getType()->isIntOrIntVectorTy()) LHS = Builder.CreateMul(LHS, Ops.pop_back_val()); else LHS = Builder.CreateFMul(LHS, Ops.pop_back_val()); @@ -2090,8 +2088,9 @@ void Reassociate::OptimizeInst(Instruction *I) { if (I->isCommutative()) canonicalizeOperands(I); - // Don't optimize vector instructions. - if (I->getType()->isVectorTy()) + // TODO: We should optimize vector Xor instructions, but they are + // currently unsupported. + if (I->getType()->isVectorTy() && I->getOpcode() == Instruction::Xor) return; // Don't optimize floating point instructions that don't have unsafe algebra. @@ -2170,9 +2169,6 @@ void Reassociate::OptimizeInst(Instruction *I) { } void Reassociate::ReassociateExpression(BinaryOperator *I) { - assert(!I->getType()->isVectorTy() && - "Reassociation of vector instructions is not supported."); - // First, walk the expression tree, linearizing the tree, collecting the // operand information. SmallVector<RepeatedValue, 8> Tree; diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index ca9ab54..f5d21ff 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -548,9 +548,6 @@ public: } PhiState(Value *b) : status(Base), base(b) {} PhiState() : status(Unknown), base(nullptr) {} - PhiState(const PhiState &other) : status(other.status), base(other.base) { - assert(status != Base || base); - } Status getStatus() const { return status; } Value *getBase() const { return base; } @@ -684,12 +681,19 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, states[def] = PhiState(); // Recursively fill in all phis & selects reachable from the initial one // for which we don't already know a definite base value for - // PERF: Yes, this is as horribly inefficient as it looks. + // TODO: This should be rewritten with a worklist bool done = false; while (!done) { done = true; + // Since we're adding elements to 'states' as we run, we can't keep + // iterators into the set. + SmallVector<Value*, 16> Keys; + Keys.reserve(states.size()); for (auto Pair : states) { - Value *v = Pair.first; + Value *V = Pair.first; + Keys.push_back(V); + } + for (Value *v : Keys) { assert(!isKnownBaseResult(v) && "why did it get added?"); if (PHINode *phi = dyn_cast<PHINode>(v)) { assert(phi->getNumIncomingValues() > 0 && @@ -730,10 +734,12 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, // have reached conflict state. The current version seems too conservative. bool progress = true; - size_t oldSize = 0; while (progress) { - oldSize = states.size(); +#ifndef NDEBUG + size_t oldSize = states.size(); +#endif progress = false; + // We're only changing keys in this loop, thus safe to keep iterators for (auto Pair : states) { MeetPhiStates calculateMeet(states); Value *v = Pair.first; @@ -768,46 +774,58 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, } // Insert Phis for all conflicts + // We want to keep naming deterministic in the loop that follows, so + // sort the keys before iteration. This is useful in allowing us to + // write stable tests. Note that there is no invalidation issue here. + SmallVector<Value*, 16> Keys; + Keys.reserve(states.size()); for (auto Pair : states) { - Instruction *v = cast<Instruction>(Pair.first); - PhiState state = Pair.second; + Value *V = Pair.first; + Keys.push_back(V); + } + std::sort(Keys.begin(), Keys.end(), order_by_name); + // TODO: adjust naming patterns to avoid this order of iteration dependency + for (Value *V : Keys) { + Instruction *v = cast<Instruction>(V); + PhiState state = states[V]; assert(!isKnownBaseResult(v) && "why did it get added?"); assert(!state.isUnknown() && "Optimistic algorithm didn't complete!"); - if (state.isConflict()) { - if (isa<PHINode>(v)) { - int num_preds = - std::distance(pred_begin(v->getParent()), pred_end(v->getParent())); - assert(num_preds > 0 && "how did we reach here"); - PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v); - NewInsertedDefs.insert(phi); - // Add metadata marking this as a base value - auto *const_1 = ConstantInt::get( - Type::getInt32Ty( - v->getParent()->getParent()->getParent()->getContext()), - 1); - auto MDConst = ConstantAsMetadata::get(const_1); - MDNode *md = MDNode::get( - v->getParent()->getParent()->getParent()->getContext(), MDConst); - phi->setMetadata("is_base_value", md); - states[v] = PhiState(PhiState::Conflict, phi); - } else if (SelectInst *sel = dyn_cast<SelectInst>(v)) { - // The undef will be replaced later - UndefValue *undef = UndefValue::get(sel->getType()); - SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef, - undef, "base_select", sel); - NewInsertedDefs.insert(basesel); - // Add metadata marking this as a base value - auto *const_1 = ConstantInt::get( - Type::getInt32Ty( - v->getParent()->getParent()->getParent()->getContext()), - 1); - auto MDConst = ConstantAsMetadata::get(const_1); - MDNode *md = MDNode::get( - v->getParent()->getParent()->getParent()->getContext(), MDConst); - basesel->setMetadata("is_base_value", md); - states[v] = PhiState(PhiState::Conflict, basesel); - } else - llvm_unreachable("unknown conflict type"); + if (!state.isConflict()) + continue; + + if (isa<PHINode>(v)) { + int num_preds = + std::distance(pred_begin(v->getParent()), pred_end(v->getParent())); + assert(num_preds > 0 && "how did we reach here"); + PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v); + NewInsertedDefs.insert(phi); + // Add metadata marking this as a base value + auto *const_1 = ConstantInt::get( + Type::getInt32Ty( + v->getParent()->getParent()->getParent()->getContext()), + 1); + auto MDConst = ConstantAsMetadata::get(const_1); + MDNode *md = MDNode::get( + v->getParent()->getParent()->getParent()->getContext(), MDConst); + phi->setMetadata("is_base_value", md); + states[v] = PhiState(PhiState::Conflict, phi); + } else { + SelectInst *sel = cast<SelectInst>(v); + // The undef will be replaced later + UndefValue *undef = UndefValue::get(sel->getType()); + SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef, + undef, "base_select", sel); + NewInsertedDefs.insert(basesel); + // Add metadata marking this as a base value + auto *const_1 = ConstantInt::get( + Type::getInt32Ty( + v->getParent()->getParent()->getParent()->getContext()), + 1); + auto MDConst = ConstantAsMetadata::get(const_1); + MDNode *md = MDNode::get( + v->getParent()->getParent()->getParent()->getContext(), MDConst); + basesel->setMetadata("is_base_value", md); + states[v] = PhiState(PhiState::Conflict, basesel); } } @@ -818,97 +836,98 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, assert(!isKnownBaseResult(v) && "why did it get added?"); assert(!state.isUnknown() && "Optimistic algorithm didn't complete!"); - if (state.isConflict()) { - if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) { - PHINode *phi = cast<PHINode>(v); - unsigned NumPHIValues = phi->getNumIncomingValues(); - for (unsigned i = 0; i < NumPHIValues; i++) { - Value *InVal = phi->getIncomingValue(i); - BasicBlock *InBB = phi->getIncomingBlock(i); - - // If we've already seen InBB, add the same incoming value - // we added for it earlier. The IR verifier requires phi - // nodes with multiple entries from the same basic block - // to have the same incoming value for each of those - // entries. If we don't do this check here and basephi - // has a different type than base, we'll end up adding two - // bitcasts (and hence two distinct values) as incoming - // values for the same basic block. - - int blockIndex = basephi->getBasicBlockIndex(InBB); - if (blockIndex != -1) { - Value *oldBase = basephi->getIncomingValue(blockIndex); - basephi->addIncoming(oldBase, InBB); + if (!state.isConflict()) + continue; + + if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) { + PHINode *phi = cast<PHINode>(v); + unsigned NumPHIValues = phi->getNumIncomingValues(); + for (unsigned i = 0; i < NumPHIValues; i++) { + Value *InVal = phi->getIncomingValue(i); + BasicBlock *InBB = phi->getIncomingBlock(i); + + // If we've already seen InBB, add the same incoming value + // we added for it earlier. The IR verifier requires phi + // nodes with multiple entries from the same basic block + // to have the same incoming value for each of those + // entries. If we don't do this check here and basephi + // has a different type than base, we'll end up adding two + // bitcasts (and hence two distinct values) as incoming + // values for the same basic block. + + int blockIndex = basephi->getBasicBlockIndex(InBB); + if (blockIndex != -1) { + Value *oldBase = basephi->getIncomingValue(blockIndex); + basephi->addIncoming(oldBase, InBB); #ifndef NDEBUG - Value *base = findBaseOrBDV(InVal, cache); - if (!isKnownBaseResult(base)) { - // Either conflict or base. - assert(states.count(base)); - base = states[base].getBase(); - assert(base != nullptr && "unknown PhiState!"); - assert(NewInsertedDefs.count(base) && - "should have already added this in a prev. iteration!"); - } - - // In essense this assert states: the only way two - // values incoming from the same basic block may be - // different is by being different bitcasts of the same - // value. A cleanup that remains TODO is changing - // findBaseOrBDV to return an llvm::Value of the correct - // type (and still remain pure). This will remove the - // need to add bitcasts. - assert(base->stripPointerCasts() == oldBase->stripPointerCasts() && - "sanity -- findBaseOrBDV should be pure!"); -#endif - continue; - } - - // Find either the defining value for the PHI or the normal base for - // a non-phi node Value *base = findBaseOrBDV(InVal, cache); if (!isKnownBaseResult(base)) { // Either conflict or base. assert(states.count(base)); base = states[base].getBase(); assert(base != nullptr && "unknown PhiState!"); + assert(NewInsertedDefs.count(base) && + "should have already added this in a prev. iteration!"); } - assert(base && "can't be null"); - // Must use original input BB since base may not be Instruction - // The cast is needed since base traversal may strip away bitcasts - if (base->getType() != basephi->getType()) { - base = new BitCastInst(base, basephi->getType(), "cast", - InBB->getTerminator()); - NewInsertedDefs.insert(base); - } - basephi->addIncoming(base, InBB); + + // In essense this assert states: the only way two + // values incoming from the same basic block may be + // different is by being different bitcasts of the same + // value. A cleanup that remains TODO is changing + // findBaseOrBDV to return an llvm::Value of the correct + // type (and still remain pure). This will remove the + // need to add bitcasts. + assert(base->stripPointerCasts() == oldBase->stripPointerCasts() && + "sanity -- findBaseOrBDV should be pure!"); +#endif + continue; } - assert(basephi->getNumIncomingValues() == NumPHIValues); - } else if (SelectInst *basesel = dyn_cast<SelectInst>(state.getBase())) { - SelectInst *sel = cast<SelectInst>(v); - // Operand 1 & 2 are true, false path respectively. TODO: refactor to - // something more safe and less hacky. - for (int i = 1; i <= 2; i++) { - Value *InVal = sel->getOperand(i); - // Find either the defining value for the PHI or the normal base for - // a non-phi node - Value *base = findBaseOrBDV(InVal, cache); - if (!isKnownBaseResult(base)) { - // Either conflict or base. - assert(states.count(base)); - base = states[base].getBase(); - assert(base != nullptr && "unknown PhiState!"); - } - assert(base && "can't be null"); - // Must use original input BB since base may not be Instruction - // The cast is needed since base traversal may strip away bitcasts - if (base->getType() != basesel->getType()) { - base = new BitCastInst(base, basesel->getType(), "cast", basesel); - NewInsertedDefs.insert(base); - } - basesel->setOperand(i, base); + + // Find either the defining value for the PHI or the normal base for + // a non-phi node + Value *base = findBaseOrBDV(InVal, cache); + if (!isKnownBaseResult(base)) { + // Either conflict or base. + assert(states.count(base)); + base = states[base].getBase(); + assert(base != nullptr && "unknown PhiState!"); } - } else - llvm_unreachable("unexpected conflict type"); + assert(base && "can't be null"); + // Must use original input BB since base may not be Instruction + // The cast is needed since base traversal may strip away bitcasts + if (base->getType() != basephi->getType()) { + base = new BitCastInst(base, basephi->getType(), "cast", + InBB->getTerminator()); + NewInsertedDefs.insert(base); + } + basephi->addIncoming(base, InBB); + } + assert(basephi->getNumIncomingValues() == NumPHIValues); + } else { + SelectInst *basesel = cast<SelectInst>(state.getBase()); + SelectInst *sel = cast<SelectInst>(v); + // Operand 1 & 2 are true, false path respectively. TODO: refactor to + // something more safe and less hacky. + for (int i = 1; i <= 2; i++) { + Value *InVal = sel->getOperand(i); + // Find either the defining value for the PHI or the normal base for + // a non-phi node + Value *base = findBaseOrBDV(InVal, cache); + if (!isKnownBaseResult(base)) { + // Either conflict or base. + assert(states.count(base)); + base = states[base].getBase(); + assert(base != nullptr && "unknown PhiState!"); + } + assert(base && "can't be null"); + // Must use original input BB since base may not be Instruction + // The cast is needed since base traversal may strip away bitcasts + if (base->getType() != basesel->getType()) { + base = new BitCastInst(base, basesel->getType(), "cast", basesel); + NewInsertedDefs.insert(base); + } + basesel->setOperand(i, base); + } } } @@ -964,7 +983,13 @@ static void findBasePointers(const StatepointLiveSetTy &live, DenseMap<llvm::Value *, llvm::Value *> &PointerToBase, DominatorTree *DT, DefiningValueMapTy &DVCache, DenseSet<llvm::Value *> &NewInsertedDefs) { - for (Value *ptr : live) { + // For the naming of values inserted to be deterministic - which makes for + // much cleaner and more stable tests - we need to assign an order to the + // live values. DenseSets do not provide a deterministic order across runs. + SmallVector<Value*, 64> Temp; + Temp.insert(Temp.end(), live.begin(), live.end()); + std::sort(Temp.begin(), Temp.end(), order_by_name); + for (Value *ptr : Temp) { Value *base = findBasePointer(ptr, DVCache, NewInsertedDefs); assert(base && "failed to find base pointer"); PointerToBase[ptr] = base; @@ -993,10 +1018,19 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, findBasePointers(result.liveset, PointerToBase, &DT, DVCache, NewInsertedDefs); if (PrintBasePointers) { + // Note: Need to print these in a stable order since this is checked in + // some tests. errs() << "Base Pairs (w/o Relocation):\n"; + SmallVector<Value*, 64> Temp; + Temp.reserve(PointerToBase.size()); for (auto Pair : PointerToBase) { - errs() << " derived %" << Pair.first->getName() << " base %" - << Pair.second->getName() << "\n"; + Temp.push_back(Pair.first); + } + std::sort(Temp.begin(), Temp.end(), order_by_name); + for (Value *Ptr : Temp) { + Value *Base = PointerToBase[Ptr]; + errs() << " derived %" << Ptr->getName() << " base %" + << Base->getName() << "\n"; } } @@ -1131,11 +1165,11 @@ static AttributeSet legalizeCallAttributes(AttributeSet AS) { /// statepointToken - statepoint instruction to which relocates should be /// bound. /// Builder - Llvm IR builder to be used to construct new calls. -void CreateGCRelocates(ArrayRef<llvm::Value *> liveVariables, - const int liveStart, - ArrayRef<llvm::Value *> basePtrs, - Instruction *statepointToken, IRBuilder<> Builder) { - +static void CreateGCRelocates(ArrayRef<llvm::Value *> liveVariables, + const int liveStart, + ArrayRef<llvm::Value *> basePtrs, + Instruction *statepointToken, + IRBuilder<> Builder) { SmallVector<Instruction *, 64> NewDefs; NewDefs.reserve(liveVariables.size()); @@ -1559,8 +1593,18 @@ static void relocationViaAlloca( // store must be inserted after load, otherwise store will be in alloca's // use list and an extra load will be inserted before it StoreInst *store = new StoreInst(def, alloca); - if (isa<Instruction>(def)) { - store->insertAfter(cast<Instruction>(def)); + if (Instruction *inst = dyn_cast<Instruction>(def)) { + if (InvokeInst *invoke = dyn_cast<InvokeInst>(inst)) { + // InvokeInst is a TerminatorInst so the store need to be inserted + // into its normal destination block. + BasicBlock *normalDest = invoke->getNormalDest(); + store->insertBefore(normalDest->getFirstNonPHI()); + } else { + assert(!inst->isTerminator() && + "The only TerminatorInst that can produce a value is " + "InvokeInst which is handled above."); + store->insertAfter(inst); + } } else { assert((isa<Argument>(def) || isa<GlobalVariable>(def) || (isa<Constant>(def) && cast<Constant>(def)->isNullValue())) && diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 05b9608..875a007 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -35,7 +36,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> @@ -154,7 +154,7 @@ namespace { /// Constant Propagation. /// class SCCPSolver : public InstVisitor<SCCPSolver> { - const DataLayout *DL; + const DataLayout &DL; const TargetLibraryInfo *TLI; SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable. DenseMap<Value*, LatticeVal> ValueState; // The state each value is in. @@ -206,8 +206,8 @@ class SCCPSolver : public InstVisitor<SCCPSolver> { typedef std::pair<BasicBlock*, BasicBlock*> Edge; DenseSet<Edge> KnownFeasibleEdges; public: - SCCPSolver(const DataLayout *DL, const TargetLibraryInfo *tli) - : DL(DL), TLI(tli) {} + SCCPSolver(const DataLayout &DL, const TargetLibraryInfo *tli) + : DL(DL), TLI(tli) {} /// MarkBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. @@ -1561,8 +1561,7 @@ bool SCCP::runOnFunction(Function &F) { return false; DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n"); - const DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; + const DataLayout &DL = F.getParent()->getDataLayout(); const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); SCCPSolver Solver(DL, TLI); @@ -1691,8 +1690,7 @@ static bool AddressIsTaken(const GlobalValue *GV) { } bool IPSCCP::runOnModule(Module &M) { - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; + const DataLayout &DL = M.getDataLayout(); const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); SCCPSolver Solver(DL, TLI); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index f69c750..06b000f 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -247,7 +247,7 @@ public: /// hold. void insert(ArrayRef<Slice> NewSlices) { int OldSize = Slices.size(); - std::move(NewSlices.begin(), NewSlices.end(), std::back_inserter(Slices)); + Slices.append(NewSlices.begin(), NewSlices.end()); auto SliceI = Slices.begin() + OldSize; std::sort(SliceI, Slices.end()); std::inplace_merge(Slices.begin(), SliceI, Slices.end()); @@ -701,6 +701,7 @@ private: // by writing out the code here where we have tho underlying allocation // size readily available. APInt GEPOffset = Offset; + const DataLayout &DL = GEPI.getModule()->getDataLayout(); for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI); GTI != GTE; ++GTI) { @@ -750,6 +751,7 @@ private: if (!IsOffsetKnown) return PI.setAborted(&LI); + const DataLayout &DL = LI.getModule()->getDataLayout(); uint64_t Size = DL.getTypeStoreSize(LI.getType()); return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); } @@ -761,6 +763,7 @@ private: if (!IsOffsetKnown) return PI.setAborted(&SI); + const DataLayout &DL = SI.getModule()->getDataLayout(); uint64_t Size = DL.getTypeStoreSize(ValOp->getType()); // If this memory access can be shown to *statically* extend outside the @@ -898,6 +901,7 @@ private: SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses; Visited.insert(Root); Uses.push_back(std::make_pair(cast<Instruction>(*U), Root)); + const DataLayout &DL = Root->getModule()->getDataLayout(); // If there are no loads or stores, the access is dead. We mark that as // a size zero access. Size = 0; @@ -1194,7 +1198,6 @@ class SROA : public FunctionPass { const bool RequiresDomTree; LLVMContext *C; - const DataLayout *DL; DominatorTree *DT; AssumptionCache *AC; @@ -1243,7 +1246,7 @@ class SROA : public FunctionPass { public: SROA(bool RequiresDomTree = true) : FunctionPass(ID), RequiresDomTree(RequiresDomTree), C(nullptr), - DL(nullptr), DT(nullptr) { + DT(nullptr) { initializeSROAPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -1349,7 +1352,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B, /// /// FIXME: This should be hoisted into a generic utility, likely in /// Transforms/Util/Local.h -static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) { +static bool isSafePHIToSpeculate(PHINode &PN) { // For now, we can only do this promotion if the load is in the same block // as the PHI, and if there are no stores between the phi and load. // TODO: Allow recursive phi users. @@ -1381,6 +1384,8 @@ static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) { if (!HaveLoad) return false; + const DataLayout &DL = PN.getModule()->getDataLayout(); + // We can only transform this if it is safe to push the loads into the // predecessor blocks. The only thing to watch out for is that we can't put // a possibly trapping load in the predecessor if it is a critical edge. @@ -1403,7 +1408,7 @@ static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) { // is already a load in the block, then we can move the load to the pred // block. if (InVal->isDereferenceablePointer(DL) || - isSafeToLoadUnconditionally(InVal, TI, MaxAlign, DL)) + isSafeToLoadUnconditionally(InVal, TI, MaxAlign)) continue; return false; @@ -1468,10 +1473,10 @@ static void speculatePHINodeLoads(PHINode &PN) { /// /// We can do this to a select if its only uses are loads and if the operand /// to the select can be loaded unconditionally. -static bool isSafeSelectToSpeculate(SelectInst &SI, - const DataLayout *DL = nullptr) { +static bool isSafeSelectToSpeculate(SelectInst &SI) { Value *TValue = SI.getTrueValue(); Value *FValue = SI.getFalseValue(); + const DataLayout &DL = SI.getModule()->getDataLayout(); bool TDerefable = TValue->isDereferenceablePointer(DL); bool FDerefable = FValue->isDereferenceablePointer(DL); @@ -1484,10 +1489,10 @@ static bool isSafeSelectToSpeculate(SelectInst &SI, // absolutely (e.g. allocas) or at this point because we can see other // accesses to it. if (!TDerefable && - !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment(), DL)) + !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment())) return false; if (!FDerefable && - !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment(), DL)) + !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment())) return false; } @@ -3699,6 +3704,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { // them to the alloca slices. SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap; std::vector<LoadInst *> SplitLoads; + const DataLayout &DL = AI.getModule()->getDataLayout(); for (LoadInst *LI : Loads) { SplitLoads.clear(); @@ -3724,10 +3730,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); auto *PartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace()); LoadInst *PLoad = IRB.CreateAlignedLoad( - getAdjustedPtr(IRB, *DL, BasePtr, - APInt(DL->getPointerSizeInBits(), PartOffset), + getAdjustedPtr(IRB, DL, BasePtr, + APInt(DL.getPointerSizeInBits(), PartOffset), PartPtrTy, BasePtr->getName() + "."), - getAdjustedAlignment(LI, PartOffset, *DL), /*IsVolatile*/ false, + getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); // Append this load onto the list of split loads so we can find it later @@ -3777,10 +3783,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { PLoad->getType()->getPointerTo(SI->getPointerAddressSpace()); StoreInst *PStore = IRB.CreateAlignedStore( - PLoad, getAdjustedPtr(IRB, *DL, StoreBasePtr, - APInt(DL->getPointerSizeInBits(), PartOffset), + PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, + APInt(DL.getPointerSizeInBits(), PartOffset), PartPtrTy, StoreBasePtr->getName() + "."), - getAdjustedAlignment(SI, PartOffset, *DL), /*IsVolatile*/ false); + getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); (void)PStore; DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); } @@ -3857,20 +3863,20 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { } else { IRB.SetInsertPoint(BasicBlock::iterator(LI)); PLoad = IRB.CreateAlignedLoad( - getAdjustedPtr(IRB, *DL, LoadBasePtr, - APInt(DL->getPointerSizeInBits(), PartOffset), + getAdjustedPtr(IRB, DL, LoadBasePtr, + APInt(DL.getPointerSizeInBits(), PartOffset), PartPtrTy, LoadBasePtr->getName() + "."), - getAdjustedAlignment(LI, PartOffset, *DL), /*IsVolatile*/ false, + getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); } // And store this partition. IRB.SetInsertPoint(BasicBlock::iterator(SI)); StoreInst *PStore = IRB.CreateAlignedStore( - PLoad, getAdjustedPtr(IRB, *DL, StoreBasePtr, - APInt(DL->getPointerSizeInBits(), PartOffset), + PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, + APInt(DL.getPointerSizeInBits(), PartOffset), PartPtrTy, StoreBasePtr->getName() + "."), - getAdjustedAlignment(SI, PartOffset, *DL), /*IsVolatile*/ false); + getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); // Now build a new slice for the alloca. NewSlices.push_back( @@ -3970,25 +3976,26 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // won't always succeed, in which case we fall back to a legal integer type // or an i8 array of an appropriate size. Type *SliceTy = nullptr; + const DataLayout &DL = AI.getModule()->getDataLayout(); if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset())) - if (DL->getTypeAllocSize(CommonUseTy) >= P.size()) + if (DL.getTypeAllocSize(CommonUseTy) >= P.size()) SliceTy = CommonUseTy; if (!SliceTy) - if (Type *TypePartitionTy = getTypePartition(*DL, AI.getAllocatedType(), + if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), P.beginOffset(), P.size())) SliceTy = TypePartitionTy; if ((!SliceTy || (SliceTy->isArrayTy() && SliceTy->getArrayElementType()->isIntegerTy())) && - DL->isLegalInteger(P.size() * 8)) + DL.isLegalInteger(P.size() * 8)) SliceTy = Type::getIntNTy(*C, P.size() * 8); if (!SliceTy) SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size()); - assert(DL->getTypeAllocSize(SliceTy) >= P.size()); + assert(DL.getTypeAllocSize(SliceTy) >= P.size()); - bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, *DL); + bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL); VectorType *VecTy = - IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, *DL); + IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL); if (VecTy) SliceTy = VecTy; @@ -4010,12 +4017,12 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // The minimum alignment which users can rely on when the explicit // alignment is omitted or zero is that required by the ABI for this // type. - Alignment = DL->getABITypeAlignment(AI.getAllocatedType()); + Alignment = DL.getABITypeAlignment(AI.getAllocatedType()); } Alignment = MinAlign(Alignment, P.beginOffset()); // If we will get at least this much alignment from the type alone, leave // the alloca's alignment unconstrained. - if (Alignment <= DL->getABITypeAlignment(SliceTy)) + if (Alignment <= DL.getABITypeAlignment(SliceTy)) Alignment = 0; NewAI = new AllocaInst( SliceTy, nullptr, Alignment, @@ -4035,7 +4042,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, SmallPtrSet<PHINode *, 8> PHIUsers; SmallPtrSet<SelectInst *, 8> SelectUsers; - AllocaSliceRewriter Rewriter(*DL, AS, *this, AI, *NewAI, P.beginOffset(), + AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(), P.endOffset(), IsIntegerPromotable, VecTy, PHIUsers, SelectUsers); bool Promotable = true; @@ -4057,7 +4064,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(), E = PHIUsers.end(); I != E; ++I) - if (!isSafePHIToSpeculate(**I, DL)) { + if (!isSafePHIToSpeculate(**I)) { Promotable = false; PHIUsers.clear(); SelectUsers.clear(); @@ -4066,7 +4073,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(), E = SelectUsers.end(); I != E; ++I) - if (!isSafeSelectToSpeculate(**I, DL)) { + if (!isSafeSelectToSpeculate(**I)) { Promotable = false; PHIUsers.clear(); SelectUsers.clear(); @@ -4110,6 +4117,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { unsigned NumPartitions = 0; bool Changed = false; + const DataLayout &DL = AI.getModule()->getDataLayout(); // First try to pre-split loads and stores. Changed |= presplitLoadsAndStores(AI, AS); @@ -4127,7 +4135,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { // confident that the above handling of splittable loads and stores is // completely sufficient before we forcibly disable the remaining handling. if (S.beginOffset() == 0 && - S.endOffset() >= DL->getTypeAllocSize(AI.getAllocatedType())) + S.endOffset() >= DL.getTypeAllocSize(AI.getAllocatedType())) continue; if (isa<LoadInst>(S.getUse()->getUser()) || isa<StoreInst>(S.getUse()->getUser())) { @@ -4155,7 +4163,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { Changed = true; if (NewAI != &AI) { uint64_t SizeOfByte = 8; - uint64_t AllocaSize = DL->getTypeSizeInBits(NewAI->getAllocatedType()); + uint64_t AllocaSize = DL.getTypeSizeInBits(NewAI->getAllocatedType()); // Don't include any padding. uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte); Pieces.push_back(Piece(NewAI, P.beginOffset() * SizeOfByte, Size)); @@ -4236,21 +4244,22 @@ bool SROA::runOnAlloca(AllocaInst &AI) { AI.eraseFromParent(); return true; } + const DataLayout &DL = AI.getModule()->getDataLayout(); // Skip alloca forms that this analysis can't handle. if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() || - DL->getTypeAllocSize(AI.getAllocatedType()) == 0) + DL.getTypeAllocSize(AI.getAllocatedType()) == 0) return false; bool Changed = false; // First, split any FCA loads and stores touching this alloca to promote // better splitting and promotion opportunities. - AggLoadStoreRewriter AggRewriter(*DL); + AggLoadStoreRewriter AggRewriter(DL); Changed |= AggRewriter.rewrite(AI); // Build the slices using a recursive instruction-visiting builder. - AllocaSlices AS(*DL, AI); + AllocaSlices AS(DL, AI); DEBUG(AS.print(dbgs())); if (AS.isEscaped()) return Changed; @@ -4423,12 +4432,6 @@ bool SROA::runOnFunction(Function &F) { DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); C = &F.getContext(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (!DLP) { - DEBUG(dbgs() << " Skipping SROA -- no target data!\n"); - return false; - } - DL = &DLP->getDataLayout(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DT = DTWP ? &DTWP->getDomTree() : nullptr; diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp index c7232a9..3e7cf04 100644 --- a/lib/Transforms/Scalar/SampleProfile.cpp +++ b/lib/Transforms/Scalar/SampleProfile.cpp @@ -217,6 +217,9 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) { /// \returns The profiled weight of I. unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) { DebugLoc DLoc = Inst.getDebugLoc(); + if (DLoc.isUnknown()) + return 0; + unsigned Lineno = DLoc.getLine(); if (Lineno < HeaderLineno) return 0; diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 621633b..6cc8411 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -48,6 +48,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopDeletionPass(Registry); initializeLoopAccessAnalysisPass(Registry); initializeLoopInstSimplifyPass(Registry); + initializeLoopInterchangePass(Registry); initializeLoopRotatePass(Registry); initializeLoopStrengthReducePass(Registry); initializeLoopRerollPass(Registry); @@ -209,7 +210,6 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) { void LLVMAddVerifierPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createVerifierPass()); - // FIXME: should this also add createDebugInfoVerifierPass()? } void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 5c49a55..acd8585 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -89,7 +89,6 @@ namespace { private: bool HasDomTree; - const DataLayout *DL; /// DeadInsts - Keep track of instructions we have made dead, so that /// we can remove them after we are done working. @@ -159,9 +158,10 @@ namespace { void isSafeMemAccess(uint64_t Offset, uint64_t MemSize, Type *MemOpType, bool isStore, AllocaInfo &Info, Instruction *TheAccess, bool AllowWholeAccess); - bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size); - uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, - Type *&IdxTy); + bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size, + const DataLayout &DL); + uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy, + const DataLayout &DL); void DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList); @@ -699,9 +699,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // If the source and destination are both to the same alloca, then this is // a noop copy-to-self, just delete it. Otherwise, emit a load and store // as appropriate. - AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &DL, 0)); + AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, DL, 0)); - if (GetUnderlyingObject(MTI->getSource(), &DL, 0) != OrigAI) { + if (GetUnderlyingObject(MTI->getSource(), DL, 0) != OrigAI) { // Dest must be OrigAI, change this to be a load from the original // pointer (bitcasted), then a store to our new alloca. assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); @@ -717,7 +717,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); SrcVal->setAlignment(MTI->getAlignment()); Builder.CreateStore(SrcVal, NewAI); - } else if (GetUnderlyingObject(MTI->getDest(), &DL, 0) != OrigAI) { + } else if (GetUnderlyingObject(MTI->getDest(), DL, 0) != OrigAI) { // Src must be OrigAI, change this to be a load from NewAI then a store // through the original dest pointer (bitcasted). assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); @@ -1032,17 +1032,8 @@ bool SROA::runOnFunction(Function &F) { if (skipOptnoneFunction(F)) return false; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; - bool Changed = performPromotion(F); - // FIXME: ScalarRepl currently depends on DataLayout more than it - // theoretically needs to. It should be refactored in order to support - // target-independent IR. Until this is done, just skip the actual - // scalar-replacement portion of this pass. - if (!DL) return Changed; - while (1) { bool LocalChange = performScalarRepl(F); if (!LocalChange) break; // No need to repromote if no scalarrepl @@ -1148,7 +1139,8 @@ public: /// /// We can do this to a select if its only uses are loads and if the operand to /// the select can be loaded unconditionally. -static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) { +static bool isSafeSelectToSpeculate(SelectInst *SI) { + const DataLayout &DL = SI->getModule()->getDataLayout(); bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(DL); bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(DL); @@ -1158,11 +1150,13 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) { // Both operands to the select need to be dereferencable, either absolutely // (e.g. allocas) or at this point because we can see other accesses to it. - if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, - LI->getAlignment(), DL)) + if (!TDerefable && + !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, + LI->getAlignment())) return false; - if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI, - LI->getAlignment(), DL)) + if (!FDerefable && + !isSafeToLoadUnconditionally(SI->getFalseValue(), LI, + LI->getAlignment())) return false; } @@ -1185,7 +1179,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) { /// /// We can do this to a select if its only uses are loads and if the operand to /// the select can be loaded unconditionally. -static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) { +static bool isSafePHIToSpeculate(PHINode *PN) { // For now, we can only do this promotion if the load is in the same block as // the PHI, and if there are no stores between the phi and load. // TODO: Allow recursive phi users. @@ -1209,6 +1203,8 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) { MaxAlign = std::max(MaxAlign, LI->getAlignment()); } + const DataLayout &DL = PN->getModule()->getDataLayout(); + // Okay, we know that we have one or more loads in the same block as the PHI. // We can transform this if it is safe to push the loads into the predecessor // blocks. The only thing to watch out for is that we can't put a possibly @@ -1234,7 +1230,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) { // If this pointer is always safe to load, or if we can prove that there is // already a load in the block, then we can move the load to the pred block. if (InVal->isDereferenceablePointer(DL) || - isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, DL)) + isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign)) continue; return false; @@ -1248,7 +1244,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) { /// direct (non-volatile) loads and stores to it. If the alloca is close but /// not quite there, this will transform the code to allow promotion. As such, /// it is a non-pure predicate. -static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) { +static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout &DL) { SetVector<Instruction*, SmallVector<Instruction*, 4>, SmallPtrSet<Instruction*, 4> > InstsToRewrite; for (User *U : AI->users()) { @@ -1279,7 +1275,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) { // If it is safe to turn "load (select c, AI, ptr)" into a select of two // loads, then we can transform this by rewriting the select. - if (!isSafeSelectToSpeculate(SI, DL)) + if (!isSafeSelectToSpeculate(SI)) return false; InstsToRewrite.insert(SI); @@ -1294,7 +1290,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) { // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads // in the pred blocks, then we can transform this by rewriting the PHI. - if (!isSafePHIToSpeculate(PN, DL)) + if (!isSafePHIToSpeculate(PN)) return false; InstsToRewrite.insert(PN); @@ -1416,6 +1412,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) { bool SROA::performPromotion(Function &F) { std::vector<AllocaInst*> Allocas; + const DataLayout &DL = F.getParent()->getDataLayout(); DominatorTree *DT = nullptr; if (HasDomTree) DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); @@ -1479,6 +1476,7 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) { // bool SROA::performScalarRepl(Function &F) { std::vector<AllocaInst*> WorkList; + const DataLayout &DL = F.getParent()->getDataLayout(); // Scan the entry basic block, adding allocas to the worklist. BasicBlock &BB = F.getEntryBlock(); @@ -1508,7 +1506,7 @@ bool SROA::performScalarRepl(Function &F) { // transform the allocation instruction if it is an array allocation // (allocations OF arrays are ok though), and an allocation of a scalar // value cannot be decomposed at all. - uint64_t AllocaSize = DL->getTypeAllocSize(AI->getAllocatedType()); + uint64_t AllocaSize = DL.getTypeAllocSize(AI->getAllocatedType()); // Do not promote [0 x %struct]. if (AllocaSize == 0) continue; @@ -1531,8 +1529,9 @@ bool SROA::performScalarRepl(Function &F) { // promoted itself. If so, we don't want to transform it needlessly. Note // that we can't just check based on the type: the alloca may be of an i32 // but that has pointer arithmetic to set byte 3 of it or something. - if (AllocaInst *NewAI = ConvertToScalarInfo( - (unsigned)AllocaSize, *DL, ScalarLoadThreshold).TryConvert(AI)) { + if (AllocaInst *NewAI = + ConvertToScalarInfo((unsigned)AllocaSize, DL, ScalarLoadThreshold) + .TryConvert(AI)) { NewAI->takeName(AI); AI->eraseFromParent(); ++NumConverted; @@ -1610,6 +1609,7 @@ void SROA::DeleteDeadInstructions() { /// referenced by this instruction. void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info) { + const DataLayout &DL = I->getModule()->getDataLayout(); for (Use &U : I->uses()) { Instruction *User = cast<Instruction>(U.getUser()); @@ -1632,8 +1632,8 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, if (!LI->isSimple()) return MarkUnsafe(Info, User); Type *LIType = LI->getType(); - isSafeMemAccess(Offset, DL->getTypeAllocSize(LIType), - LIType, false, Info, LI, true /*AllowWholeAccess*/); + isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info, + LI, true /*AllowWholeAccess*/); Info.hasALoadOrStore = true; } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { @@ -1642,8 +1642,8 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, return MarkUnsafe(Info, User); Type *SIType = SI->getOperand(0)->getType(); - isSafeMemAccess(Offset, DL->getTypeAllocSize(SIType), - SIType, true, Info, SI, true /*AllowWholeAccess*/); + isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info, + SI, true /*AllowWholeAccess*/); Info.hasALoadOrStore = true; } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { if (II->getIntrinsicID() != Intrinsic::lifetime_start && @@ -1675,6 +1675,7 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, if (!Info.CheckedPHIs.insert(PN).second) return; + const DataLayout &DL = I->getModule()->getDataLayout(); for (User *U : I->users()) { Instruction *UI = cast<Instruction>(U); @@ -1691,8 +1692,8 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, if (!LI->isSimple()) return MarkUnsafe(Info, UI); Type *LIType = LI->getType(); - isSafeMemAccess(Offset, DL->getTypeAllocSize(LIType), - LIType, false, Info, LI, false /*AllowWholeAccess*/); + isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info, + LI, false /*AllowWholeAccess*/); Info.hasALoadOrStore = true; } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { @@ -1701,8 +1702,8 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, return MarkUnsafe(Info, UI); Type *SIType = SI->getOperand(0)->getType(); - isSafeMemAccess(Offset, DL->getTypeAllocSize(SIType), - SIType, true, Info, SI, false /*AllowWholeAccess*/); + isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info, + SI, false /*AllowWholeAccess*/); Info.hasALoadOrStore = true; } else if (isa<PHINode>(UI) || isa<SelectInst>(UI)) { isSafePHISelectUseForScalarRepl(UI, Offset, Info); @@ -1746,9 +1747,11 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, // constant part of the offset. if (NonConstant) Indices.pop_back(); - Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices); - if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, - NonConstantIdxSize)) + + const DataLayout &DL = GEPI->getModule()->getDataLayout(); + Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices); + if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, NonConstantIdxSize, + DL)) MarkUnsafe(Info, GEPI); } @@ -1803,9 +1806,10 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, Type *MemOpType, bool isStore, AllocaInfo &Info, Instruction *TheAccess, bool AllowWholeAccess) { + const DataLayout &DL = TheAccess->getModule()->getDataLayout(); // Check if this is a load/store of the entire alloca. if (Offset == 0 && AllowWholeAccess && - MemSize == DL->getTypeAllocSize(Info.AI->getAllocatedType())) { + MemSize == DL.getTypeAllocSize(Info.AI->getAllocatedType())) { // This can be safe for MemIntrinsics (where MemOpType is 0) and integer // loads/stores (which are essentially the same as the MemIntrinsics with // regard to copying padding between elements). But, if an alloca is @@ -1828,7 +1832,7 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, } // Check if the offset/size correspond to a component within the alloca type. Type *T = Info.AI->getAllocatedType(); - if (TypeHasComponent(T, Offset, MemSize)) { + if (TypeHasComponent(T, Offset, MemSize, DL)) { Info.hasSubelementAccess = true; return; } @@ -1838,24 +1842,25 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, /// TypeHasComponent - Return true if T has a component type with the /// specified offset and size. If Size is zero, do not check the size. -bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) { +bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size, + const DataLayout &DL) { Type *EltTy; uint64_t EltSize; if (StructType *ST = dyn_cast<StructType>(T)) { - const StructLayout *Layout = DL->getStructLayout(ST); + const StructLayout *Layout = DL.getStructLayout(ST); unsigned EltIdx = Layout->getElementContainingOffset(Offset); EltTy = ST->getContainedType(EltIdx); - EltSize = DL->getTypeAllocSize(EltTy); + EltSize = DL.getTypeAllocSize(EltTy); Offset -= Layout->getElementOffset(EltIdx); } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) { EltTy = AT->getElementType(); - EltSize = DL->getTypeAllocSize(EltTy); + EltSize = DL.getTypeAllocSize(EltTy); if (Offset >= AT->getNumElements() * EltSize) return false; Offset %= EltSize; } else if (VectorType *VT = dyn_cast<VectorType>(T)) { EltTy = VT->getElementType(); - EltSize = DL->getTypeAllocSize(EltTy); + EltSize = DL.getTypeAllocSize(EltTy); if (Offset >= VT->getNumElements() * EltSize) return false; Offset %= EltSize; @@ -1867,7 +1872,7 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) { // Check if the component spans multiple elements. if (Offset + Size > EltSize) return false; - return TypeHasComponent(EltTy, Offset, Size); + return TypeHasComponent(EltTy, Offset, Size, DL); } /// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite @@ -1876,6 +1881,7 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) { /// instruction. void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, SmallVectorImpl<AllocaInst *> &NewElts) { + const DataLayout &DL = I->getModule()->getDataLayout(); for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) { Use &TheUse = *UI++; Instruction *User = cast<Instruction>(TheUse.getUser()); @@ -1893,8 +1899,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); uint64_t MemSize = Length->getZExtValue(); - if (Offset == 0 && - MemSize == DL->getTypeAllocSize(AI->getAllocatedType())) + if (Offset == 0 && MemSize == DL.getTypeAllocSize(AI->getAllocatedType())) RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); // Otherwise the intrinsic can only touch a single element and the // address operand will be updated, so nothing else needs to be done. @@ -1930,8 +1935,8 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, LI->replaceAllUsesWith(Insert); DeadInsts.push_back(LI); } else if (LIType->isIntegerTy() && - DL->getTypeAllocSize(LIType) == - DL->getTypeAllocSize(AI->getAllocatedType())) { + DL.getTypeAllocSize(LIType) == + DL.getTypeAllocSize(AI->getAllocatedType())) { // If this is a load of the entire alloca to an integer, rewrite it. RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); } @@ -1957,8 +1962,8 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, } DeadInsts.push_back(SI); } else if (SIType->isIntegerTy() && - DL->getTypeAllocSize(SIType) == - DL->getTypeAllocSize(AI->getAllocatedType())) { + DL.getTypeAllocSize(SIType) == + DL.getTypeAllocSize(AI->getAllocatedType())) { // If this is a store of the entire alloca from an integer, rewrite it. RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); } @@ -2001,7 +2006,8 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, Type *T = AI->getAllocatedType(); uint64_t EltOffset = 0; Type *IdxTy; - uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy); + uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy, + BC->getModule()->getDataLayout()); Instruction *Val = NewElts[Idx]; if (Val->getType() != BC->getDestTy()) { Val = new BitCastInst(Val, BC->getDestTy(), "", BC); @@ -2016,11 +2022,12 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, /// Sets T to the type of the element and Offset to the offset within that /// element. IdxTy is set to the type of the index result to be used in a /// GEP instruction. -uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, - Type *&IdxTy) { +uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy, + const DataLayout &DL) { uint64_t Idx = 0; + if (StructType *ST = dyn_cast<StructType>(T)) { - const StructLayout *Layout = DL->getStructLayout(ST); + const StructLayout *Layout = DL.getStructLayout(ST); Idx = Layout->getElementContainingOffset(Offset); T = ST->getContainedType(Idx); Offset -= Layout->getElementOffset(Idx); @@ -2028,7 +2035,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, return Idx; } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) { T = AT->getElementType(); - uint64_t EltSize = DL->getTypeAllocSize(T); + uint64_t EltSize = DL.getTypeAllocSize(T); Idx = Offset / EltSize; Offset -= Idx * EltSize; IdxTy = Type::getInt64Ty(T->getContext()); @@ -2036,7 +2043,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, } VectorType *VT = cast<VectorType>(T); T = VT->getElementType(); - uint64_t EltSize = DL->getTypeAllocSize(T); + uint64_t EltSize = DL.getTypeAllocSize(T); Idx = Offset / EltSize; Offset -= Idx * EltSize; IdxTy = Type::getInt64Ty(T->getContext()); @@ -2049,6 +2056,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, SmallVectorImpl<AllocaInst *> &NewElts) { uint64_t OldOffset = Offset; + const DataLayout &DL = GEPI->getModule()->getDataLayout(); SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); // If the GEP was dynamic then it must have been a dynamic vector lookup. // In this case, it must be the last GEP operand which is dynamic so keep that @@ -2057,19 +2065,19 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, Value* NonConstantIdx = nullptr; if (!GEPI->hasAllConstantIndices()) NonConstantIdx = Indices.pop_back_val(); - Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices); + Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices); RewriteForScalarRepl(GEPI, AI, Offset, NewElts); Type *T = AI->getAllocatedType(); Type *IdxTy; - uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy); + uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy, DL); if (GEPI->getOperand(0) == AI) OldIdx = ~0ULL; // Force the GEP to be rewritten. T = AI->getAllocatedType(); uint64_t EltOffset = Offset; - uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy); + uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy, DL); // If this GEP does not move the pointer across elements of the alloca // being split, then it does not needs to be rewritten. @@ -2080,7 +2088,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, SmallVector<Value*, 8> NewArgs; NewArgs.push_back(Constant::getNullValue(i32Ty)); while (EltOffset != 0) { - uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy); + uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy, DL); NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx)); } if (NonConstantIdx) { @@ -2114,9 +2122,10 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, // Put matching lifetime markers on everything from Offset up to // Offset+OldSize. Type *AIType = AI->getAllocatedType(); + const DataLayout &DL = II->getModule()->getDataLayout(); uint64_t NewOffset = Offset; Type *IdxTy; - uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy); + uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy, DL); IRBuilder<> Builder(II); uint64_t Size = OldSize->getLimitedValue(); @@ -2129,7 +2138,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, V = Builder.CreateGEP(V, Builder.getInt64(NewOffset)); IdxTy = NewElts[Idx]->getAllocatedType(); - uint64_t EltSize = DL->getTypeAllocSize(IdxTy) - NewOffset; + uint64_t EltSize = DL.getTypeAllocSize(IdxTy) - NewOffset; if (EltSize > Size) { EltSize = Size; Size = 0; @@ -2145,7 +2154,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, for (; Idx != NewElts.size() && Size; ++Idx) { IdxTy = NewElts[Idx]->getAllocatedType(); - uint64_t EltSize = DL->getTypeAllocSize(IdxTy); + uint64_t EltSize = DL.getTypeAllocSize(IdxTy); if (EltSize > Size) { EltSize = Size; Size = 0; @@ -2221,6 +2230,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, bool SROADest = MI->getRawDest() == Inst; Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); + const DataLayout &DL = MI->getModule()->getDataLayout(); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // If this is a memcpy/memmove, emit a GEP of the other element address. @@ -2237,10 +2247,10 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); Type *OtherTy = OtherPtrTy->getElementType(); if (StructType *ST = dyn_cast<StructType>(OtherTy)) { - EltOffset = DL->getStructLayout(ST)->getElementOffset(i); + EltOffset = DL.getStructLayout(ST)->getElementOffset(i); } else { Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); - EltOffset = DL->getTypeAllocSize(EltTy)*i; + EltOffset = DL.getTypeAllocSize(EltTy) * i; } // The alignment of the other pointer is the guaranteed alignment of the @@ -2281,7 +2291,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, Type *ValTy = EltTy->getScalarType(); // Construct an integer with the right value. - unsigned EltSize = DL->getTypeSizeInBits(ValTy); + unsigned EltSize = DL.getTypeSizeInBits(ValTy); APInt OneVal(EltSize, CI->getZExtValue()); APInt TotalVal(OneVal); // Set each byte. @@ -2311,7 +2321,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // this element. } - unsigned EltSize = DL->getTypeAllocSize(EltTy); + unsigned EltSize = DL.getTypeAllocSize(EltTy); if (!EltSize) continue; @@ -2345,12 +2355,13 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, // and store the element value to the individual alloca. Value *SrcVal = SI->getOperand(0); Type *AllocaEltTy = AI->getAllocatedType(); - uint64_t AllocaSizeBits = DL->getTypeAllocSizeInBits(AllocaEltTy); + const DataLayout &DL = SI->getModule()->getDataLayout(); + uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy); IRBuilder<> Builder(SI); // Handle tail padding by extending the operand - if (DL->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) + if (DL.getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) SrcVal = Builder.CreateZExt(SrcVal, IntegerType::get(SI->getContext(), AllocaSizeBits)); @@ -2360,15 +2371,15 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { - const StructLayout *Layout = DL->getStructLayout(EltSTy); + const StructLayout *Layout = DL.getStructLayout(EltSTy); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // Get the number of bits to shift SrcVal to get the value. Type *FieldTy = EltSTy->getElementType(i); uint64_t Shift = Layout->getElementOffsetInBits(i); - if (DL->isBigEndian()) - Shift = AllocaSizeBits-Shift-DL->getTypeAllocSizeInBits(FieldTy); + if (DL.isBigEndian()) + Shift = AllocaSizeBits - Shift - DL.getTypeAllocSizeInBits(FieldTy); Value *EltVal = SrcVal; if (Shift) { @@ -2377,7 +2388,7 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, } // Truncate down to an integer of the right size. - uint64_t FieldSizeBits = DL->getTypeSizeInBits(FieldTy); + uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy); // Ignore zero sized fields like {}, they obviously contain no data. if (FieldSizeBits == 0) continue; @@ -2402,12 +2413,12 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, } else { ArrayType *ATy = cast<ArrayType>(AllocaEltTy); Type *ArrayEltTy = ATy->getElementType(); - uint64_t ElementOffset = DL->getTypeAllocSizeInBits(ArrayEltTy); - uint64_t ElementSizeBits = DL->getTypeSizeInBits(ArrayEltTy); + uint64_t ElementOffset = DL.getTypeAllocSizeInBits(ArrayEltTy); + uint64_t ElementSizeBits = DL.getTypeSizeInBits(ArrayEltTy); uint64_t Shift; - if (DL->isBigEndian()) + if (DL.isBigEndian()) Shift = AllocaSizeBits-ElementOffset; else Shift = 0; @@ -2441,7 +2452,7 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, } new StoreInst(EltVal, DestField, SI); - if (DL->isBigEndian()) + if (DL.isBigEndian()) Shift -= ElementOffset; else Shift += ElementOffset; @@ -2459,7 +2470,8 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, // Extract each element out of the NewElts according to its structure offset // and form the result value. Type *AllocaEltTy = AI->getAllocatedType(); - uint64_t AllocaSizeBits = DL->getTypeAllocSizeInBits(AllocaEltTy); + const DataLayout &DL = LI->getModule()->getDataLayout(); + uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy); DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI << '\n'); @@ -2469,10 +2481,10 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, const StructLayout *Layout = nullptr; uint64_t ArrayEltBitOffset = 0; if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { - Layout = DL->getStructLayout(EltSTy); + Layout = DL.getStructLayout(EltSTy); } else { Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); - ArrayEltBitOffset = DL->getTypeAllocSizeInBits(ArrayEltTy); + ArrayEltBitOffset = DL.getTypeAllocSizeInBits(ArrayEltTy); } Value *ResultVal = @@ -2484,7 +2496,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, Value *SrcField = NewElts[i]; Type *FieldTy = cast<PointerType>(SrcField->getType())->getElementType(); - uint64_t FieldSizeBits = DL->getTypeSizeInBits(FieldTy); + uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy); // Ignore zero sized fields like {}, they obviously contain no data. if (FieldSizeBits == 0) continue; @@ -2515,7 +2527,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, else // Array case. Shift = i*ArrayEltBitOffset; - if (DL->isBigEndian()) + if (DL.isBigEndian()) Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); if (Shift) { @@ -2532,7 +2544,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, } // Handle tail padding by truncating the result - if (DL->getTypeSizeInBits(LI->getType()) != AllocaSizeBits) + if (DL.getTypeSizeInBits(LI->getType()) != AllocaSizeBits) ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); LI->replaceAllUsesWith(ResultVal); @@ -2589,13 +2601,15 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { return false; } + const DataLayout &DL = AI->getModule()->getDataLayout(); + // Okay, we know all the users are promotable. If the aggregate is a memcpy // source and destination, we have to be careful. In particular, the memcpy // could be moving around elements that live in structure padding of the LLVM // types, but may actually be used. In these cases, we refuse to promote the // struct. if (Info.isMemCpySrc && Info.isMemCpyDst && - HasPadding(AI->getAllocatedType(), *DL)) + HasPadding(AI->getAllocatedType(), DL)) return false; // If the alloca never has an access to just *part* of it, but is accessed diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index 6036c09..a457cba 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -165,7 +165,7 @@ private: void gather(Instruction *, const ValueVector &); bool canTransferMetadata(unsigned Kind); void transferMetadata(Instruction *, const ValueVector &); - bool getVectorLayout(Type *, unsigned, VectorLayout &); + bool getVectorLayout(Type *, unsigned, VectorLayout &, const DataLayout &); bool finish(); template<typename T> bool splitBinary(Instruction &, const T &); @@ -173,7 +173,6 @@ private: ScatterMap Scattered; GatherList Gathered; unsigned ParallelLoopAccessMDKind; - const DataLayout *DL; bool ScalarizeLoadStore; }; @@ -248,8 +247,6 @@ bool Scalarizer::doInitialization(Module &M) { } bool Scalarizer::runOnFunction(Function &F) { - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { BasicBlock *BB = BBI; for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { @@ -345,10 +342,7 @@ void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) { // Try to fill in Layout from Ty, returning true on success. Alignment is // the alignment of the vector, or 0 if the ABI default should be used. bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment, - VectorLayout &Layout) { - if (!DL) - return false; - + VectorLayout &Layout, const DataLayout &DL) { // Make sure we're dealing with a vector. Layout.VecTy = dyn_cast<VectorType>(Ty); if (!Layout.VecTy) @@ -356,15 +350,15 @@ bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment, // Check that we're dealing with full-byte elements. Layout.ElemTy = Layout.VecTy->getElementType(); - if (DL->getTypeSizeInBits(Layout.ElemTy) != - DL->getTypeStoreSizeInBits(Layout.ElemTy)) + if (DL.getTypeSizeInBits(Layout.ElemTy) != + DL.getTypeStoreSizeInBits(Layout.ElemTy)) return false; if (Alignment) Layout.VecAlign = Alignment; else - Layout.VecAlign = DL->getABITypeAlignment(Layout.VecTy); - Layout.ElemSize = DL->getTypeStoreSize(Layout.ElemTy); + Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy); + Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy); return true; } @@ -456,7 +450,7 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) { Indices.resize(NumIndices); for (unsigned J = 0; J < NumIndices; ++J) Indices[J] = Ops[J][I]; - Res[I] = Builder.CreateGEP(Base[I], Indices, + Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices, GEPI.getName() + ".i" + Twine(I)); if (GEPI.isInBounds()) if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I])) @@ -595,7 +589,8 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) { return false; VectorLayout Layout; - if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout)) + if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout, + LI.getModule()->getDataLayout())) return false; unsigned NumElems = Layout.VecTy->getNumElements(); @@ -619,7 +614,8 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) { VectorLayout Layout; Value *FullValue = SI.getValueOperand(); - if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout)) + if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout, + SI.getModule()->getDataLayout())) return false; unsigned NumElems = Layout.VecTy->getNumElements(); diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index bffe8df..1a04d74 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -199,18 +199,15 @@ class ConstantOffsetExtractor { /// new index representing the remainder (equal to the original index minus /// the constant offset), or nullptr if we cannot extract a constant offset. /// \p Idx The given GEP index - /// \p DL The datalayout of the module /// \p GEP The given GEP - static Value *Extract(Value *Idx, const DataLayout *DL, - GetElementPtrInst *GEP); + static Value *Extract(Value *Idx, GetElementPtrInst *GEP); /// Looks for a constant offset from the given GEP index without extracting /// it. It returns the numeric value of the extracted constant offset (0 if /// failed). The meaning of the arguments are the same as Extract. - static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP); + static int64_t Find(Value *Idx, GetElementPtrInst *GEP); private: - ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt) - : DL(Layout), IP(InsertionPt) {} + ConstantOffsetExtractor(Instruction *InsertionPt) : IP(InsertionPt) {} /// Searches the expression that computes V for a non-zero constant C s.t. /// V can be reassociated into the form V' + C. If the searching is /// successful, returns C and update UserChain as a def-use chain from C to V; @@ -294,8 +291,6 @@ class ConstantOffsetExtractor { /// A data structure used in rebuildWithoutConstOffset. Contains all /// sext/zext instructions along UserChain. SmallVector<CastInst *, 16> ExtInsts; - /// The data layout of the module. Used in ComputeKnownBits. - const DataLayout *DL; Instruction *IP; /// Insertion position of cloned instructions. }; @@ -312,19 +307,10 @@ class SeparateConstOffsetFromGEP : public FunctionPass { } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DataLayoutPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); AU.setPreservesCFG(); } - bool doInitialization(Module &M) override { - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (DLP == nullptr) - report_fatal_error("data layout missing"); - DL = &DLP->getDataLayout(); - return false; - } - bool runOnFunction(Function &F) override; private: @@ -372,7 +358,6 @@ class SeparateConstOffsetFromGEP : public FunctionPass { /// Verified in @i32_add in split-gep.ll bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP); - const DataLayout *DL; const TargetMachine *TM; /// Whether to lower a GEP with multiple indices into arithmetic operations or /// multiple GEPs with a single index. @@ -386,7 +371,6 @@ INITIALIZE_PASS_BEGIN( "Split GEPs to a variadic base and a constant offset for better CSE", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DataLayoutPass) INITIALIZE_PASS_END( SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", "Split GEPs to a variadic base and a constant offset for better CSE", false, @@ -647,9 +631,8 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) { return BO; } -Value *ConstantOffsetExtractor::Extract(Value *Idx, const DataLayout *DL, - GetElementPtrInst *GEP) { - ConstantOffsetExtractor Extractor(DL, GEP); +Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP) { + ConstantOffsetExtractor Extractor(GEP); // Find a non-zero constant offset first. APInt ConstantOffset = Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, @@ -660,10 +643,9 @@ Value *ConstantOffsetExtractor::Extract(Value *Idx, const DataLayout *DL, return Extractor.rebuildWithoutConstOffset(); } -int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL, - GetElementPtrInst *GEP) { +int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP) { // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative. - return ConstantOffsetExtractor(DL, GEP) + return ConstantOffsetExtractor(GEP) .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, GEP->isInBounds()) .getSExtValue(); @@ -674,6 +656,7 @@ void ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne, IntegerType *IT = cast<IntegerType>(V->getType()); KnownOne = APInt(IT->getBitWidth(), 0); KnownZero = APInt(IT->getBitWidth(), 0); + const DataLayout &DL = IP->getModule()->getDataLayout(); llvm::computeKnownBits(V, KnownZero, KnownOne, DL, 0); } @@ -689,7 +672,8 @@ bool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const { bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize( GetElementPtrInst *GEP) { bool Changed = false; - Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + const DataLayout &DL = GEP->getModule()->getDataLayout(); + Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); gep_type_iterator GTI = gep_type_begin(*GEP); for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E; ++I, ++GTI) { @@ -710,18 +694,19 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP, NeedsExtraction = false; int64_t AccumulativeByteOffset = 0; gep_type_iterator GTI = gep_type_begin(*GEP); + const DataLayout &DL = GEP->getModule()->getDataLayout(); for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { if (isa<SequentialType>(*GTI)) { // Tries to extract a constant offset from this GEP index. int64_t ConstantOffset = - ConstantOffsetExtractor::Find(GEP->getOperand(I), DL, GEP); + ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP); if (ConstantOffset != 0) { NeedsExtraction = true; // A GEP may have multiple indices. We accumulate the extracted // constant offset to a byte offset, and later offset the remainder of // the original GEP with this byte offset. AccumulativeByteOffset += - ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType()); + ConstantOffset * DL.getTypeAllocSize(GTI.getIndexedType()); } } else if (LowerGEP) { StructType *StTy = cast<StructType>(*GTI); @@ -730,7 +715,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP, if (Field != 0) { NeedsExtraction = true; AccumulativeByteOffset += - DL->getStructLayout(StTy)->getElementOffset(Field); + DL.getStructLayout(StTy)->getElementOffset(Field); } } } @@ -740,7 +725,8 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP, void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) { IRBuilder<> Builder(Variadic); - Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); + const DataLayout &DL = Variadic->getModule()->getDataLayout(); + Type *IntPtrTy = DL.getIntPtrType(Variadic->getType()); Type *I8PtrTy = Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace()); @@ -760,7 +746,7 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( continue; APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), - DL->getTypeAllocSize(GTI.getIndexedType())); + DL.getTypeAllocSize(GTI.getIndexedType())); // Scale the index by element size. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { @@ -791,7 +777,8 @@ void SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) { IRBuilder<> Builder(Variadic); - Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); + const DataLayout &DL = Variadic->getModule()->getDataLayout(); + Type *IntPtrTy = DL.getIntPtrType(Variadic->getType()); Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy); gep_type_iterator GTI = gep_type_begin(*Variadic); @@ -807,7 +794,7 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, continue; APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), - DL->getTypeAllocSize(GTI.getIndexedType())); + DL.getTypeAllocSize(GTI.getIndexedType())); // Scale the index by element size. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { @@ -880,8 +867,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { if (isa<SequentialType>(*GTI)) { // Splits this GEP index into a variadic part and a constant offset, and // uses the variadic part as the new index. - Value *NewIdx = - ConstantOffsetExtractor::Extract(GEP->getOperand(I), DL, GEP); + Value *NewIdx = ConstantOffsetExtractor::Extract(GEP->getOperand(I), GEP); if (NewIdx != nullptr) { GEP->setOperand(I, NewIdx); } @@ -958,15 +944,17 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned = // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is // used with unsigned integers later. + const DataLayout &DL = GEP->getModule()->getDataLayout(); int64_t ElementTypeSizeOfGEP = static_cast<int64_t>( - DL->getTypeAllocSize(GEP->getType()->getElementType())); - Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + DL.getTypeAllocSize(GEP->getType()->getElementType())); + Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) { // Very likely. As long as %gep is natually aligned, the byte offset we // extracted should be a multiple of sizeof(*%gep). int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP; - NewGEP = GetElementPtrInst::Create( - NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP); + NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP, + ConstantInt::get(IntPtrTy, Index, true), + GEP->getName(), GEP); } else { // Unlikely but possible. For example, // #pragma pack(1) @@ -986,8 +974,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { GEP->getPointerAddressSpace()); NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP); NewGEP = GetElementPtrInst::Create( - NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), - "uglygep", GEP); + Type::getInt8Ty(GEP->getContext()), NewGEP, + ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep", + GEP); if (GEP->getType() != I8PtrTy) NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP); } diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index fb8fe38..8566cd9 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -127,7 +127,7 @@ static bool mergeEmptyReturnBlocks(Function &F) { /// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, - const DataLayout *DL, AssumptionCache *AC, + AssumptionCache *AC, unsigned BonusInstThreshold) { bool Changed = false; bool LocalChange = true; @@ -137,7 +137,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, // Loop over all of the basic blocks and remove them if they are unneeded... // for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { - if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, DL, AC)) { + if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) { LocalChange = true; ++NumSimpl; } @@ -148,11 +148,10 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, } static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, - const DataLayout *DL, AssumptionCache *AC, - int BonusInstThreshold) { + AssumptionCache *AC, int BonusInstThreshold) { bool EverChanged = removeUnreachableBlocks(F); EverChanged |= mergeEmptyReturnBlocks(F); - EverChanged |= iterativelySimplifyCFG(F, TTI, DL, AC, BonusInstThreshold); + EverChanged |= iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold); // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -166,7 +165,7 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, return true; do { - EverChanged = iterativelySimplifyCFG(F, TTI, DL, AC, BonusInstThreshold); + EverChanged = iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold); EverChanged |= removeUnreachableBlocks(F); } while (EverChanged); @@ -181,11 +180,10 @@ SimplifyCFGPass::SimplifyCFGPass(int BonusInstThreshold) PreservedAnalyses SimplifyCFGPass::run(Function &F, AnalysisManager<Function> *AM) { - auto *DL = F.getParent()->getDataLayout(); auto &TTI = AM->getResult<TargetIRAnalysis>(F); auto &AC = AM->getResult<AssumptionAnalysis>(F); - if (!simplifyFunctionCFG(F, TTI, DL, &AC, BonusInstThreshold)) + if (!simplifyFunctionCFG(F, TTI, &AC, BonusInstThreshold)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); @@ -207,9 +205,7 @@ struct CFGSimplifyPass : public FunctionPass { &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; - return simplifyFunctionCFG(F, TTI, DL, AC, BonusInstThreshold); + return simplifyFunctionCFG(F, TTI, AC, BonusInstThreshold); } void getAnalysisUsage(AnalysisUsage &AU) const override { diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index d0ee0a6..b169d56 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -35,7 +36,6 @@ namespace { DominatorTree *DT; LoopInfo *LI; AliasAnalysis *AA; - const DataLayout *DL; public: static char ID; // Pass identification @@ -100,8 +100,6 @@ bool Sinking::runOnFunction(Function &F) { DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); AA = &getAnalysis<AliasAnalysis>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; bool MadeChange, EverMadeChange = false; @@ -196,7 +194,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst, if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. - if (!isSafeToSpeculativelyExecute(Inst, DL)) + if (!isSafeToSpeculativelyExecute(Inst)) return false; // We don't want to sink across a critical edge if we don't dominate the diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index 4edc86c..e71031c 100644 --- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -15,19 +15,30 @@ // // There are many optimizations we can perform in the domain of SLSR. This file // for now contains only an initial step. Specifically, we look for strength -// reduction candidate in the form of +// reduction candidates in two forms: // -// (B + i) * S +// Form 1: (B + i) * S +// Form 2: &B[i * S] // -// where B and S are integer constants or variables, and i is a constant -// integer. If we found two such candidates +// where S is an integer variable, and i is a constant integer. If we found two +// candidates // -// S1: X = (B + i) * S S2: Y = (B + i') * S +// S1: X = (B + i) * S +// S2: Y = (B + i') * S +// +// or +// +// S1: X = &B[i * S] +// S2: Y = &B[i' * S] // // and S1 dominates S2, we call S1 a basis of S2, and can replace S2 with // // Y = X + (i' - i) * S // +// or +// +// Y = &X[(i' - i) * S] +// // where (i' - i) * S is folded to the extent possible. When S2 has multiple // bases, we pick the one that is closest to S2, or S2's "immediate" basis. // @@ -35,8 +46,6 @@ // // - Handle candidates in the form of B + i * S // -// - Handle candidates in the form of pointer arithmetics. e.g., B[i * S] -// // - Floating point arithmetics when fast math is enabled. // // - SLSR may decrease ILP at the architecture level. Targets that are very @@ -45,6 +54,10 @@ #include <vector> #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" @@ -58,14 +71,30 @@ using namespace PatternMatch; namespace { class StraightLineStrengthReduce : public FunctionPass { - public: +public: // SLSR candidate. Such a candidate must be in the form of // (Base + Index) * Stride + // or + // Base[..][Index * Stride][..] struct Candidate : public ilist_node<Candidate> { - Candidate(Value *B = nullptr, ConstantInt *Idx = nullptr, - Value *S = nullptr, Instruction *I = nullptr) - : Base(B), Index(Idx), Stride(S), Ins(I), Basis(nullptr) {} - Value *Base; + enum Kind { + Invalid, // reserved for the default constructor + Mul, // (B + i) * S + GEP, // &B[..][i * S][..] + }; + + Candidate() + : CandidateKind(Invalid), Base(nullptr), Index(nullptr), + Stride(nullptr), Ins(nullptr), Basis(nullptr) {} + Candidate(Kind CT, const SCEV *B, ConstantInt *Idx, Value *S, + Instruction *I) + : CandidateKind(CT), Base(B), Index(Idx), Stride(S), Ins(I), + Basis(nullptr) {} + Kind CandidateKind; + const SCEV *Base; + // Note that Index and Stride of a GEP candidate may not have the same + // integer type. In that case, during rewriting, Stride will be + // sign-extended or truncated to Index's type. ConstantInt *Index; Value *Stride; // The instruction this candidate corresponds to. It helps us to rewrite a @@ -90,33 +119,70 @@ class StraightLineStrengthReduce : public FunctionPass { static char ID; - StraightLineStrengthReduce() : FunctionPass(ID), DT(nullptr) { + StraightLineStrengthReduce() + : FunctionPass(ID), DL(nullptr), DT(nullptr), TTI(nullptr) { initializeStraightLineStrengthReducePass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<ScalarEvolution>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); // We do not modify the shape of the CFG. AU.setPreservesCFG(); } + bool doInitialization(Module &M) override { + DL = &M.getDataLayout(); + return false; + } + bool runOnFunction(Function &F) override; - private: +private: // Returns true if Basis is a basis for C, i.e., Basis dominates C and they // share the same base and stride. bool isBasisFor(const Candidate &Basis, const Candidate &C); // Checks whether I is in a candidate form. If so, adds all the matching forms // to Candidates, and tries to find the immediate basis for each of them. void allocateCandidateAndFindBasis(Instruction *I); - // Given that I is in the form of "(B + Idx) * S", adds this form to - // Candidates, and finds its immediate basis. - void allocateCandidateAndFindBasis(Value *B, ConstantInt *Idx, Value *S, + // Allocate candidates and find bases for Mul instructions. + void allocateCandidateAndFindBasisForMul(Instruction *I); + // Splits LHS into Base + Index and, if succeeds, calls + // allocateCandidateAndFindBasis. + void allocateCandidateAndFindBasisForMul(Value *LHS, Value *RHS, + Instruction *I); + // Allocate candidates and find bases for GetElementPtr instructions. + void allocateCandidateAndFindBasisForGEP(GetElementPtrInst *GEP); + // A helper function that scales Idx with ElementSize before invoking + // allocateCandidateAndFindBasis. + void allocateCandidateAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx, + Value *S, uint64_t ElementSize, + Instruction *I); + // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate + // basis. + void allocateCandidateAndFindBasis(Candidate::Kind CT, const SCEV *B, + ConstantInt *Idx, Value *S, Instruction *I); // Rewrites candidate C with respect to Basis. void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis); + // A helper function that factors ArrayIdx to a product of a stride and a + // constant index, and invokes allocateCandidateAndFindBasis with the + // factorings. + void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize, + GetElementPtrInst *GEP); + // Emit code that computes the "bump" from Basis to C. If the candidate is a + // GEP and the bump is not divisible by the element size of the GEP, this + // function sets the BumpWithUglyGEP flag to notify its caller to bump the + // basis using an ugly GEP. + static Value *emitBump(const Candidate &Basis, const Candidate &C, + IRBuilder<> &Builder, const DataLayout *DL, + bool &BumpWithUglyGEP); + const DataLayout *DL; DominatorTree *DT; + ScalarEvolution *SE; + TargetTransformInfo *TTI; ilist<Candidate> Candidates; // Temporarily holds all instructions that are unlinked (but not deleted) by // rewriteCandidateWithBasis. These instructions will be actually removed @@ -129,6 +195,8 @@ char StraightLineStrengthReduce::ID = 0; INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr", "Straight line strength reduction", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr", "Straight line strength reduction", false, false) @@ -141,9 +209,47 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis, return (Basis.Ins != C.Ins && // skip the same instruction // Basis must dominate C in order to rewrite C with respect to Basis. DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) && - // They share the same base and stride. + // They share the same base, stride, and candidate kind. Basis.Base == C.Base && - Basis.Stride == C.Stride); + Basis.Stride == C.Stride && + Basis.CandidateKind == C.CandidateKind); +} + +static bool isCompletelyFoldable(GetElementPtrInst *GEP, + const TargetTransformInfo *TTI, + const DataLayout *DL) { + GlobalVariable *BaseGV = nullptr; + int64_t BaseOffset = 0; + bool HasBaseReg = false; + int64_t Scale = 0; + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand())) + BaseGV = GV; + else + HasBaseReg = true; + + gep_type_iterator GTI = gep_type_begin(GEP); + for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I, ++GTI) { + if (isa<SequentialType>(*GTI)) { + int64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); + if (ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I)) { + BaseOffset += ConstIdx->getSExtValue() * ElementSize; + } else { + // Needs scale register. + if (Scale != 0) { + // No addressing mode takes two scale registers. + return false; + } + Scale = ElementSize; + } + } else { + StructType *STy = cast<StructType>(*GTI); + uint64_t Field = cast<ConstantInt>(*I)->getZExtValue(); + BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field); + } + } + return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV, + BaseOffset, HasBaseReg, Scale); } // TODO: We currently implement an algorithm whose time complexity is linear to @@ -153,11 +259,17 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis, // table is indexed by the base and the stride of a candidate. Therefore, // finding the immediate basis of a candidate boils down to one hash-table look // up. -void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Value *B, - ConstantInt *Idx, - Value *S, - Instruction *I) { - Candidate C(B, Idx, S, I); +void StraightLineStrengthReduce::allocateCandidateAndFindBasis( + Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S, + Instruction *I) { + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + // If &B[Idx * S] fits into an addressing mode, do not turn it into + // non-free computation. + if (isCompletelyFoldable(GEP, TTI, DL)) + return; + } + + Candidate C(CT, B, Idx, S, I); // Try to compute the immediate basis of C. unsigned NumIterations = 0; // Limit the scan radius to avoid running forever. @@ -176,60 +288,209 @@ void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Value *B, } void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Mul: + allocateCandidateAndFindBasisForMul(I); + break; + case Instruction::GetElementPtr: + allocateCandidateAndFindBasisForGEP(cast<GetElementPtrInst>(I)); + break; + } +} + +void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul( + Value *LHS, Value *RHS, Instruction *I) { Value *B = nullptr; ConstantInt *Idx = nullptr; - // "(Base + Index) * Stride" must be a Mul instruction at the first hand. - if (I->getOpcode() == Instruction::Mul) { - if (IntegerType *ITy = dyn_cast<IntegerType>(I->getType())) { - Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); - for (unsigned Swapped = 0; Swapped < 2; ++Swapped) { - // Only handle the canonical operand ordering. - if (match(LHS, m_Add(m_Value(B), m_ConstantInt(Idx)))) { - // If LHS is in the form of "Base + Index", then I is in the form of - // "(Base + Index) * RHS". - allocateCandidateAndFindBasis(B, Idx, RHS, I); - } else { - // Otherwise, at least try the form (LHS + 0) * RHS. - allocateCandidateAndFindBasis(LHS, ConstantInt::get(ITy, 0), RHS, I); - } - // Swap LHS and RHS so that we also cover the cases where LHS is the - // stride. - if (LHS == RHS) - break; - std::swap(LHS, RHS); - } - } + // Only handle the canonical operand ordering. + if (match(LHS, m_Add(m_Value(B), m_ConstantInt(Idx)))) { + // If LHS is in the form of "Base + Index", then I is in the form of + // "(Base + Index) * RHS". + allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I); + } else { + // Otherwise, at least try the form (LHS + 0) * RHS. + ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0); + allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS, + I); + } +} + +void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul( + Instruction *I) { + // Try matching (B + i) * S. + // TODO: we could extend SLSR to float and vector types. + if (!isa<IntegerType>(I->getType())) + return; + + Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); + allocateCandidateAndFindBasisForMul(LHS, RHS, I); + if (LHS != RHS) { + // Symmetrically, try to split RHS to Base + Index. + allocateCandidateAndFindBasisForMul(RHS, LHS, I); + } +} + +void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP( + const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize, + Instruction *I) { + // I = B + sext(Idx *nsw S) *nsw ElementSize + // = B + (sext(Idx) * ElementSize) * sext(S) + // Casting to IntegerType is safe because we skipped vector GEPs. + IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType())); + ConstantInt *ScaledIdx = ConstantInt::get( + IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true); + allocateCandidateAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I); +} + +void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx, + const SCEV *Base, + uint64_t ElementSize, + GetElementPtrInst *GEP) { + // At least, ArrayIdx = ArrayIdx *s 1. + allocateCandidateAndFindBasisForGEP( + Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1), + ArrayIdx, ElementSize, GEP); + Value *LHS = nullptr; + ConstantInt *RHS = nullptr; + // TODO: handle shl. e.g., we could treat (S << 2) as (S * 4). + // + // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx + // itself. This would allow us to handle the shl case for free. However, + // matching SCEVs has two issues: + // + // 1. this would complicate rewriting because the rewriting procedure + // would have to translate SCEVs back to IR instructions. This translation + // is difficult when LHS is further evaluated to a composite SCEV. + // + // 2. ScalarEvolution is designed to be control-flow oblivious. It tends + // to strip nsw/nuw flags which are critical for SLSR to trace into + // sext'ed multiplication. + if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) { + // SLSR is currently unsafe if i * S may overflow. + // GEP = Base + sext(LHS *nsw RHS) *nsw ElementSize + allocateCandidateAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP); + } +} + +void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP( + GetElementPtrInst *GEP) { + // TODO: handle vector GEPs + if (GEP->getType()->isVectorTy()) + return; + + const SCEV *GEPExpr = SE->getSCEV(GEP); + Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + + gep_type_iterator GTI = gep_type_begin(GEP); + for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) { + if (!isa<SequentialType>(*GTI++)) + continue; + Value *ArrayIdx = *I; + // Compute the byte offset of this index. + uint64_t ElementSize = DL->getTypeAllocSize(*GTI); + const SCEV *ElementSizeExpr = SE->getSizeOfExpr(IntPtrTy, *GTI); + const SCEV *ArrayIdxExpr = SE->getSCEV(ArrayIdx); + ArrayIdxExpr = SE->getTruncateOrSignExtend(ArrayIdxExpr, IntPtrTy); + const SCEV *LocalOffset = + SE->getMulExpr(ArrayIdxExpr, ElementSizeExpr, SCEV::FlagNSW); + // The base of this candidate equals GEPExpr less the byte offset of this + // index. + const SCEV *Base = SE->getMinusSCEV(GEPExpr, LocalOffset); + factorArrayIndex(ArrayIdx, Base, ElementSize, GEP); + // When ArrayIdx is the sext of a value, we try to factor that value as + // well. Handling this case is important because array indices are + // typically sign-extended to the pointer size. + Value *TruncatedArrayIdx = nullptr; + if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx)))) + factorArrayIndex(TruncatedArrayIdx, Base, ElementSize, GEP); } } +// A helper function that unifies the bitwidth of A and B. +static void unifyBitWidth(APInt &A, APInt &B) { + if (A.getBitWidth() < B.getBitWidth()) + A = A.sext(B.getBitWidth()); + else if (A.getBitWidth() > B.getBitWidth()) + B = B.sext(A.getBitWidth()); +} + +Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis, + const Candidate &C, + IRBuilder<> &Builder, + const DataLayout *DL, + bool &BumpWithUglyGEP) { + APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue(); + unifyBitWidth(Idx, BasisIdx); + APInt IndexOffset = Idx - BasisIdx; + + BumpWithUglyGEP = false; + if (Basis.CandidateKind == Candidate::GEP) { + APInt ElementSize( + IndexOffset.getBitWidth(), + DL->getTypeAllocSize( + cast<GetElementPtrInst>(Basis.Ins)->getType()->getElementType())); + APInt Q, R; + APInt::sdivrem(IndexOffset, ElementSize, Q, R); + if (R.getSExtValue() == 0) + IndexOffset = Q; + else + BumpWithUglyGEP = true; + } + // Compute Bump = C - Basis = (i' - i) * S. + // Common case 1: if (i' - i) is 1, Bump = S. + if (IndexOffset.getSExtValue() == 1) + return C.Stride; + // Common case 2: if (i' - i) is -1, Bump = -S. + if (IndexOffset.getSExtValue() == -1) + return Builder.CreateNeg(C.Stride); + // Otherwise, Bump = (i' - i) * sext/trunc(S). + ConstantInt *Delta = ConstantInt::get(Basis.Ins->getContext(), IndexOffset); + Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, Delta->getType()); + return Builder.CreateMul(ExtendedStride, Delta); +} + void StraightLineStrengthReduce::rewriteCandidateWithBasis( const Candidate &C, const Candidate &Basis) { + assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base && + C.Stride == Basis.Stride); + // An instruction can correspond to multiple candidates. Therefore, instead of // simply deleting an instruction when we rewrite it, we mark its parent as // nullptr (i.e. unlink it) so that we can skip the candidates whose // instruction is already rewritten. if (!C.Ins->getParent()) return; - assert(C.Base == Basis.Base && C.Stride == Basis.Stride); - // Basis = (B + i) * S - // C = (B + i') * S - // ==> - // C = Basis + (i' - i) * S + IRBuilder<> Builder(C.Ins); - ConstantInt *IndexOffset = ConstantInt::get( - C.Ins->getContext(), C.Index->getValue() - Basis.Index->getValue()); - Value *Reduced; - // TODO: preserve nsw/nuw in some cases. - if (IndexOffset->isOne()) { - // If (i' - i) is 1, fold C into Basis + S. - Reduced = Builder.CreateAdd(Basis.Ins, C.Stride); - } else if (IndexOffset->isMinusOne()) { - // If (i' - i) is -1, fold C into Basis - S. - Reduced = Builder.CreateSub(Basis.Ins, C.Stride); - } else { - Value *Bump = Builder.CreateMul(C.Stride, IndexOffset); + bool BumpWithUglyGEP; + Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP); + Value *Reduced = nullptr; // equivalent to but weaker than C.Ins + switch (C.CandidateKind) { + case Candidate::Mul: Reduced = Builder.CreateAdd(Basis.Ins, Bump); - } + break; + case Candidate::GEP: + { + Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType()); + if (BumpWithUglyGEP) { + // C = (char *)Basis + Bump + unsigned AS = Basis.Ins->getType()->getPointerAddressSpace(); + Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS); + Reduced = Builder.CreateBitCast(Basis.Ins, CharTy); + // We only considered inbounds GEP as candidates. + Reduced = Builder.CreateInBoundsGEP(Reduced, Bump); + Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType()); + } else { + // C = gep Basis, Bump + // Canonicalize bump to pointer size. + Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy); + Reduced = Builder.CreateInBoundsGEP(Basis.Ins, Bump); + } + } + break; + default: + llvm_unreachable("C.CandidateKind is invalid"); + }; Reduced->takeName(C.Ins); C.Ins->replaceAllUsesWith(Reduced); C.Ins->dropAllReferences(); @@ -243,15 +504,15 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) { if (skipOptnoneFunction(F)) return false; + TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + SE = &getAnalysis<ScalarEvolution>(); // Traverse the dominator tree in the depth-first order. This order makes sure // all bases of a candidate are in Candidates when we process it. for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT); node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) { - BasicBlock *B = node->getBlock(); - for (auto I = B->begin(); I != B->end(); ++I) { - allocateCandidateAndFindBasis(I); - } + for (auto &I : *node->getBlock()) + allocateCandidateAndFindBasis(&I); } // Rewrite candidates in the reverse depth-first order. This order makes sure diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index aaf6f9a..6c3ce58 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -9,8 +9,8 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SCCIterator.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" @@ -18,6 +18,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 715ddeb..9eef132 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -54,8 +54,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" @@ -87,7 +87,6 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced"); namespace { struct TailCallElim : public FunctionPass { const TargetTransformInfo *TTI; - const DataLayout *DL; static char ID; // Pass identification, replacement for typeid TailCallElim() : FunctionPass(ID) { @@ -159,8 +158,6 @@ bool TailCallElim::runOnFunction(Function &F) { if (skipOptnoneFunction(F)) return false; - DL = F.getParent()->getDataLayout(); - bool AllCallsAreTailCalls = false; bool Modified = markTails(F, AllCallsAreTailCalls); if (AllCallsAreTailCalls) @@ -392,10 +389,9 @@ bool TailCallElim::runTRE(Function &F) { SmallVector<PHINode*, 8> ArgumentPHIs; bool MadeChange = false; - // CanTRETailMarkedCall - If false, we cannot perform TRE on tail calls - // marked with the 'tail' attribute, because doing so would cause the stack - // size to increase (real TRE would deallocate variable sized allocas, TRE - // doesn't). + // If false, we cannot perform TRE on tail calls marked with the 'tail' + // attribute, because doing so would cause the stack size to increase (real + // TRE would deallocate variable sized allocas, TRE doesn't). bool CanTRETailMarkedCall = CanTRE(F); // Change any tail recursive calls to loops. @@ -404,28 +400,19 @@ bool TailCallElim::runTRE(Function &F) { // alloca' is changed from being a static alloca to being a dynamic alloca. // Until this is resolved, disable this transformation if that would ever // happen. This bug is PR962. - SmallVector<BasicBlock*, 8> BBToErase; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) { + BasicBlock *BB = BBI++; // FoldReturnAndProcessPred may delete BB. if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, !CanTRETailMarkedCall); - if (!Change && BB->getFirstNonPHIOrDbg() == Ret) { + if (!Change && BB->getFirstNonPHIOrDbg() == Ret) Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, !CanTRETailMarkedCall); - // FoldReturnAndProcessPred may have emptied some BB. Remember to - // erase them. - if (Change && BB->empty()) - BBToErase.push_back(BB); - - } MadeChange |= Change; } } - for (auto BB: BBToErase) - BB->eraseFromParent(); - // If we eliminated any tail recursions, it's possible that we inserted some // silly PHI nodes which just merge an initial value (the incoming operand) // with themselves. Check to see if we did and clean up our mess if so. This @@ -435,7 +422,7 @@ bool TailCallElim::runTRE(Function &F) { PHINode *PN = ArgumentPHIs[i]; // If the PHI Node is a dynamic constant, replace it with the value it is. - if (Value *PNV = SimplifyInstruction(PN)) { + if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) { PN->replaceAllUsesWith(PNV); PN->eraseFromParent(); } @@ -445,7 +432,7 @@ bool TailCallElim::runTRE(Function &F) { } -/// CanMoveAboveCall - Return true if it is safe to move the specified +/// Return true if it is safe to move the specified /// instruction from after the call to before the call, assuming that all /// instructions between the call and this instruction are movable. /// @@ -464,7 +451,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { // being loaded from. if (CI->mayWriteToMemory() || !isSafeToLoadUnconditionally(L->getPointerOperand(), L, - L->getAlignment(), DL)) + L->getAlignment())) return false; } } @@ -480,13 +467,11 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { return true; } -// isDynamicConstant - Return true if the specified value is the same when the -// return would exit as it was when the initial iteration of the recursive -// function was executed. -// -// We currently handle static constants and arguments that are not modified as -// part of the recursion. -// +/// Return true if the specified value is the same when the return would exit +/// as it was when the initial iteration of the recursive function was executed. +/// +/// We currently handle static constants and arguments that are not modified as +/// part of the recursion. static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { if (isa<Constant>(V)) return true; // Static constants are always dyn consts @@ -518,10 +503,9 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { return false; } -// getCommonReturnValue - Check to see if the function containing the specified -// tail call consistently returns the same runtime-constant value at all exit -// points except for IgnoreRI. If so, return the returned value. -// +/// Check to see if the function containing the specified tail call consistently +/// returns the same runtime-constant value at all exit points except for +/// IgnoreRI. If so, return the returned value. static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { Function *F = CI->getParent()->getParent(); Value *ReturnedValue = nullptr; @@ -545,10 +529,9 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { return ReturnedValue; } -/// CanTransformAccumulatorRecursion - If the specified instruction can be -/// transformed using accumulator recursion elimination, return the constant -/// which is the start of the accumulator value. Otherwise return null. -/// +/// If the specified instruction can be transformed using accumulator recursion +/// elimination, return the constant which is the start of the accumulator +/// value. Otherwise return null. Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI) { if (!I->isAssociative() || !I->isCommutative()) return nullptr; @@ -836,14 +819,11 @@ bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB, ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred); // Cleanup: if all predecessors of BB have been eliminated by - // FoldReturnIntoUncondBranch, we would like to delete it, but we - // can not just nuke it as it is being used as an iterator by our caller. - // Just empty it, and the caller will erase it when it is safe to do so. - // It is important to empty it, because the ret instruction in there is - // still using a value which EliminateRecursiveTailCall will attempt - // to remove. + // FoldReturnIntoUncondBranch, delete it. It is important to empty it, + // because the ret instruction in there is still using a value which + // EliminateRecursiveTailCall will attempt to remove. if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) - BB->getInstList().clear(); + BB->eraseFromParent(); EliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 762a83f..671cbfe 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -33,7 +33,7 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { /// EmitStrLen - Emit a call to the strlen function to the builder, for the /// specified pointer. This always returns an integer value of size intptr_t. -Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, +Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strlen)) return nullptr; @@ -45,12 +45,9 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrLen = M->getOrInsertFunction("strlen", - AttributeSet::get(M->getContext(), - AS), - TD->getIntPtrType(Context), - B.getInt8PtrTy(), - nullptr); + Constant *StrLen = M->getOrInsertFunction( + "strlen", AttributeSet::get(M->getContext(), AS), + DL.getIntPtrType(Context), B.getInt8PtrTy(), nullptr); CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -62,7 +59,7 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, /// specified pointer. Ptr is required to be some pointer type, MaxLen must /// be of size_t type, and the return value has 'intptr_t' type. Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, - const DataLayout *TD, const TargetLibraryInfo *TLI) { + const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strnlen)) return nullptr; @@ -73,13 +70,10 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrNLen = M->getOrInsertFunction("strnlen", - AttributeSet::get(M->getContext(), - AS), - TD->getIntPtrType(Context), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), - nullptr); + Constant *StrNLen = + M->getOrInsertFunction("strnlen", AttributeSet::get(M->getContext(), AS), + DL.getIntPtrType(Context), B.getInt8PtrTy(), + DL.getIntPtrType(Context), nullptr); CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen"); if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -91,7 +85,7 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, /// specified pointer and character. Ptr is required to be some pointer type, /// and the return value has 'i8*' type. Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, - const DataLayout *TD, const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strchr)) return nullptr; @@ -114,9 +108,8 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, } /// EmitStrNCmp - Emit a call to the strncmp function to the builder. -Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, - IRBuilder<> &B, const DataLayout *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strncmp)) return nullptr; @@ -128,13 +121,9 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *StrNCmp = M->getOrInsertFunction("strncmp", - AttributeSet::get(M->getContext(), - AS), - B.getInt32Ty(), - B.getInt8PtrTy(), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), nullptr); + Value *StrNCmp = M->getOrInsertFunction( + "strncmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), + B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr); CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len, "strncmp"); @@ -147,8 +136,7 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, - const DataLayout *TD, const TargetLibraryInfo *TLI, - StringRef Name) { + const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strcpy)) return nullptr; @@ -170,8 +158,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the /// specified pointer arguments. -Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, - IRBuilder<> &B, const DataLayout *TD, +Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B, const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strncpy)) return nullptr; @@ -198,7 +185,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src /// are pointers. Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, - IRBuilder<> &B, const DataLayout *TD, + IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcpy_chk)) return nullptr; @@ -208,13 +195,10 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", - AttributeSet::get(M->getContext(), AS), - B.getInt8PtrTy(), - B.getInt8PtrTy(), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), - TD->getIntPtrType(Context), nullptr); + Value *MemCpy = M->getOrInsertFunction( + "__memcpy_chk", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(), + B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), + DL.getIntPtrType(Context), nullptr); Dst = CastToCStr(Dst, B); Src = CastToCStr(Src, B); CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize); @@ -225,9 +209,8 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. -Value *llvm::EmitMemChr(Value *Ptr, Value *Val, - Value *Len, IRBuilder<> &B, const DataLayout *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memchr)) return nullptr; @@ -236,13 +219,9 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemChr = M->getOrInsertFunction("memchr", - AttributeSet::get(M->getContext(), AS), - B.getInt8PtrTy(), - B.getInt8PtrTy(), - B.getInt32Ty(), - TD->getIntPtrType(Context), - nullptr); + Value *MemChr = M->getOrInsertFunction( + "memchr", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(), + B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context), nullptr); CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts())) @@ -252,9 +231,8 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, } /// EmitMemCmp - Emit a call to the memcmp function. -Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, - Value *Len, IRBuilder<> &B, const DataLayout *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcmp)) return nullptr; @@ -266,12 +244,9 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemCmp = M->getOrInsertFunction("memcmp", - AttributeSet::get(M->getContext(), AS), - B.getInt32Ty(), - B.getInt8PtrTy(), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), nullptr); + Value *MemCmp = M->getOrInsertFunction( + "memcmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), + B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr); CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len, "memcmp"); @@ -339,7 +314,7 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. -Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, +Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::putchar)) return nullptr; @@ -361,7 +336,7 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, /// EmitPutS - Emit a call to the puts function. This assumes that Str is /// some pointer. -Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, +Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::puts)) return nullptr; @@ -386,7 +361,7 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is /// an integer and File is a pointer to FILE. Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, - const DataLayout *TD, const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputc)) return nullptr; @@ -419,7 +394,7 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, - const DataLayout *TD, const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputs)) return nullptr; @@ -450,9 +425,8 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. -Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, - IRBuilder<> &B, const DataLayout *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fwrite)) return nullptr; @@ -466,21 +440,18 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction(FWriteName, - AttributeSet::get(M->getContext(), AS), - TD->getIntPtrType(Context), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), - TD->getIntPtrType(Context), - File->getType(), nullptr); + F = M->getOrInsertFunction( + FWriteName, AttributeSet::get(M->getContext(), AS), + DL.getIntPtrType(Context), B.getInt8PtrTy(), DL.getIntPtrType(Context), + DL.getIntPtrType(Context), File->getType(), nullptr); else - F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), - TD->getIntPtrType(Context), - File->getType(), nullptr); - CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, - ConstantInt::get(TD->getIntPtrType(Context), 1), File); + F = M->getOrInsertFunction(FWriteName, DL.getIntPtrType(Context), + B.getInt8PtrTy(), DL.getIntPtrType(Context), + DL.getIntPtrType(Context), File->getType(), + nullptr); + CallInst *CI = + B.CreateCall4(F, CastToCStr(Ptr, B), Size, + ConstantInt::get(DL.getIntPtrType(Context), 1), File); if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 09279b6..f04ea9c 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -34,7 +34,7 @@ #include <map> using namespace llvm; -// CloneBasicBlock - See comments in Cloning.h +/// See comments in Cloning.h. BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, @@ -202,7 +202,7 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc, } } -/// CloneFunction - Return a copy of the specified function, but without +/// Return a copy of the specified function, but without /// embedding the function into another module. Also, any references specified /// in the VMap are changed to refer to their mapped value instead of the /// original one. If any of the arguments to the function are in the VMap, @@ -250,8 +250,7 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, namespace { - /// PruningFunctionCloner - This class is a private class used to implement - /// the CloneAndPruneFunctionInto method. + /// This is a private class used to implement CloneAndPruneFunctionInto. struct PruningFunctionCloner { Function *NewFunc; const Function *OldFunc; @@ -259,23 +258,18 @@ namespace { bool ModuleLevelChanges; const char *NameSuffix; ClonedCodeInfo *CodeInfo; - const DataLayout *DL; CloningDirector *Director; ValueMapTypeRemapper *TypeMapper; ValueMaterializer *Materializer; public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, - ValueToValueMapTy &valueMap, - bool moduleLevelChanges, - const char *nameSuffix, - ClonedCodeInfo *codeInfo, - const DataLayout *DL, + ValueToValueMapTy &valueMap, bool moduleLevelChanges, + const char *nameSuffix, ClonedCodeInfo *codeInfo, CloningDirector *Director) - : NewFunc(newFunc), OldFunc(oldFunc), - VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), - NameSuffix(nameSuffix), CodeInfo(codeInfo), DL(DL), - Director(Director) { + : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), + ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), + CodeInfo(codeInfo), Director(Director) { // These are optional components. The Director may return null. if (Director) { TypeMapper = Director->getTypeRemapper(); @@ -286,7 +280,7 @@ namespace { } } - /// CloneBlock - The specified block is found to be reachable, clone it and + /// The specified block is found to be reachable, clone it and /// anything that it can reach. void CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, @@ -294,7 +288,7 @@ namespace { }; } -/// CloneBlock - The specified block is found to be reachable, clone it and +/// The specified block is found to be reachable, clone it and /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, @@ -360,7 +354,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If we can simplify this instruction to some other value, simply add // a mapping to that value rather than inserting a new instruction into // the basic block. - if (Value *V = SimplifyInstruction(NewInst, DL)) { + if (Value *V = + SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) { // On the off-chance that this simplifies to an instruction in the old // function, map it back into the new function. if (Value *MappedV = VMap.lookup(V)) @@ -397,6 +392,14 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // terminator into the new basic block in this case. if (Action == CloningDirector::StopCloningBB) return; + if (Action == CloningDirector::CloneSuccessors) { + // If the director says to skip with a terminate instruction, we still + // need to clone this block's successors. + const TerminatorInst *TI = BB->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + ToClone.push_back(TI->getSuccessor(i)); + return; + } assert(Action != CloningDirector::SkipInstruction && "SkipInstruction is not valid for terminators."); } @@ -455,10 +458,9 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, } } -/// CloneAndPruneIntoFromInst - This works like CloneAndPruneFunctionInto, except -/// that it does not clone the entire function. Instead it starts at an -/// instruction provided by the caller and copies (and prunes) only the code -/// reachable from that instruction. +/// This works like CloneAndPruneFunctionInto, except that it does not clone the +/// entire function. Instead it starts at an instruction provided by the caller +/// and copies (and prunes) only the code reachable from that instruction. void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, @@ -466,7 +468,6 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, - const DataLayout *DL, CloningDirector *Director) { assert(NameSuffix && "NameSuffix cannot be null!"); @@ -488,7 +489,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, - NameSuffix, CodeInfo, DL, Director); + NameSuffix, CodeInfo, Director); const BasicBlock *StartingBB; if (StartingInst) StartingBB = StartingInst->getParent(); @@ -523,11 +524,18 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Handle PHI nodes specially, as we have to remove references to dead // blocks. - for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) - if (const PHINode *PN = dyn_cast<PHINode>(I)) - PHIToResolve.push_back(PN); - else + for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) { + // PHI nodes may have been remapped to non-PHI nodes by the caller or + // during the cloning process. + if (const PHINode *PN = dyn_cast<PHINode>(I)) { + if (isa<PHINode>(VMap[PN])) + PHIToResolve.push_back(PN); + else + break; + } else { break; + } + } // Finally, remap the terminator instructions, as those can't be remapped // until all BBs are mapped. @@ -626,10 +634,10 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // node). for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]])) - recursivelySimplifyInstruction(PN, DL); + recursivelySimplifyInstruction(PN); // Now that the inlined function body has been fully constructed, go through - // and zap unconditional fall-through branches. This happen all the time when + // and zap unconditional fall-through branches. This happens all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB]); @@ -680,7 +688,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Do not increment I, iteratively merge all things this block branches to. } - // Make a final pass over the basic blocks from theh old function to gather + // Make a final pass over the basic blocks from the old function to gather // any return instructions which survived folding. We have to do this here // because we can iteratively remove and merge returns above. for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB]), @@ -691,7 +699,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, } -/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, +/// This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The /// effect of this is to copy significantly less code in cases where (for /// example) a function call with constant arguments is inlined, and those @@ -704,9 +712,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, - const DataLayout *DL, Instruction *TheCall) { - CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(), - VMap, ModuleLevelChanges, Returns, NameSuffix, - CodeInfo, DL, nullptr); + CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(), VMap, + ModuleLevelChanges, Returns, NameSuffix, CodeInfo, + nullptr); } diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index e70a7d6..ab89b41 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -332,11 +332,11 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, DEBUG(dbgs() << **i << ", "); DEBUG(dbgs() << ")\n"); + StructType *StructTy; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - PointerType *StructPtr = - PointerType::getUnqual(StructType::get(M->getContext(), paramTy)); + StructTy = StructType::get(M->getContext(), paramTy); paramTy.clear(); - paramTy.push_back(StructPtr); + paramTy.push_back(PointerType::getUnqual(StructTy)); } FunctionType *funcType = FunctionType::get(RetTy, paramTy, false); @@ -364,8 +364,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); TerminatorInst *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = - GetElementPtrInst::Create(AI, Idx, "gep_" + inputs[i]->getName(), TI); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructTy, AI, Idx, "gep_" + inputs[i]->getName(), TI); RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI); } else RewriteVal = AI++; @@ -447,6 +447,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, } } + StructType *StructArgTy = nullptr; AllocaInst *Struct = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { std::vector<Type*> ArgTypes; @@ -455,7 +456,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, ArgTypes.push_back((*v)->getType()); // Allocate a struct at the beginning of this function - Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); + StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); Struct = new AllocaInst(StructArgTy, nullptr, "structArg", codeReplacer->getParent()->begin()->begin()); @@ -465,9 +466,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = - GetElementPtrInst::Create(Struct, Idx, - "gep_" + StructValues[i]->getName()); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); codeReplacer->getInstList().push_back(GEP); StoreInst *SI = new StoreInst(StructValues[i], GEP); codeReplacer->getInstList().push_back(SI); @@ -491,9 +491,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP - = GetElementPtrInst::Create(Struct, Idx, - "gep_reload_" + outputs[i]->getName()); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); codeReplacer->getInstList().push_back(GEP); Output = GEP; } else { @@ -606,10 +605,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut+out); - GetElementPtrInst *GEP = - GetElementPtrInst::Create(OAI, Idx, - "gep_" + outputs[out]->getName(), - NTRet); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, OAI, Idx, "gep_" + outputs[out]->getName(), + NTRet); new StoreInst(outputs[out], GEP, NTRet); } else { new StoreInst(outputs[out], OAI, NTRet); diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp index 26875e8..dc95089 100644 --- a/lib/Transforms/Utils/CtorUtils.cpp +++ b/lib/Transforms/Utils/CtorUtils.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/BitVector.h" #include "llvm/Transforms/Utils/CtorUtils.h" +#include "llvm/ADT/BitVector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "ctor_utils" diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index c2ef1ac..df3e1d4 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -89,7 +89,7 @@ namespace { CallerLPad = cast<LandingPadInst>(I); } - /// getOuterResumeDest - The outer unwind destination is the target of + /// The outer unwind destination is the target of /// unwind edges introduced for calls within the inlined function. BasicBlock *getOuterResumeDest() const { return OuterResumeDest; @@ -99,17 +99,16 @@ namespace { LandingPadInst *getLandingPadInst() const { return CallerLPad; } - /// forwardResume - Forward the 'resume' instruction to the caller's landing - /// pad block. When the landing pad block has only one predecessor, this is + /// Forward the 'resume' instruction to the caller's landing pad block. + /// When the landing pad block has only one predecessor, this is /// a simple branch. When there is more than one predecessor, we need to /// split the landing pad block after the landingpad instruction and jump /// to there. void forwardResume(ResumeInst *RI, SmallPtrSetImpl<LandingPadInst*> &InlinedLPads); - /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind - /// destination block for the given basic block, using the values for the - /// original invoke's source block. + /// Add incoming-PHI values to the unwind destination block for the given + /// basic block, using the values for the original invoke's source block. void addIncomingPHIValuesFor(BasicBlock *BB) const { addIncomingPHIValuesForInto(BB, OuterResumeDest); } @@ -124,7 +123,7 @@ namespace { }; } -/// getInnerResumeDest - Get or create a target for the branch from ResumeInsts. +/// Get or create a target for the branch from ResumeInsts. BasicBlock *InvokeInliningInfo::getInnerResumeDest() { if (InnerResumeDest) return InnerResumeDest; @@ -159,8 +158,8 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { return InnerResumeDest; } -/// forwardResume - Forward the 'resume' instruction to the caller's landing pad -/// block. When the landing pad block has only one predecessor, this is a simple +/// Forward the 'resume' instruction to the caller's landing pad block. +/// When the landing pad block has only one predecessor, this is a simple /// branch. When there is more than one predecessor, we need to split the /// landing pad block after the landingpad instruction and jump to there. void InvokeInliningInfo::forwardResume(ResumeInst *RI, @@ -178,9 +177,9 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI, RI->eraseFromParent(); } -/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into -/// an invoke, we have to turn all of the calls that can throw into -/// invokes. This function analyze BB to see if there are any calls, and if so, +/// When we inline a basic block into an invoke, +/// we have to turn all of the calls that can throw into invokes. +/// This function analyze BB to see if there are any calls, and if so, /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI /// nodes in that block with the values specified in InvokeDestPHIValues. static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, @@ -228,7 +227,7 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, } } -/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls +/// If we inlined an invoke site, we need to convert calls /// in the body of the inlined function into invokes. /// /// II is the invoke instruction being inlined. FirstNewBlock is the first @@ -279,8 +278,8 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, InvokeDest->removePredecessor(II->getParent()); } -/// CloneAliasScopeMetadata - When inlining a function that contains noalias -/// scope metadata, this metadata needs to be cloned so that the inlined blocks +/// When inlining a function that contains noalias scope metadata, +/// this metadata needs to be cloned so that the inlined blocks /// have different "unqiue scopes" at every call site. Were this not done, then /// aliasing scopes from a function inlined into a caller multiple times could /// not be differentiated (and this would lead to miscompiles because the @@ -391,12 +390,12 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { } } -/// AddAliasScopeMetadata - If the inlined function has noalias arguments, then -/// add new alias scopes for each noalias argument, tag the mapped noalias +/// If the inlined function has noalias arguments, +/// then add new alias scopes for each noalias argument, tag the mapped noalias /// parameters with noalias metadata specifying the new scope, and tag all /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, - const DataLayout *DL, AliasAnalysis *AA) { + const DataLayout &DL, AliasAnalysis *AA) { if (!EnableNoAliasConversion) return; @@ -622,8 +621,9 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, /// If the inlined function has non-byval align arguments, then /// add @llvm.assume-based alignment assumptions to preserve this information. static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { - if (!PreserveAlignmentAssumptions || !IFI.DL) + if (!PreserveAlignmentAssumptions) return; + auto &DL = CS.getCaller()->getParent()->getDataLayout(); // To avoid inserting redundant assumptions, we should check for assumptions // already in the caller. To do this, we might need a DT of the caller. @@ -645,20 +645,20 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { // If we can already prove the asserted alignment in the context of the // caller, then don't bother inserting the assumption. Value *Arg = CS.getArgument(I->getArgNo()); - if (getKnownAlignment(Arg, IFI.DL, + if (getKnownAlignment(Arg, DL, CS.getInstruction(), &IFI.ACT->getAssumptionCache(*CalledFunc), - CS.getInstruction(), &DT) >= Align) + &DT) >= Align) continue; - IRBuilder<>(CS.getInstruction()).CreateAlignmentAssumption(*IFI.DL, Arg, - Align); + IRBuilder<>(CS.getInstruction()) + .CreateAlignmentAssumption(DL, Arg, Align); } } } -/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee -/// into the caller, update the specified callgraph to reflect the changes we -/// made. Note that it's possible that not all code was copied over, so only +/// Once we have cloned code over from a callee into the caller, +/// update the specified callgraph to reflect the changes we made. +/// Note that it's possible that not all code was copied over, so only /// some edges of the callgraph may remain. static void UpdateCallGraphAfterInlining(CallSite CS, Function::iterator FirstNewBlock, @@ -693,8 +693,15 @@ static void UpdateCallGraphAfterInlining(CallSite CS, // If the call was inlined, but then constant folded, there is no edge to // add. Check for this case. Instruction *NewCall = dyn_cast<Instruction>(VMI->second); - if (!NewCall) continue; + if (!NewCall) + continue; + // We do not treat intrinsic calls like real function calls because we + // expect them to become inline code; do not add an edge for an intrinsic. + CallSite CS = CallSite(NewCall); + if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic()) + continue; + // Remember that this call site got inlined for the client of // InlineFunction. IFI.InlinedCalls.push_back(NewCall); @@ -726,11 +733,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, Type *AggTy = cast<PointerType>(Src->getType())->getElementType(); IRBuilder<> Builder(InsertBlock->begin()); - Value *Size; - if (IFI.DL == nullptr) - Size = ConstantExpr::getSizeOf(AggTy); - else - Size = Builder.getInt64(IFI.DL->getTypeStoreSize(AggTy)); + Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy)); // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer @@ -738,7 +741,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1); } -/// HandleByValArgument - When inlining a call site that has a byval argument, +/// When inlining a call site that has a byval argument, /// we have to make the implicit memcpy explicit by adding it. static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, @@ -759,11 +762,13 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. return Arg; + const DataLayout &DL = Caller->getParent()->getDataLayout(); + // If the pointer is already known to be sufficiently aligned, or if we can // round it up to a larger alignment, then we don't need a temporary. - if (getOrEnforceKnownAlignment(Arg, ByValAlignment, IFI.DL, - &IFI.ACT->getAssumptionCache(*Caller), - TheCall) >= ByValAlignment) + if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, + &IFI.ACT->getAssumptionCache(*Caller)) >= + ByValAlignment) return Arg; // Otherwise, we have to make a memcpy to get a safe alignment. This is bad @@ -771,10 +776,9 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, } // Create the alloca. If we have DataLayout, use nice alignment. - unsigned Align = 1; - if (IFI.DL) - Align = IFI.DL->getPrefTypeAlignment(AggTy); - + unsigned Align = + Caller->getParent()->getDataLayout().getPrefTypeAlignment(AggTy); + // If the byval had an alignment specified, we *must* use at least that // alignment, as it is required by the byval argument (and uses of the // pointer inside the callee). @@ -789,8 +793,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, return NewAlloca; } -// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime -// intrinsic. +// Check whether this Value is used by a lifetime intrinsic. static bool isUsedByLifetimeMarker(Value *V) { for (User *U : V->users()) { if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { @@ -805,7 +808,7 @@ static bool isUsedByLifetimeMarker(Value *V) { return false; } -// hasLifetimeMarkers - Check whether the given alloca already has +// Check whether the given alloca already has // lifetime.start or lifetime.end intrinsics. static bool hasLifetimeMarkers(AllocaInst *AI) { Type *Ty = AI->getType(); @@ -862,7 +865,7 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode, return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), Last); } -/// fixupLineNumbers - Update inlined instructions' line numbers to +/// Update inlined instructions' line numbers to /// to encode location where these instructions are inlined. static void fixupLineNumbers(Function *Fn, Function::iterator FI, Instruction *TheCall) { @@ -920,10 +923,9 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, } } -/// InlineFunction - This function inlines the called function into the basic -/// block of the caller. This returns false if it is not possible to inline -/// this call. The program is still in a well defined state if this occurs -/// though. +/// This function inlines the called function into the basic block of the +/// caller. This returns false if it is not possible to inline this call. +/// The program is still in a well defined state if this occurs though. /// /// Note that this only does one level of inlining. For example, if the /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now @@ -1008,6 +1010,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Keep a list of pair (dst, src) to emit byval initializations. SmallVector<std::pair<Value*, Value*>, 4> ByValInit; + auto &DL = Caller->getParent()->getDataLayout(); + assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); @@ -1042,9 +1046,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. - CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", - &InlinedFunctionInfo, IFI.DL, TheCall); + &InlinedFunctionInfo, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; @@ -1065,7 +1069,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, CloneAliasScopeMetadata(CS, VMap); // Add noalias metadata if necessary. - AddAliasScopeMetadata(CS, VMap, IFI.DL, IFI.AA); + AddAliasScopeMetadata(CS, VMap, DL, IFI.AA); // FIXME: We could register any cloned assumptions instead of clearing the // whole function's cache. @@ -1173,18 +1177,17 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, ConstantInt *AllocaSize = nullptr; if (ConstantInt *AIArraySize = dyn_cast<ConstantInt>(AI->getArraySize())) { - if (IFI.DL) { - Type *AllocaType = AI->getAllocatedType(); - uint64_t AllocaTypeSize = IFI.DL->getTypeAllocSize(AllocaType); - uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); - assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); - // Check that array size doesn't saturate uint64_t and doesn't - // overflow when it's multiplied by type size. - if (AllocaArraySize != ~0ULL && - UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { - AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), - AllocaArraySize * AllocaTypeSize); - } + auto &DL = Caller->getParent()->getDataLayout(); + Type *AllocaType = AI->getAllocatedType(); + uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType); + uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); + assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); + // Check that array size doesn't saturate uint64_t and doesn't + // overflow when it's multiplied by type size. + if (AllocaArraySize != ~0ULL && + UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { + AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), + AllocaArraySize * AllocaTypeSize); } } @@ -1445,7 +1448,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) { - if (Value *V = SimplifyInstruction(PHI, IFI.DL, nullptr, nullptr, + auto &DL = Caller->getParent()->getDataLayout(); + if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, &IFI.ACT->getAssumptionCache(*Caller))) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 4830568..bd15f9e 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -17,8 +17,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/LibCallSemantics.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LibCallSemantics.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" @@ -417,7 +417,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, /// /// This returns true if it changed the code, note that it can delete /// instructions in other blocks as well in this block. -bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD, +bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI) { bool MadeChange = false; @@ -434,7 +434,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD, Instruction *Inst = BI++; WeakVH BIHandle(BI); - if (recursivelySimplifyInstruction(Inst, TD, TLI)) { + if (recursivelySimplifyInstruction(Inst, TLI)) { MadeChange = true; if (BIHandle != BI) BI = BB->begin(); @@ -464,8 +464,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD, /// /// .. and delete the predecessor corresponding to the '1', this will attempt to /// recursively fold the and to 0. -void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, - DataLayout *TD) { +void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { // This only adjusts blocks with PHI nodes. if (!isa<PHINode>(BB->begin())) return; @@ -480,7 +479,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); Value *OldPhiIt = PhiIt; - if (!recursivelySimplifyInstruction(PN, TD)) + if (!recursivelySimplifyInstruction(PN)) continue; // If recursive simplification ended up deleting the next PHI node we would @@ -900,13 +899,14 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { /// their preferred alignment from the beginning. /// static unsigned enforceKnownAlignment(Value *V, unsigned Align, - unsigned PrefAlign, const DataLayout *TD) { + unsigned PrefAlign, + const DataLayout &DL) { V = V->stripPointerCasts(); if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { // If the preferred alignment is greater than the natural stack alignment // then don't round up. This avoids dynamic stack realignment. - if (TD && TD->exceedsNaturalStackAlignment(PrefAlign)) + if (DL.exceedsNaturalStackAlignment(PrefAlign)) return Align; // If there is a requested alignment and if this is an alloca, round up. if (AI->getAlignment() >= PrefAlign) @@ -945,13 +945,13 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, /// and it is more than the alignment of the ultimate object, see if we can /// increase the alignment of the ultimate object, making this check succeed. unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, - const DataLayout *DL, - AssumptionCache *AC, + const DataLayout &DL, const Instruction *CxtI, + AssumptionCache *AC, const DominatorTree *DT) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); - unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64; + unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType()); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT); diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index a0f8268..90dfaba 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -57,8 +57,10 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -209,10 +211,11 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, DominatorTree *DT, AssumptionCache *AC) { + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) { + if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); @@ -476,7 +479,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, /// explicit if they accepted the analysis directly and then updated it. static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, - ScalarEvolution *SE, Pass *PP, const DataLayout *DL, + ScalarEvolution *SE, Pass *PP, AssumptionCache *AC) { bool Changed = false; ReprocessLoop: @@ -608,13 +611,15 @@ ReprocessLoop: } } + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + // Scan over the PHI nodes in the loop header. Since they now have only two // incoming values (the loop is canonicalized), we may have simplified the PHI // down to 'X = phi [X, Y]', which should be replaced with 'Y'. PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) { + if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { if (AA) AA->deleteValue(PN); if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); @@ -676,7 +681,8 @@ ReprocessLoop: // The block has now been cleared of all instructions except for // a comparison and a conditional branch. SimplifyCFG may be able // to fold it now. - if (!FoldBranchToCommonDest(BI, DL)) continue; + if (!FoldBranchToCommonDest(BI)) + continue; // Success. The block is now dead, so remove it from the loop, // update the dominator tree and delete it. @@ -714,7 +720,7 @@ ReprocessLoop: bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, AliasAnalysis *AA, ScalarEvolution *SE, - const DataLayout *DL, AssumptionCache *AC) { + AssumptionCache *AC) { bool Changed = false; // Worklist maintains our depth-first queue of loops in this nest to process. @@ -731,7 +737,7 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, while (!Worklist.empty()) Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI, - SE, PP, DL, AC); + SE, PP, AC); return Changed; } @@ -749,7 +755,6 @@ namespace { DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; - const DataLayout *DL; AssumptionCache *AC; bool runOnFunction(Function &F) override; @@ -797,13 +802,11 @@ bool LoopSimplify::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = getAnalysisIfAvailable<ScalarEvolution>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL, AC); + Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, AC); return Changed; } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index accb731..6b3aa02 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -26,8 +26,8 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -500,6 +500,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. + const DataLayout &DL = Header->getModule()->getDataLayout(); const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) @@ -508,7 +509,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); - else if (Value *V = SimplifyInstruction(Inst)) + else if (Value *V = SimplifyInstruction(Inst, DL)) if (LI->replacementPreservesLCSSAForm(Inst, V)) { Inst->replaceAllUsesWith(V); (*BB)->getInstList().erase(Inst); @@ -531,9 +532,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { - DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; - simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL, AC); + simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, AC); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 91b688c..381d8fc 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -31,6 +31,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" @@ -339,10 +340,11 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI); BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI); BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); + const DataLayout &DL = Header->getModule()->getDataLayout(); // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % (loop unroll factor + 1) - SCEVExpander Expander(*SE, "loop-unroll"); + SCEVExpander Expander(*SE, DL, "loop-unroll"); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index b3bdae4..e0e0e90 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -14,17 +14,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/CFG.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include <algorithm> using namespace llvm; @@ -175,11 +175,16 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, // Remove additional occurences coming from condensed cases and keep the // number of incoming values equal to the number of branches to SuccBB. + SmallVector<unsigned, 8> Indices; for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx) if (PN->getIncomingBlock(Idx) == OrigBB) { - PN->removeIncomingValue(Idx); + Indices.push_back(Idx); LocalNumMergedCases--; } + // Remove incoming values in the reverse order to prevent invalidating + // *successive* index. + for (auto III = Indices.rbegin(), IIE = Indices.rend(); III != IIE; ++III) + PN->removeIncomingValue(*III); } } diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index dabadb7..4b34b19 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <queue> @@ -667,6 +668,8 @@ void PromoteMem2Reg::run() { A->eraseFromParent(); } + const DataLayout &DL = F.getParent()->getDataLayout(); + // Remove alloca's dbg.declare instrinsics from the function. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) @@ -691,7 +694,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT, AC)) { + if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index c057b06..955ce30 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -155,7 +156,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. - if (Value *V = SimplifyInstruction(InsertedPHI)) { + if (Value *V = + SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) { InsertedPHI->eraseFromParent(); return V; } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 3248a83..c7c0ca6 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -110,8 +110,8 @@ namespace { class SimplifyCFGOpt { const TargetTransformInfo &TTI; + const DataLayout &DL; unsigned BonusInstThreshold; - const DataLayout *const DL; AssumptionCache *AC; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, @@ -131,9 +131,9 @@ class SimplifyCFGOpt { bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder); public: - SimplifyCFGOpt(const TargetTransformInfo &TTI, unsigned BonusInstThreshold, - const DataLayout *DL, AssumptionCache *AC) - : TTI(TTI), BonusInstThreshold(BonusInstThreshold), DL(DL), AC(AC) {} + SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, + unsigned BonusInstThreshold, AssumptionCache *AC) + : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {} bool run(BasicBlock *BB); }; } @@ -223,9 +223,9 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, /// given instruction, which is assumed to be safe to speculate. TCC_Free means /// cheap, TCC_Basic means less cheap, and TCC_Expensive means prohibitively /// expensive. -static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL, +static unsigned ComputeSpeculationCost(const User *I, const TargetTransformInfo &TTI) { - assert(isSafeToSpeculativelyExecute(I, DL) && + assert(isSafeToSpeculativelyExecute(I) && "Instruction is not safe to speculatively execute!"); return TTI.getUserCost(I); } @@ -249,7 +249,6 @@ static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL, static bool DominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl<Instruction*> *AggressiveInsts, unsigned &CostRemaining, - const DataLayout *DL, const TargetTransformInfo &TTI) { Instruction *I = dyn_cast<Instruction>(V); if (!I) { @@ -283,10 +282,10 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. - if (!isSafeToSpeculativelyExecute(I, DL)) + if (!isSafeToSpeculativelyExecute(I)) return false; - unsigned Cost = ComputeSpeculationCost(I, DL, TTI); + unsigned Cost = ComputeSpeculationCost(I, TTI); if (Cost > CostRemaining) return false; @@ -296,7 +295,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL, TTI)) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts->insert(I); @@ -305,15 +304,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, /// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr /// and PointerNullValue. Return NULL if value is not a constant int. -static ConstantInt *GetConstantInt(Value *V, const DataLayout *DL) { +static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) { // Normal constant int. ConstantInt *CI = dyn_cast<ConstantInt>(V); - if (CI || !DL || !isa<Constant>(V) || !V->getType()->isPointerTy()) + if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy()) return CI; // This is some kind of pointer constant. Turn it into a pointer-sized // ConstantInt if possible. - IntegerType *PtrTy = cast<IntegerType>(DL->getIntPtrType(V->getType())); + IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType())); // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). if (isa<ConstantPointerNull>(V)) @@ -346,16 +345,16 @@ namespace { /// while for a chain of '&&' it will build the set elements that make the test /// fail. struct ConstantComparesGatherer { - + const DataLayout &DL; Value *CompValue; /// Value found for the switch comparison Value *Extra; /// Extra clause to be checked before the switch SmallVector<ConstantInt *, 8> Vals; /// Set of integers to match in switch unsigned UsedICmps; /// Number of comparisons matched in the and/or chain /// Construct and compute the result for the comparison instruction Cond - ConstantComparesGatherer(Instruction *Cond, const DataLayout *DL) - : CompValue(nullptr), Extra(nullptr), UsedICmps(0) { - gather(Cond, DL); + ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) + : DL(DL), CompValue(nullptr), Extra(nullptr), UsedICmps(0) { + gather(Cond); } /// Prevent copy @@ -380,7 +379,7 @@ private: /// against is placed in CompValue. /// If CompValue is already set, the function is expected to fail if a match /// is found but the value compared to is different. - bool matchInstruction(Instruction *I, const DataLayout *DL, bool isEQ) { + bool matchInstruction(Instruction *I, bool isEQ) { // If this is an icmp against a constant, handle this as one of the cases. ICmpInst *ICI; ConstantInt *C; @@ -422,8 +421,8 @@ private: } // If we have "x ult 3", for example, then we can add 0,1,2 to the set. - ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(), - C->getValue()); + ConstantRange Span = ConstantRange::makeAllowedICmpRegion( + ICI->getPredicate(), C->getValue()); // Shift the range if the compare is fed by an add. This is the range // compare idiom as emitted by instcombine. @@ -462,7 +461,7 @@ private: /// the value being compared, and stick the list constants into the Vals /// vector. /// One "Extra" case is allowed to differ from the other. - void gather(Value *V, const DataLayout *DL) { + void gather(Value *V) { Instruction *I = dyn_cast<Instruction>(V); bool isEQ = (I->getOpcode() == Instruction::Or); @@ -484,7 +483,7 @@ private: } // Try to match the current instruction - if (matchInstruction(I, DL, isEQ)) + if (matchInstruction(I, isEQ)) // Match succeed, continue the loop continue; } @@ -532,15 +531,16 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { CV = SI->getCondition(); } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) if (BI->isConditional() && BI->getCondition()->hasOneUse()) - if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) + if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) { if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL)) CV = ICI->getOperand(0); + } // Unwrap any lossless ptrtoint cast. - if (DL && CV) { + if (CV) { if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) { Value *Ptr = PTII->getPointerOperand(); - if (PTII->getType() == DL->getIntPtrType(Ptr->getType())) + if (PTII->getType() == DL.getIntPtrType(Ptr->getType())) CV = Ptr; } } @@ -981,8 +981,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, Builder.SetInsertPoint(PTI); // Convert pointer to int before we switch. if (CV->getType()->isPointerTy()) { - assert(DL && "Cannot switch on pointer without DataLayout"); - CV = Builder.CreatePtrToInt(CV, DL->getIntPtrType(CV->getType()), + CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr"); } @@ -1053,7 +1052,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I); /// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and /// BB2, hoist any common code in the two blocks up into the branch block. The /// caller of this function guarantees that BI's block dominates BB1 and BB2. -static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL, +static bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI) { // This does very trivial matching, with limited scanning, to find identical // instructions in the two blocks. In particular, we don't want to get into @@ -1145,9 +1144,9 @@ HoistTerminator: passingValueIsAlwaysUndefined(BB2V, PN)) return Changed; - if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V, DL)) + if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) return Changed; - if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V, DL)) + if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V)) return Changed; } } @@ -1467,7 +1466,6 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, /// /// \returns true if the conditional block is removed. static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, - const DataLayout *DL, const TargetTransformInfo &TTI) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); @@ -1511,14 +1509,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, return false; // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(I, DL) && - !(HoistCondStores && - (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB, - EndBB)))) + if (!isSafeToSpeculativelyExecute(I) && + !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore( + I, BB, ThenBB, EndBB)))) return false; if (!SpeculatedStoreValue && - ComputeSpeculationCost(I, DL, TTI) > PHINodeFoldingThreshold * - TargetTransformInfo::TCC_Basic) + ComputeSpeculationCost(I, TTI) > + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic) return false; // Store the store speculation candidate. @@ -1574,11 +1571,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if (!OrigCE && !ThenCE) continue; // Known safe and cheap. - if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE, DL)) || - (OrigCE && !isSafeToSpeculativelyExecute(OrigCE, DL))) + if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || + (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) return false; - unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL, TTI) : 0; - unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL, TTI) : 0; + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; unsigned MaxCost = 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; if (OrigCost + ThenCost > MaxCost) @@ -1688,7 +1685,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { /// that is defined in the same block as the branch and if any PHI entries are /// constants, thread edges corresponding to that entry to be branches to their /// ultimate destination. -static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) { +static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) { BasicBlock *BB = BI->getParent(); PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); // NOTE: we currently cannot transform this case if the PHI node is used @@ -1786,8 +1783,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) { /// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry /// PHI node, see if we can eliminate it. -static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL, - const TargetTransformInfo &TTI) { +static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, + const DataLayout &DL) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which @@ -1830,9 +1827,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL, } if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, - MaxCostVal0, DL, TTI) || + MaxCostVal0, TTI) || !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, - MaxCostVal1, DL, TTI)) + MaxCostVal1, TTI)) return false; } @@ -2052,8 +2049,7 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { /// FoldBranchToCommonDest - If this basic block is simple enough, and if a /// predecessor branches to us and one of our successors, fold the block into /// the predecessor and use logical operations to pick the right destination. -bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL, - unsigned BonusInstThreshold) { +bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { BasicBlock *BB = BI->getParent(); Instruction *Cond = nullptr; @@ -2109,7 +2105,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL, // Ignore dbg intrinsics. if (isa<DbgInfoIntrinsic>(I)) continue; - if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I, DL)) + if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I)) return false; // I has only one use and can be executed unconditionally. Instruction *User = dyn_cast<Instruction>(I->user_back()); @@ -2702,8 +2698,9 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. static bool TryToSimplifyUncondBranchWithICmpInIt( - ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, const DataLayout *DL, AssumptionCache *AC) { + ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL, + const TargetTransformInfo &TTI, unsigned BonusInstThreshold, + AssumptionCache *AC) { BasicBlock *BB = ICI->getParent(); // If the block has any PHIs in it or the icmp has multiple uses, it is too @@ -2736,7 +2733,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( ICI->eraseFromParent(); } // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // Ok, the block is reachable from the default dest. If the constant we're @@ -2752,7 +2749,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // The use of the icmp has to be in the 'end' block, by the only PHI node in @@ -2808,8 +2805,8 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( /// SimplifyBranchOnICmpChain - The specified branch is a conditional branch. /// Check to see if it is branching on an or/and chain of icmp instructions, and /// fold it into a switch instruction if so. -static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL, - IRBuilder<> &Builder) { +static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, + const DataLayout &DL) { Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (!Cond) return false; @@ -2884,10 +2881,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL, Builder.SetInsertPoint(BI); // Convert pointer to int before we switch. if (CompVal->getType()->isPointerTy()) { - assert(DL && "Cannot switch on pointer without DataLayout"); - CompVal = Builder.CreatePtrToInt(CompVal, - DL->getIntPtrType(CompVal->getType()), - "magicptr"); + CompVal = Builder.CreatePtrToInt( + CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr"); } // Create the new switch instruction now. @@ -3246,8 +3241,8 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { /// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch /// and use it to remove dead cases. -static bool EliminateDeadSwitchCases(SwitchInst *SI, const DataLayout *DL, - AssumptionCache *AC) { +static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, + const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); APInt KnownZero(Bits, 0), KnownOne(Bits, 0); @@ -3398,9 +3393,8 @@ static Constant *LookupConstant(Value *V, /// constant or can be replaced by constants from the ConstantPool. Returns the /// resulting constant on success, 0 otherwise. static Constant * -ConstantFold(Instruction *I, - const SmallDenseMap<Value *, Constant *> &ConstantPool, - const DataLayout *DL) { +ConstantFold(Instruction *I, const DataLayout &DL, + const SmallDenseMap<Value *, Constant *> &ConstantPool) { if (SelectInst *Select = dyn_cast<SelectInst>(I)) { Constant *A = LookupConstant(Select->getCondition(), ConstantPool); if (!A) @@ -3420,9 +3414,10 @@ ConstantFold(Instruction *I, return nullptr; } - if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) + if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0], COps[1], DL); + } return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL); } @@ -3432,12 +3427,10 @@ ConstantFold(Instruction *I, /// destionations CaseDest corresponding to value CaseVal (0 for the default /// case), of a switch instruction SI. static bool -GetCaseResults(SwitchInst *SI, - ConstantInt *CaseVal, - BasicBlock *CaseDest, +GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, - SmallVectorImpl<std::pair<PHINode *, Constant *> > &Res, - const DataLayout *DL) { + SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res, + const DataLayout &DL) { // The block from which we enter the common destination. BasicBlock *Pred = SI->getParent(); @@ -3456,7 +3449,7 @@ GetCaseResults(SwitchInst *SI, } else if (isa<DbgInfoIntrinsic>(I)) { // Skip debug intrinsic. continue; - } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) { + } else if (Constant *C = ConstantFold(I, DL, ConstantPool)) { // Instruction is side-effect free and constant. // If the instruction has uses outside this block or a phi node slot for @@ -3527,11 +3520,11 @@ static void MapCaseToResult(ConstantInt *CaseVal, // results for the PHI node of the common destination block for a switch // instruction. Returns false if multiple PHI nodes have been found or if // there is not a common destination block for the switch. -static bool InitializeUniqueCases( - SwitchInst *SI, const DataLayout *DL, PHINode *&PHI, - BasicBlock *&CommonDest, - SwitchCaseResultVectorTy &UniqueResults, - Constant *&DefaultResult) { +static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, + BasicBlock *&CommonDest, + SwitchCaseResultVectorTy &UniqueResults, + Constant *&DefaultResult, + const DataLayout &DL) { for (auto &I : SI->cases()) { ConstantInt *CaseVal = I.getCaseValue(); @@ -3638,15 +3631,15 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, /// phi nodes in a common successor block with only two different /// constant values, replace the switch with select. static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, - const DataLayout *DL, AssumptionCache *AC) { + AssumptionCache *AC, const DataLayout &DL) { Value *const Cond = SI->getCondition(); PHINode *PHI = nullptr; BasicBlock *CommonDest = nullptr; Constant *DefaultResult; SwitchCaseResultVectorTy UniqueResults; // Collect all the cases that will deliver the same value from the switch. - if (!InitializeUniqueCases(SI, DL, PHI, CommonDest, UniqueResults, - DefaultResult)) + if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, + DL)) return false; // Selects choose between maximum two values. if (UniqueResults.size() != 2) @@ -3673,12 +3666,10 @@ namespace { /// SwitchLookupTable - Create a lookup table to use as a switch replacement /// with the contents of Values, using DefaultValue to fill any holes in the /// table. - SwitchLookupTable(Module &M, - uint64_t TableSize, - ConstantInt *Offset, - const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values, - Constant *DefaultValue, - const DataLayout *DL); + SwitchLookupTable( + Module &M, uint64_t TableSize, ConstantInt *Offset, + const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, + Constant *DefaultValue, const DataLayout &DL); /// BuildLookup - Build instructions with Builder to retrieve the value at /// the position given by Index in the lookup table. @@ -3686,8 +3677,7 @@ namespace { /// WouldFitInRegister - Return true if a table with TableSize elements of /// type ElementType would fit in a target-legal register. - static bool WouldFitInRegister(const DataLayout *DL, - uint64_t TableSize, + static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, const Type *ElementType); private: @@ -3729,12 +3719,10 @@ namespace { }; } -SwitchLookupTable::SwitchLookupTable(Module &M, - uint64_t TableSize, - ConstantInt *Offset, - const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values, - Constant *DefaultValue, - const DataLayout *DL) +SwitchLookupTable::SwitchLookupTable( + Module &M, uint64_t TableSize, ConstantInt *Offset, + const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, + Constant *DefaultValue, const DataLayout &DL) : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr), LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) { assert(Values.size() && "Can't build lookup table without values!"); @@ -3904,11 +3892,9 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { llvm_unreachable("Unknown lookup table kind!"); } -bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL, +bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, const Type *ElementType) { - if (!DL) - return false; const IntegerType *IT = dyn_cast<IntegerType>(ElementType); if (!IT) return false; @@ -3918,17 +3904,16 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL, // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. if (TableSize >= UINT_MAX/IT->getBitWidth()) return false; - return DL->fitsInLegalInteger(TableSize * IT->getBitWidth()); + return DL.fitsInLegalInteger(TableSize * IT->getBitWidth()); } /// ShouldBuildLookupTable - Determine whether a lookup table should be built /// for this switch, based on the number of cases, size of the table and the /// types of the results. -static bool ShouldBuildLookupTable(SwitchInst *SI, - uint64_t TableSize, - const TargetTransformInfo &TTI, - const DataLayout *DL, - const SmallDenseMap<PHINode*, Type*>& ResultTypes) { +static bool +ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, + const TargetTransformInfo &TTI, const DataLayout &DL, + const SmallDenseMap<PHINode *, Type *> &ResultTypes) { if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) return false; // TableSize overflowed, or mul below might overflow. @@ -4051,10 +4036,9 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, /// SwitchToLookupTable - If the switch is only used to initialize one or more /// phi nodes in a common successor block with different constant values, /// replace the switch with lookup tables. -static bool SwitchToLookupTable(SwitchInst *SI, - IRBuilder<> &Builder, - const TargetTransformInfo &TTI, - const DataLayout* DL) { +static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, + const DataLayout &DL, + const TargetTransformInfo &TTI) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); // Only build lookup table when we have a target that supports it. @@ -4125,14 +4109,14 @@ static bool SwitchToLookupTable(SwitchInst *SI, // or a bitmask that fits in a register. SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; bool HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(), - &CommonDest, DefaultResultsList, DL); + &CommonDest, DefaultResultsList, DL); bool NeedMask = (TableHasHoles && !HasDefaultResults); if (NeedMask) { // As an extra penalty for the validity test we require more cases. if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark). return false; - if (!(DL && DL->fitsInLegalInteger(TableSize))) + if (!DL.fitsInLegalInteger(TableSize)) return false; } @@ -4290,12 +4274,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // see if that predecessor totally determines the outcome of this switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; Value *Cond = SI->getCondition(); if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) if (SimplifySwitchOnSelect(SI, Select)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // If the block only contains the switch, see if we can fold the block // away into any preds. @@ -4305,25 +4289,25 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { ++BBI; if (SI == &*BBI) if (FoldValueComparisonIntoPredecessors(SI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // Try to transform the switch into an icmp and a branch. if (TurnSwitchRangeIntoICmp(SI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // Remove unreachable cases. - if (EliminateDeadSwitchCases(SI, DL, AC)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (EliminateDeadSwitchCases(SI, AC, DL)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; - if (SwitchToSelect(SI, Builder, DL, AC)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (SwitchToSelect(SI, Builder, AC, DL)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; if (ForwardSwitchConditionToPHI(SI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; - if (SwitchToLookupTable(SI, Builder, TTI, DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (SwitchToLookupTable(SI, Builder, DL, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; return false; } @@ -4360,11 +4344,87 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } return Changed; } +/// Given an block with only a single landing pad and a unconditional branch +/// try to find another basic block which this one can be merged with. This +/// handles cases where we have multiple invokes with unique landing pads, but +/// a shared handler. +/// +/// We specifically choose to not worry about merging non-empty blocks +/// here. That is a PRE/scheduling problem and is best solved elsewhere. In +/// practice, the optimizer produces empty landing pad blocks quite frequently +/// when dealing with exception dense code. (see: instcombine, gvn, if-else +/// sinking in this file) +/// +/// This is primarily a code size optimization. We need to avoid performing +/// any transform which might inhibit optimization (such as our ability to +/// specialize a particular handler via tail commoning). We do this by not +/// merging any blocks which require us to introduce a phi. Since the same +/// values are flowing through both blocks, we don't loose any ability to +/// specialize. If anything, we make such specialization more likely. +/// +/// TODO - This transformation could remove entries from a phi in the target +/// block when the inputs in the phi are the same for the two blocks being +/// merged. In some cases, this could result in removal of the PHI entirely. +static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, + BasicBlock *BB) { + auto Succ = BB->getUniqueSuccessor(); + assert(Succ); + // If there's a phi in the successor block, we'd likely have to introduce + // a phi into the merged landing pad block. + if (isa<PHINode>(*Succ->begin())) + return false; + + for (BasicBlock *OtherPred : predecessors(Succ)) { + if (BB == OtherPred) + continue; + BasicBlock::iterator I = OtherPred->begin(); + LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I); + if (!LPad2 || !LPad2->isIdenticalTo(LPad)) + continue; + for (++I; isa<DbgInfoIntrinsic>(I); ++I) {} + BranchInst *BI2 = dyn_cast<BranchInst>(I); + if (!BI2 || !BI2->isIdenticalTo(BI)) + continue; + + // We've found an identical block. Update our predeccessors to take that + // path instead and make ourselves dead. + SmallSet<BasicBlock *, 16> Preds; + Preds.insert(pred_begin(BB), pred_end(BB)); + for (BasicBlock *Pred : Preds) { + InvokeInst *II = cast<InvokeInst>(Pred->getTerminator()); + assert(II->getNormalDest() != BB && + II->getUnwindDest() == BB && "unexpected successor"); + II->setUnwindDest(OtherPred); + } + + // The debug info in OtherPred doesn't cover the merged control flow that + // used to go through BB. We need to delete it or update it. + for (auto I = OtherPred->begin(), E = OtherPred->end(); + I != E;) { + Instruction &Inst = *I; I++; + if (isa<DbgInfoIntrinsic>(Inst)) + Inst.eraseFromParent(); + } + + SmallSet<BasicBlock *, 16> Succs; + Succs.insert(succ_begin(BB), succ_end(BB)); + for (BasicBlock *Succ : Succs) { + Succ->removePredecessor(BB); + } + + IRBuilder<> Builder(BI); + Builder.CreateUnreachable(); + BI->eraseFromParent(); + return true; + } + return false; +} + bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); @@ -4384,17 +4444,26 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ for (++I; isa<DbgInfoIntrinsic>(I); ++I) ; if (I->isTerminator() && - TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, - BonusInstThreshold, DL, AC)) + TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI, + BonusInstThreshold, AC)) return true; } + // See if we can merge an empty landing pad block with another which is + // equivalent. + if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) { + for (++I; isa<DbgInfoIntrinsic>(I); ++I) {} + if (I->isTerminator() && + TryToMergeLandingPad(LPad, BI, BB)) + return true; + } + // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (FoldBranchToCommonDest(BI, BonusInstThreshold)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; return false; } @@ -4409,7 +4478,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. @@ -4419,26 +4488,26 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { ++I; if (&*I == BI) { if (FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } else if (&*I == cast<Instruction>(BI->getCondition())){ ++I; // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(I)) ++I; if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } } // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. - if (SimplifyBranchOnICmpChain(BI, DL, Builder)) + if (SimplifyBranchOnICmpChain(BI, Builder, DL)) return true; // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (FoldBranchToCommonDest(BI, BonusInstThreshold)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if @@ -4446,16 +4515,16 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // can hoist it up to the branching block. if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistThenElseCodeToIf(BI, DL, TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (HoistThenElseCodeToIf(BI, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL, TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor()) { // If Successor #0 has multiple preds, we may be able to conditionally @@ -4463,8 +4532,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL, TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // If this is a branch on a phi node in the current block, thread control @@ -4472,14 +4541,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) if (PN->getParent() == BI->getParent()) if (FoldCondBranchOnPHI(BI, DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) if (SimplifyCondBranchToCondBranch(PBI, BI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; return false; } @@ -4591,7 +4660,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // eliminate it, do so now. if (PHINode *PN = dyn_cast<PHINode>(BB->begin())) if (PN->getNumIncomingValues() == 2) - Changed |= FoldTwoEntryPHINode(PN, DL, TTI); + Changed |= FoldTwoEntryPHINode(PN, TTI, DL); Builder.SetInsertPoint(BB->getTerminator()); if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { @@ -4623,7 +4692,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// of the CFG. It returns true if a modification was made. /// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, const DataLayout *DL, - AssumptionCache *AC) { - return SimplifyCFGOpt(TTI, BonusInstThreshold, DL, AC).run(BB); + unsigned BonusInstThreshold, AssumptionCache *AC) { + return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), + BonusInstThreshold, AC).run(BB); } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 6a5d885..8bfc5fb 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -270,95 +270,57 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, Value *IVOperand) { - // Currently we only handle instructions of the form "add <indvar> <value>" - unsigned Op = BO->getOpcode(); - if (Op != Instruction::Add) + // Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`. + if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap()) return false; - // If BO is already both nuw and nsw then there is nothing left to do - if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap()) + const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *, + SCEV::NoWrapFlags); + + switch (BO->getOpcode()) { + default: return false; - IntegerType *IT = cast<IntegerType>(IVOperand->getType()); - Value *OtherOperand = nullptr; - if (BO->getOperand(0) == IVOperand) { - OtherOperand = BO->getOperand(1); - } else { - assert(BO->getOperand(1) == IVOperand && "only other use!"); - OtherOperand = BO->getOperand(0); + case Instruction::Add: + GetExprForBO = &ScalarEvolution::getAddExpr; + break; + + case Instruction::Sub: + GetExprForBO = &ScalarEvolution::getMinusSCEV; + break; + + case Instruction::Mul: + GetExprForBO = &ScalarEvolution::getMulExpr; + break; } - bool Changed = false; - const SCEV *OtherOpSCEV = SE->getSCEV(OtherOperand); - if (OtherOpSCEV == SE->getCouldNotCompute()) - return false; + unsigned BitWidth = cast<IntegerType>(BO->getType())->getBitWidth(); + Type *WideTy = IntegerType::get(BO->getContext(), BitWidth * 2); + const SCEV *LHS = SE->getSCEV(BO->getOperand(0)); + const SCEV *RHS = SE->getSCEV(BO->getOperand(1)); - const SCEV *IVOpSCEV = SE->getSCEV(IVOperand); - const SCEV *ZeroSCEV = SE->getConstant(IVOpSCEV->getType(), 0); + bool Changed = false; - if (!BO->hasNoSignedWrap()) { - // Upgrade the add to an "add nsw" if we can prove that it will never - // sign-overflow or sign-underflow. - - const SCEV *SignedMax = - SE->getConstant(APInt::getSignedMaxValue(IT->getBitWidth())); - const SCEV *SignedMin = - SE->getConstant(APInt::getSignedMinValue(IT->getBitWidth())); - - // The addition "IVOperand + OtherOp" does not sign-overflow if the result - // is sign-representable in 2's complement in the given bit-width. - // - // If OtherOp is SLT 0, then for an IVOperand in [SignedMin - OtherOp, - // SignedMax], "IVOperand + OtherOp" is in [SignedMin, SignedMax + OtherOp]. - // Everything in [SignedMin, SignedMax + OtherOp] is representable since - // SignedMax + OtherOp is at least -1. - // - // If OtherOp is SGE 0, then for an IVOperand in [SignedMin, SignedMax - - // OtherOp], "IVOperand + OtherOp" is in [SignedMin + OtherOp, SignedMax]. - // Everything in [SignedMin + OtherOp, SignedMax] is representable since - // SignedMin + OtherOp is at most -1. - // - // It follows that for all values of IVOperand in [SignedMin - smin(0, - // OtherOp), SignedMax - smax(0, OtherOp)] the result of the add is - // representable (i.e. there is no sign-overflow). - - const SCEV *UpperDelta = SE->getSMaxExpr(ZeroSCEV, OtherOpSCEV); - const SCEV *UpperLimit = SE->getMinusSCEV(SignedMax, UpperDelta); - - bool NeverSignedOverflows = - SE->isKnownPredicate(ICmpInst::ICMP_SLE, IVOpSCEV, UpperLimit); - - if (NeverSignedOverflows) { - const SCEV *LowerDelta = SE->getSMinExpr(ZeroSCEV, OtherOpSCEV); - const SCEV *LowerLimit = SE->getMinusSCEV(SignedMin, LowerDelta); - - bool NeverSignedUnderflows = - SE->isKnownPredicate(ICmpInst::ICMP_SGE, IVOpSCEV, LowerLimit); - if (NeverSignedUnderflows) { - BO->setHasNoSignedWrap(true); - Changed = true; - } + if (!BO->hasNoUnsignedWrap()) { + const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy); + const SCEV *OpAfterExtend = (SE->*GetExprForBO)( + SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy), + SCEV::FlagAnyWrap); + if (ExtendAfterOp == OpAfterExtend) { + BO->setHasNoUnsignedWrap(); + SE->forgetValue(BO); + Changed = true; } } - if (!BO->hasNoUnsignedWrap()) { - // Upgrade the add computing "IVOperand + OtherOp" to an "add nuw" if we can - // prove that it will never unsigned-overflow (i.e. the result will always - // be representable in the given bit-width). - // - // "IVOperand + OtherOp" is unsigned-representable in 2's complement iff it - // does not produce a carry. "IVOperand + OtherOp" produces no carry iff - // IVOperand ULE (UnsignedMax - OtherOp). - - const SCEV *UnsignedMax = - SE->getConstant(APInt::getMaxValue(IT->getBitWidth())); - const SCEV *UpperLimit = SE->getMinusSCEV(UnsignedMax, OtherOpSCEV); - - bool NeverUnsignedOverflows = - SE->isKnownPredicate(ICmpInst::ICMP_ULE, IVOpSCEV, UpperLimit); - - if (NeverUnsignedOverflows) { - BO->setHasNoUnsignedWrap(true); + if (!BO->hasNoSignedWrap()) { + const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy); + const SCEV *OpAfterExtend = (SE->*GetExprForBO)( + SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy), + SCEV::FlagAnyWrap); + if (ExtendAfterOp == OpAfterExtend) { + BO->setHasNoSignedWrap(); + SE->forgetValue(BO); Changed = true; } } diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 55a4455..c499c87 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -51,8 +51,7 @@ namespace { const DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; + const DataLayout &DL = F.getParent()->getDataLayout(); const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); AssumptionCache *AC = diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index fb1d83f..5867d65 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -120,12 +120,12 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, /// string/memory copying library function \p Func. /// Acceptable functions are st[rp][n]?cpy, memove, memcpy, and memset. /// Their fortified (_chk) counterparts are also accepted. -static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func, - const DataLayout *DL) { +static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func) { + const DataLayout &DL = F->getParent()->getDataLayout(); FunctionType *FT = F->getFunctionType(); LLVMContext &Context = F->getContext(); Type *PCharTy = Type::getInt8PtrTy(Context); - Type *SizeTTy = DL ? DL->getIntPtrType(Context) : nullptr; + Type *SizeTTy = DL.getIntPtrType(Context); unsigned NumParams = FT->getNumParams(); // All string libfuncs return the same type as the first parameter. @@ -208,10 +208,6 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) { if (Len == 0) return Dst; - // These optimizations require DataLayout. - if (!DL) - return nullptr; - return emitStrLenMemCpy(Src, Dst, Len, B); } @@ -230,9 +226,9 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy( - CpyDst, Src, - ConstantInt::get(DL->getIntPtrType(Src->getContext()), Len + 1), 1); + B.CreateMemCpy(CpyDst, Src, + ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1), + 1); return Dst; } @@ -269,10 +265,6 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) { if (SrcLen == 0 || Len == 0) return Dst; - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // We don't optimize this case if (Len < SrcLen) return nullptr; @@ -297,24 +289,20 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { // of the input string and turn this into memchr. ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); if (!CharC) { - // These optimizations require DataLayout. - if (!DL) - return nullptr; - uint64_t Len = GetStringLength(SrcStr); if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. return nullptr; - return EmitMemChr( - SrcStr, CI->getArgOperand(1), // include nul. - ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len), B, DL, TLI); + return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), + B, DL, TLI); } // Otherwise, the character is a constant, see if the first argument is // a string literal. If so, we can constant fold. StringRef Str; if (!getConstantStringInfo(SrcStr, Str)) { - if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p) + if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p) return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr"); return nullptr; } @@ -350,8 +338,8 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { StringRef Str; if (!getConstantStringInfo(SrcStr, Str)) { // strrchr(s, 0) -> strchr(s, 0) - if (DL && CharC->isZero()) - return EmitStrChr(SrcStr, '\0', B, DL, TLI); + if (CharC->isZero()) + return EmitStrChr(SrcStr, '\0', B, TLI); return nullptr; } @@ -398,12 +386,8 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { uint64_t Len1 = GetStringLength(Str1P); uint64_t Len2 = GetStringLength(Str2P); if (Len1 && Len2) { - // These optimizations require DataLayout. - if (!DL) - return nullptr; - return EmitMemCmp(Str1P, Str2P, - ConstantInt::get(DL->getIntPtrType(CI->getContext()), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), std::min(Len1, Len2)), B, DL, TLI); } @@ -435,7 +419,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { if (Length == 0) // strncmp(x,y,0) -> 0 return ConstantInt::get(CI->getType(), 0); - if (DL && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) + if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI); StringRef Str1, Str2; @@ -462,17 +446,13 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy)) return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // strcpy(x,x) -> x return Src; - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); if (Len == 0) @@ -481,7 +461,7 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. B.CreateMemCpy(Dst, Src, - ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len), 1); + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1); return Dst; } @@ -490,11 +470,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { // Verify the "stpcpy" function prototype. FunctionType *FT = Callee->getFunctionType(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy, DL)) - return nullptr; - - // These optimizations require DataLayout. - if (!DL) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy)) return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); @@ -509,9 +485,9 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { return nullptr; Type *PT = FT->getParamType(0); - Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len); + Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len); Value *DstEnd = - B.CreateGEP(Dst, ConstantInt::get(DL->getIntPtrType(PT), Len - 1)); + B.CreateGEP(Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. @@ -523,7 +499,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy)) return nullptr; Value *Dst = CI->getArgOperand(0); @@ -551,17 +527,13 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { if (Len == 0) return Dst; // strncpy(x, y, 0) -> x - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // Let strncpy handle the zero padding if (Len > SrcLen + 1) return nullptr; Type *PT = FT->getParamType(0); // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] - B.CreateMemCpy(Dst, Src, ConstantInt::get(DL->getIntPtrType(PT), Len), 1); + B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1); return Dst; } @@ -629,8 +601,8 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) { } // strpbrk(s, "a") -> strchr(s, 'a') - if (DL && HasS2 && S2.size() == 1) - return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI); + if (HasS2 && S2.size() == 1) + return EmitStrChr(CI->getArgOperand(0), S2[0], B, TLI); return nullptr; } @@ -706,7 +678,7 @@ Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) { } // strcspn(s, "") -> strlen(s) - if (DL && HasS2 && S2.empty()) + if (HasS2 && S2.empty()) return EmitStrLen(CI->getArgOperand(0), B, DL, TLI); return nullptr; @@ -725,7 +697,7 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) { return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 - if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI); if (!StrLen) return nullptr; @@ -767,12 +739,98 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) { // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) { - Value *StrChr = EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI); + Value *StrChr = EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI); return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr; } return nullptr; } +Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy(32) || + !FT->getParamType(2)->isIntegerTy() || + !FT->getReturnType()->isPointerTy()) + return nullptr; + + Value *SrcStr = CI->getArgOperand(0); + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + + // memchr(x, y, 0) -> null + if (LenC && LenC->isNullValue()) + return Constant::getNullValue(CI->getType()); + + // From now on we need at least constant length and string. + StringRef Str; + if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false)) + return nullptr; + + // Truncate the string to LenC. If Str is smaller than LenC we will still only + // scan the string, as reading past the end of it is undefined and we can just + // return null if we don't find the char. + Str = Str.substr(0, LenC->getZExtValue()); + + // If the char is variable but the input str and length are not we can turn + // this memchr call into a simple bit field test. Of course this only works + // when the return value is only checked against null. + // + // It would be really nice to reuse switch lowering here but we can't change + // the CFG at this point. + // + // memchr("\r\n", C, 2) != nullptr -> (C & ((1 << '\r') | (1 << '\n'))) != 0 + // after bounds check. + if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) { + unsigned char Max = + *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()), + reinterpret_cast<const unsigned char *>(Str.end())); + + // Make sure the bit field we're about to create fits in a register on the + // target. + // FIXME: On a 64 bit architecture this prevents us from using the + // interesting range of alpha ascii chars. We could do better by emitting + // two bitfields or shifting the range by 64 if no lower chars are used. + if (!DL.fitsInLegalInteger(Max + 1)) + return nullptr; + + // For the bit field use a power-of-2 type with at least 8 bits to avoid + // creating unnecessary illegal types. + unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max)); + + // Now build the bit field. + APInt Bitfield(Width, 0); + for (char C : Str) + Bitfield.setBit((unsigned char)C); + Value *BitfieldC = B.getInt(Bitfield); + + // First check that the bit field access is within bounds. + Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType()); + Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width), + "memchr.bounds"); + + // Create code that checks if the given bit is set in the field. + Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C); + Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits"); + + // Finally merge both checks and cast to pointer type. The inttoptr + // implicitly zexts the i1 to intptr type. + return B.CreateIntToPtr(B.CreateAnd(Bounds, Bits, "memchr"), CI->getType()); + } + + // Check if all arguments are constants. If so, we can constant fold. + if (!CharC) + return nullptr; + + // Compute the offset. + size_t I = Str.find(CharC->getSExtValue() & 0xFF); + if (I == StringRef::npos) // Didn't find the char. memchr returns null. + return Constant::getNullValue(CI->getType()); + + // memchr(s+n,c,l) -> gep(s+n+i,c) + return B.CreateGEP(SrcStr, B.getInt64(I), "memchr"); +} + Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); @@ -827,11 +885,8 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // These optimizations require DataLayout. - if (!DL) - return nullptr; - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy)) return nullptr; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) @@ -842,11 +897,8 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // These optimizations require DataLayout. - if (!DL) - return nullptr; - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove)) return nullptr; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) @@ -857,11 +909,8 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // These optimizations require DataLayout. - if (!DL) - return nullptr; - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset)) return nullptr; // memset(p, v, n) -> llvm.memset(p, v, n, 1) @@ -1521,7 +1570,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { // printf("x") -> putchar('x'), even for '%'. if (FormatStr.size() == 1) { - Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, DL, TLI); + Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TLI); if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); @@ -1534,7 +1583,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { // pass to be run after this pass, to merge duplicate strings. FormatStr = FormatStr.drop_back(); Value *GV = B.CreateGlobalString(FormatStr, "str"); - Value *NewCI = EmitPutS(GV, B, DL, TLI); + Value *NewCI = EmitPutS(GV, B, TLI); return (CI->use_empty() || !NewCI) ? NewCI : ConstantInt::get(CI->getType(), FormatStr.size() + 1); @@ -1544,7 +1593,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { // printf("%c", chr) --> putchar(chr) if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isIntegerTy()) { - Value *Res = EmitPutChar(CI->getArgOperand(1), B, DL, TLI); + Value *Res = EmitPutChar(CI->getArgOperand(1), B, TLI); if (CI->use_empty() || !Res) return Res; @@ -1554,7 +1603,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isPointerTy()) { - return EmitPutS(CI->getArgOperand(1), B, DL, TLI); + return EmitPutS(CI->getArgOperand(1), B, TLI); } return nullptr; } @@ -1600,16 +1649,11 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { if (FormatStr[i] == '%') return nullptr; // we found a format specifier, bail out. - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - B.CreateMemCpy( - CI->getArgOperand(0), CI->getArgOperand(1), - ConstantInt::get(DL->getIntPtrType(CI->getContext()), - FormatStr.size() + 1), - 1); // Copy the null byte. + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), + FormatStr.size() + 1), + 1); // Copy the null byte. return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1634,10 +1678,6 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { } if (FormatStr[1] == 's') { - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; @@ -1702,13 +1742,9 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) { if (FormatStr[i] == '%') // Could handle %% -> % if we cared. return nullptr; // We found a format specifier. - // These optimizations require DataLayout. - if (!DL) - return nullptr; - return EmitFWrite( CI->getArgOperand(1), - ConstantInt::get(DL->getIntPtrType(CI->getContext()), FormatStr.size()), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()), CI->getArgOperand(0), B, DL, TLI); } @@ -1723,14 +1759,14 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) { // fprintf(F, "%c", chr) --> fputc(chr, F) if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr; - return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI); + return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) --> fputs(str, F) if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; - return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI); + return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); } return nullptr; } @@ -1790,7 +1826,7 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) { // This optimisation is only valid, if the return value is unused. if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); - Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI); + Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TLI); return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; } @@ -1802,10 +1838,6 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // Require two pointers. Also, we can't optimize if return value is used. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || @@ -1820,7 +1852,7 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { // Known to have no uses (see above). return EmitFWrite( CI->getArgOperand(0), - ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len - 1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1), CI->getArgOperand(1), B, DL, TLI); } @@ -1839,7 +1871,7 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { if (Str.empty() && CI->use_empty()) { // puts("") -> putchar('\n') - Value *Res = EmitPutChar(B.getInt32('\n'), B, DL, TLI); + Value *Res = EmitPutChar(B.getInt32('\n'), B, TLI); if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); @@ -1906,6 +1938,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeStrCSpn(CI, Builder); case LibFunc::strstr: return optimizeStrStr(CI, Builder); + case LibFunc::memchr: + return optimizeMemChr(CI, Builder); case LibFunc::memcmp: return optimizeMemCmp(CI, Builder); case LibFunc::memcpy: @@ -2089,9 +2123,9 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { } LibCallSimplifier::LibCallSimplifier( - const DataLayout *DL, const TargetLibraryInfo *TLI, + const DataLayout &DL, const TargetLibraryInfo *TLI, function_ref<void(Instruction *, Value *)> Replacer) - : FortifiedSimplifier(DL, TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false), + : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false), Replacer(Replacer) {} void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { @@ -2187,7 +2221,7 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk)) return nullptr; if (isFortifiedCallFoldable(CI, 3, 2, false)) { @@ -2201,7 +2235,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> & Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk)) return nullptr; if (isFortifiedCallFoldable(CI, 3, 2, false)) { @@ -2215,7 +2249,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk)) return nullptr; if (isFortifiedCallFoldable(CI, 3, 2, false)) { @@ -2231,8 +2265,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, LibFunc::Func Func) { Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); + const DataLayout &DL = CI->getModule()->getDataLayout(); - if (!checkStringCopyLibFuncSignature(Callee, Func, DL)) + if (!checkStringCopyLibFuncSignature(Callee, Func)) return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1), @@ -2250,7 +2285,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. if (isFortifiedCallFoldable(CI, 2, 1, true)) { - Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6)); + Value *Ret = EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6)); return Ret; } else if (!OnlyLowerUnknownSize) { // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. @@ -2258,11 +2293,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, if (Len == 0) return nullptr; - // This optimization requires DataLayout. - if (!DL) - return nullptr; - - Type *SizeTTy = DL->getIntPtrType(CI->getContext()); + Type *SizeTTy = DL.getIntPtrType(CI->getContext()); Value *LenV = ConstantInt::get(SizeTTy, Len); Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); // If the function was an __stpcpy_chk, and we were able to fold it into @@ -2280,12 +2311,11 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); - if (!checkStringCopyLibFuncSignature(Callee, Func, DL)) + if (!checkStringCopyLibFuncSignature(Callee, Func)) return nullptr; if (isFortifiedCallFoldable(CI, 3, 2, false)) { - Value *Ret = - EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, DL, TLI, Name.substr(2, 7)); + Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI, Name.substr(2, 7)); return Ret; } return nullptr; @@ -2328,8 +2358,6 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { return nullptr; } -FortifiedLibCallSimplifier:: -FortifiedLibCallSimplifier(const DataLayout *DL, const TargetLibraryInfo *TLI, - bool OnlyLowerUnknownSize) - : DL(DL), TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) { -} +FortifiedLibCallSimplifier::FortifiedLibCallSimplifier( + const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize) + : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {} diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index b343cc4..a2a54da 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -60,6 +60,7 @@ #define DEBUG_TYPE "symbol-rewriter" #include "llvm/CodeGen/Passes.h" #include "llvm/Pass.h" +#include "llvm/ADT/SmallString.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -72,15 +73,15 @@ #include "llvm/Transforms/Utils/SymbolRewriter.h" using namespace llvm; +using namespace SymbolRewriter; static cl::list<std::string> RewriteMapFiles("rewrite-map-file", cl::desc("Symbol Rewrite Map"), cl::value_desc("filename")); -namespace llvm { -namespace SymbolRewriter { -void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source, - const std::string &Target) { +static void rewriteComdat(Module &M, GlobalObject *GO, + const std::string &Source, + const std::string &Target) { if (Comdat *CD = GO->getComdat()) { auto &Comdats = M.getComdatSymbolTable(); @@ -92,6 +93,7 @@ void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source, } } +namespace { template <RewriteDescriptor::Type DT, typename ValueType, ValueType *(llvm::Module::*Get)(StringRef) const> class ExplicitRewriteDescriptor : public RewriteDescriptor { @@ -226,6 +228,7 @@ typedef PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, &llvm::Module::getNamedAlias, &llvm::Module::aliases> PatternRewriteNamedAliasDescriptor; +} // namespace bool RewriteMapParser::parse(const std::string &MapFile, RewriteDescriptorList *DL) { @@ -489,8 +492,6 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, return true; } -} -} namespace { class RewriteSymbols : public ModulePass { diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 49c0902..54c7688 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -291,14 +291,18 @@ static Metadata *MapMetadataImpl(const Metadata *MD, return nullptr; } + // Note: this cast precedes the Flags check so we always get its associated + // assertion. const MDNode *Node = cast<MDNode>(MD); - assert(Node->isResolved() && "Unexpected unresolved node"); // If this is a module-level metadata and we know that nothing at the // module level is changing, then use an identity mapping. if (Flags & RF_NoModuleLevelChanges) return mapToSelf(VM, MD); + // Require resolved nodes whenever metadata might be remapped. + assert(Node->isResolved() && "Unexpected unresolved node"); + if (Node->isDistinct()) return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer); diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 525c050..29fb01f 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" @@ -206,8 +207,6 @@ namespace { AA = &P->getAnalysis<AliasAnalysis>(); DT = &P->getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = &P->getAnalysis<ScalarEvolution>(); - DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; TTI = IgnoreTargetInfo ? nullptr : &P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); @@ -222,7 +221,6 @@ namespace { AliasAnalysis *AA; DominatorTree *DT; ScalarEvolution *SE; - const DataLayout *DL; const TargetTransformInfo *TTI; // FIXME: const correct? @@ -442,8 +440,6 @@ namespace { AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = &getAnalysis<ScalarEvolution>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; TTI = IgnoreTargetInfo ? nullptr : &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( @@ -642,19 +638,19 @@ namespace { dyn_cast<SCEVConstant>(OffsetSCEV)) { ConstantInt *IntOff = ConstOffSCEV->getValue(); int64_t Offset = IntOff->getSExtValue(); - + const DataLayout &DL = I->getModule()->getDataLayout(); Type *VTy = IPtr->getType()->getPointerElementType(); - int64_t VTyTSS = (int64_t) DL->getTypeStoreSize(VTy); + int64_t VTyTSS = (int64_t)DL.getTypeStoreSize(VTy); Type *VTy2 = JPtr->getType()->getPointerElementType(); if (VTy != VTy2 && Offset < 0) { - int64_t VTy2TSS = (int64_t) DL->getTypeStoreSize(VTy2); + int64_t VTy2TSS = (int64_t)DL.getTypeStoreSize(VTy2); OffsetInElmts = Offset/VTy2TSS; - return (abs64(Offset) % VTy2TSS) == 0; + return (std::abs(Offset) % VTy2TSS) == 0; } OffsetInElmts = Offset/VTyTSS; - return (abs64(Offset) % VTyTSS) == 0; + return (std::abs(Offset) % VTyTSS) == 0; } return false; @@ -846,7 +842,7 @@ namespace { // It is important to cleanup here so that future iterations of this // function have less work to do. - (void) SimplifyInstructionsInBlock(&BB, DL, AA->getTargetLibraryInfo()); + (void)SimplifyInstructionsInBlock(&BB, AA->getTargetLibraryInfo()); return true; } @@ -900,10 +896,6 @@ namespace { return false; } - // We can't vectorize memory operations without target data - if (!DL && IsSimpleLoadStore) - return false; - Type *T1, *T2; getInstructionTypes(I, T1, T2); @@ -938,9 +930,8 @@ namespace { if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy()) return false; - if ((!Config.VectorizePointers || !DL) && - (T1->getScalarType()->isPointerTy() || - T2->getScalarType()->isPointerTy())) + if (!Config.VectorizePointers && (T1->getScalarType()->isPointerTy() || + T2->getScalarType()->isPointerTy())) return false; if (!TTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits || @@ -985,8 +976,8 @@ namespace { unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace; int64_t OffsetInElmts = 0; if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, - IAddressSpace, JAddressSpace, - OffsetInElmts) && abs64(OffsetInElmts) == 1) { + IAddressSpace, JAddressSpace, OffsetInElmts) && + std::abs(OffsetInElmts) == 1) { FixedOrder = (int) OffsetInElmts; unsigned BottomAlignment = IAlignment; if (OffsetInElmts < 0) BottomAlignment = JAlignment; @@ -1001,8 +992,8 @@ namespace { // An aligned load or store is possible only if the instruction // with the lower offset has an alignment suitable for the // vector type. - - unsigned VecAlignment = DL->getPrefTypeAlignment(VType); + const DataLayout &DL = I->getModule()->getDataLayout(); + unsigned VecAlignment = DL.getPrefTypeAlignment(VType); if (BottomAlignment < VecAlignment) return false; } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 6142306..b7d0ae4 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -218,6 +218,15 @@ public: R.getInstr()) {} }; +/// A helper function for converting Scalar types to vector types. +/// If the incoming type is void, we return void. If the VF is 1, we return +/// the scalar type. +static Type* ToVectorTy(Type *Scalar, unsigned VF) { + if (Scalar->isVoidTy() || VF == 1) + return Scalar; + return VectorType::get(Scalar, VF); +} + /// InnerLoopVectorizer vectorizes loops which contain only one basic /// block to a specified vectorization factor (VF). /// This class performs the widening of scalars into vectors, or multiple @@ -235,13 +244,13 @@ public: class InnerLoopVectorizer { public: InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI, - DominatorTree *DT, const DataLayout *DL, - const TargetLibraryInfo *TLI, unsigned VecWidth, + DominatorTree *DT, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, unsigned VecWidth, unsigned UnrollFactor) - : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI), + : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor), - Legal(nullptr) {} + Legal(nullptr), AddedSafetyChecks(false) {} // Perform the actual loop widening (vectorization). void vectorize(LoopVectorizationLegality *L) { @@ -255,6 +264,11 @@ public: updateAnalysis(); } + // Return true if any runtime check is added. + bool IsSafetyChecksAdded() { + return AddedSafetyChecks; + } + virtual ~InnerLoopVectorizer() {} protected: @@ -389,10 +403,10 @@ protected: DominatorTree *DT; /// Alias Analysis. AliasAnalysis *AA; - /// Data Layout. - const DataLayout *DL; /// Target Library Info. const TargetLibraryInfo *TLI; + /// Target Transform Info. + const TargetTransformInfo *TTI; /// The vectorization SIMD factor to use. Each vector will have this many /// vector elements. @@ -434,14 +448,17 @@ protected: EdgeMaskCache MaskCache; LoopVectorizationLegality *Legal; + + // Record whether runtime check is added. + bool AddedSafetyChecks; }; class InnerLoopUnroller : public InnerLoopVectorizer { public: InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI, - DominatorTree *DT, const DataLayout *DL, - const TargetLibraryInfo *TLI, unsigned UnrollFactor) : - InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { } + DominatorTree *DT, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, unsigned UnrollFactor) + : InnerLoopVectorizer(OrigLoop, SE, LI, DT, TLI, TTI, 1, UnrollFactor) {} private: void scalarizeInstruction(Instruction *Instr, @@ -488,7 +505,7 @@ static std::string getDebugLocString(const Loop *L) { raw_string_ostream OS(Result); const DebugLoc LoopDbgLoc = L->getStartLoc(); if (!LoopDbgLoc.isUnknown()) - LoopDbgLoc.print(L->getHeader()->getContext(), OS); + LoopDbgLoc.print(OS); else // Just print the module name. OS << L->getHeader()->getParent()->getParent()->getModuleIdentifier(); @@ -543,14 +560,13 @@ static void propagateMetadata(SmallVectorImpl<Value *> &To, const Instruction *F /// induction variable and the different reduction variables. class LoopVectorizationLegality { public: - LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL, - DominatorTree *DT, TargetLibraryInfo *TLI, - AliasAnalysis *AA, Function *F, - const TargetTransformInfo *TTI, + LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT, + TargetLibraryInfo *TLI, AliasAnalysis *AA, + Function *F, const TargetTransformInfo *TTI, LoopAccessAnalysis *LAA) - : NumPredStores(0), TheLoop(L), SE(SE), DL(DL), - TLI(TLI), TheFunction(F), TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), - Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {} + : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F), + TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), Induction(nullptr), + WidestIndTy(nullptr), HasFunNoNaNAttr(false) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -842,8 +858,6 @@ private: Loop *TheLoop; /// Scev analysis. ScalarEvolution *SE; - /// DataLayout analysis. - const DataLayout *DL; /// Target Library Info. TargetLibraryInfo *TLI; /// Parent function @@ -884,7 +898,7 @@ private: ValueToValueMap Strides; SmallPtrSet<Value *, 8> StrideSet; - + /// While vectorizing these instructions we have to generate a /// call to the appropriate masked intrinsic SmallPtrSet<const Instruction*, 8> MaskedOp; @@ -902,10 +916,9 @@ public: LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, LoopVectorizationLegality *Legal, const TargetTransformInfo &TTI, - const DataLayout *DL, const TargetLibraryInfo *TLI, - AssumptionCache *AC, const Function *F, - const LoopVectorizeHints *Hints) - : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI), + const TargetLibraryInfo *TLI, AssumptionCache *AC, + const Function *F, const LoopVectorizeHints *Hints) + : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), TheFunction(F), Hints(Hints) { CodeMetrics::collectEphemeralValues(L, AC, EphValues); } @@ -958,11 +971,6 @@ private: /// width. Vector width of one means scalar. unsigned getInstructionCost(Instruction *I, unsigned VF); - /// A helper function for converting Scalar types to vector types. - /// If the incoming type is void, we return void. If the VF is 1, we return - /// the scalar type. - static Type* ToVectorTy(Type *Scalar, unsigned VF); - /// Returns whether the instruction is a load or store and will be a emitted /// as a vector operation. bool isConsecutiveLoadOrStore(Instruction *I); @@ -988,8 +996,6 @@ private: LoopVectorizationLegality *Legal; /// Vector target information. const TargetTransformInfo &TTI; - /// Target data layout information. - const DataLayout *DL; /// Target Library Info. const TargetLibraryInfo *TLI; const Function *TheFunction; @@ -1254,7 +1260,6 @@ struct LoopVectorize : public FunctionPass { } ScalarEvolution *SE; - const DataLayout *DL; LoopInfo *LI; TargetTransformInfo *TTI; DominatorTree *DT; @@ -1270,8 +1275,6 @@ struct LoopVectorize : public FunctionPass { bool runOnFunction(Function &F) override { SE = &getAnalysis<ScalarEvolution>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); @@ -1292,12 +1295,6 @@ struct LoopVectorize : public FunctionPass { if (!TTI->getNumberOfRegisters(true)) return false; - if (!DL) { - DEBUG(dbgs() << "\nLV: Not vectorizing " << F.getName() - << ": Missing data layout\n"); - return false; - } - // Build up a worklist of inner-loops to vectorize. This is necessary as // the act of vectorizing or partially unrolling a loop creates new loops // and can invalidate iterators across the loops. @@ -1317,6 +1314,40 @@ struct LoopVectorize : public FunctionPass { return Changed; } + static void AddRuntimeUnrollDisableMetaData(Loop *L) { + SmallVector<Metadata *, 4> MDs; + // Reserve first location for self reference to the LoopID metadata node. + MDs.push_back(nullptr); + bool IsUnrollMetadata = false; + MDNode *LoopID = L->getLoopID(); + if (LoopID) { + // First find existing loop unrolling disable metadata. + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); + if (MD) { + const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); + IsUnrollMetadata = + S && S->getString().startswith("llvm.loop.unroll.disable"); + } + MDs.push_back(LoopID->getOperand(i)); + } + } + + if (!IsUnrollMetadata) { + // Add runtime unroll disable metadata. + LLVMContext &Context = L->getHeader()->getContext(); + SmallVector<Metadata *, 1> DisableOperands; + DisableOperands.push_back( + MDString::get(Context, "llvm.loop.unroll.runtime.disable")); + MDNode *DisableNode = MDNode::get(Context, DisableOperands); + MDs.push_back(DisableNode); + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + L->setLoopID(NewLoopID); + } + } + bool processLoop(Loop *L) { assert(L->empty() && "Only process inner loops."); @@ -1391,7 +1422,7 @@ struct LoopVectorize : public FunctionPass { } // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI, LAA); + LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); emitMissedWarning(F, L, Hints); @@ -1399,8 +1430,7 @@ struct LoopVectorize : public FunctionPass { } // Use the cost model. - LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, AC, F, - &Hints); + LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, AC, F, &Hints); // Check the function attributes to find out if this function should be // optimized for size. @@ -1464,14 +1494,20 @@ struct LoopVectorize : public FunctionPass { // We decided not to vectorize, but we may want to unroll. - InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF); + InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, UF); Unroller.vectorize(&LVL); } else { // If we decided that it is *legal* to vectorize the loop then do it. - InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF); + InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, UF); LB.vectorize(&LVL); ++LoopsVectorized; + // Add metadata to disable runtime unrolling scalar loop when there's no + // runtime check about strides and memory. Because at this situation, + // scalar loop is rarely used not worthy to be unrolled. + if (!LB.IsSafetyChecksAdded()) + AddRuntimeUnrollDisableMetaData(L); + // Report the vectorization decision. emitOptimizationRemark( F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), @@ -1561,10 +1597,10 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, /// \brief Find the operand of the GEP that should be checked for consecutive /// stores. This ignores trailing indices that have no effect on the final /// pointer. -static unsigned getGEPInductionOperand(const DataLayout *DL, - const GetElementPtrInst *Gep) { +static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) { + const DataLayout &DL = Gep->getModule()->getDataLayout(); unsigned LastOperand = Gep->getNumOperands() - 1; - unsigned GEPAllocSize = DL->getTypeAllocSize( + unsigned GEPAllocSize = DL.getTypeAllocSize( cast<PointerType>(Gep->getType()->getScalarType())->getElementType()); // Walk backwards and try to peel off zeros. @@ -1575,7 +1611,7 @@ static unsigned getGEPInductionOperand(const DataLayout *DL, // If it's a type with the same allocation size as the result of the GEP we // can peel off the zero index. - if (DL->getTypeAllocSize(*GEPTI) != GEPAllocSize) + if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize) break; --LastOperand; } @@ -1621,7 +1657,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { return II.getConsecutiveDirection(); } - unsigned InductionOperand = getGEPInductionOperand(DL, Gep); + unsigned InductionOperand = getGEPInductionOperand(Gep); // Check that all of the gep indices are uniform except for our induction // operand. @@ -1714,11 +1750,12 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment(); // An alignment of 0 means target abi alignment. We need to use the scalar's // target abi alignment in such a case. + const DataLayout &DL = Instr->getModule()->getDataLayout(); if (!Alignment) - Alignment = DL->getABITypeAlignment(ScalarDataTy); + Alignment = DL.getABITypeAlignment(ScalarDataTy); unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace(); - unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy); - unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF; + unsigned ScalarAllocatedSize = DL.getTypeAllocSize(ScalarDataTy); + unsigned VectorElementSize = DL.getTypeStoreSize(DataTy) / VF; if (SI && Legal->blockNeedsPredication(SI->getParent()) && !Legal->isMaskRequired(SI)) @@ -1759,7 +1796,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; unsigned NumOperands = Gep->getNumOperands(); - unsigned InductionOperand = getGEPInductionOperand(DL, Gep); + unsigned InductionOperand = getGEPInductionOperand(Gep); // Create the new GEP with the new induction variable. GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); @@ -2080,9 +2117,11 @@ void InnerLoopVectorizer::createEmptyLoop() { ExitCount = SE->getAddExpr(BackedgeTakeCount, SE->getConstant(BackedgeTakeCount->getType(), 1)); + const DataLayout &DL = OldBasicBlock->getModule()->getDataLayout(); + // Expand the trip count and place the new instructions in the preheader. // Notice that the pre-header does not change, only the loop body. - SCEVExpander Exp(*SE, "induction"); + SCEVExpander Exp(*SE, DL, "induction"); // We need to test whether the backedge-taken count is uint##_max. Adding one // to it will cause overflow and an incorrect loop trip count in the vector @@ -2218,6 +2257,7 @@ void InnerLoopVectorizer::createEmptyLoop() { std::tie(FirstCheckInst, StrideCheck) = addStrideCheck(LastBypassBlock->getTerminator()); if (StrideCheck) { + AddedSafetyChecks = true; // Create a new block containing the stride check. BasicBlock *CheckBlock = LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck"); @@ -2242,6 +2282,7 @@ void InnerLoopVectorizer::createEmptyLoop() { std::tie(FirstCheckInst, MemRuntimeCheck) = Legal->getLAI()->addRuntimeCheck(LastBypassBlock->getTerminator()); if (MemRuntimeCheck) { + AddedSafetyChecks = true; // Create a new block containing the memory check. BasicBlock *CheckBlock = LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck"); @@ -2480,10 +2521,9 @@ getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) { } } -Value *createMinMaxOp(IRBuilder<> &Builder, - LoopVectorizationLegality::MinMaxReductionKind RK, - Value *Left, - Value *Right) { +static Value *createMinMaxOp(IRBuilder<> &Builder, + LoopVectorizationLegality::MinMaxReductionKind RK, + Value *Left, Value *Right) { CmpInst::Predicate P = CmpInst::ICMP_NE; switch (RK) { default: @@ -2594,6 +2634,95 @@ static Value *addFastMathFlag(Value *V) { return V; } +/// Estimate the overhead of scalarizing a value. Insert and Extract are set if +/// the result needs to be inserted and/or extracted from vectors. +static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract, + const TargetTransformInfo &TTI) { + if (Ty->isVoidTy()) + return 0; + + assert(Ty->isVectorTy() && "Can only scalarize vectors"); + unsigned Cost = 0; + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + if (Insert) + Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, i); + if (Extract) + Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + + return Cost; +} + +// Estimate cost of a call instruction CI if it were vectorized with factor VF. +// Return the cost of the instruction, including scalarization overhead if it's +// needed. The flag NeedToScalarize shows if the call needs to be scalarized - +// i.e. either vector version isn't available, or is too expensive. +static unsigned getVectorCallCost(CallInst *CI, unsigned VF, + const TargetTransformInfo &TTI, + const TargetLibraryInfo *TLI, + bool &NeedToScalarize) { + Function *F = CI->getCalledFunction(); + StringRef FnName = CI->getCalledFunction()->getName(); + Type *ScalarRetTy = CI->getType(); + SmallVector<Type *, 4> Tys, ScalarTys; + for (auto &ArgOp : CI->arg_operands()) + ScalarTys.push_back(ArgOp->getType()); + + // Estimate cost of scalarized vector call. The source operands are assumed + // to be vectors, so we need to extract individual elements from there, + // execute VF scalar calls, and then gather the result into the vector return + // value. + unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys); + if (VF == 1) + return ScalarCallCost; + + // Compute corresponding vector type for return value and arguments. + Type *RetTy = ToVectorTy(ScalarRetTy, VF); + for (unsigned i = 0, ie = ScalarTys.size(); i != ie; ++i) + Tys.push_back(ToVectorTy(ScalarTys[i], VF)); + + // Compute costs of unpacking argument values for the scalar calls and + // packing the return values to a vector. + unsigned ScalarizationCost = + getScalarizationOverhead(RetTy, true, false, TTI); + for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) + ScalarizationCost += getScalarizationOverhead(Tys[i], false, true, TTI); + + unsigned Cost = ScalarCallCost * VF + ScalarizationCost; + + // If we can't emit a vector call for this function, then the currently found + // cost is the cost we need to return. + NeedToScalarize = true; + if (!TLI || !TLI->isFunctionVectorizable(FnName, VF) || CI->isNoBuiltin()) + return Cost; + + // If the corresponding vector cost is cheaper, return its cost. + unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys); + if (VectorCallCost < Cost) { + NeedToScalarize = false; + return VectorCallCost; + } + return Cost; +} + +// Estimate cost of an intrinsic call instruction CI if it were vectorized with +// factor VF. Return the cost of the instruction, including scalarization +// overhead if it's needed. +static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF, + const TargetTransformInfo &TTI, + const TargetLibraryInfo *TLI) { + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); + assert(ID && "Expected intrinsic call!"); + + Type *RetTy = ToVectorTy(CI->getType(), VF); + SmallVector<Type *, 4> Tys; + for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) + Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF)); + + return TTI.getIntrinsicInstrCost(ID, RetTy, Tys); +} + void InnerLoopVectorizer::vectorizeLoop() { //===------------------------------------------------===// // @@ -3181,37 +3310,71 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { Module *M = BB->getParent()->getParent(); CallInst *CI = cast<CallInst>(it); + + StringRef FnName = CI->getCalledFunction()->getName(); + Function *F = CI->getCalledFunction(); + Type *RetTy = ToVectorTy(CI->getType(), VF); + SmallVector<Type *, 4> Tys; + for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) + Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF)); + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); - assert(ID && "Not an intrinsic call!"); - switch (ID) { - case Intrinsic::assume: - case Intrinsic::lifetime_end: - case Intrinsic::lifetime_start: + if (ID && + (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || + ID == Intrinsic::lifetime_start)) { scalarizeInstruction(it); break; - default: - bool HasScalarOpd = hasVectorInstrinsicScalarOpd(ID, 1); - for (unsigned Part = 0; Part < UF; ++Part) { - SmallVector<Value *, 4> Args; - for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { - if (HasScalarOpd && i == 1) { - Args.push_back(CI->getArgOperand(i)); - continue; - } - VectorParts &Arg = getVectorValue(CI->getArgOperand(i)); - Args.push_back(Arg[Part]); - } - Type *Tys[] = {CI->getType()}; - if (VF > 1) - Tys[0] = VectorType::get(CI->getType()->getScalarType(), VF); + } + // The flag shows whether we use Intrinsic or a usual Call for vectorized + // version of the instruction. + // Is it beneficial to perform intrinsic call compared to lib call? + bool NeedToScalarize; + unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize); + bool UseVectorIntrinsic = + ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost; + if (!UseVectorIntrinsic && NeedToScalarize) { + scalarizeInstruction(it); + break; + } - Function *F = Intrinsic::getDeclaration(M, ID, Tys); - Entry[Part] = Builder.CreateCall(F, Args); + for (unsigned Part = 0; Part < UF; ++Part) { + SmallVector<Value *, 4> Args; + for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { + Value *Arg = CI->getArgOperand(i); + // Some intrinsics have a scalar argument - don't replace it with a + // vector. + if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) { + VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i)); + Arg = VectorArg[Part]; + } + Args.push_back(Arg); } - propagateMetadata(Entry, it); - break; + Function *VectorF; + if (UseVectorIntrinsic) { + // Use vector version of the intrinsic. + Type *TysForDecl[] = {CI->getType()}; + if (VF > 1) + TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF); + VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); + } else { + // Use vector version of the library call. + StringRef VFnName = TLI->getVectorizedFunction(FnName, VF); + assert(!VFnName.empty() && "Vector function name is empty."); + VectorF = M->getFunction(VFnName); + if (!VectorF) { + // Generate a declaration + FunctionType *FTy = FunctionType::get(RetTy, Tys, false); + VectorF = + Function::Create(FTy, Function::ExternalLinkage, VFnName, M); + VectorF->copyAttributesFrom(F); + } + } + assert(VectorF && "Can't create vector function."); + Entry[Part] = Builder.CreateCall(VectorF, Args); } + + propagateMetadata(Entry, it); break; } @@ -3463,6 +3626,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Look for the attribute signaling the absence of NaNs. Function &F = *Header->getParent(); + const DataLayout &DL = F.getParent()->getDataLayout(); if (F.hasFnAttribute("no-nans-fp-math")) HasFunNoNaNAttr = F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; @@ -3518,9 +3682,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (IK_NoInduction != IK) { // Get the widest type. if (!WidestIndTy) - WidestIndTy = convertPointerToIntegerType(*DL, PhiTy); + WidestIndTy = convertPointerToIntegerType(DL, PhiTy); else - WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy); + WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); // Int inductions are special because we only allow one IV. if (IK == IK_IntInduction && StepValue->isOne()) { @@ -3591,13 +3755,17 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { return false; }// end of PHI handling - // We still don't handle functions. However, we can ignore dbg intrinsic - // calls and we do handle certain intrinsic and libm functions. + // We handle calls that: + // * Are debug info intrinsics. + // * Have a mapping to an IR intrinsic. + // * Have a vector version available. CallInst *CI = dyn_cast<CallInst>(it); - if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) { + if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI) && + !(CI->getCalledFunction() && TLI && + TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) { emitAnalysis(VectorizationReport(it) << "call instruction cannot be vectorized"); - DEBUG(dbgs() << "LV: Found a call site.\n"); + DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n"); return false; } @@ -3665,13 +3833,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { ///\brief Remove GEPs whose indices but the last one are loop invariant and /// return the induction operand of the gep pointer. -static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, - const DataLayout *DL, Loop *Lp) { +static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); if (!GEP) return Ptr; - unsigned InductionOperand = getGEPInductionOperand(DL, GEP); + unsigned InductionOperand = getGEPInductionOperand(GEP); // Check that all of the gep indices are uniform except for our induction // operand. @@ -3700,8 +3867,7 @@ static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) { ///\brief Get the stride of a pointer access in a loop. /// Looks for symbolic strides "a[i*stride]". Returns the symbolic stride as a /// pointer to the Value, or null otherwise. -static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, - const DataLayout *DL, Loop *Lp) { +static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); if (!PtrTy || PtrTy->isAggregateType()) return nullptr; @@ -3714,7 +3880,7 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, // The size of the pointer access. int64_t PtrAccessSize = 1; - Ptr = stripGetElementPtr(Ptr, SE, DL, Lp); + Ptr = stripGetElementPtr(Ptr, SE, Lp); const SCEV *V = SE->getSCEV(Ptr); if (Ptr != OrigPtr) @@ -3733,7 +3899,8 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, // Strip off the size of access multiplication if we are still analyzing the // pointer. if (OrigPtr == Ptr) { - DL->getTypeAllocSize(PtrTy->getElementType()); + const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout(); + DL.getTypeAllocSize(PtrTy->getElementType()); if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) { if (M->getOperand(0)->getSCEVType() != scConstant) return nullptr; @@ -3785,7 +3952,7 @@ void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) { else return; - Value *Stride = getStrideFromPointer(Ptr, SE, DL, TheLoop); + Value *Stride = getStrideFromPointer(Ptr, SE, TheLoop); if (!Stride) return; @@ -3837,7 +4004,19 @@ bool LoopVectorizationLegality::canVectorizeMemory() { auto &OptionalReport = LAI->getReport(); if (OptionalReport) emitAnalysis(VectorizationReport(*OptionalReport)); - return LAI->canVectorizeMemory(); + if (!LAI->canVectorizeMemory()) + return false; + + if (LAI->getNumRuntimePointerChecks() > + VectorizerParams::RuntimeMemoryCheckThreshold) { + emitAnalysis(VectorizationReport() + << LAI->getNumRuntimePointerChecks() << " exceeds limit of " + << VectorizerParams::RuntimeMemoryCheckThreshold + << " dependent memory operations checked at runtime"); + DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); + return false; + } + return true; } static bool hasMultipleUsesOf(Instruction *I, @@ -4163,7 +4342,8 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi, if (!PointerElementType->isSized()) return IK_NoInduction; - int64_t Size = static_cast<int64_t>(DL->getTypeAllocSize(PointerElementType)); + const DataLayout &DL = Phi->getModule()->getDataLayout(); + int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); int64_t CVSize = CV->getSExtValue(); if (CVSize % Size) return IK_NoInduction; @@ -4375,6 +4555,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { unsigned LoopVectorizationCostModel::getWidestType() { unsigned MaxWidth = 8; + const DataLayout &DL = TheFunction->getParent()->getDataLayout(); // For each block. for (Loop::block_iterator bb = TheLoop->block_begin(), @@ -4409,7 +4590,7 @@ unsigned LoopVectorizationCostModel::getWidestType() { continue; MaxWidth = std::max(MaxWidth, - (unsigned)DL->getTypeSizeInBits(T->getScalarType())); + (unsigned)DL.getTypeSizeInBits(T->getScalarType())); } } @@ -4561,6 +4742,14 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, return SmallUF; } + // Unroll if this is a large loop (small loops are already dealt with by this + // point) that could benefit from interleaved unrolling. + bool HasReductions = (Legal->getReductionVars()->size() > 0); + if (TTI.enableAggressiveInterleaving(HasReductions)) { + DEBUG(dbgs() << "LV: Unrolling to expose ILP.\n"); + return UF; + } + DEBUG(dbgs() << "LV: Not Unrolling.\n"); return 1; } @@ -4898,8 +5087,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { // Scalarized loads/stores. int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); bool Reverse = ConsecutiveStride < 0; - unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy); - unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF; + const DataLayout &DL = I->getModule()->getDataLayout(); + unsigned ScalarAllocatedSize = DL.getTypeAllocSize(ValTy); + unsigned VectorElementSize = DL.getTypeStoreSize(VectorTy) / VF; if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) { bool IsComplexComputation = isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop); @@ -4960,14 +5150,12 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy); } case Instruction::Call: { + bool NeedToScalarize; CallInst *CI = cast<CallInst>(I); - Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); - assert(ID && "Not an intrinsic call!"); - Type *RetTy = ToVectorTy(CI->getType(), VF); - SmallVector<Type*, 4> Tys; - for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) - Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF)); - return TTI.getIntrinsicInstrCost(ID, RetTy, Tys); + unsigned CallCost = getVectorCallCost(CI, VF, TTI, TLI, NeedToScalarize); + if (getIntrinsicIDForCall(CI, TLI)) + return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI)); + return CallCost; } default: { // We are scalarizing the instruction. Return the cost of the scalar @@ -4994,12 +5182,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { }// end of switch. } -Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) { - if (Scalar->isVoidTy() || VF == 1) - return Scalar; - return VectorType::get(Scalar, VF); -} - char LoopVectorize::ID = 0; static const char lv_name[] = "Loop Vectorization"; INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false) diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index baf9741..8fc4cc1 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -17,9 +17,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Vectorize.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" @@ -342,11 +342,11 @@ public: typedef SmallPtrSet<Value *, 16> ValueSet; typedef SmallVector<StoreInst *, 8> StoreList; - BoUpSLP(Function *Func, ScalarEvolution *Se, const DataLayout *Dl, - TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa, - LoopInfo *Li, DominatorTree *Dt, AssumptionCache *AC) + BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, + TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li, + DominatorTree *Dt, AssumptionCache *AC) : NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func), - SE(Se), DL(Dl), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), + SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), Builder(Se->getContext()) { CodeMetrics::collectEphemeralValues(F, AC, EphValues); } @@ -383,7 +383,7 @@ public: } /// \returns true if the memory operations A and B are consecutive. - bool isConsecutiveAccess(Value *A, Value *B); + bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL); /// \brief Perform LICM and CSE on the newly generated gather sequences. void optimizeGatherSequence(); @@ -877,7 +877,6 @@ private: // Analysis and block reference. Function *F; ScalarEvolution *SE; - const DataLayout *DL; TargetTransformInfo *TTI; TargetLibraryInfo *TLI; AliasAnalysis *AA; @@ -1130,8 +1129,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); return; } - if (!isConsecutiveAccess(VL[i], VL[i + 1])) { - if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0])) { + const DataLayout &DL = F->getParent()->getDataLayout(); + if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) { + if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) { ++NumLoadsWantToChangeOrder; } BS.cancelScheduling(VL); @@ -1300,9 +1300,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { return; } case Instruction::Store: { + const DataLayout &DL = F->getParent()->getDataLayout(); // Check if the stores are consecutive or of we need to swizzle them. for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) - if (!isConsecutiveAccess(VL[i], VL[i + 1])) { + if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) { BS.cancelScheduling(VL); newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); @@ -1789,7 +1790,7 @@ unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { return -1; } -bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { +bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL) { Value *PtrA = getPointerOperand(A); Value *PtrB = getPointerOperand(B); unsigned ASA = getAddressSpaceOperand(A); @@ -1803,13 +1804,13 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { if (PtrA == PtrB || PtrA->getType() != PtrB->getType()) return false; - unsigned PtrBitWidth = DL->getPointerSizeInBits(ASA); + unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); - APInt Size(PtrBitWidth, DL->getTypeStoreSize(Ty)); + APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty)); APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0); - PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetA); - PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetB); + PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); + PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); APInt OffsetDelta = OffsetB - OffsetA; @@ -1842,6 +1843,7 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL, SmallVectorImpl<Value *> &Left, SmallVectorImpl<Value *> &Right) { + const DataLayout &DL = F->getParent()->getDataLayout(); // Push left and right operands of binary operation into Left and Right for (unsigned i = 0, e = VL.size(); i < e; ++i) { @@ -1856,10 +1858,10 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL, if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) { Instruction *VL1 = cast<Instruction>(VL[j]); Instruction *VL2 = cast<Instruction>(VL[j + 1]); - if (isConsecutiveAccess(L, L1) && VL1->isCommutative()) { + if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) { std::swap(Left[j], Right[j]); continue; - } else if (isConsecutiveAccess(L, L1) && VL2->isCommutative()) { + } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -1870,10 +1872,10 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL, if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) { Instruction *VL1 = cast<Instruction>(VL[j]); Instruction *VL2 = cast<Instruction>(VL[j + 1]); - if (isConsecutiveAccess(L, L1) && VL1->isCommutative()) { + if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) { std::swap(Left[j], Right[j]); continue; - } else if (isConsecutiveAccess(L, L1) && VL2->isCommutative()) { + } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -1983,6 +1985,8 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL, Right = OrigRight; } + const DataLayout &DL = F->getParent()->getDataLayout(); + // Finally check if we can get longer vectorizable chain by reordering // without breaking the good operand order detected above. // E.g. If we have something like- @@ -2001,7 +2005,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL, for (unsigned j = 0; j < VL.size() - 1; ++j) { if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) { if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) { - if (isConsecutiveAccess(L, L1)) { + if (isConsecutiveAccess(L, L1, DL)) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -2009,7 +2013,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL, } if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) { if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) { - if (isConsecutiveAccess(L, L1)) { + if (isConsecutiveAccess(L, L1, DL)) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -2105,6 +2109,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return Gather(E->Scalars, VecTy); } + const DataLayout &DL = F->getParent()->getDataLayout(); unsigned Opcode = getSameOpcode(E->Scalars); switch (Opcode) { @@ -2301,8 +2306,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { unsigned Alignment = LI->getAlignment(); LI = Builder.CreateLoad(VecPtr); - if (!Alignment) - Alignment = DL->getABITypeAlignment(ScalarLoadTy); + if (!Alignment) { + Alignment = DL.getABITypeAlignment(ScalarLoadTy); + } LI->setAlignment(Alignment); E->VectorizedValue = LI; ++NumVectorInstructions; @@ -2331,8 +2337,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { ExternalUses.push_back( ExternalUser(SI->getPointerOperand(), cast<User>(VecPtr), 0)); - if (!Alignment) - Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType()); + if (!Alignment) { + Alignment = DL.getABITypeAlignment(SI->getValueOperand()->getType()); + } S->setAlignment(Alignment); E->VectorizedValue = S; ++NumVectorInstructions; @@ -2358,7 +2365,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { OpVecs.push_back(OpVec); } - Value *V = Builder.CreateGEP(Op0, OpVecs); + Value *V = Builder.CreateGEP( + cast<GetElementPtrInst>(VL0)->getSourceElementType(), Op0, OpVecs); E->VectorizedValue = V; ++NumVectorInstructions; @@ -3051,7 +3059,6 @@ struct SLPVectorizer : public FunctionPass { } ScalarEvolution *SE; - const DataLayout *DL; TargetTransformInfo *TTI; TargetLibraryInfo *TLI; AliasAnalysis *AA; @@ -3064,8 +3071,6 @@ struct SLPVectorizer : public FunctionPass { return false; SE = &getAnalysis<ScalarEvolution>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); TLI = TLIP ? &TLIP->getTLI() : nullptr; @@ -3082,11 +3087,6 @@ struct SLPVectorizer : public FunctionPass { if (!TTI->getNumberOfRegisters(true)) return false; - // Must have DataLayout. We can't require it because some tests run w/o - // triple. - if (!DL) - return false; - // Don't vectorize when the attribute NoImplicitFloat is used. if (F.hasFnAttribute(Attribute::NoImplicitFloat)) return false; @@ -3095,7 +3095,7 @@ struct SLPVectorizer : public FunctionPass { // Use the bottom up slp vectorizer to construct chains that start with // store instructions. - BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT, AC); + BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC); // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to // delete instructions. @@ -3178,15 +3178,11 @@ private: /// the WeakVH array. /// Vectorization of part of the VL array may cause later values in the VL array /// to become invalid. We track when this has happened in the WeakVH array. -static bool hasValueBeenRAUWed(ArrayRef<Value *> &VL, - SmallVectorImpl<WeakVH> &VH, - unsigned SliceBegin, - unsigned SliceSize) { - for (unsigned i = SliceBegin; i < SliceBegin + SliceSize; ++i) - if (VH[i] != VL[i]) - return true; - - return false; +static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH, + unsigned SliceBegin, unsigned SliceSize) { + VL = VL.slice(SliceBegin, SliceSize); + VH = VH.slice(SliceBegin, SliceSize); + return !std::equal(VL.begin(), VL.end(), VH.begin()); } bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain, @@ -3195,7 +3191,8 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain, DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen << "\n"); Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType(); - unsigned Sz = DL->getTypeSizeInBits(StoreTy); + auto &DL = cast<StoreInst>(Chain[0])->getModule()->getDataLayout(); + unsigned Sz = DL.getTypeSizeInBits(StoreTy); unsigned VF = MinVecRegSize / Sz; if (!isPowerOf2_32(Sz) || VF < 2) @@ -3238,8 +3235,8 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain, bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold, BoUpSLP &R) { - SetVector<Value *> Heads, Tails; - SmallDenseMap<Value *, Value *> ConsecutiveChain; + SetVector<StoreInst *> Heads, Tails; + SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain; // We may run into multiple chains that merge into a single chain. We mark the // stores that we vectorized so that we don't visit the same store twice. @@ -3252,8 +3249,8 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores, for (unsigned j = 0; j < e; ++j) { if (i == j) continue; - - if (R.isConsecutiveAccess(Stores[i], Stores[j])) { + const DataLayout &DL = Stores[i]->getModule()->getDataLayout(); + if (R.isConsecutiveAccess(Stores[i], Stores[j], DL)) { Tails.insert(Stores[j]); Heads.insert(Stores[i]); ConsecutiveChain[Stores[i]] = Stores[j]; @@ -3262,7 +3259,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores, } // For stores that start but don't end a link in the chain: - for (SetVector<Value *>::iterator it = Heads.begin(), e = Heads.end(); + for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end(); it != e; ++it) { if (Tails.count(*it)) continue; @@ -3270,7 +3267,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores, // We found a store instr that starts a chain. Now follow the chain and try // to vectorize it. BoUpSLP::ValueList Operands; - Value *I = *it; + StoreInst *I = *it; // Collect the chain into a list. while (Tails.count(I) || Heads.count(I)) { if (VectorizedStores.count(I)) @@ -3295,6 +3292,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores, unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { unsigned count = 0; StoreRefs.clear(); + const DataLayout &DL = BB->getModule()->getDataLayout(); for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { StoreInst *SI = dyn_cast<StoreInst>(it); if (!SI) @@ -3340,9 +3338,10 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, return false; unsigned Opcode0 = I0->getOpcode(); + const DataLayout &DL = I0->getModule()->getDataLayout(); Type *Ty0 = I0->getType(); - unsigned Sz = DL->getTypeSizeInBits(Ty0); + unsigned Sz = DL.getTypeSizeInBits(Ty0); unsigned VF = MinVecRegSize / Sz; for (int i = 0, e = VL.size(); i < e; ++i) { @@ -3544,8 +3543,7 @@ public: ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {} /// \brief Try to find a reduction tree. - bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B, - const DataLayout *DL) { + bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) { assert((!Phi || std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) && "Thi phi needs to use the binary operator"); @@ -3570,9 +3568,10 @@ public: if (!isValidElementType(Ty)) return false; + const DataLayout &DL = B->getModule()->getDataLayout(); ReductionOpcode = B->getOpcode(); ReducedValueOpcode = 0; - ReduxWidth = MinVecRegSize / DL->getTypeSizeInBits(Ty); + ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty); ReductionRoot = B; ReductionPHI = Phi; @@ -3882,8 +3881,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { // Try to match and vectorize a horizontal reduction. HorizontalReduction HorRdx; - if (ShouldVectorizeHor && - HorRdx.matchAssociativeReduction(P, BI, DL) && + if (ShouldVectorizeHor && HorRdx.matchAssociativeReduction(P, BI) && HorRdx.tryToReduce(R, TTI)) { Changed = true; it = BB->begin(); @@ -3913,7 +3911,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(SI->getValueOperand())) { HorizontalReduction HorRdx; - if (((HorRdx.matchAssociativeReduction(nullptr, BinOp, DL) && + if (((HorRdx.matchAssociativeReduction(nullptr, BinOp) && HorRdx.tryToReduce(R, TTI)) || tryToVectorize(BinOp, R))) { Changed = true; |