diff options
Diffstat (limited to 'lib')
319 files changed, 11080 insertions, 3509 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 3b6aab1..f768eec 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -36,6 +36,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Type.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; // Register the AliasAnalysis interface, providing a nice name to refer to. @@ -452,6 +453,7 @@ AliasAnalysis::~AliasAnalysis() {} /// void AliasAnalysis::InitializeAliasAnalysis(Pass *P) { TD = P->getAnalysisIfAvailable<TargetData>(); + TLI = P->getAnalysisIfAvailable<TargetLibraryInfo>(); AA = &P->getAnalysis<AliasAnalysis>(); } diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 92e8906..e9dcb37 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -550,7 +550,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { //===----------------------------------------------------------------------===// void AliasSet::print(raw_ostream &OS) const { - OS << " AliasSet[" << (void*)this << ", " << RefCount << "] "; + OS << " AliasSet[" << (const void*)this << ", " << RefCount << "] "; OS << (AliasTy == MustAlias ? "must" : "may") << " alias, "; switch (AccessTy) { case NoModRef: OS << "No access "; break; @@ -590,8 +590,10 @@ void AliasSetTracker::print(raw_ostream &OS) const { OS << "\n"; } +#ifndef NDEBUG void AliasSet::dump() const { print(dbgs()); } void AliasSetTracker::dump() const { print(dbgs()); } +#endif //===----------------------------------------------------------------------===// // ASTCallbackVH Class Implementation diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 0ba6af9..87a75fd 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -61,6 +61,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializePathProfileLoaderPassPass(Registry); initializeProfileVerifierPassPass(Registry); initializePathProfileVerifierPass(Registry); + initializeProfileMetadataLoaderPassPass(Registry); initializeRegionInfoPass(Registry); initializeRegionViewerPass(Registry); initializeRegionPrinterPass(Registry); diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 1d028c2..a3bc06a 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -85,9 +85,10 @@ static bool isEscapeSource(const Value *V) { /// getObjectSize - Return the size of the object specified by V, or /// UnknownSize if unknown. static uint64_t getObjectSize(const Value *V, const TargetData &TD, + const TargetLibraryInfo &TLI, bool RoundToAlign = false) { uint64_t Size; - if (getObjectSize(V, Size, &TD, RoundToAlign)) + if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign)) return Size; return AliasAnalysis::UnknownSize; } @@ -95,10 +96,11 @@ static uint64_t getObjectSize(const Value *V, const TargetData &TD, /// isObjectSmallerThan - Return true if we can prove that the object specified /// by V is smaller than Size. static bool isObjectSmallerThan(const Value *V, uint64_t Size, - const TargetData &TD) { + const TargetData &TD, + const TargetLibraryInfo &TLI) { // This function needs to use the aligned object size because we allow // reads a bit past the end given sufficient alignment. - uint64_t ObjectSize = getObjectSize(V, TD, /*RoundToAlign*/true); + uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true); return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size; } @@ -106,8 +108,8 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size, /// isObjectSize - Return true if we can prove that the object specified /// by V has size Size. static bool isObjectSize(const Value *V, uint64_t Size, - const TargetData &TD) { - uint64_t ObjectSize = getObjectSize(V, TD); + const TargetData &TD, const TargetLibraryInfo &TLI) { + uint64_t ObjectSize = getObjectSize(V, TD, TLI); return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size; } @@ -126,6 +128,15 @@ namespace { const Value *V; ExtensionKind Extension; int64_t Scale; + + bool operator==(const VariableGEPIndex &Other) const { + return V == Other.V && Extension == Other.Extension && + Scale == Other.Scale; + } + + bool operator!=(const VariableGEPIndex &Other) const { + return !operator==(Other); + } }; } @@ -417,13 +428,7 @@ namespace { /// BasicAliasAnalysis - This is the primary alias analysis implementation. struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis { static char ID; // Class identification, replacement for typeinfo - BasicAliasAnalysis() : ImmutablePass(ID), - // AliasCache rarely has more than 1 or 2 elements, - // so start it off fairly small so that clear() - // doesn't have to tromp through 64 (the default) - // elements on each alias query. This really wants - // something like a SmallDenseMap. - AliasCache(8) { + BasicAliasAnalysis() : ImmutablePass(ID) { initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry()); } @@ -443,7 +448,11 @@ namespace { "BasicAliasAnalysis doesn't support interprocedural queries."); AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag, LocB.Ptr, LocB.Size, LocB.TBAATag); - AliasCache.clear(); + // AliasCache rarely has more than 1 or 2 elements, always use + // shrink_and_clear so it quickly returns to the inline capacity of the + // SmallDenseMap if it ever grows larger. + // FIXME: This should really be shrink_to_inline_capacity_and_clear(). + AliasCache.shrink_and_clear(); return Alias; } @@ -481,7 +490,7 @@ namespace { private: // AliasCache - Track alias queries to guard against recursion. typedef std::pair<Location, Location> LocPair; - typedef DenseMap<LocPair, AliasResult> AliasCacheTy; + typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy; AliasCacheTy AliasCache; // Visited - Track instructions visited by pointsToConstantMemory. @@ -490,6 +499,7 @@ namespace { // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP // instruction against another. AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size, + const MDNode *V1TBAAInfo, const Value *V2, uint64_t V2Size, const MDNode *V2TBAAInfo, const Value *UnderlyingV1, const Value *UnderlyingV2); @@ -807,6 +817,21 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min); } +static bool areVarIndicesEqual(SmallVector<VariableGEPIndex, 4> &Indices1, + SmallVector<VariableGEPIndex, 4> &Indices2) { + unsigned Size1 = Indices1.size(); + unsigned Size2 = Indices2.size(); + + if (Size1 != Size2) + return false; + + for (unsigned I = 0; I != Size1; ++I) + if (Indices1[I] != Indices2[I]) + return false; + + return true; +} + /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction /// against another pointer. We know that V1 is a GEP, but we don't know /// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, TD), @@ -814,6 +839,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, /// AliasAnalysis::AliasResult BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, + const MDNode *V1TBAAInfo, const Value *V2, uint64_t V2Size, const MDNode *V2TBAAInfo, const Value *UnderlyingV1, @@ -821,9 +847,41 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, int64_t GEP1BaseOffset; SmallVector<VariableGEPIndex, 4> GEP1VariableIndices; - // If we have two gep instructions with must-alias'ing base pointers, figure - // out if the indexes to the GEP tell us anything about the derived pointer. + // If we have two gep instructions with must-alias or not-alias'ing base + // pointers, figure out if the indexes to the GEP tell us anything about the + // derived pointer. if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) { + // Check for geps of non-aliasing underlying pointers where the offsets are + // identical. + if (V1Size == V2Size) { + // Do the base pointers alias assuming type and size. + AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, + V1TBAAInfo, UnderlyingV2, + V2Size, V2TBAAInfo); + if (PreciseBaseAlias == NoAlias) { + // See if the computed offset from the common pointer tells us about the + // relation of the resulting pointer. + int64_t GEP2BaseOffset; + SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; + const Value *GEP2BasePtr = + DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); + const Value *GEP1BasePtr = + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); + // DecomposeGEPExpression and GetUnderlyingObject should return the + // same result except when DecomposeGEPExpression has no TargetData. + if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { + assert(TD == 0 && + "DecomposeGEPExpression and GetUnderlyingObject disagree!"); + return MayAlias; + } + // Same offsets. + if (GEP1BaseOffset == GEP2BaseOffset && + areVarIndicesEqual(GEP1VariableIndices, GEP2VariableIndices)) + return NoAlias; + GEP1VariableIndices.clear(); + } + } + // Do the base pointers alias? AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, UnderlyingV2, UnknownSize, 0); @@ -843,9 +901,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); - // If DecomposeGEPExpression isn't able to look all the way through the - // addressing operation, we must not have TD and this is too complex for us - // to handle without it. + // DecomposeGEPExpression and GetUnderlyingObject should return the + // same result except when DecomposeGEPExpression has no TargetData. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { assert(TD == 0 && "DecomposeGEPExpression and GetUnderlyingObject disagree!"); @@ -879,9 +936,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); - // If DecomposeGEPExpression isn't able to look all the way through the - // addressing operation, we must not have TD and this is too complex for us - // to handle without it. + // DecomposeGEPExpression and GetUnderlyingObject should return the + // same result except when DecomposeGEPExpression has no TargetData. if (GEP1BasePtr != UnderlyingV1) { assert(TD == 0 && "DecomposeGEPExpression and GetUnderlyingObject disagree!"); @@ -1004,12 +1060,42 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, // on corresponding edges. if (const PHINode *PN2 = dyn_cast<PHINode>(V2)) if (PN2->getParent() == PN->getParent()) { + LocPair Locs(Location(PN, PNSize, PNTBAAInfo), + Location(V2, V2Size, V2TBAAInfo)); + if (PN > V2) + std::swap(Locs.first, Locs.second); + AliasResult Alias = aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo, PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)), V2Size, V2TBAAInfo); if (Alias == MayAlias) return MayAlias; + + // If the first source of the PHI nodes NoAlias and the other inputs are + // the PHI node itself through some amount of recursion this does not add + // any new information so just return NoAlias. + // bb: + // ptr = ptr2 + 1 + // loop: + // ptr_phi = phi [bb, ptr], [loop, ptr_plus_one] + // ptr2_phi = phi [bb, ptr2], [loop, ptr2_plus_one] + // ... + // ptr_plus_one = gep ptr_phi, 1 + // ptr2_plus_one = gep ptr2_phi, 1 + // We assume for the recursion that the the phis (ptr_phi, ptr2_phi) do + // not alias each other. + bool ArePhisAssumedNoAlias = false; + AliasResult OrigAliasResult; + if (Alias == NoAlias) { + // Pretend the phis do not alias. + assert(AliasCache.count(Locs) && + "There must exist an entry for the phi node"); + OrigAliasResult = AliasCache[Locs]; + AliasCache[Locs] = NoAlias; + ArePhisAssumedNoAlias = true; + } + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { AliasResult ThisAlias = aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo, @@ -1019,6 +1105,11 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, if (Alias == MayAlias) break; } + + // Reset if speculation failed. + if (ArePhisAssumedNoAlias && Alias != NoAlias) + AliasCache[Locs] = OrigAliasResult; + return Alias; } @@ -1133,8 +1224,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. if (TD) - if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD)) || - (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD))) + if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD, *TLI)) || + (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD, *TLI))) return NoAlias; // Check the cache before climbing up use-def chains. This also terminates @@ -1156,7 +1247,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, std::swap(O1, O2); } if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) { - AliasResult Result = aliasGEP(GV1, V1Size, V2, V2Size, V2TBAAInfo, O1, O2); + AliasResult Result = aliasGEP(GV1, V1Size, V1TBAAInfo, V2, V2Size, V2TBAAInfo, O1, O2); if (Result != MayAlias) return AliasCache[Locs] = Result; } @@ -1184,8 +1275,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, // accesses is accessing the entire object, then the accesses must // overlap in some way. if (TD && O1 == O2) - if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD)) || - (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD))) + if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD, *TLI)) || + (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD, *TLI))) return AliasCache[Locs] = PartialAlias; AliasResult Result = diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index b255ce6..04a6560 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -115,14 +115,14 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { return false; } - SmallPtrSet<BasicBlock *, 4> UnreachableEdges; - SmallPtrSet<BasicBlock *, 4> ReachableEdges; + SmallVector<unsigned, 4> UnreachableEdges; + SmallVector<unsigned, 4> ReachableEdges; for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { if (PostDominatedByUnreachable.count(*I)) - UnreachableEdges.insert(*I); + UnreachableEdges.push_back(I.getSuccessorIndex()); else - ReachableEdges.insert(*I); + ReachableEdges.push_back(I.getSuccessorIndex()); } // If all successors are in the set of blocks post-dominated by unreachable, @@ -136,18 +136,19 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { return false; uint32_t UnreachableWeight = - std::max(UR_TAKEN_WEIGHT / UnreachableEdges.size(), MIN_WEIGHT); - for (SmallPtrSet<BasicBlock *, 4>::iterator I = UnreachableEdges.begin(), - E = UnreachableEdges.end(); + std::max(UR_TAKEN_WEIGHT / (unsigned)UnreachableEdges.size(), MIN_WEIGHT); + for (SmallVector<unsigned, 4>::iterator I = UnreachableEdges.begin(), + E = UnreachableEdges.end(); I != E; ++I) setEdgeWeight(BB, *I, UnreachableWeight); if (ReachableEdges.empty()) return true; uint32_t ReachableWeight = - std::max(UR_NONTAKEN_WEIGHT / ReachableEdges.size(), NORMAL_WEIGHT); - for (SmallPtrSet<BasicBlock *, 4>::iterator I = ReachableEdges.begin(), - E = ReachableEdges.end(); + std::max(UR_NONTAKEN_WEIGHT / (unsigned)ReachableEdges.size(), + NORMAL_WEIGHT); + for (SmallVector<unsigned, 4>::iterator I = ReachableEdges.begin(), + E = ReachableEdges.end(); I != E; ++I) setEdgeWeight(BB, *I, ReachableWeight); @@ -187,7 +188,7 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { } assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]); + setEdgeWeight(BB, i, Weights[i]); return true; } @@ -211,19 +212,17 @@ bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) { assert(CI->getOperand(1)->getType()->isPointerTy()); - BasicBlock *Taken = BI->getSuccessor(0); - BasicBlock *NonTaken = BI->getSuccessor(1); - // p != 0 -> isProb = true // p == 0 -> isProb = false // p != q -> isProb = true // p == q -> isProb = false; + unsigned TakenIdx = 0, NonTakenIdx = 1; bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE; if (!isProb) - std::swap(Taken, NonTaken); + std::swap(TakenIdx, NonTakenIdx); - setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT); - setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, TakenIdx, PH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTakenIdx, PH_NONTAKEN_WEIGHT); return true; } @@ -234,17 +233,17 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (!L) return false; - SmallPtrSet<BasicBlock *, 8> BackEdges; - SmallPtrSet<BasicBlock *, 8> ExitingEdges; - SmallPtrSet<BasicBlock *, 8> InEdges; // Edges from header to the loop. + SmallVector<unsigned, 8> BackEdges; + SmallVector<unsigned, 8> ExitingEdges; + SmallVector<unsigned, 8> InEdges; // Edges from header to the loop. for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { if (!L->contains(*I)) - ExitingEdges.insert(*I); + ExitingEdges.push_back(I.getSuccessorIndex()); else if (L->getHeader() == *I) - BackEdges.insert(*I); + BackEdges.push_back(I.getSuccessorIndex()); else - InEdges.insert(*I); + InEdges.push_back(I.getSuccessorIndex()); } if (uint32_t numBackEdges = BackEdges.size()) { @@ -252,10 +251,9 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (backWeight < NORMAL_WEIGHT) backWeight = NORMAL_WEIGHT; - for (SmallPtrSet<BasicBlock *, 8>::iterator EI = BackEdges.begin(), + for (SmallVector<unsigned, 8>::iterator EI = BackEdges.begin(), EE = BackEdges.end(); EI != EE; ++EI) { - BasicBlock *Back = *EI; - setEdgeWeight(BB, Back, backWeight); + setEdgeWeight(BB, *EI, backWeight); } } @@ -264,10 +262,9 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (inWeight < NORMAL_WEIGHT) inWeight = NORMAL_WEIGHT; - for (SmallPtrSet<BasicBlock *, 8>::iterator EI = InEdges.begin(), + for (SmallVector<unsigned, 8>::iterator EI = InEdges.begin(), EE = InEdges.end(); EI != EE; ++EI) { - BasicBlock *Back = *EI; - setEdgeWeight(BB, Back, inWeight); + setEdgeWeight(BB, *EI, inWeight); } } @@ -276,10 +273,9 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (exitWeight < MIN_WEIGHT) exitWeight = MIN_WEIGHT; - for (SmallPtrSet<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(), + for (SmallVector<unsigned, 8>::iterator EI = ExitingEdges.begin(), EE = ExitingEdges.end(); EI != EE; ++EI) { - BasicBlock *Exiting = *EI; - setEdgeWeight(BB, Exiting, exitWeight); + setEdgeWeight(BB, *EI, exitWeight); } } @@ -335,14 +331,13 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { return false; } - BasicBlock *Taken = BI->getSuccessor(0); - BasicBlock *NonTaken = BI->getSuccessor(1); + unsigned TakenIdx = 0, NonTakenIdx = 1; if (!isProb) - std::swap(Taken, NonTaken); + std::swap(TakenIdx, NonTakenIdx); - setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT); - setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, TakenIdx, ZH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTakenIdx, ZH_NONTAKEN_WEIGHT); return true; } @@ -372,14 +367,13 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) { return false; } - BasicBlock *Taken = BI->getSuccessor(0); - BasicBlock *NonTaken = BI->getSuccessor(1); + unsigned TakenIdx = 0, NonTakenIdx = 1; if (!isProb) - std::swap(Taken, NonTaken); + std::swap(TakenIdx, NonTakenIdx); - setEdgeWeight(BB, Taken, FPH_TAKEN_WEIGHT); - setEdgeWeight(BB, NonTaken, FPH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, TakenIdx, FPH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTakenIdx, FPH_NONTAKEN_WEIGHT); return true; } @@ -389,11 +383,8 @@ bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) { if (!II) return false; - BasicBlock *Normal = II->getNormalDest(); - BasicBlock *Unwind = II->getUnwindDest(); - - setEdgeWeight(BB, Normal, IH_TAKEN_WEIGHT); - setEdgeWeight(BB, Unwind, IH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, 0/*Index for Normal*/, IH_TAKEN_WEIGHT); + setEdgeWeight(BB, 1/*Index for Unwind*/, IH_NONTAKEN_WEIGHT); return true; } @@ -450,8 +441,7 @@ uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { uint32_t Sum = 0; for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - const BasicBlock *Succ = *I; - uint32_t Weight = getEdgeWeight(BB, Succ); + uint32_t Weight = getEdgeWeight(BB, I.getSuccessorIndex()); uint32_t PrevSum = Sum; Sum += Weight; @@ -494,11 +484,13 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { return 0; } -// Return edge's weight. If can't find it, return DEFAULT_WEIGHT value. +/// Get the raw edge weight for the edge. If can't find it, return +/// DEFAULT_WEIGHT value. Here an edge is specified using PredBlock and an index +/// to the successors. uint32_t BranchProbabilityInfo:: -getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { - Edge E(Src, Dst); - DenseMap<Edge, uint32_t>::const_iterator I = Weights.find(E); +getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const { + DenseMap<Edge, uint32_t>::const_iterator I = + Weights.find(std::make_pair(Src, IndexInSuccessors)); if (I != Weights.end()) return I->second; @@ -506,15 +498,43 @@ getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { return DEFAULT_WEIGHT; } +/// Get the raw edge weight calculated for the block pair. This returns the sum +/// of all raw edge weights from Src to Dst. +uint32_t BranchProbabilityInfo:: +getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { + uint32_t Weight = 0; + DenseMap<Edge, uint32_t>::const_iterator MapI; + for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I) + if (*I == Dst) { + MapI = Weights.find(std::make_pair(Src, I.getSuccessorIndex())); + if (MapI != Weights.end()) + Weight += MapI->second; + } + return (Weight == 0) ? DEFAULT_WEIGHT : Weight; +} + +/// Set the edge weight for a given edge specified by PredBlock and an index +/// to the successors. void BranchProbabilityInfo:: -setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst, uint32_t Weight) { - Weights[std::make_pair(Src, Dst)] = Weight; +setEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors, + uint32_t Weight) { + Weights[std::make_pair(Src, IndexInSuccessors)] = Weight; DEBUG(dbgs() << "set edge " << Src->getName() << " -> " - << Dst->getName() << " weight to " << Weight - << (isEdgeHot(Src, Dst) ? " [is HOT now]\n" : "\n")); + << IndexInSuccessors << " successor weight to " + << Weight << "\n"); } +/// Get an edge's probability, relative to other out-edges from Src. +BranchProbability BranchProbabilityInfo:: +getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const { + uint32_t N = getEdgeWeight(Src, IndexInSuccessors); + uint32_t D = getSumForBlock(Src); + + return BranchProbability(N, D); +} +/// Get the probability of going from Src to Dst. It returns the sum of all +/// probabilities for edges from Src to Dst. BranchProbability BranchProbabilityInfo:: getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const { diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 96e68b4..e461848 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -44,6 +44,8 @@ add_llvm_library(LLVMAnalysis ProfileInfoLoader.cpp ProfileInfoLoaderPass.cpp ProfileVerifierPass.cpp + ProfileDataLoader.cpp + ProfileDataLoaderPass.cpp RegionInfo.cpp RegionPass.cpp RegionPrinter.cpp diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index f5e619c..4ad613c 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -659,7 +659,8 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy); APInt Offset = APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(), - makeArrayRef((Value **)Ops.data() + 1, + makeArrayRef((Value *const*) + Ops.data() + 1, Ops.size() - 1))); Ptr = StripPtrCastKeepAS(Ptr); diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp index 1604576..5536a9b 100644 --- a/lib/Analysis/DominanceFrontier.cpp +++ b/lib/Analysis/DominanceFrontier.cpp @@ -133,7 +133,9 @@ void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const { } } +#ifndef NDEBUG void DominanceFrontierBase::dump() const { print(dbgs()); } +#endif diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index 0df3e8a..947ad51 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -198,9 +198,11 @@ void CallGraph::print(raw_ostream &OS, Module*) const { for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I) I->second->print(OS); } +#ifndef NDEBUG void CallGraph::dump() const { print(dbgs(), 0); } +#endif //===----------------------------------------------------------------------===// // Implementations of public modification methods @@ -267,7 +269,9 @@ void CallGraphNode::print(raw_ostream &OS) const { OS << '\n'; } +#ifndef NDEBUG void CallGraphNode::dump() const { print(dbgs()); } +#endif /// removeCallEdgeFor - This method removes the edge in the node for the /// specified call site. Note that this method takes linear time, so it diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 22f6e96..990caa8 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -263,7 +263,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest)) return true; - } else if (isFreeCall(U)) { + } else if (isFreeCall(U, TLI)) { Writers.push_back(cast<Instruction>(U)->getParent()->getParent()); } else if (CallInst *CI = dyn_cast<CallInst>(U)) { // Make sure that this is just the function being called, not that it is @@ -329,7 +329,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { // Check the value being stored. Value *Ptr = GetUnderlyingObject(SI->getOperand(0)); - if (!isAllocLikeFn(Ptr)) + if (!isAllocLikeFn(Ptr, TLI)) return false; // Too hard to analyze. // Analyze all uses of the allocation. If any of them are used in a @@ -458,7 +458,7 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { if (SI->isVolatile()) // Treat volatile stores as reading memory somewhere. FunctionEffect |= Ref; - } else if (isAllocationFn(&*II) || isFreeCall(&*II)) { + } else if (isAllocationFn(&*II, TLI) || isFreeCall(&*II, TLI)) { FunctionEffect |= ModRef; } else if (IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(&*II)) { // The callgraph doesn't include intrinsic calls. diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 0a6682a..f705181 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -273,9 +273,11 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { } } +#ifndef NDEBUG void IVUsers::dump() const { print(dbgs()); } +#endif void IVUsers::releaseMemory() { Processed.clear(); diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index bc1ecd2..12be7fd 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -974,6 +974,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { return AlwaysInline || Cost < Threshold; } +#ifndef NDEBUG /// \brief Dump stats about this call's analysis. void CallAnalyzer::dump() { #define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" @@ -987,6 +988,7 @@ void CallAnalyzer::dump() { DEBUG_PRINT_STAT(SROACostSavingsLost); #undef DEBUG_PRINT_STAT } +#endif InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) { return getInlineCost(CS, CS.getCalledFunction(), Threshold); diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 9140786..ec618fa 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -470,8 +470,10 @@ bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) { return true; LVIValueHandle ValHandle(Val, this); - if (!ValueCache.count(ValHandle)) return false; - return ValueCache[ValHandle].count(BB); + std::map<LVIValueHandle, ValueCacheEntryTy>::iterator I = + ValueCache.find(ValHandle); + if (I == ValueCache.end()) return false; + return I->second.count(BB); } LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { @@ -845,9 +847,12 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { ConstantRange EdgeVal(i.getCaseValue()->getValue()); - if (DefaultCase) - EdgesVals = EdgesVals.difference(EdgeVal); - else if (i.getCaseSuccessor() == BBTo) + if (DefaultCase) { + // It is possible that the default destination is the destination of + // some cases. There is no need to perform difference for those cases. + if (i.getCaseSuccessor() != BBTo) + EdgesVals = EdgesVals.difference(EdgeVal); + } else if (i.getCaseSuccessor() == BBTo) EdgesVals = EdgesVals.unionWith(EdgeVal); } Result = LVILatticeVal::getRange(EdgesVals); diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 20c33a3..4a18104 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -306,9 +306,11 @@ BasicBlock *Loop::getUniqueExitBlock() const { return 0; } +#ifndef NDEBUG void Loop::dump() const { print(dbgs()); } +#endif //===----------------------------------------------------------------------===// // UnloopUpdater implementation @@ -429,8 +431,8 @@ void UnloopUpdater::updateSubloopParents() { Unloop->removeChildLoop(llvm::prior(Unloop->end())); assert(SubloopParents.count(Subloop) && "DFS failed to visit subloop"); - if (SubloopParents[Subloop]) - SubloopParents[Subloop]->addChildLoop(Subloop); + if (Loop *Parent = SubloopParents[Subloop]) + Parent->addChildLoop(Subloop); else LI->addTopLevelLoop(Subloop); } @@ -456,9 +458,8 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { assert(Subloop && "subloop is not an ancestor of the original loop"); } // Get the current nearest parent of the Subloop exits, initially Unloop. - if (!SubloopParents.count(Subloop)) - SubloopParents[Subloop] = Unloop; - NearLoop = SubloopParents[Subloop]; + NearLoop = + SubloopParents.insert(std::make_pair(Subloop, Unloop)).first->second; } succ_iterator I = succ_begin(BB), E = succ_end(BB); diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index b986b32..5b2313e 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -39,7 +40,7 @@ enum AllocType { }; struct AllocFnsTy { - const char *Name; + LibFunc::Func Func; AllocType AllocTy; unsigned char NumParams; // First and Second size parameters (or -1 if unused) @@ -49,22 +50,22 @@ struct AllocFnsTy { // FIXME: certain users need more information. E.g., SimplifyLibCalls needs to // know which functions are nounwind, noalias, nocapture parameters, etc. static const AllocFnsTy AllocationFnData[] = { - {"malloc", MallocLike, 1, 0, -1}, - {"valloc", MallocLike, 1, 0, -1}, - {"_Znwj", MallocLike, 1, 0, -1}, // new(unsigned int) - {"_ZnwjRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new(unsigned int, nothrow) - {"_Znwm", MallocLike, 1, 0, -1}, // new(unsigned long) - {"_ZnwmRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new(unsigned long, nothrow) - {"_Znaj", MallocLike, 1, 0, -1}, // new[](unsigned int) - {"_ZnajRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) - {"_Znam", MallocLike, 1, 0, -1}, // new[](unsigned long) - {"_ZnamRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow) - {"posix_memalign", MallocLike, 3, 2, -1}, - {"calloc", CallocLike, 2, 0, 1}, - {"realloc", ReallocLike, 2, 1, -1}, - {"reallocf", ReallocLike, 2, 1, -1}, - {"strdup", StrDupLike, 1, -1, -1}, - {"strndup", StrDupLike, 2, 1, -1} + {LibFunc::malloc, MallocLike, 1, 0, -1}, + {LibFunc::valloc, MallocLike, 1, 0, -1}, + {LibFunc::Znwj, MallocLike, 1, 0, -1}, // new(unsigned int) + {LibFunc::ZnwjRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow) + {LibFunc::Znwm, MallocLike, 1, 0, -1}, // new(unsigned long) + {LibFunc::ZnwmRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned long, nothrow) + {LibFunc::Znaj, MallocLike, 1, 0, -1}, // new[](unsigned int) + {LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) + {LibFunc::Znam, MallocLike, 1, 0, -1}, // new[](unsigned long) + {LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow) + {LibFunc::posix_memalign, MallocLike, 3, 2, -1}, + {LibFunc::calloc, CallocLike, 2, 0, 1}, + {LibFunc::realloc, ReallocLike, 2, 1, -1}, + {LibFunc::reallocf, ReallocLike, 2, 1, -1}, + {LibFunc::strdup, StrDupLike, 1, -1, -1}, + {LibFunc::strndup, StrDupLike, 2, 1, -1} }; @@ -85,15 +86,22 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { /// \brief Returns the allocation data for the given value if it is a call to a /// known allocation function, and NULL otherwise. static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, + const TargetLibraryInfo *TLI, bool LookThroughBitCast = false) { Function *Callee = getCalledFunction(V, LookThroughBitCast); if (!Callee) return 0; + // Make sure that the function is available. + StringRef FnName = Callee->getName(); + LibFunc::Func TLIFn; + if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) + return 0; + unsigned i = 0; bool found = false; for ( ; i < array_lengthof(AllocationFnData); ++i) { - if (Callee->getName() == AllocationFnData[i].Name) { + if (AllocationFnData[i].Func == TLIFn) { found = true; break; } @@ -106,7 +114,6 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, return 0; // Check function prototype. - // FIXME: Check the nobuiltin metadata?? (PR5130) int FstParam = FnData->FstParam; int SndParam = FnData->SndParam; FunctionType *FTy = Callee->getFunctionType(); @@ -132,57 +139,65 @@ static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { /// \brief Tests if a value is a call or invoke to a library function that /// allocates or reallocates memory (either malloc, calloc, realloc, or strdup /// like). -bool llvm::isAllocationFn(const Value *V, bool LookThroughBitCast) { - return getAllocationData(V, AnyAlloc, LookThroughBitCast); +bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, AnyAlloc, TLI, LookThroughBitCast); } /// \brief Tests if a value is a call or invoke to a function that returns a /// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). -bool llvm::isNoAliasFn(const Value *V, bool LookThroughBitCast) { +bool llvm::isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { // it's safe to consider realloc as noalias since accessing the original // pointer is undefined behavior - return isAllocationFn(V, LookThroughBitCast) || + return isAllocationFn(V, TLI, LookThroughBitCast) || hasNoAliasAttr(V, LookThroughBitCast); } /// \brief Tests if a value is a call or invoke to a library function that /// allocates uninitialized memory (such as malloc). -bool llvm::isMallocLikeFn(const Value *V, bool LookThroughBitCast) { - return getAllocationData(V, MallocLike, LookThroughBitCast); +bool llvm::isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, MallocLike, TLI, LookThroughBitCast); } /// \brief Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). -bool llvm::isCallocLikeFn(const Value *V, bool LookThroughBitCast) { - return getAllocationData(V, CallocLike, LookThroughBitCast); +bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, CallocLike, TLI, LookThroughBitCast); } /// \brief Tests if a value is a call or invoke to a library function that /// allocates memory (either malloc, calloc, or strdup like). -bool llvm::isAllocLikeFn(const Value *V, bool LookThroughBitCast) { - return getAllocationData(V, AllocLike, LookThroughBitCast); +bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, AllocLike, TLI, LookThroughBitCast); } /// \brief Tests if a value is a call or invoke to a library function that /// reallocates memory (such as realloc). -bool llvm::isReallocLikeFn(const Value *V, bool LookThroughBitCast) { - return getAllocationData(V, ReallocLike, LookThroughBitCast); +bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast); } /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. -const CallInst *llvm::extractMallocCall(const Value *I) { - return isMallocLikeFn(I) ? dyn_cast<CallInst>(I) : 0; +const CallInst *llvm::extractMallocCall(const Value *I, + const TargetLibraryInfo *TLI) { + return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : 0; } static Value *computeArraySize(const CallInst *CI, const TargetData *TD, + const TargetLibraryInfo *TLI, bool LookThroughSExt = false) { if (!CI) return NULL; // The size of the malloc's result type must be known to determine array size. - Type *T = getMallocAllocatedType(CI); + Type *T = getMallocAllocatedType(CI, TLI); if (!T || !T->isSized() || !TD) return NULL; @@ -204,9 +219,11 @@ static Value *computeArraySize(const CallInst *CI, const TargetData *TD, /// isArrayMalloc - Returns the corresponding CallInst if the instruction /// is a call to malloc whose array size can be determined and the array size /// is not constant 1. Otherwise, return NULL. -const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { - const CallInst *CI = extractMallocCall(I); - Value *ArraySize = computeArraySize(CI, TD); +const CallInst *llvm::isArrayMalloc(const Value *I, + const TargetData *TD, + const TargetLibraryInfo *TLI) { + const CallInst *CI = extractMallocCall(I, TLI); + Value *ArraySize = computeArraySize(CI, TD, TLI); if (ArraySize && ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) @@ -221,8 +238,9 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { /// 0: PointerType is the calls' return type. /// 1: PointerType is the bitcast's result type. /// >1: Unique PointerType cannot be determined, return NULL. -PointerType *llvm::getMallocType(const CallInst *CI) { - assert(isMallocLikeFn(CI) && "getMallocType and not malloc call"); +PointerType *llvm::getMallocType(const CallInst *CI, + const TargetLibraryInfo *TLI) { + assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call"); PointerType *MallocType = NULL; unsigned NumOfBitCastUses = 0; @@ -252,8 +270,9 @@ PointerType *llvm::getMallocType(const CallInst *CI) { /// 0: PointerType is the malloc calls' return type. /// 1: PointerType is the bitcast's result type. /// >1: Unique PointerType cannot be determined, return NULL. -Type *llvm::getMallocAllocatedType(const CallInst *CI) { - PointerType *PT = getMallocType(CI); +Type *llvm::getMallocAllocatedType(const CallInst *CI, + const TargetLibraryInfo *TLI) { + PointerType *PT = getMallocType(CI, TLI); return PT ? PT->getElementType() : NULL; } @@ -263,21 +282,23 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI) { /// constant 1. Otherwise, return NULL for mallocs whose array size cannot be /// determined. Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD, + const TargetLibraryInfo *TLI, bool LookThroughSExt) { - assert(isMallocLikeFn(CI) && "getMallocArraySize and not malloc call"); - return computeArraySize(CI, TD, LookThroughSExt); + assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call"); + return computeArraySize(CI, TD, TLI, LookThroughSExt); } /// extractCallocCall - Returns the corresponding CallInst if the instruction /// is a calloc call. -const CallInst *llvm::extractCallocCall(const Value *I) { - return isCallocLikeFn(I) ? cast<CallInst>(I) : 0; +const CallInst *llvm::extractCallocCall(const Value *I, + const TargetLibraryInfo *TLI) { + return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : 0; } /// isFreeCall - Returns non-null if the value is a call to the builtin free() -const CallInst *llvm::isFreeCall(const Value *I) { +const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { const CallInst *CI = dyn_cast<CallInst>(I); if (!CI) return 0; @@ -285,9 +306,14 @@ const CallInst *llvm::isFreeCall(const Value *I) { if (Callee == 0 || !Callee->isDeclaration()) return 0; - if (Callee->getName() != "free" && - Callee->getName() != "_ZdlPv" && // operator delete(void*) - Callee->getName() != "_ZdaPv") // operator delete[](void*) + StringRef FnName = Callee->getName(); + LibFunc::Func TLIFn; + if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) + return 0; + + if (TLIFn != LibFunc::free && + TLIFn != LibFunc::ZdlPv && // operator delete(void*) + TLIFn != LibFunc::ZdaPv) // operator delete[](void*) return 0; // Check free prototype. @@ -316,11 +342,11 @@ const CallInst *llvm::isFreeCall(const Value *I) { /// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, /// byval arguments, and global variables. bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const TargetData *TD, - bool RoundToAlign) { + const TargetLibraryInfo *TLI, bool RoundToAlign) { if (!TD) return false; - ObjectSizeOffsetVisitor Visitor(TD, Ptr->getContext(), RoundToAlign); + ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign); SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr)); if (!Visitor.bothKnown(Data)) return false; @@ -348,9 +374,10 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { } ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const TargetData *TD, + const TargetLibraryInfo *TLI, LLVMContext &Context, bool RoundToAlign) -: TD(TD), RoundToAlign(RoundToAlign) { +: TD(TD), TLI(TLI), RoundToAlign(RoundToAlign) { IntegerType *IntTy = TD->getIntPtrType(Context); IntTyBits = IntTy->getBitWidth(); Zero = APInt::getNullValue(IntTyBits); @@ -416,7 +443,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { } SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { - const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc); + const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc, + TLI); if (!FnData) return unknown(); @@ -532,8 +560,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const TargetData *TD, + const TargetLibraryInfo *TLI, LLVMContext &Context) -: TD(TD), Context(Context), Builder(Context, TargetFolder(TD)) { +: TD(TD), TLI(TLI), Context(Context), Builder(Context, TargetFolder(TD)) { IntTy = TD->getIntPtrType(Context); Zero = ConstantInt::get(IntTy, 0); } @@ -558,7 +587,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { } SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { - ObjectSizeOffsetVisitor Visitor(TD, Context); + ObjectSizeOffsetVisitor Visitor(TD, TLI, Context); SizeOffsetType Const = Visitor.compute(V); if (Visitor.bothKnown(Const)) return std::make_pair(ConstantInt::get(Context, Const.first), @@ -621,7 +650,8 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) { } SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) { - const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc); + const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc, + TLI); if (!FnData) return unknown(); diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 059e574..5736c35 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -148,7 +148,7 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, return AliasAnalysis::ModRef; } - if (const CallInst *CI = isFreeCall(Inst)) { + if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) { // calls to free() deallocate the entire structure Loc = AliasAnalysis::Location(CI->getArgOperand(0)); return AliasAnalysis::Mod; @@ -479,12 +479,20 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // a subsequent bitcast of the malloc call result. There can be stores to // the malloced memory between the malloc call and its bitcast uses, and we // need to continue scanning until the malloc call. - if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst)) { + const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo(); + if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) { const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); - continue; + // Be conservative if the accessed pointer may alias the allocation. + if (AA->alias(Inst, AccessPtr) != AliasAnalysis::NoAlias) + return MemDepResult::getClobber(Inst); + // If the allocation is not aliased and does not read memory (like + // strdup), it is safe to ignore. + if (isa<AllocaInst>(Inst) || + isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI)) + continue; } // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 38cb1c9..d6a17ca 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -41,6 +41,7 @@ static bool CanPHITrans(Instruction *Inst) { return false; } +#ifndef NDEBUG void PHITransAddr::dump() const { if (Addr == 0) { dbgs() << "PHITransAddr: null\n"; @@ -50,6 +51,7 @@ void PHITransAddr::dump() const { for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) dbgs() << " Input #" << i << " is " << *InstInputs[i] << "\n"; } +#endif static bool VerifySubExpr(Value *Expr, diff --git a/lib/Analysis/ProfileDataLoader.cpp b/lib/Analysis/ProfileDataLoader.cpp new file mode 100644 index 0000000..69286ef --- /dev/null +++ b/lib/Analysis/ProfileDataLoader.cpp @@ -0,0 +1,162 @@ +//===- ProfileDataLoader.cpp - Load profile information from disk ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The ProfileDataLoader class is used to load raw profiling data from the dump +// file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Module.h" +#include "llvm/InstrTypes.h" +#include "llvm/Analysis/ProfileDataLoader.h" +#include "llvm/Analysis/ProfileDataTypes.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" +#include <cstdio> +#include <cstdlib> +using namespace llvm; + +raw_ostream &llvm::operator<<(raw_ostream &O, std::pair<const BasicBlock *, + const BasicBlock *> E) { + O << "("; + + if (E.first) + O << E.first->getName(); + else + O << "0"; + + O << ","; + + if (E.second) + O << E.second->getName(); + else + O << "0"; + + return O << ")"; +} + +/// AddCounts - Add 'A' and 'B', accounting for the fact that the value of one +/// (or both) may not be defined. +static unsigned AddCounts(unsigned A, unsigned B) { + // If either value is undefined, use the other. + // Undefined + undefined = undefined. + if (A == ProfileDataLoader::Uncounted) return B; + if (B == ProfileDataLoader::Uncounted) return A; + + // Saturate to the maximum storable value. This could change taken/nottaken + // ratios, but is presumably better than wrapping and thus potentially + // inverting ratios. + uint64_t tmp = (uint64_t)A + (uint64_t)B; + if (tmp > (uint64_t)ProfileDataLoader::MaxCount) + tmp = ProfileDataLoader::MaxCount; + return (unsigned)tmp; +} + +/// ReadProfilingData - Load 'NumEntries' items of type 'T' from file 'F' +template <typename T> +static void ReadProfilingData(const char *ToolName, FILE *F, + T *Data, size_t NumEntries) { + // Read in the block of data... + if (fread(Data, sizeof(T), NumEntries, F) != NumEntries) + report_fatal_error(Twine(ToolName) + ": Profiling data truncated"); +} + +/// ReadProfilingNumEntries - Read how many entries are in this profiling data +/// packet. +static unsigned ReadProfilingNumEntries(const char *ToolName, FILE *F, + bool ShouldByteSwap) { + unsigned Entry; + ReadProfilingData<unsigned>(ToolName, F, &Entry, 1); + return ShouldByteSwap ? ByteSwap_32(Entry) : Entry; +} + +/// ReadProfilingBlock - Read the number of entries in the next profiling data +/// packet and then accumulate the entries into 'Data'. +static void ReadProfilingBlock(const char *ToolName, FILE *F, + bool ShouldByteSwap, + SmallVector<unsigned, 32> &Data) { + // Read the number of entries... + unsigned NumEntries = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap); + + // Read in the data. + SmallVector<unsigned, 8> TempSpace(NumEntries); + ReadProfilingData<unsigned>(ToolName, F, TempSpace.data(), NumEntries); + + // Make sure we have enough space ... + if (Data.size() < NumEntries) + Data.resize(NumEntries, ProfileDataLoader::Uncounted); + + // Accumulate the data we just read into the existing data. + for (unsigned i = 0; i < NumEntries; ++i) { + unsigned Entry = ShouldByteSwap ? ByteSwap_32(TempSpace[i]) : TempSpace[i]; + Data[i] = AddCounts(Entry, Data[i]); + } +} + +/// ReadProfilingArgBlock - Read the command line arguments that the progam was +/// run with when the current profiling data packet(s) were generated. +static void ReadProfilingArgBlock(const char *ToolName, FILE *F, + bool ShouldByteSwap, + SmallVector<std::string, 1> &CommandLines) { + // Read the number of bytes ... + unsigned ArgLength = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap); + + // Read in the arguments (if there are any to read). Round up the length to + // the nearest 4-byte multiple. + SmallVector<char, 8> Args(ArgLength+4); + if (ArgLength) + ReadProfilingData<char>(ToolName, F, Args.data(), (ArgLength+3) & ~3); + + // Store the arguments. + CommandLines.push_back(std::string(&Args[0], &Args[ArgLength])); +} + +const unsigned ProfileDataLoader::Uncounted = ~0U; +const unsigned ProfileDataLoader::MaxCount = ~0U - 1U; + +/// ProfileDataLoader ctor - Read the specified profiling data file, reporting +/// a fatal error if the file is invalid or broken. +ProfileDataLoader::ProfileDataLoader(const char *ToolName, + const std::string &Filename) + : Filename(Filename) { + FILE *F = fopen(Filename.c_str(), "rb"); + if (F == 0) + report_fatal_error(Twine(ToolName) + ": Error opening '" + + Filename + "': "); + + // Keep reading packets until we run out of them. + unsigned PacketType; + while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) { + // If the low eight bits of the packet are zero, we must be dealing with an + // endianness mismatch. Byteswap all words read from the profiling + // information. This can happen when the compiler host and target have + // different endianness. + bool ShouldByteSwap = (char)PacketType == 0; + PacketType = ShouldByteSwap ? ByteSwap_32(PacketType) : PacketType; + + switch (PacketType) { + case ArgumentInfo: + ReadProfilingArgBlock(ToolName, F, ShouldByteSwap, CommandLines); + break; + + case EdgeInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts); + break; + + default: + report_fatal_error(std::string(ToolName) + + ": Unknown profiling packet type"); + break; + } + } + + fclose(F); +} diff --git a/lib/Analysis/ProfileDataLoaderPass.cpp b/lib/Analysis/ProfileDataLoaderPass.cpp new file mode 100644 index 0000000..c43cff0 --- /dev/null +++ b/lib/Analysis/ProfileDataLoaderPass.cpp @@ -0,0 +1,188 @@ +//===- ProfileDataLoaderPass.cpp - Set branch weight metadata from prof ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass loads profiling data from a dump file and sets branch weight +// metadata. +// +// TODO: Replace all "profile-metadata-loader" strings with "profile-loader" +// once ProfileInfo etc. has been removed. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-metadata-loader" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/BasicBlock.h" +#include "llvm/InstrTypes.h" +#include "llvm/Module.h" +#include "llvm/LLVMContext.h" +#include "llvm/MDBuilder.h" +#include "llvm/Metadata.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileDataLoader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumEdgesRead, "The # of edges read."); +STATISTIC(NumTermsAnnotated, "The # of terminator instructions annotated."); + +static cl::opt<std::string> +ProfileMetadataFilename("profile-file", cl::init("llvmprof.out"), + cl::value_desc("filename"), + cl::desc("Profile file loaded by -profile-metadata-loader")); + +namespace { + /// This pass loads profiling data from a dump file and sets branch weight + /// metadata. + class ProfileMetadataLoaderPass : public ModulePass { + std::string Filename; + public: + static char ID; // Class identification, replacement for typeinfo + explicit ProfileMetadataLoaderPass(const std::string &filename = "") + : ModulePass(ID), Filename(filename) { + initializeProfileMetadataLoaderPassPass(*PassRegistry::getPassRegistry()); + if (filename.empty()) Filename = ProfileMetadataFilename; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + virtual const char *getPassName() const { + return "Profile loader"; + } + + virtual void readEdge(unsigned, ProfileData&, ProfileData::Edge, + ArrayRef<unsigned>); + virtual unsigned matchEdges(Module&, ProfileData&, ArrayRef<unsigned>); + virtual void setBranchWeightMetadata(Module&, ProfileData&); + + virtual bool runOnModule(Module &M); + }; +} // End of anonymous namespace + +char ProfileMetadataLoaderPass::ID = 0; +INITIALIZE_PASS_BEGIN(ProfileMetadataLoaderPass, "profile-metadata-loader", + "Load profile information from llvmprof.out", false, true) +INITIALIZE_PASS_END(ProfileMetadataLoaderPass, "profile-metadata-loader", + "Load profile information from llvmprof.out", false, true) + +char &llvm::ProfileMetadataLoaderPassID = ProfileMetadataLoaderPass::ID; + +/// createProfileMetadataLoaderPass - This function returns a Pass that loads +/// the profiling information for the module from the specified filename, +/// making it available to the optimizers. +ModulePass *llvm::createProfileMetadataLoaderPass() { + return new ProfileMetadataLoaderPass(); +} +ModulePass *llvm::createProfileMetadataLoaderPass(const std::string &Filename) { + return new ProfileMetadataLoaderPass(Filename); +} + +/// readEdge - Take the value from a profile counter and assign it to an edge. +void ProfileMetadataLoaderPass::readEdge(unsigned ReadCount, + ProfileData &PB, ProfileData::Edge e, + ArrayRef<unsigned> Counters) { + if (ReadCount >= Counters.size()) return; + + unsigned weight = Counters[ReadCount]; + assert(weight != ProfileDataLoader::Uncounted); + PB.addEdgeWeight(e, weight); + + DEBUG(dbgs() << "-- Read Edge Counter for " << e + << " (# "<< (ReadCount) << "): " + << PB.getEdgeWeight(e) << "\n"); +} + +/// matchEdges - Link every profile counter with an edge. +unsigned ProfileMetadataLoaderPass::matchEdges(Module &M, ProfileData &PB, + ArrayRef<unsigned> Counters) { + if (Counters.size() == 0) return 0; + + unsigned ReadCount = 0; + + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(dbgs() << "Loading edges in '" << F->getName() << "'\n"); + readEdge(ReadCount++, PB, PB.getEdge(0, &F->getEntryBlock()), Counters); + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { + readEdge(ReadCount++, PB, PB.getEdge(BB,TI->getSuccessor(s)), + Counters); + } + } + } + + return ReadCount; +} + +/// setBranchWeightMetadata - Translate the counter values associated with each +/// edge into branch weights for each conditional branch (a branch with 2 or +/// more desinations). +void ProfileMetadataLoaderPass::setBranchWeightMetadata(Module &M, + ProfileData &PB) { + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(dbgs() << "Setting branch metadata in '" << F->getName() << "'\n"); + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + unsigned NumSuccessors = TI->getNumSuccessors(); + + // If there is only one successor then we can not set a branch + // probability as the target is certain. + if (NumSuccessors < 2) continue; + + // Load the weights of all edges leading from this terminator. + DEBUG(dbgs() << "-- Terminator with " << NumSuccessors + << " successors:\n"); + SmallVector<uint32_t, 4> Weights(NumSuccessors); + for (unsigned s = 0 ; s < NumSuccessors ; ++s) { + ProfileData::Edge edge = PB.getEdge(BB, TI->getSuccessor(s)); + Weights[s] = (uint32_t)PB.getEdgeWeight(edge); + DEBUG(dbgs() << "---- Edge '" << edge << "' has weight " + << Weights[s] << "\n"); + } + + // Set branch weight metadata. This will set branch probabilities of + // 100%/0% if that is true of the dynamic execution. + // BranchProbabilityInfo can account for this when it loads this metadata + // (it gives the unexectuted branch a weight of 1 for the purposes of + // probability calculations). + MDBuilder MDB(TI->getContext()); + MDNode *Node = MDB.createBranchWeights(Weights); + TI->setMetadata(LLVMContext::MD_prof, Node); + NumTermsAnnotated++; + } + } +} + +bool ProfileMetadataLoaderPass::runOnModule(Module &M) { + ProfileDataLoader PDL("profile-data-loader", Filename); + ProfileData PB; + + ArrayRef<unsigned> Counters = PDL.getRawEdgeCounts(); + + unsigned ReadCount = matchEdges(M, PB, Counters); + + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + NumEdgesRead = ReadCount; + + setBranchWeightMetadata(M, PB); + + return ReadCount > 0; +} diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp index 63468f8..12b59e0 100644 --- a/lib/Analysis/ProfileEstimatorPass.cpp +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -286,7 +286,7 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { } } - double fraction = floor(BBWeight/Edges.size()); + double fraction = Edges.size() ? floor(BBWeight/Edges.size()) : 0.0; // Finally we know what flow is still not leaving the block, distribute this // flow onto the empty edges. for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 173de2c..b5b7ac1 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -1016,40 +1016,14 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { } } -raw_ostream& operator<<(raw_ostream &O, const Function *F) { - return O << F->getName(); -} - raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) { return O << MF->getFunction()->getName() << "(MF)"; } -raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) { - return O << BB->getName(); -} - raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) { return O << MBB->getBasicBlock()->getName() << "(MB)"; } -raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) { - O << "("; - - if (E.first) - O << E.first; - else - O << "0"; - - O << ","; - - if (E.second) - O << E.second; - else - O << "0"; - - return O << ")"; -} - raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) { O << "("; diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index 868f483..0f9a8b3 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -47,7 +47,7 @@ static cl::opt<enum Region::PrintStyle> printStyle("print-region-style", cl::values( clEnumValN(Region::PrintNone, "none", "print no details"), clEnumValN(Region::PrintBB, "bb", - "print regions in detail with block_node_iterator"), + "print regions in detail with block_iterator"), clEnumValN(Region::PrintRN, "rn", "print regions in detail with element_iterator"), clEnumValEnd)); @@ -246,22 +246,6 @@ void Region::verifyRegionNest() const { verifyRegion(); } -Region::block_node_iterator Region::block_node_begin() { - return GraphTraits<FlatIt<Region*> >::nodes_begin(this); -} - -Region::block_node_iterator Region::block_node_end() { - return GraphTraits<FlatIt<Region*> >::nodes_end(this); -} - -Region::const_block_node_iterator Region::block_node_begin() const { - return GraphTraits<FlatIt<const Region*> >::nodes_begin(this); -} - -Region::const_block_node_iterator Region::block_node_end() const { - return GraphTraits<FlatIt<const Region*> >::nodes_end(this); -} - Region::element_iterator Region::element_begin() { return GraphTraits<Region*>::nodes_begin(this); } @@ -425,10 +409,8 @@ void Region::print(raw_ostream &OS, bool print_tree, unsigned level, OS.indent(level*2 + 2); if (Style == PrintBB) { - for (const_block_node_iterator I = block_node_begin(), - E = block_node_end(); - I != E; ++I) - OS << **I << ", "; // TODO: remove the last "," + for (const_block_iterator I = block_begin(), E = block_end(); I != E; ++I) + OS << (*I)->getName() << ", "; // TODO: remove the last "," } else if (Style == PrintRN) { for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I) OS << **I << ", "; // TODO: remove the last ", @@ -445,9 +427,11 @@ void Region::print(raw_ostream &OS, bool print_tree, unsigned level, OS.indent(level*2) << "} \n"; } +#ifndef NDEBUG void Region::dump() const { print(dbgs(), true, getDepth(), printStyle.getValue()); } +#endif void Region::clearNodeCache() { // Free the cached nodes. diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index c97b5eb..9208fa2 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -195,10 +195,9 @@ public: virtual bool runOnRegion(Region *R, RGPassManager &RGM) { Out << Banner; - for (Region::block_node_iterator I = R->block_node_begin(), - E = R->block_node_end(); + for (Region::block_iterator I = R->block_begin(), E = R->block_end(); I != E; ++I) - (*I)->getEntry()->print(Out); + (*I)->print(Out); return false; } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index a654648..84e147b 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -122,10 +122,12 @@ char ScalarEvolution::ID = 0; // Implementation of the SCEV class. // +#ifndef NDEBUG void SCEV::dump() const { print(dbgs()); dbgs() << '\n'; } +#endif void SCEV::print(raw_ostream &OS) const { switch (getSCEVType()) { diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp index ff5010b..dbb9535 100644 --- a/lib/Analysis/Trace.cpp +++ b/lib/Analysis/Trace.cpp @@ -43,9 +43,11 @@ void Trace::print(raw_ostream &O) const { O << "; Trace parent function: \n" << *F; } +#ifndef NDEBUG /// dump - Debugger convenience method; writes trace to standard error /// output stream. /// void Trace::dump() const { print(dbgs()); } +#endif diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index cea34e1..491224a 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1614,7 +1614,7 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, // right. unsigned PtrSize = TD.getPointerSizeInBits(); if (PtrSize < 64) - Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); + Offset = SignExtend64(Offset, PtrSize); return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); } diff --git a/lib/Archive/ArchiveInternals.h b/lib/Archive/ArchiveInternals.h index 55684f7..639f5ac 100644 --- a/lib/Archive/ArchiveInternals.h +++ b/lib/Archive/ArchiveInternals.h @@ -66,7 +66,7 @@ namespace llvm { fmag[1] = '\n'; } - bool checkSignature() { + bool checkSignature() const { return 0 == memcmp(fmag, ARFILE_MEMBER_MAGIC,2); } }; diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp index 5cfc810..5052495 100644 --- a/lib/Archive/ArchiveReader.cpp +++ b/lib/Archive/ArchiveReader.cpp @@ -79,7 +79,7 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) } // Cast archive member header - ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At; + const ArchiveMemberHeader* Hdr = (const ArchiveMemberHeader*)At; At += sizeof(ArchiveMemberHeader); int flags = 0; @@ -196,7 +196,7 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) /* FALL THROUGH */ default: - char* slash = (char*) memchr(Hdr->name, '/', 16); + const char* slash = (const char*) memchr(Hdr->name, '/', 16); if (slash == 0) slash = Hdr->name + 16; pathname.assign(Hdr->name, slash - Hdr->name); diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index e045804..6e61665 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -510,6 +510,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(asm); KEYWORD(sideeffect); KEYWORD(alignstack); + KEYWORD(inteldialect); KEYWORD(gc); KEYWORD(ccc); @@ -554,7 +555,6 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(naked); KEYWORD(nonlazybind); KEYWORD(address_safety); - KEYWORD(ia_nsdialect); KEYWORD(type); KEYWORD(opaque); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index a9c7e98..b0b64d8 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -962,7 +962,6 @@ bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) { case lltok::kw_naked: Attrs |= Attribute::Naked; break; case lltok::kw_nonlazybind: Attrs |= Attribute::NonLazyBind; break; case lltok::kw_address_safety: Attrs |= Attribute::AddressSafety; break; - case lltok::kw_ia_nsdialect: Attrs |= Attribute::IANSDialect; break; case lltok::kw_alignstack: { unsigned Alignment; @@ -2070,16 +2069,18 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { case lltok::kw_asm: { // ValID ::= 'asm' SideEffect? AlignStack? STRINGCONSTANT ',' STRINGCONSTANT - bool HasSideEffect, AlignStack; + bool HasSideEffect, AlignStack, AsmDialect; Lex.Lex(); if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) || ParseOptionalToken(lltok::kw_alignstack, AlignStack) || + ParseOptionalToken(lltok::kw_inteldialect, AsmDialect) || ParseStringConstant(ID.StrVal) || ParseToken(lltok::comma, "expected comma in inline asm expression") || ParseToken(lltok::StringConstant, "expected constraint string")) return true; ID.StrVal2 = Lex.getStrVal(); - ID.UIntVal = unsigned(HasSideEffect) | (unsigned(AlignStack)<<1); + ID.UIntVal = unsigned(HasSideEffect) | (unsigned(AlignStack)<<1) | + (unsigned(AsmDialect)<<2); ID.Kind = ValID::t_InlineAsm; return false; } @@ -2496,7 +2497,8 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0; if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2)) return Error(ID.Loc, "invalid type for inline asm constraint string"); - V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, ID.UIntVal>>1); + V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, + (ID.UIntVal>>1)&1, (InlineAsm::AsmDialect(ID.UIntVal>>2))); return false; } case ValID::t_MDNode: diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 9fd63f2..37cbf30 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -72,6 +72,7 @@ namespace lltok { kw_asm, kw_sideeffect, kw_alignstack, + kw_inteldialect, kw_gc, kw_c, @@ -107,7 +108,6 @@ namespace lltok { kw_naked, kw_nonlazybind, kw_address_safety, - kw_ia_nsdialect, kw_type, kw_opaque, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 65fd52e..f242df4 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1245,7 +1245,9 @@ bool BitcodeReader::ParseConstants() { V = ConstantExpr::getICmp(Record[3], Op0, Op1); break; } - case bitc::CST_CODE_INLINEASM: { + // This maintains backward compatibility, pre-asm dialect keywords. + // FIXME: Remove with the 4.0 release. + case bitc::CST_CODE_INLINEASM_OLD: { if (Record.size() < 2) return Error("Invalid INLINEASM record"); std::string AsmStr, ConstrStr; bool HasSideEffects = Record[0] & 1; @@ -1266,6 +1268,31 @@ bool BitcodeReader::ParseConstants() { AsmStr, ConstrStr, HasSideEffects, IsAlignStack); break; } + // This version adds support for the asm dialect keywords (e.g., + // inteldialect). + case bitc::CST_CODE_INLINEASM: { + if (Record.size() < 2) return Error("Invalid INLINEASM record"); + std::string AsmStr, ConstrStr; + bool HasSideEffects = Record[0] & 1; + bool IsAlignStack = (Record[0] >> 1) & 1; + unsigned AsmDialect = Record[0] >> 2; + unsigned AsmStrSize = Record[1]; + if (2+AsmStrSize >= Record.size()) + return Error("Invalid INLINEASM record"); + unsigned ConstStrSize = Record[2+AsmStrSize]; + if (3+AsmStrSize+ConstStrSize > Record.size()) + return Error("Invalid INLINEASM record"); + + for (unsigned i = 0; i != AsmStrSize; ++i) + AsmStr += (char)Record[2+i]; + for (unsigned i = 0; i != ConstStrSize; ++i) + ConstrStr += (char)Record[3+AsmStrSize+i]; + PointerType *PTy = cast<PointerType>(CurTy); + V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()), + AsmStr, ConstrStr, HasSideEffects, IsAlignStack, + InlineAsm::AsmDialect(AsmDialect)); + break; + } case bitc::CST_CODE_BLOCKADDRESS:{ if (Record.size() < 3) return Error("Invalid CE_BLOCKADDRESS record"); Type *FnTy = getTypeByID(Record[0]); @@ -2837,7 +2864,7 @@ bool BitcodeReader::InitStream() { } bool BitcodeReader::InitStreamFromBuffer() { - const unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart(); + const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart(); const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize(); if (Buffer->getBufferSize() & 3) { diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 1d2dfc3..94ebe19 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -814,7 +814,8 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) { Record.push_back(unsigned(IA->hasSideEffects()) | - unsigned(IA->isAlignStack()) << 1); + unsigned(IA->isAlignStack()) << 1 | + unsigned(IA->getDialect()&1) << 2); // Add the asm string. const std::string &AsmStr = IA->getAsmString(); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 7364f42..23d9222 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -319,8 +319,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { return; } - if (MAI->getLCOMMDirectiveType() != LCOMM::None && - (MAI->getLCOMMDirectiveType() != LCOMM::NoAlignment || Align == 1)) { + if (Align == 1 || + MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) { // .lcomm _foo, 42 OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align); return; @@ -491,9 +491,8 @@ void AsmPrinter::EmitFunctionEntryLabel() { "' label emitted multiple times to assembly file"); } - -/// EmitComments - Pretty-print comments for instructions. -static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { +/// emitComments - Pretty-print comments for instructions. +static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { const MachineFunction *MF = MI.getParent()->getParent(); const TargetMachine &TM = MF->getTarget(); @@ -528,16 +527,16 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { CommentOS << " Reload Reuse\n"; } -/// EmitImplicitDef - This method emits the specified machine instruction +/// emitImplicitDef - This method emits the specified machine instruction /// that is an implicit def. -static void EmitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) { +static void emitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) { unsigned RegNo = MI->getOperand(0).getReg(); AP.OutStreamer.AddComment(Twine("implicit-def: ") + AP.TM.getRegisterInfo()->getName(RegNo)); AP.OutStreamer.AddBlankLine(); } -static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) { +static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { std::string Str = "kill:"; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &Op = MI->getOperand(i); @@ -550,10 +549,10 @@ static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) { AP.OutStreamer.AddBlankLine(); } -/// EmitDebugValueComment - This method handles the target-independent form +/// emitDebugValueComment - This method handles the target-independent form /// of DBG_VALUE, returning true if it was able to do so. A false return /// means the target will need to handle MI in EmitInstruction. -static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { +static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { // This code handles only the 3-operand target-independent form. if (MI->getNumOperands() != 3) return false; @@ -685,7 +684,7 @@ void AsmPrinter::EmitFunctionBody() { #endif // !ANDROID_TARGET_BUILD || ANDROID_ENGINEERING_BUILD if (isVerbose()) - EmitComments(*II, OutStreamer.GetCommentOS()); + emitComments(*II, OutStreamer.GetCommentOS()); switch (II->getOpcode()) { case TargetOpcode::PROLOG_LABEL: @@ -701,15 +700,15 @@ void AsmPrinter::EmitFunctionBody() { break; case TargetOpcode::DBG_VALUE: if (isVerbose()) { - if (!EmitDebugValueComment(II, *this)) + if (!emitDebugValueComment(II, *this)) EmitInstruction(II); } break; case TargetOpcode::IMPLICIT_DEF: - if (isVerbose()) EmitImplicitDef(II, *this); + if (isVerbose()) emitImplicitDef(II, *this); break; case TargetOpcode::KILL: - if (isVerbose()) EmitKill(II, *this); + if (isVerbose()) emitKill(II, *this); break; default: if (!TM.hasMCUseLoc()) @@ -1439,9 +1438,9 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { // Constant emission. //===----------------------------------------------------------------------===// -/// LowerConstant - Lower the specified LLVM Constant to an MCExpr. +/// lowerConstant - Lower the specified LLVM Constant to an MCExpr. /// -static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { +static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { MCContext &Ctx = AP.OutContext; if (CV->isNullValue() || isa<UndefValue>(CV)) @@ -1469,7 +1468,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getTargetData())) if (C != CE) - return LowerConstant(C, AP); + return lowerConstant(C, AP); // Otherwise report the problem to the user. { @@ -1487,15 +1486,14 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end()); int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec); - const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); + const MCExpr *Base = lowerConstant(CE->getOperand(0), AP); if (Offset == 0) return Base; // Truncate/sext the offset to the pointer size. - if (TD.getPointerSizeInBits() != 64) { - int SExtAmount = 64-TD.getPointerSizeInBits(); - Offset = (Offset << SExtAmount) >> SExtAmount; - } + unsigned Width = TD.getPointerSizeInBits(); + if (Width < 64) + Offset = SignExtend64(Offset, Width); return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), Ctx); @@ -1508,7 +1506,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { // is reasonable to treat their delta as a 32-bit value. // FALL THROUGH. case Instruction::BitCast: - return LowerConstant(CE->getOperand(0), AP); + return lowerConstant(CE->getOperand(0), AP); case Instruction::IntToPtr: { const TargetData &TD = *AP.TM.getTargetData(); @@ -1517,7 +1515,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { Constant *Op = CE->getOperand(0); Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), false/*ZExt*/); - return LowerConstant(Op, AP); + return lowerConstant(Op, AP); } case Instruction::PtrToInt: { @@ -1527,7 +1525,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { Constant *Op = CE->getOperand(0); Type *Ty = CE->getType(); - const MCExpr *OpExpr = LowerConstant(Op, AP); + const MCExpr *OpExpr = lowerConstant(Op, AP); // We can emit the pointer value into this slot if the slot is an // integer slot equal to the size of the pointer. @@ -1553,8 +1551,8 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); - const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); + const MCExpr *LHS = lowerConstant(CE->getOperand(0), AP); + const MCExpr *RHS = lowerConstant(CE->getOperand(1), AP); switch (CE->getOpcode()) { default: llvm_unreachable("Unknown binary operator constant cast expr"); case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); @@ -1571,7 +1569,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { } } -static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, +static void emitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, AsmPrinter &AP); /// isRepeatedByteSequence - Determine whether the given value is @@ -1633,7 +1631,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { return -1; } -static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS, +static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, unsigned AddrSpace,AsmPrinter &AP){ // See if we can aggregate this into a .fill, if so, emit it as such. @@ -1698,7 +1696,7 @@ static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS, } -static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, +static void emitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, AsmPrinter &AP) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. @@ -1710,14 +1708,14 @@ static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, } else { for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); + emitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); } } -static void EmitGlobalConstantVector(const ConstantVector *CV, +static void emitGlobalConstantVector(const ConstantVector *CV, unsigned AddrSpace, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) - EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); + emitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); const TargetData &TD = *AP.TM.getTargetData(); unsigned Size = TD.getTypeAllocSize(CV->getType()); @@ -1727,7 +1725,7 @@ static void EmitGlobalConstantVector(const ConstantVector *CV, AP.OutStreamer.EmitZeros(Padding, AddrSpace); } -static void EmitGlobalConstantStruct(const ConstantStruct *CS, +static void emitGlobalConstantStruct(const ConstantStruct *CS, unsigned AddrSpace, AsmPrinter &AP) { // Print the fields in successive locations. Pad to align if needed! const TargetData *TD = AP.TM.getTargetData(); @@ -1744,7 +1742,7 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS, SizeSoFar += FieldSize + PadSize; // Now print the actual field value. - EmitGlobalConstantImpl(Field, AddrSpace, AP); + emitGlobalConstantImpl(Field, AddrSpace, AP); // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well @@ -1755,7 +1753,7 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS, "Layout of constant struct may be incorrect!"); } -static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, +static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AsmPrinter &AP) { if (CFP->getType()->isHalfTy()) { if (AP.isVerbose()) { @@ -1840,7 +1838,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, } } -static void EmitGlobalConstantLargeInt(const ConstantInt *CI, +static void emitGlobalConstantLargeInt(const ConstantInt *CI, unsigned AddrSpace, AsmPrinter &AP) { const TargetData *TD = AP.TM.getTargetData(); unsigned BitWidth = CI->getBitWidth(); @@ -1856,7 +1854,7 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI, } } -static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, +static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, AsmPrinter &AP) { const TargetData *TD = AP.TM.getTargetData(); uint64_t Size = TD->getTypeAllocSize(CV->getType()); @@ -1875,13 +1873,13 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); return; default: - EmitGlobalConstantLargeInt(CI, AddrSpace, AP); + emitGlobalConstantLargeInt(CI, AddrSpace, AP); return; } } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) - return EmitGlobalConstantFP(CFP, AddrSpace, AP); + return emitGlobalConstantFP(CFP, AddrSpace, AP); if (isa<ConstantPointerNull>(CV)) { AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); @@ -1889,19 +1887,19 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, } if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV)) - return EmitGlobalConstantDataSequential(CDS, AddrSpace, AP); + return emitGlobalConstantDataSequential(CDS, AddrSpace, AP); if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) - return EmitGlobalConstantArray(CVA, AddrSpace, AP); + return emitGlobalConstantArray(CVA, AddrSpace, AP); if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) - return EmitGlobalConstantStruct(CVS, AddrSpace, AP); + return emitGlobalConstantStruct(CVS, AddrSpace, AP); if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of // vectors). if (CE->getOpcode() == Instruction::BitCast) - return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); + return emitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); if (Size > 8) { // If the constant expression's size is greater than 64-bits, then we have @@ -1909,23 +1907,23 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, // that way. Constant *New = ConstantFoldConstantExpression(CE, TD); if (New && New != CE) - return EmitGlobalConstantImpl(New, AddrSpace, AP); + return emitGlobalConstantImpl(New, AddrSpace, AP); } } if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) - return EmitGlobalConstantVector(V, AddrSpace, AP); + return emitGlobalConstantVector(V, AddrSpace, AP); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Size, AddrSpace); + AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); if (Size) - EmitGlobalConstantImpl(CV, AddrSpace, *this); + emitGlobalConstantImpl(CV, AddrSpace, *this); else if (MAI->hasSubsectionsViaSymbols()) { // If the global has zero size, emit a single byte so that two labels don't // look like they are at the same location. @@ -2040,8 +2038,8 @@ static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop, } } -/// EmitBasicBlockLoopComments - Pretty-print comments for basic blocks. -static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB, +/// emitBasicBlockLoopComments - Pretty-print comments for basic blocks. +static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, const MachineLoopInfo *LI, const AsmPrinter &AP) { // Add loop depth information @@ -2107,7 +2105,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { if (const BasicBlock *BB = MBB->getBasicBlock()) if (BB->hasName()) OutStreamer.AddComment("%" + BB->getName()); - EmitBasicBlockLoopComments(*MBB, LI, *this); + emitBasicBlockLoopComments(*MBB, LI, *this); } // Print the main label for the block. diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 711375b..b26ffeb 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -43,10 +43,10 @@ namespace { }; } -/// SrcMgrDiagHandler - This callback is invoked when the SourceMgr for an +/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an /// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo /// struct above. -static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { +static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo); assert(DiagInfo && "Diagnostic context not passed down?"); @@ -68,7 +68,8 @@ static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { } /// EmitInlineAsm - Emit a blob of inline asm to the output streamer. -void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { +void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, + InlineAsm::AsmDialect Dialect) const { #ifndef ANDROID_TARGET_BUILD assert(!Str.empty() && "Can't emit empty inline asm block"); @@ -92,12 +93,12 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { LLVMContext &LLVMCtx = MMI->getModule()->getContext(); bool HasDiagHandler = false; if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) { - // If the source manager has an issue, we arrange for SrcMgrDiagHandler + // If the source manager has an issue, we arrange for srcMgrDiagHandler // to be invoked, getting DiagInfo passed into it. DiagInfo.LocInfo = LocMDNode; DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler(); DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext(); - SrcMgr.setDiagHandler(SrcMgrDiagHandler, &DiagInfo); + SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo); HasDiagHandler = true; } @@ -127,6 +128,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); + Parser->setAssemblerDialect(Dialect); Parser->setTargetParser(*TAP.get()); // Don't implicitly switch to the text section before the asm. @@ -200,6 +202,15 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // The variant of the current asmprinter. int AsmPrinterVariant = MAI->getAssemblerDialect(); + int InlineAsmVariant = MI->getInlineAsmDialect(); + + // Switch to the inline assembly variant. + if (AsmPrinterVariant != InlineAsmVariant) { + if (InlineAsmVariant == 0) + OS << ".att_syntax\n\t"; + else + OS << ".intel_syntax\n\t"; + } int CurVariant = -1; // The number of the {.|.|.} region we are in. const char *LastEmitted = AsmStr; // One past the last character emitted. @@ -345,11 +356,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { else { AsmPrinter *AP = const_cast<AsmPrinter*>(this); if (InlineAsm::isMemKind(OpFlags)) { - Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant, + Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, Modifier[0] ? Modifier : 0, OS); } else { - Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant, + Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, Modifier[0] ? Modifier : 0, OS); } } @@ -365,8 +376,16 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { } } } + // Switch to the AsmPrinter variant. + if (AsmPrinterVariant != InlineAsmVariant) { + if (AsmPrinterVariant == 0) + OS << "\n\t.att_syntax"; + else + OS << "\n\t.intel_syntax"; + } + OS << '\n' << (char)0; // null terminate string. - EmitInlineAsm(OS.str(), LocMD); + EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect()); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't // enabled, so we use EmitRawText. @@ -413,8 +432,8 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, /// instruction, using the specified assembler variant. Targets should /// override this to format as appropriate. bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O) { + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) { // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { if (ExtraCode[1] != 0) return true; // Unknown modifier. diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 3776848..0885285 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -182,6 +182,12 @@ void DIEValue::dump() { void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { unsigned Size = ~0U; switch (Form) { + case dwarf::DW_FORM_flag_present: + // Emit something to keep the lines and comments in sync. + // FIXME: Is there a better way to do this? + if (Asm->OutStreamer.hasRawTextSupport()) + Asm->OutStreamer.EmitRawText(StringRef("")); + return; case dwarf::DW_FORM_flag: // Fall thru case dwarf::DW_FORM_ref1: // Fall thru case dwarf::DW_FORM_data1: Size = 1; break; @@ -203,6 +209,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { /// unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { switch (Form) { + case dwarf::DW_FORM_flag_present: return 0; case dwarf::DW_FORM_flag: // Fall thru case dwarf::DW_FORM_ref1: // Fall thru case dwarf::DW_FORM_data1: return sizeof(int8_t); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index d30e5bb..e585897 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -51,6 +51,15 @@ DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { return Value; } +/// addFlag - Add a flag that is true. +void CompileUnit::addFlag(DIE *Die, unsigned Attribute) { + if (!DD->useDarwinGDBCompat()) + Die->addValue(Attribute, dwarf::DW_FORM_flag_present, + DIEIntegerOne); + else + addUInt(Die, Attribute, dwarf::DW_FORM_flag, 1); +} + /// addUInt - Add an unsigned integer attribute data and value. /// void CompileUnit::addUInt(DIE *Die, unsigned Attribute, @@ -794,7 +803,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) - addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_prototyped); } break; case dwarf::DW_TAG_structure_type: @@ -825,15 +834,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); if (SP.isExplicit()) - addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); + addFlag(ElemDie, dwarf::DW_AT_explicit); } else if (Element.isVariable()) { DIVariable DV(Element); ElemDie = new DIE(dwarf::DW_TAG_variable); addString(ElemDie, dwarf::DW_AT_name, DV.getName()); addType(ElemDie, DV.getType()); - addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addFlag(ElemDie, dwarf::DW_AT_declaration); + addFlag(ElemDie, dwarf::DW_AT_external); addSourceLine(ElemDie, DV); } else if (Element.isDerivedType()) { DIDerivedType DDTy(Element); @@ -883,7 +892,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } if (CTy.isAppleBlockExtension()) - addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_APPLE_block); DICompositeType ContainingType = CTy.getContainingType(); if (DIDescriptor(ContainingType).isCompositeType()) @@ -895,8 +904,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } if (CTy.isObjcClassComplete()) - addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type, - dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type); // Add template parameters to a class, structure or union types. // FIXME: The support isn't in the metadata for this yet. @@ -929,7 +937,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // If we're a forward decl, say so. if (CTy.isForwardDecl()) - addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_declaration); // Add source line info if available. if (!CTy.isForwardDecl()) @@ -1028,8 +1036,10 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // AT_specification code in order to work around a bug in older // gdbs that requires the linkage name to resolve multiple template // functions. + // TODO: Remove this set of code when we get rid of the old gdb + // compatibility. StringRef LinkageName = SP.getLinkageName(); - if (!LinkageName.empty()) + if (!LinkageName.empty() && DD->useDarwinGDBCompat()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, getRealLinkageName(LinkageName)); @@ -1043,6 +1053,11 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { return SPDie; } + // Add the linkage name if we have one. + if (!LinkageName.empty() && !DD->useDarwinGDBCompat()) + addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); + // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) addString(SPDie, dwarf::DW_AT_name, SP.getName()); @@ -1055,7 +1070,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) - addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_prototyped); // Add Return Type. DICompositeType SPTy = SP.getType(); @@ -1079,7 +1094,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { } if (!SP.isDefinition()) { - addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_declaration); // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. @@ -1093,19 +1108,19 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { DIType ATy = DIType(DIType(Args.getElement(i))); addType(Arg, ATy); if (ATy.isArtificial()) - addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + addFlag(Arg, dwarf::DW_AT_artificial); SPDie->addChild(Arg); } } if (SP.isArtificial()) - addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_artificial); if (!SP.isLocalToUnit()) - addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_external); if (SP.isOptimized()) - addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_APPLE_optimized); if (unsigned isa = Asm->getISAEncoding()) { addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); @@ -1168,7 +1183,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { // Add scoping info. if (!GV.isLocalToUnit()) - addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addFlag(VariableDIE, dwarf::DW_AT_external); // Add line number info. addSourceLine(VariableDIE, GV); @@ -1193,8 +1208,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, VariableDIE); addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); - addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, - 1); + addFlag(VariableDIE, dwarf::DW_AT_declaration); addDie(VariableSpecDIE); } else { addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); @@ -1260,7 +1274,7 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType *CTy) { Buffer.setTag(dwarf::DW_TAG_array_type); if (CTy->getTag() == dwarf::DW_TAG_vector_type) - addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_GNU_vector); // Emit derived type. addType(&Buffer, CTy->getTypeDerivedFrom()); @@ -1333,8 +1347,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { } if (DV->isArtificial()) - addUInt(VariableDie, dwarf::DW_AT_artificial, - dwarf::DW_FORM_flag, 1); + addFlag(VariableDie, dwarf::DW_AT_artificial); if (isScopeAbstract) { DV->setDIE(VariableDie); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index b4ff9e8..22401fe 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -176,6 +176,9 @@ public: } public: + /// addFlag - Add a flag that is true to the DIE. + void addFlag(DIE *Die, unsigned Attribute); + /// addUInt - Add an unsigned integer attribute data and value. /// void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 649684a..946ac35 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -54,9 +54,29 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, cl::desc("Make an absence of debug location information explicit."), cl::init(false)); -static cl::opt<bool> DwarfAccelTables("dwarf-accel-tables", cl::Hidden, +namespace { + enum DefaultOnOff { + Default, Enable, Disable + }; +} + +static cl::opt<DefaultOnOff> DwarfAccelTables("dwarf-accel-tables", cl::Hidden, cl::desc("Output prototype dwarf accelerator tables."), - cl::init(false)); + cl::values( + clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), + clEnumValEnd), + cl::init(Default)); + +static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden, + cl::desc("Compatibility with Darwin gdb."), + cl::values( + clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), + clEnumValEnd), + cl::init(Default)); namespace { const char *DWARFGroupName = "DWARF Emission"; @@ -135,10 +155,25 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; - // Turn on accelerator tables for Darwin. - if (Triple(M->getTargetTriple()).isOSDarwin()) - DwarfAccelTables = true; - + // Turn on accelerator tables and older gdb compatibility + // for Darwin. + bool isDarwin = Triple(M->getTargetTriple()).isOSDarwin(); + if (DarwinGDBCompat == Default) { + if (isDarwin) + isDarwinGDBCompat = true; + else + isDarwinGDBCompat = false; + } else + isDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false; + + if (DwarfAccelTables == Default) { + if (isDarwin) + hasDwarfAccelTables = true; + else + hasDwarfAccelTables = false; + } else + hasDwarfAccelTables = DwarfAccelTables == Enable ? true : false; + { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(M); @@ -282,7 +317,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, if (SP.isDefinition() && !SP.getContext().isCompileUnit() && !SP.getContext().isFile() && !isSubprogramContext(SP.getContext())) { - SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + SPCU->addFlag(SPDie, dwarf::DW_AT_declaration); // Add arguments. DICompositeType SPTy = SP.getType(); @@ -294,7 +329,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DIType ATy = DIType(DIType(Args.getElement(i))); SPCU->addType(Arg, ATy); if (ATy.isArtificial()) - SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPCU->addFlag(Arg, dwarf::DW_AT_artificial); SPDie->addChild(Arg); } DIE *SPDeclDie = SPDie; @@ -575,7 +610,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { if (!CompilationDir.empty()) NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); if (DIUnit.isOptimized()) - NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized); StringRef Flags = DIUnit.getFlags(); if (!Flags.empty()) @@ -816,8 +851,8 @@ void DwarfDebug::endModule() { // Corresponding abbreviations into a abbrev section. emitAbbreviations(); - // Emit info into a dwarf accelerator table sections. - if (DwarfAccelTables) { + // Emit info into the dwarf accelerator table sections. + if (useDwarfAccelTables()) { emitAccelNames(); emitAccelObjC(); emitAccelNamespaces(); @@ -825,7 +860,10 @@ void DwarfDebug::endModule() { } // Emit info into a debug pubtypes section. - emitDebugPubTypes(); + // TODO: When we don't need the option anymore we can + // remove all of the code that adds to the table. + if (useDarwinGDBCompat()) + emitDebugPubTypes(); // Emit info into a debug loc section. emitDebugLoc(); @@ -840,7 +878,11 @@ void DwarfDebug::endModule() { emitDebugMacInfo(); // Emit inline info. - emitDebugInlineInfo(); + // TODO: When we don't need the option anymore we + // can remove all of the code that this section + // depends upon. + if (useDarwinGDBCompat()) + emitDebugInlineInfo(); // Emit info into a debug str section. emitDebugStr(); @@ -1439,8 +1481,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope); if (!MF->getTarget().Options.DisableFramePointerElim(*MF)) - TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, - dwarf::DW_FORM_flag, 1); + TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), MMI->getFrameMoves())); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index d1d6512..f94c9d0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -307,6 +307,9 @@ class DwarfDebug { // table for the same directory as DW_at_comp_dir. StringRef CompilationDir; + // A holder for the DarwinGDBCompat flag so that the compile unit can use it. + bool isDarwinGDBCompat; + bool hasDwarfAccelTables; private: /// assignAbbrevNumber - Define a unique number for the abbreviation. @@ -520,6 +523,11 @@ public: /// getStringPoolEntry - returns an entry into the string pool with the given /// string text. MCSymbol *getStringPoolEntry(StringRef Str); + + /// useDarwinGDBCompat - returns whether or not to limit some of our debug + /// output to the limitations of darwin gdb. + bool useDarwinGDBCompat() { return isDarwinGDBCompat; } + bool useDwarfAccelTables() { return hasDwarfAccelTables; } }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index 75f6056..fe9e493 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -43,26 +43,6 @@ protected: /// MMI - Collected machine module information. MachineModuleInfo *MMI; - /// EmitExceptionTable - Emit landing pads and actions. - /// - /// The general organization of the table is complex, but the basic concepts - /// are easy. First there is a header which describes the location and - /// organization of the three components that follow. - /// 1. The landing pad site information describes the range of code covered - /// by the try. In our case it's an accumulation of the ranges covered - /// by the invokes in the try. There is also a reference to the landing - /// pad that handles the exception once processed. Finally an index into - /// the actions table. - /// 2. The action table, in our case, is composed of pairs of type ids - /// and next action offset. Starting with the action index from the - /// landing pad site, each type Id is checked for a match to the current - /// exception. If it matches then the exception and type id are passed - /// on to the landing pad. Otherwise the next action is looked up. This - /// chain is terminated with a next action of zero. If no type id is - /// found the frame is unwound and handling continues. - /// 3. Type id table contains references to all the C++ typeinfo for all - /// catches in the function. This tables is reversed indexed base 1. - /// SharedTypeIds - How many leading type ids two landing pads have in common. static unsigned SharedTypeIds(const LandingPadInfo *L, const LandingPadInfo *R); @@ -119,6 +99,26 @@ protected: const RangeMapType &PadMap, const SmallVectorImpl<const LandingPadInfo *> &LPs, const SmallVectorImpl<unsigned> &FirstActions); + + /// EmitExceptionTable - Emit landing pads and actions. + /// + /// The general organization of the table is complex, but the basic concepts + /// are easy. First there is a header which describes the location and + /// organization of the three components that follow. + /// 1. The landing pad site information describes the range of code covered + /// by the try. In our case it's an accumulation of the ranges covered + /// by the invokes in the try. There is also a reference to the landing + /// pad that handles the exception once processed. Finally an index into + /// the actions table. + /// 2. The action table, in our case, is composed of pairs of type ids + /// and next action offset. Starting with the action index from the + /// landing pad site, each type Id is checked for a match to the current + /// exception. If it matches then the exception and type id are passed + /// on to the landing pad. Otherwise the next action is looked up. This + /// chain is terminated with a next action of zero. If no type id is + /// found the frame is unwound and handling continues. + /// 3. Type id table contains references to all the C++ typeinfo for all + /// catches in the function. This tables is reversed indexed base 1. void EmitExceptionTable(); public: diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index fb65bb7..7df0e15 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1554,8 +1554,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) Uses.insert(*AI); } else { - if (Uses.count(Reg)) { - Uses.erase(Reg); + if (Uses.erase(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) Uses.erase(*SubRegs); // Use sub-registers to be conservative } diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 2e189ad..386509b 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -95,6 +95,7 @@ add_llvm_library(LLVMCodeGen SplitKit.cpp StackProtector.cpp StackSlotColoring.cpp + StackColoring.cpp StrongPHIElimination.cpp TailDuplication.cpp TargetFrameLoweringImpl.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 939af3f..bc5258e 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -9,7 +9,6 @@ #define DEBUG_TYPE "calcspillweights" -#include "llvm/Function.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -42,8 +41,7 @@ void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Compute Spill Weights **********\n" - << "********** Function: " - << MF.getFunction()->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); LiveIntervals &LIS = getAnalysis<LiveIntervals>(); MachineRegisterInfo &MRI = MF.getRegInfo(); diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index fb2c2e8..65f0941 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -56,6 +56,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeRegisterCoalescerPass(Registry); initializeSlotIndexesPass(Registry); initializeStackProtectorPass(Registry); + initializeStackColoringPass(Registry); initializeStackSlotColoringPass(Registry); initializeStrongPHIEliminationPass(Registry); initializeTailDuplicatePassPass(Registry); diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index f9347ef..c40c5ac 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -18,7 +18,6 @@ #define DEBUG_TYPE "early-ifcvt" #include "MachineTraceMetrics.h" -#include "llvm/Function.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" @@ -775,8 +774,7 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" - << "********** Function: " - << ((Value*)MF.getFunction())->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel; diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 7a17331..ffe4b63 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "postrapseudos" #include "llvm/CodeGen/Passes.h" -#include "llvm/Function.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -190,8 +189,7 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Machine Function\n" << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n" - << "********** Function: " - << MF.getFunction()->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 4214ba1..31e36f0 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -13,7 +13,6 @@ #define DEBUG_TYPE "ifcvt" #include "BranchFolding.h" -#include "llvm/Function.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -282,7 +281,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { } DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" - << MF.getFunction()->getName() << "\'"); + << MF.getName() << "\'"); if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { DEBUG(dbgs() << " skipped\n"); @@ -997,14 +996,13 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs, } for (unsigned i = 0, e = Defs.size(); i != e; ++i) { unsigned Reg = Defs[i]; - if (Redefs.count(Reg)) { + if (!Redefs.insert(Reg)) { if (AddImpUse) // Treat predicated update as read + write. MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, true/*IsImp*/,false/*IsKill*/, false/*IsDead*/,true/*IsUndef*/)); } else { - Redefs.insert(Reg); for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) Redefs.insert(*SubRegs); } diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 07e37af..622127c 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -613,7 +613,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, propagateSiblingValue(SVI); } while (!WorkList.empty()); - // Look up the value we were looking for. We already did this lokup at the + // Look up the value we were looking for. We already did this lookup at the // top of the function, but SibValues may have been invalidated. SVI = SibValues.find(UseVNI); assert(SVI != SibValues.end() && "Didn't compute requested info"); diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index d631726..defc127 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -687,8 +687,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { clear(); LS.initialize(mf); DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " - << ((Value*)mf.getFunction())->getName() - << " **********\n"); + << mf.getName() << " **********\n"); bool Changed = collectDebugValues(mf); computeIntervals(); diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 0a795e6..3e9b485 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "RegisterCoalescer.h" #include <algorithm> using namespace llvm; @@ -142,6 +143,48 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other, return false; } +bool LiveInterval::overlaps(const LiveInterval &Other, + const CoalescerPair &CP, + const SlotIndexes &Indexes) const { + assert(!empty() && "empty interval"); + if (Other.empty()) + return false; + + // Use binary searches to find initial positions. + const_iterator I = find(Other.beginIndex()); + const_iterator IE = end(); + if (I == IE) + return false; + const_iterator J = Other.find(I->start); + const_iterator JE = Other.end(); + if (J == JE) + return false; + + for (;;) { + // J has just been advanced to satisfy: + assert(J->end >= I->start); + // Check for an overlap. + if (J->start < I->end) { + // I and J are overlapping. Find the later start. + SlotIndex Def = std::max(I->start, J->start); + // Allow the overlap if Def is a coalescable copy. + if (Def.isBlock() || + !CP.isCoalescable(Indexes.getInstructionFromIndex(Def))) + return true; + } + // Advance the iterator that ends first to check for more overlaps. + if (J->end > I->end) { + std::swap(I, J); + std::swap(IE, JE); + } + // Advance J until J->end >= I->start. + do + if (++J == JE) + return false; + while (J->end < I->start); + } +} + /// overlaps - Return true if the live interval overlaps a range specified /// by [Start, End). bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { @@ -705,9 +748,11 @@ raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; } +#ifndef NDEBUG void LiveRange::dump() const { dbgs() << *this << "\n"; } +#endif void LiveInterval::print(raw_ostream &OS) const { if (empty()) @@ -740,9 +785,11 @@ void LiveInterval::print(raw_ostream &OS) const { } } +#ifndef NDEBUG void LiveInterval::dump() const { dbgs() << *this << "\n"; } +#endif #ifndef NDEBUG void LiveInterval::verify() const { diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index d0f8ae1..17f9d9e 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -34,6 +34,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "LiveRangeCalc.h" +#include "VirtRegMap.h" #include <algorithm> #include <limits> #include <cmath> @@ -155,9 +156,11 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const { MF->print(OS, Indexes); } +#ifndef NDEBUG void LiveIntervals::dumpInstrs() const { printInstrs(dbgs()); } +#endif static bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { @@ -382,8 +385,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, /// which a variable is live void LiveIntervals::computeIntervals() { DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n" - << "********** Function: " - << ((Value*)MF->getFunction())->getName() << '\n'); + << "********** Function: " << MF->getName() << '\n'); RegMaskBlocks.resize(MF->getNumBlockIDs()); @@ -440,7 +442,7 @@ void LiveIntervals::computeIntervals() { // Compute the number of register mask instructions in this block. std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()]; - RMB.second = RegMaskSlots.size() - RMB.first;; + RMB.second = RegMaskSlots.size() - RMB.first; } // Create empty intervals for registers defined by implicit_def's (except @@ -497,7 +499,7 @@ void LiveIntervals::computeRegMasks() { RegMaskBits.push_back(MO->getRegMask()); } // Compute the number of register mask instructions in this block. - RMB.second = RegMaskSlots.size() - RMB.first;; + RMB.second = RegMaskSlots.size() - RMB.first; } } @@ -734,12 +736,28 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Register allocator hooks. // -void LiveIntervals::addKillFlags() { +void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { + // Keep track of regunit ranges. + SmallVector<std::pair<LiveInterval*, LiveInterval::iterator>, 8> RU; + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; LiveInterval *LI = &getInterval(Reg); + if (LI->empty()) + continue; + + // Find the regunit intervals for the assigned register. They may overlap + // the virtual register live range, cancelling any kills. + RU.clear(); + for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid(); + ++Units) { + LiveInterval *RUInt = &getRegUnit(*Units); + if (RUInt->empty()) + continue; + RU.push_back(std::make_pair(RUInt, RUInt->find(LI->begin()->end))); + } // Every instruction that kills Reg corresponds to a live range end point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; @@ -750,7 +768,32 @@ void LiveIntervals::addKillFlags() { MachineInstr *MI = getInstructionFromIndex(RI->end); if (!MI) continue; - MI->addRegisterKilled(Reg, NULL); + + // Check if any of the reguints are live beyond the end of RI. That could + // happen when a physreg is defined as a copy of a virtreg: + // + // %EAX = COPY %vreg5 + // FOO %vreg5 <--- MI, cancel kill because %EAX is live. + // BAR %EAX<kill> + // + // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX. + bool CancelKill = false; + for (unsigned u = 0, e = RU.size(); u != e; ++u) { + LiveInterval *RInt = RU[u].first; + LiveInterval::iterator &I = RU[u].second; + if (I == RInt->end()) + continue; + I = RInt->advanceTo(I, RI->end); + if (I == RInt->end() || I->start >= RI->end) + continue; + // I is overlapping RI. + CancelKill = true; + break; + } + if (CancelKill) + MI->clearRegisterKills(Reg, NULL); + else + MI->addRegisterKilled(Reg, NULL); } } } @@ -1174,7 +1217,7 @@ private: SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx); if (LastUse != NewIdx) moveKillFlags(LI->reg, NewIdx, LastUse); - LR->end = LastUse.getRegSlot(); + LR->end = LastUse.getRegSlot(LR->end.isEarlyClobber()); } void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) { @@ -1188,7 +1231,7 @@ private: assert(LR->end > OldIdx && "LiveRange does not cover original slot"); moveKillFlags(LI->reg, LR->end, NewIdx); } - LR->end = NewIdx.getRegSlot(); + LR->end = NewIdx.getRegSlot(LR->end.isEarlyClobber()); } } diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index d828f25..c3ff4f1 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -65,7 +65,11 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { // Visit all operands that read Reg. This may include partial defs. for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), E = MRI->reg_nodbg_end(); I != E; ++I) { - const MachineOperand &MO = I.getOperand(); + MachineOperand &MO = I.getOperand(); + // Clear all kill flags. They will be reinserted after register allocation + // by LiveIntervalAnalysis::addKillFlags(). + if (MO.isUse()) + MO.setIsKill(false); if (!MO.readsReg()) continue; // MI is reading Reg. We may have visited MI before if it happens to be diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index cdb1776..7f22478 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "regalloc" #include "LiveRegMatrix.h" +#include "RegisterCoalescer.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -117,8 +118,9 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg) { if (VirtReg.empty()) return false; + CoalescerPair CP(VirtReg.reg, PhysReg, *TRI); for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) - if (VirtReg.overlaps(LIS->getRegUnit(*Units))) + if (VirtReg.overlaps(LIS->getRegUnit(*Units), CP, *LIS->getSlotIndexes())) return true; return false; } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 348ed3a..f294124 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -65,6 +65,7 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { } void LiveVariables::VarInfo::dump() const { +#ifndef NDEBUG dbgs() << " Alive in blocks: "; for (SparseBitVector<>::iterator I = AliveBlocks.begin(), E = AliveBlocks.end(); I != E; ++I) @@ -77,6 +78,7 @@ void LiveVariables::VarInfo::dump() const { dbgs() << "\n #" << i << ": " << *Kills[i]; dbgs() << "\n"; } +#endif } /// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg. @@ -806,18 +808,44 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *SuccBB) { const unsigned NumNew = BB->getNumber(); - // All registers used by PHI nodes in SuccBB must be live through BB. - for (MachineBasicBlock::iterator BBI = SuccBB->begin(), - BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI) + SmallSet<unsigned, 16> Defs, Kills; + + MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end(); + for (; BBI != BBE && BBI->isPHI(); ++BBI) { + // Record the def of the PHI node. + Defs.insert(BBI->getOperand(0).getReg()); + + // All registers used by PHI nodes in SuccBB must be live through BB. for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) if (BBI->getOperand(i+1).getMBB() == BB) getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew); + } + + // Record all vreg defs and kills of all instructions in SuccBB. + for (; BBI != BBE; ++BBI) { + for (MachineInstr::mop_iterator I = BBI->operands_begin(), + E = BBI->operands_end(); I != E; ++I) { + if (I->isReg() && TargetRegisterInfo::isVirtualRegister(I->getReg())) { + if (I->isDef()) + Defs.insert(I->getReg()); + else if (I->isKill()) + Kills.insert(I->getReg()); + } + } + } // Update info for all live variables for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + + // If the Defs is defined in the successor it can't be live in BB. + if (Defs.count(Reg)) + continue; + + // If the register is either killed in or live through SuccBB it's also live + // through BB. VarInfo &VI = getVarInfo(Reg); - if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI)) + if (Kills.count(Reg) || VI.AliveBlocks.test(SuccBB->getNumber())) VI.AliveBlocks.set(NumNew); } } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index cf13dbd..9250577 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -228,9 +228,11 @@ const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const { return 0; } +#ifndef NDEBUG void MachineBasicBlock::dump() const { print(dbgs()); } +#endif StringRef MachineBasicBlock::getName() const { if (const BasicBlock *LBB = getBasicBlock()) @@ -243,7 +245,7 @@ StringRef MachineBasicBlock::getName() const { std::string MachineBasicBlock::getFullName() const { std::string Name; if (getParent()) - Name = (getParent()->getFunction()->getName() + ":").str(); + Name = (getParent()->getName() + ":").str(); if (getBasicBlock()) Name += getBasicBlock()->getName(); else diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index d4aede8..c282332 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -284,12 +284,19 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, return std::make_pair(Result, Result + Num); } +#ifndef NDEBUG void MachineFunction::dump() const { print(dbgs()); } +#endif + +StringRef MachineFunction::getName() const { + assert(getFunction() && "No function!"); + return getFunction()->getName(); +} void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { - OS << "# Machine code for function " << Fn->getName() << ": "; + OS << "# Machine code for function " << getName() << ": "; if (RegInfo) { OS << (RegInfo->isSSA() ? "SSA" : "Post SSA"); if (!RegInfo->tracksLiveness()) @@ -334,7 +341,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { BB->print(OS, Indexes); } - OS << "\n# End machine code for function " << Fn->getName() << ".\n\n"; + OS << "\n# End machine code for function " << getName() << ".\n\n"; } namespace llvm { @@ -344,7 +351,7 @@ namespace llvm { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const MachineFunction *F) { - return "CFG for '" + F->getFunction()->getName().str() + "' function"; + return "CFG for '" + F->getName().str() + "' function"; } std::string getNodeLabel(const MachineBasicBlock *Node, @@ -377,7 +384,7 @@ namespace llvm { void MachineFunction::viewCFG() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getName()); + ViewGraph(this, "mf" + getName()); #else errs() << "MachineFunction::viewCFG is only available in debug builds on " << "systems with Graphviz or gv!\n"; @@ -387,7 +394,7 @@ void MachineFunction::viewCFG() const void MachineFunction::viewCFGOnly() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getName(), true); + ViewGraph(this, "mf" + getName(), true); #else errs() << "MachineFunction::viewCFGOnly is only available in debug builds on " << "systems with Graphviz or gv!\n"; @@ -453,7 +460,9 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, unsigned StackAlign = TFI.getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/false, false)); + /*isSS*/ false, + /*NeedSP*/ false, + /*Alloca*/ 0)); return -++NumFixedObjects; } @@ -525,9 +534,11 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ } } +#ifndef NDEBUG void MachineFrameInfo::dump(const MachineFunction &MF) const { print(MF, dbgs()); } +#endif //===----------------------------------------------------------------------===// // MachineJumpTableInfo implementation @@ -622,7 +633,9 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { OS << '\n'; } +#ifndef NDEBUG void MachineJumpTableInfo::dump() const { print(dbgs()); } +#endif //===----------------------------------------------------------------------===// @@ -749,10 +762,12 @@ void MachineConstantPool::print(raw_ostream &OS) const { if (Constants[i].isMachineConstantPoolEntry()) Constants[i].Val.MachineCPVal->print(OS); else - OS << *(Value*)Constants[i].Val.ConstVal; + OS << *(const Value*)Constants[i].Val.ConstVal; OS << ", align=" << Constants[i].getAlignment(); OS << "\n"; } } +#ifndef NDEBUG void MachineConstantPool::dump() const { print(dbgs()); } +#endif diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index b166849..0508b9f 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -111,6 +111,7 @@ void MachineOperand::setIsDef(bool Val) { /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. void MachineOperand::ChangeToImmediate(int64_t ImmVal) { + assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); // If this operand is currently a register operand, and if this is in a // function, deregister the operand from the register's use/def list. if (isReg() && isOnRegUseList()) @@ -136,7 +137,8 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, RegInfo = &MF->getRegInfo(); // If this operand is already a register operand, remove it from the // register's use/def lists. - if (RegInfo && isReg()) + bool WasReg = isReg(); + if (RegInfo && WasReg) RegInfo->removeRegOperandFromUseList(this); // Change this to a register and set the reg#. @@ -153,6 +155,9 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, IsDebug = isDebug; // Ensure isOnRegUseList() returns false. Contents.Reg.Prev = 0; + // Preserve the tie when the operand was already a register. + if (!WasReg) + TiedTo = 0; // If this operand is embedded in a function, add the operand to the // register's use/def list. @@ -208,8 +213,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { hash_code llvm::hash_value(const MachineOperand &MO) { switch (MO.getType()) { case MachineOperand::MO_Register: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getReg(), - MO.getSubReg(), MO.isDef()); + // Register operands don't have target flags. + return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef()); case MachineOperand::MO_Immediate: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); case MachineOperand::MO_CImmediate: @@ -262,7 +267,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << PrintReg(getReg(), TRI, getSubReg()); if (isDef() || isKill() || isDead() || isImplicit() || isUndef() || - isInternalRead() || isEarlyClobber()) { + isInternalRead() || isEarlyClobber() || isTied()) { OS << '<'; bool NeedComma = false; if (isDef()) { @@ -282,27 +287,32 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { NeedComma = true; } - if (isKill() || isDead() || (isUndef() && isUse()) || isInternalRead()) { + if (isKill()) { if (NeedComma) OS << ','; - NeedComma = false; - if (isKill()) { - OS << "kill"; - NeedComma = true; - } - if (isDead()) { - OS << "dead"; - NeedComma = true; - } - if (isUndef() && isUse()) { - if (NeedComma) OS << ','; - OS << "undef"; - NeedComma = true; - } - if (isInternalRead()) { - if (NeedComma) OS << ','; - OS << "internal"; - NeedComma = true; - } + OS << "kill"; + NeedComma = true; + } + if (isDead()) { + if (NeedComma) OS << ','; + OS << "dead"; + NeedComma = true; + } + if (isUndef() && isUse()) { + if (NeedComma) OS << ','; + OS << "undef"; + NeedComma = true; + } + if (isInternalRead()) { + if (NeedComma) OS << ','; + OS << "internal"; + NeedComma = true; + } + if (isTied()) { + if (NeedComma) OS << ','; + OS << "tied"; + if (TiedTo != 15) + OS << unsigned(TiedTo - 1); + NeedComma = true; } OS << '>'; } @@ -673,6 +683,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (!isImpReg && !isInlineAsm()) { while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) { --OpNo; + assert(!Operands[OpNo].isTied() && "Cannot move tied operands"); if (RegInfo) RegInfo->removeRegOperandFromUseList(&Operands[OpNo]); } @@ -708,12 +719,25 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Operands[OpNo].isReg()) { // Ensure isOnRegUseList() returns false, regardless of Op's status. Operands[OpNo].Contents.Reg.Prev = 0; + // Ignore existing ties. This is not a property that can be copied. + Operands[OpNo].TiedTo = 0; // Add the new operand to RegInfo. if (RegInfo) RegInfo->addRegOperandToUseList(&Operands[OpNo]); - // If the register operand is flagged as early, mark the operand as such. - if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) - Operands[OpNo].setIsEarlyClobber(true); + // The MCID operand information isn't accurate until we start adding + // explicit operands. The implicit operands are added first, then the + // explicits are inserted before them. + if (!isImpReg) { + // Tie uses to defs as indicated in MCInstrDesc. + if (Operands[OpNo].isUse()) { + int DefIdx = MCID->getOperandConstraint(OpNo, MCOI::TIED_TO); + if (DefIdx != -1) + tieOperands(DefIdx, OpNo); + } + // If the register operand is flagged as early, mark the operand as such. + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) + Operands[OpNo].setIsEarlyClobber(true); + } } // Re-add all the implicit ops. @@ -730,6 +754,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { /// void MachineInstr::RemoveOperand(unsigned OpNo) { assert(OpNo < Operands.size() && "Invalid operand number"); + untieRegOperand(OpNo); MachineRegisterInfo *RegInfo = getRegInfo(); // Special case removing the last one. @@ -752,6 +777,13 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { } } +#ifndef NDEBUG + // Moving tied operands would break the ties. + for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) + if (Operands[i].isReg()) + assert(!Operands[i].isTied() && "Cannot move tied operands"); +#endif + Operands.erase(Operands.begin()+OpNo); if (RegInfo) { @@ -935,6 +967,12 @@ bool MachineInstr::isStackAligningInlineAsm() const { return false; } +InlineAsm::AsmDialect MachineInstr::getInlineAsmDialect() const { + assert(isInlineAsm() && "getInlineAsmDialect() only works for inline asms!"); + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + return InlineAsm::AsmDialect((ExtraInfo & InlineAsm::Extra_AsmDialect) != 0); +} + int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx, unsigned *GroupNo) const { assert(isInlineAsm() && "Expected an inline asm instruction"); @@ -1114,107 +1152,99 @@ int MachineInstr::findFirstPredOperandIdx() const { return -1; } -/// isRegTiedToUseOperand - Given the index of a register def operand, -/// check if the register def is tied to a source operand, due to either -/// two-address elimination or inline assembly constraints. Returns the -/// first tied use operand index by reference is UseOpIdx is not null. -bool MachineInstr:: -isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { - if (isInlineAsm()) { - assert(DefOpIdx > InlineAsm::MIOp_FirstOperand); - const MachineOperand &MO = getOperand(DefOpIdx); - if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) - return false; - // Determine the actual operand index that corresponds to this index. - unsigned DefNo = 0; - int FlagIdx = findInlineAsmFlagIdx(DefOpIdx, &DefNo); - if (FlagIdx < 0) - return false; +// MachineOperand::TiedTo is 4 bits wide. +const unsigned TiedMax = 15; - // Which part of the group is DefOpIdx? - unsigned DefPart = DefOpIdx - (FlagIdx + 1); - - for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); - i != e; ++i) { - const MachineOperand &FMO = getOperand(i); - if (!FMO.isImm()) - continue; - if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse()) - continue; - unsigned Idx; - if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) && - Idx == DefNo) { - if (UseOpIdx) - *UseOpIdx = (unsigned)i + 1 + DefPart; - return true; - } - } - return false; +/// tieOperands - Mark operands at DefIdx and UseIdx as tied to each other. +/// +/// Use and def operands can be tied together, indicated by a non-zero TiedTo +/// field. TiedTo can have these values: +/// +/// 0: Operand is not tied to anything. +/// 1 to TiedMax-1: Tied to getOperand(TiedTo-1). +/// TiedMax: Tied to an operand >= TiedMax-1. +/// +/// The tied def must be one of the first TiedMax operands on a normal +/// instruction. INLINEASM instructions allow more tied defs. +/// +void MachineInstr::tieOperands(unsigned DefIdx, unsigned UseIdx) { + MachineOperand &DefMO = getOperand(DefIdx); + MachineOperand &UseMO = getOperand(UseIdx); + assert(DefMO.isDef() && "DefIdx must be a def operand"); + assert(UseMO.isUse() && "UseIdx must be a use operand"); + assert(!DefMO.isTied() && "Def is already tied to another use"); + assert(!UseMO.isTied() && "Use is already tied to another def"); + + if (DefIdx < TiedMax) + UseMO.TiedTo = DefIdx + 1; + else { + // Inline asm can use the group descriptors to find tied operands, but on + // normal instruction, the tied def must be within the first TiedMax + // operands. + assert(isInlineAsm() && "DefIdx out of range"); + UseMO.TiedTo = TiedMax; } - assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!"); - const MCInstrDesc &MCID = getDesc(); - for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = getOperand(i); - if (MO.isReg() && MO.isUse() && - MCID.getOperandConstraint(i, MCOI::TIED_TO) == (int)DefOpIdx) { - if (UseOpIdx) - *UseOpIdx = (unsigned)i; - return true; - } - } - return false; + // UseIdx can be out of range, we'll search for it in findTiedOperandIdx(). + DefMO.TiedTo = std::min(UseIdx + 1, TiedMax); } -/// isRegTiedToDefOperand - Return true if the operand of the specified index -/// is a register use and it is tied to an def operand. It also returns the def -/// operand index by reference. -bool MachineInstr:: -isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { - if (isInlineAsm()) { - const MachineOperand &MO = getOperand(UseOpIdx); - if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0) - return false; +/// Given the index of a tied register operand, find the operand it is tied to. +/// Defs are tied to uses and vice versa. Returns the index of the tied operand +/// which must exist. +unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const { + const MachineOperand &MO = getOperand(OpIdx); + assert(MO.isTied() && "Operand isn't tied"); - // Find the flag operand corresponding to UseOpIdx - int FlagIdx = findInlineAsmFlagIdx(UseOpIdx); - if (FlagIdx < 0) - return false; + // Normally TiedTo is in range. + if (MO.TiedTo < TiedMax) + return MO.TiedTo - 1; - const MachineOperand &UFMO = getOperand(FlagIdx); - unsigned DefNo; - if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) { - if (!DefOpIdx) - return true; - - unsigned DefIdx = InlineAsm::MIOp_FirstOperand; - // Remember to adjust the index. First operand is asm string, second is - // the HasSideEffects and AlignStack bits, then there is a flag for each. - while (DefNo) { - const MachineOperand &FMO = getOperand(DefIdx); - assert(FMO.isImm()); - // Skip over this def. - DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1; - --DefNo; - } - *DefOpIdx = DefIdx + UseOpIdx - FlagIdx; - return true; + // Uses on normal instructions can be out of range. + if (!isInlineAsm()) { + // Normal tied defs must be in the 0..TiedMax-1 range. + if (MO.isUse()) + return TiedMax - 1; + // MO is a def. Search for the tied use. + for (unsigned i = TiedMax - 1, e = getNumOperands(); i != e; ++i) { + const MachineOperand &UseMO = getOperand(i); + if (UseMO.isReg() && UseMO.isUse() && UseMO.TiedTo == OpIdx + 1) + return i; } - return false; + llvm_unreachable("Can't find tied use"); } - const MCInstrDesc &MCID = getDesc(); - if (UseOpIdx >= MCID.getNumOperands()) - return false; - const MachineOperand &MO = getOperand(UseOpIdx); - if (!MO.isReg() || !MO.isUse()) - return false; - int DefIdx = MCID.getOperandConstraint(UseOpIdx, MCOI::TIED_TO); - if (DefIdx == -1) - return false; - if (DefOpIdx) - *DefOpIdx = (unsigned)DefIdx; - return true; + // Now deal with inline asm by parsing the operand group descriptor flags. + // Find the beginning of each operand group. + SmallVector<unsigned, 8> GroupIdx; + unsigned OpIdxGroup = ~0u; + unsigned NumOps; + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e; + i += NumOps) { + const MachineOperand &FlagMO = getOperand(i); + assert(FlagMO.isImm() && "Invalid tied operand on inline asm"); + unsigned CurGroup = GroupIdx.size(); + GroupIdx.push_back(i); + NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm()); + // OpIdx belongs to this operand group. + if (OpIdx > i && OpIdx < i + NumOps) + OpIdxGroup = CurGroup; + unsigned TiedGroup; + if (!InlineAsm::isUseOperandTiedToDef(FlagMO.getImm(), TiedGroup)) + continue; + // Operands in this group are tied to operands in TiedGroup which must be + // earlier. Find the number of operands between the two groups. + unsigned Delta = i - GroupIdx[TiedGroup]; + + // OpIdx is a use tied to TiedGroup. + if (OpIdxGroup == CurGroup) + return OpIdx - Delta; + + // OpIdx is a def tied to this use group. + if (OpIdxGroup == TiedGroup) + return OpIdx + Delta; + } + llvm_unreachable("Invalid tied operand on inline asm"); } /// clearKillInfo - Clears kill flags on all operands. @@ -1292,7 +1322,12 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, AliasAnalysis *AA, bool &SawStore) const { // Ignore stuff that we obviously can't move. - if (mayStore() || isCall()) { + // + // Treat volatile loads as stores. This is not strictly necessary for + // volatiles, but it is required for atomic loads. It is not allowed to move + // a load across an atomic load with Ordering > Monotonic. + if (mayStore() || isCall() || + (mayLoad() && hasOrderedMemoryRef())) { SawStore = true; return false; } @@ -1308,8 +1343,8 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, // load. if (mayLoad() && !isInvariantLoad(AA)) // Otherwise, this is a real load. If there is a store between the load and - // end of block, or if the load is volatile, we can't move it. - return !SawStore && !hasVolatileMemoryRef(); + // end of block, we can't move it. + return !SawStore; return true; } @@ -1340,11 +1375,11 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, return true; } -/// hasVolatileMemoryRef - Return true if this instruction may have a -/// volatile memory reference, or if the information describing the -/// memory reference is not available. Return false if it is known to -/// have no volatile memory references. -bool MachineInstr::hasVolatileMemoryRef() const { +/// hasOrderedMemoryRef - Return true if this instruction may have an ordered +/// or volatile memory reference, or if the information describing the memory +/// reference is not available. Return false if it is known to have no ordered +/// memory references. +bool MachineInstr::hasOrderedMemoryRef() const { // An instruction known never to access memory won't have a volatile access. if (!mayStore() && !mayLoad() && @@ -1357,9 +1392,9 @@ bool MachineInstr::hasVolatileMemoryRef() const { if (memoperands_empty()) return true; - // Check the memory reference information for volatile references. + // Check the memory reference information for ordered references. for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I) - if ((*I)->isVolatile()) + if (!(*I)->isUnordered()) return true; return false; @@ -1461,7 +1496,9 @@ void MachineInstr::copyImplicitOps(const MachineInstr *MI) { } void MachineInstr::dump() const { +#ifndef NDEBUG dbgs() << " " << *this; +#endif } static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, @@ -1540,6 +1577,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { OS << " [sideeffect]"; if (ExtraInfo & InlineAsm::Extra_IsAlignStack) OS << " [alignstack]"; + if (getInlineAsmDialect() == InlineAsm::AD_ATT) + OS << " [attdialect]"; + if (getInlineAsmDialect() == InlineAsm::AD_Intel) + OS << " [inteldialect]"; StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand; FirstOp = false; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index efec481..169443e 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -334,7 +334,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); else DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); - DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n"); + DEBUG(dbgs() << MF.getName() << " ********\n"); if (PreRegAlloc) { // Estimate register pressure during pre-regalloc pass. diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index 9f3829e..05d2f2a 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -74,6 +74,8 @@ MachineBasicBlock *MachineLoop::getBottomBlock() { return BotMBB; } +#ifndef NDEBUG void MachineLoop::dump() const { print(dbgs()); } +#endif diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index a1dc948..4704dae 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -252,7 +252,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { continue; } DEBUG(dbgs() << "********** MI Scheduling **********\n"); - DEBUG(dbgs() << MF->getFunction()->getName() + DEBUG(dbgs() << MF->getName() << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; @@ -764,12 +764,14 @@ public: Queue.pop_back(); } +#ifndef NDEBUG void dump() { dbgs() << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) dbgs() << Queue[i]->NodeNum << " "; dbgs() << "\n"; } +#endif }; /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance @@ -905,13 +907,12 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) { for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned Latency = - DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true); + unsigned MinLatency = I->getMinLatency(); #ifndef NDEBUG - Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency); + Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); #endif - if (SU->TopReadyCycle < PredReadyCycle + Latency) - SU->TopReadyCycle = PredReadyCycle + Latency; + if (SU->TopReadyCycle < PredReadyCycle + MinLatency) + SU->TopReadyCycle = PredReadyCycle + MinLatency; } Top.releaseNode(SU, SU->TopReadyCycle); } @@ -925,13 +926,12 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned Latency = - DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true); + unsigned MinLatency = I->getMinLatency(); #ifndef NDEBUG - Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency); + Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); #endif - if (SU->BotReadyCycle < SuccReadyCycle + Latency) - SU->BotReadyCycle = SuccReadyCycle + Latency; + if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) + SU->BotReadyCycle = SuccReadyCycle + MinLatency; } Bot.releaseNode(SU, SU->BotReadyCycle); } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 852c169..181e09e 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -23,8 +23,9 @@ // the verifier errors. //===----------------------------------------------------------------------===// +#include "llvm/BasicBlock.h" +#include "llvm/InlineAsm.h" #include "llvm/Instructions.h" -#include "llvm/Function.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -213,6 +214,8 @@ namespace { void report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI); + void verifyInlineAsm(const MachineInstr *MI); + void checkLiveness(const MachineOperand *MO, unsigned MONum); void markReachable(const MachineBasicBlock *MBB); void calcRegsPassed(); @@ -357,7 +360,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { MF->print(*OS, Indexes); } *OS << "*** Bad machine code: " << msg << " ***\n" - << "- function: " << MF->getFunction()->getName() << "\n"; + << "- function: " << MF->getName() << "\n"; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { @@ -365,7 +368,7 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { report(msg, MBB->getParent()); *OS << "- basic block: BB#" << MBB->getNumber() << ' ' << MBB->getName() - << " (" << (void*)MBB << ')'; + << " (" << (const void*)MBB << ')'; if (Indexes) *OS << " [" << Indexes->getMBBStartIdx(MBB) << ';' << Indexes->getMBBEndIdx(MBB) << ')'; @@ -695,6 +698,49 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { } } +// The operands on an INLINEASM instruction must follow a template. +// Verify that the flag operands make sense. +void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) { + // The first two operands on INLINEASM are the asm string and global flags. + if (MI->getNumOperands() < 2) { + report("Too few operands on inline asm", MI); + return; + } + if (!MI->getOperand(0).isSymbol()) + report("Asm string must be an external symbol", MI); + if (!MI->getOperand(1).isImm()) + report("Asm flags must be an immediate", MI); + // Allowed flags are Extra_HasSideEffects = 1, and Extra_IsAlignStack = 2. + if (!isUInt<2>(MI->getOperand(1).getImm())) + report("Unknown asm flags", &MI->getOperand(1), 1); + + assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed"); + + unsigned OpNo = InlineAsm::MIOp_FirstOperand; + unsigned NumOps; + for (unsigned e = MI->getNumOperands(); OpNo < e; OpNo += NumOps) { + const MachineOperand &MO = MI->getOperand(OpNo); + // There may be implicit ops after the fixed operands. + if (!MO.isImm()) + break; + NumOps = 1 + InlineAsm::getNumOperandRegisters(MO.getImm()); + } + + if (OpNo > MI->getNumOperands()) + report("Missing operands in last group", MI); + + // An optional MDNode follows the groups. + if (OpNo < MI->getNumOperands() && MI->getOperand(OpNo).isMetadata()) + ++OpNo; + + // All trailing operands must be implicit registers. + for (unsigned e = MI->getNumOperands(); OpNo < e; ++OpNo) { + const MachineOperand &MO = MI->getOperand(OpNo); + if (!MO.isReg() || !MO.isImplicit()) + report("Expected implicit register after groups", &MO, OpNo); + } +} + void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); if (MI->getNumOperands() < MCID.getNumOperands()) { @@ -703,6 +749,10 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { << MI->getNumExplicitOperands() << " given.\n"; } + // Check the tied operands. + if (MI->isInlineAsm()) + verifyInlineAsm(MI); + // Check the MachineMemOperands for basic consistency. for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I) { @@ -758,6 +808,17 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MO->isImplicit()) report("Explicit operand marked as implicit", MO, MONum); } + + int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO); + if (TiedTo != -1) { + if (!MO->isReg()) + report("Tied use must be a register", MO, MONum); + else if (!MO->isTied()) + report("Operand should be tied", MO, MONum); + else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum)) + report("Tied def doesn't match MCInstrDesc", MO, MONum); + } else if (MO->isReg() && MO->isTied()) + report("Explicit operand should not be tied", MO, MONum); } else { // ARM adds %reg0 operands to indicate predicates. We'll allow that. if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg()) @@ -772,6 +833,28 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MRI->tracksLiveness() && !MI->isDebugValue()) checkLiveness(MO, MONum); + // Verify the consistency of tied operands. + if (MO->isTied()) { + unsigned OtherIdx = MI->findTiedOperandIdx(MONum); + const MachineOperand &OtherMO = MI->getOperand(OtherIdx); + if (!OtherMO.isReg()) + report("Must be tied to a register", MO, MONum); + if (!OtherMO.isTied()) + report("Missing tie flags on tied operand", MO, MONum); + if (MI->findTiedOperandIdx(OtherIdx) != MONum) + report("Inconsistent tie links", MO, MONum); + if (MONum < MCID.getNumDefs()) { + if (OtherIdx < MCID.getNumOperands()) { + if (-1 == MCID.getOperandConstraint(OtherIdx, MCOI::TIED_TO)) + report("Explicit def tied to explicit use without tie constraint", + MO, MONum); + } else { + if (!OtherMO.isImplicit()) + report("Explicit def should be tied to implicit use", MO, MONum); + } + } + } + // Verify two-address constraints after leaving SSA form. unsigned DefIdx; if (!MRI->isSSA() && MO->isUse() && diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 56526f2..a6dd5de 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -447,8 +447,8 @@ void TargetPassConfig::addMachinePasses() { const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue()); const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs")); assert (TPI && IPI && "Pass ID not registered!"); - const char *TID = (char *)(TPI->getTypeInfo()); - const char *IID = (char *)(IPI->getTypeInfo()); + const char *TID = (const char *)(TPI->getTypeInfo()); + const char *IID = (const char *)(IPI->getTypeInfo()); insertPass(TID, IID); } @@ -529,6 +529,10 @@ void TargetPassConfig::addMachineSSAOptimization() { // instructions dead. addPass(&OptimizePHIsID); + // This pass merges large allocas. StackSlotColoring is a different pass + // which merges spill slots. + addPass(&StackColoringID); + // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. addPass(&LocalStackSlotAllocationID); diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 7449ff5..6090752 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -240,6 +240,7 @@ void SchedulePostRATDList::exitRegion() { ScheduleDAGInstrs::exitRegion(); } +#ifndef NDEBUG /// dumpSchedule - dump the scheduled Sequence. void SchedulePostRATDList::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { @@ -249,6 +250,7 @@ void SchedulePostRATDList::dumpSchedule() const { dbgs() << "**** NOOP ****\n"; } } +#endif bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { TII = Fn.getTarget().getInstrInfo(); @@ -298,7 +300,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { static int bbcnt = 0; if (bbcnt++ % DebugDiv != DebugMod) continue; - dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getName() + dbgs() << "*** DEBUG scheduling " << Fn.getName() << ":BB#" << MBB->getNumber() << " ***\n"; } #endif diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 34d075c..e4e18c3 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -137,8 +137,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" - << "********** Function: " - << ((Value*)MF.getFunction())->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); bool Changed = false; diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 3a03807..8a49609 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -20,7 +20,6 @@ #include "VirtRegMap.h" #include "LiveRegMatrix.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Function.h" #include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -273,7 +272,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, bool RABasic::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n" << "********** Function: " - << ((Value*)mf.getFunction())->getName() << '\n'); + << mf.getName() << '\n'); MF = &mf; RegAllocBase::init(getAnalysis<VirtRegMap>(), diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 6b3a48e..f573d41 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -1110,8 +1110,7 @@ void RAFast::AllocateBasicBlock() { /// bool RAFast::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" - << "********** Function: " - << ((Value*)Fn.getFunction())->getName() << '\n'); + << "********** Function: " << Fn.getName() << '\n'); MF = &Fn; MRI = &MF->getRegInfo(); TM = &Fn.getTarget(); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index d0cff48..c021a93 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -24,7 +24,6 @@ #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Function.h" #include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" @@ -1746,8 +1745,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" - << "********** Function: " - << mf.getFunction()->getName() << '\n'); + << "********** Function: " << mf.getName() << '\n'); MF = &mf; if (VerifyEnabled) diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index d0db26b..fcdbce7 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -192,7 +192,6 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, const MachineLoopInfo *loopInfo, const RegSet &vregs) { - typedef std::vector<const LiveInterval*> LIVector; LiveIntervals *LIS = const_cast<LiveIntervals*>(lis); MachineRegisterInfo *mri = &mf->getRegInfo(); const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); @@ -556,7 +555,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { mri->freezeReservedRegs(MF); - DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n"); // Allocator main loop: // @@ -570,11 +569,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // Find the vreg intervals in need of allocation. findVRegIntervalsToAlloc(); +#ifndef NDEBUG const Function* func = mf->getFunction(); std::string fqn = func->getParent()->getModuleIdentifier() + "." + func->getName().str(); - (void)fqn; +#endif // If there are non-empty intervals allocate them using pbqp. if (!vregsToAlloc.empty()) { diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 9906334..d018835 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -1564,8 +1564,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { Loops = &getAnalysis<MachineLoopInfo>(); DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" - << "********** Function: " - << ((Value*)MF->getFunction())->getName() << '\n'); + << "********** Function: " << MF->getName() << '\n'); if (VerifyCoalescing) MF->verify(this, "Before register coalescing"); diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h index 8a6df98..47c3df1 100644 --- a/lib/CodeGen/RegisterCoalescer.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -63,6 +63,13 @@ namespace llvm { : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0), Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} + /// Create a CoalescerPair representing a virtreg-to-physreg copy. + /// No need to call setRegisters(). + CoalescerPair(unsigned VirtReg, unsigned PhysReg, + const TargetRegisterInfo &tri) + : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0), + Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} + /// setRegisters - set registers to match the copy instruction MI. Return /// false if MI is not a coalescable copy instruction. bool setRegisters(const MachineInstr*); diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 43448c8..6cdfe7cd 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -63,6 +63,7 @@ void RegisterPressure::decrease(const TargetRegisterClass *RC, decreaseSetPressure(MaxSetPressure, RC, TRI); } +#ifndef NDEBUG void RegisterPressure::dump(const TargetRegisterInfo *TRI) { dbgs() << "Live In: "; for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) @@ -78,6 +79,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) { << '\n'; } } +#endif /// Increase the current pressure as impacted by these physical registers and /// bump the high water mark if needed. diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 752f8e4..af8cd8f 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -279,6 +279,7 @@ void SUnit::ComputeHeight() { } while (!WorkList.empty()); } +#ifndef NDEBUG /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or /// a group of nodes flagged together. void SUnit::dump(const ScheduleDAG *G) const { @@ -336,6 +337,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { } dbgs() << "\n"; } +#endif #ifndef NDEBUG /// VerifyScheduledDAG - Verify that all SUnits were scheduled and that diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 9c1dba3..2d8f235 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -209,7 +209,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { if (Reg == 0) continue; if (TRI->isPhysicalRegister(Reg)) - Uses[Reg].push_back(&ExitSU); + Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); else { assert(!IsPostRA && "Virtual register encountered after regalloc."); addVRegUseDeps(&ExitSU, i); @@ -225,15 +225,15 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; if (!Uses.contains(Reg)) - Uses[Reg].push_back(&ExitSU); + Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); } } } /// MO is an operand of SU's instruction that defines a physical register. Add /// data dependencies from SU to any uses of the physical register. -void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, - const MachineOperand &MO) { +void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { + const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); assert(MO.isDef() && "expect physreg def"); // Ask the target if address-backscheduling is desirable, and if so how much. @@ -245,11 +245,13 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, Alias.isValid(); ++Alias) { if (!Uses.contains(*Alias)) continue; - std::vector<SUnit*> &UseList = Uses[*Alias]; + std::vector<PhysRegSUOper> &UseList = Uses[*Alias]; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { - SUnit *UseSU = UseList[i]; + SUnit *UseSU = UseList[i].SU; if (UseSU == SU) continue; + MachineInstr *UseMI = UseSU->getInstr(); + int UseOp = UseList[i].OpIdx; unsigned LDataLatency = DataLatency; // Optionally add in a special extra latency for nodes that // feed addresses. @@ -258,7 +260,6 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, // adjustSchedDependency for the targets that care about it. if (SpecialAddressLatency != 0 && !UnitLatencies && UseSU != &ExitSU) { - MachineInstr *UseMI = UseSU->getInstr(); const MCInstrDesc &UseMCID = UseMI->getDesc(); int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); @@ -273,8 +274,15 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, // perform its own adjustments. SDep dep(SU, SDep::Data, LDataLatency, *Alias); if (!UnitLatencies) { - unsigned Latency = computeOperandLatency(SU, UseSU, dep); + unsigned Latency = + TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx, + (UseOp < 0 ? 0 : UseMI), UseOp); dep.setLatency(Latency); + unsigned MinLatency = + TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx, + (UseOp < 0 ? 0 : UseMI), UseOp, + /*FindMin=*/true); + dep.setMinLatency(MinLatency); ST.adjustSchedDependency(SU, UseSU, dep); } @@ -301,9 +309,9 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { Alias.isValid(); ++Alias) { if (!Defs.contains(*Alias)) continue; - std::vector<SUnit *> &DefList = Defs[*Alias]; + std::vector<PhysRegSUOper> &DefList = Defs[*Alias]; for (unsigned i = 0, e = DefList.size(); i != e; ++i) { - SUnit *DefSU = DefList[i]; + SUnit *DefSU = DefList[i].SU; if (DefSU == &ExitSU) continue; if (DefSU != SU && @@ -324,14 +332,14 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. - Uses[MO.getReg()].push_back(SU); + Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx)); } else { - addPhysRegDataDeps(SU, MO); + addPhysRegDataDeps(SU, OperIdx); // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's defs. - std::vector<SUnit *> &DefList = Defs[MO.getReg()]; + std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()]; // If a def is going to wrap back around to the top of the loop, // backschedule it. @@ -393,11 +401,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // the block. Instead, we leave only one call at the back of the // DefList. if (SU->isCall) { - while (!DefList.empty() && DefList.back()->isCall) + while (!DefList.empty() && DefList.back().SU->isCall) DefList.pop_back(); } // Defs are pushed in the order they are visited and never reordered. - DefList.push_back(SU); + DefList.push_back(PhysRegSUOper(SU, OperIdx)); } } @@ -468,8 +476,14 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { if (!UnitLatencies) { // Adjust the dependence latency using operand def/use information, then // allow the target to perform its own adjustments. - unsigned Latency = computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); + int DefOp = Def->findRegisterDefOperandIdx(Reg); + unsigned Latency = + TII->computeOperandLatency(InstrItins, Def, DefOp, MI, OperIdx); dep.setLatency(Latency); + unsigned MinLatency = + TII->computeOperandLatency(InstrItins, Def, DefOp, MI, OperIdx, + /*FindMin=*/true); + dep.setMinLatency(MinLatency); const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); @@ -488,7 +502,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { /// (like a call or something with unmodeled side effects). static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { if (MI->isCall() || MI->hasUnmodeledSideEffects() || - (MI->hasVolatileMemoryRef() && + (MI->hasOrderedMemoryRef() && (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) return true; return false; @@ -997,19 +1011,10 @@ void ScheduleDAGInstrs::computeLatency(SUnit *SU) { } } -unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, - const SDep& dep, - bool FindMin) const { - // For a data dependency with a known register... - if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) - return 1; - - return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(), - Use->getInstr(), dep.getReg(), FindMin); -} - void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { +#ifndef NDEBUG SU->getInstr()->dump(); +#endif } std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 38feee9..6e781b1 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Constants.h" -#include "llvm/Function.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -35,7 +34,7 @@ namespace llvm { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const ScheduleDAG *G) { - return G->MF.getFunction()->getName(); + return G->MF.getName(); } static bool renderGraphFromBottomUp() { diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index e675366..5ca22b2 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -89,6 +89,7 @@ void ScoreboardHazardRecognizer::Reset() { ReservedScoreboard.reset(); } +#ifndef NDEBUG void ScoreboardHazardRecognizer::Scoreboard::dump() const { dbgs() << "Scoreboard:\n"; @@ -104,6 +105,7 @@ void ScoreboardHazardRecognizer::Scoreboard::dump() const { dbgs() << '\n'; } } +#endif bool ScoreboardHazardRecognizer::atIssueLimit() const { if (IssueWidth == 0) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1c485a0..d7fa009 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -413,7 +413,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) return 0; - // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, Depth + 1)) return V; @@ -2496,8 +2496,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // lanes of the constant together. EVT VT = Vector->getValueType(0); unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); + + // If the splat value has been compressed to a bitlength lower + // than the size of the vector lane, we need to re-expand it to + // the lane size. + if (BitWidth > SplatBitSize) + for (SplatValue = SplatValue.zextOrTrunc(BitWidth); + SplatBitSize < BitWidth; + SplatBitSize = SplatBitSize * 2) + SplatValue |= SplatValue.shl(SplatBitSize); + Constant = APInt::getAllOnesValue(BitWidth); - for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i) + for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); } } @@ -5681,6 +5691,127 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); + // In unsafe math mode, we can fold chains of FADD's of the same value + // into multiplications. This transform is not safe in general because + // we are reducing the number of rounding steps. + if (DAG.getTarget().Options.UnsafeFPMath && + TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && + !N0CFP && !N1CFP) { + if (N0.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); + ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + + // (fadd (fmul c, x), x) -> (fmul c+1, x) + if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP00, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, NewCFP); + } + + // (fadd (fmul x, c), x) -> (fmul c+1, x) + if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, NewCFP); + } + + // (fadd (fadd x, x), x) -> (fmul 3.0, x) + if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) && + N0.getOperand(0) == N1) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, DAG.getConstantFP(3.0, VT)); + } + + // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) + if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(1) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP00, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), NewCFP); + } + + // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x) + if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), NewCFP); + } + } + + if (N1.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); + ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); + + // (fadd x, (fmul c, x)) -> (fmul c+1, x) + if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP10, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, NewCFP); + } + + // (fadd x, (fmul x, c)) -> (fmul c+1, x) + if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, NewCFP); + } + + // (fadd x, (fadd x, x)) -> (fmul 3.0, x) + if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) && + N1.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, DAG.getConstantFP(3.0, VT)); + } + + // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) + if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(1) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP10, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), NewCFP); + } + + // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x) + if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), NewCFP); + } + } + + // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) + if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), + DAG.getConstantFP(4.0, VT)); + } + } + // FADD -> FMA combines: if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && @@ -5692,8 +5823,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), N1); } - - // fold (fadd x, (fmul y, z)) -> (fma x, y, z) + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, @@ -5867,6 +5998,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); if (N0CFP && N0CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); @@ -5877,6 +6009,58 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N0CFP && !N1CFP) return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2); + // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N2.getOpcode() == ISD::FMUL && + N0 == N2.getOperand(0) && + N2.getOperand(1).getOpcode() == ISD::ConstantFP) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); + } + + + // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) + if (DAG.getTarget().Options.UnsafeFPMath && + N0.getOpcode() == ISD::FMUL && N1CFP && + N0.getOperand(1).getOpcode() == ISD::ConstantFP) { + return DAG.getNode(ISD::FMA, dl, VT, + N0.getOperand(0), + DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), + N2); + } + + // (fma x, 1, y) -> (fadd x, y) + // (fma x, -1, y) -> (fadd (fneg x), y) + if (N1CFP) { + if (N1CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, dl, VT, N0, N2); + + if (N1CFP->isExactlyValue(-1.0) && + (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { + SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); + AddToWorkList(RHSNeg.getNode()); + return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); + } + } + + // (fma x, c, x) -> (fmul x, (c+1)) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) { + return DAG.getNode(ISD::FMUL, dl, VT, + N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(1.0, VT))); + } + + // (fma x, c, (fneg x)) -> (fmul x, (c-1)) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, dl, VT, + N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(-1.0, VT))); + } + + return SDValue(); } @@ -6225,6 +6409,30 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + if (VT.isVector() && !LegalOperations) { + // If operand is a BUILD_VECTOR node, see if we can constant fold it. + if (N0.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { + SDValue Op = N0.getOperand(i); + if (Op.getOpcode() != ISD::UNDEF && + Op.getOpcode() != ISD::ConstantFP) + break; + EVT EltVT = Op.getValueType(); + SDValue FoldOp = DAG.getNode(ISD::FNEG, N0.getDebugLoc(), EltVT, Op); + if (FoldOp.getOpcode() != ISD::UNDEF && + FoldOp.getOpcode() != ISD::ConstantFP) + break; + Ops.push_back(FoldOp); + AddToWorkList(FoldOp.getNode()); + } + + if (Ops.size() == N0.getNumOperands()) + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + VT, &Ops[0], Ops.size()); + } + } + if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options)) return GetNegatedExpression(N0, DAG, LegalOperations); @@ -6246,6 +6454,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { } } + // (fneg (fmul c, x)) -> (fmul -c, x) + if (N0.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + if (CFP1) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), + DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, + N0.getOperand(1))); + } + } + return SDValue(); } @@ -7876,29 +8095,15 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) return SDValue(); - // If the element type of the input vector is not the same as - // the output element type, make concat_vectors based on input element - // type and then bitcast it to the output vector type. - // - // In another words avoid nodes like this: - // <NODE> v16i8 = concat_vectors v4i16 v4i16 - // Replace it with this one: - // <NODE0> v8i16 = concat_vectors v4i16 v4i16 - // <NODE1> v16i8 = bitcast NODE0 - EVT ItemType = VecIn1.getValueType().getVectorElementType(); - if (ItemType != VT.getVectorElementType()) { - EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), - ItemType, - VecIn1.getValueType().getVectorNumElements()*2); - // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, - VecIn1, DAG.getUNDEF(VecIn1.getValueType())); - VecIn1 = DAG.getNode(ISD::BITCAST, dl, VT, VecIn1); - } else - // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, - VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + // If the input vector type has a different base type to the output + // vector type, bail out. + if (VecIn1.getValueType().getVectorElementType() != + VT.getVectorElementType()) + return SDValue(); + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); } // If VecIn2 is unused then change it to undef. @@ -8749,7 +8954,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { // to alias with anything but itself. Provides base object and offset as // results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, - const GlobalValue *&GV, void *&CV) { + const GlobalValue *&GV, const void *&CV) { // Assume it is a primitive operation. Base = Ptr; Offset = 0; GV = 0; CV = 0; @@ -8774,8 +8979,8 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, // for ConstantSDNodes since the same constant pool entry may be represented // by multiple nodes with different offsets. if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { - CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal() - : (void *)C->getConstVal(); + CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal() + : (const void *)C->getConstVal(); Offset += C->getOffset(); return false; } @@ -8800,7 +9005,7 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, SDValue Base1, Base2; int64_t Offset1, Offset2; const GlobalValue *GV1, *GV2; - void *CV1, *CV2; + const void *CV1, *CV2; bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 3e18ea7..b2a2a5c 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -97,7 +97,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = MF->getFrameInfo()->CreateStackObject(TySize, Align, false, - MayNeedSP); + MayNeedSP, AI); } for (; BB != EB; ++BB) diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 4488d27..6d2cdea 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -55,7 +55,8 @@ unsigned InstrEmitter::CountResults(SDNode *Node) { /// /// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding /// the chain and glue. These operands may be implicit on the machine instr. -static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) { +static unsigned countOperands(SDNode *Node, unsigned NumExpUses, + unsigned &NumImpUses) { unsigned N = Node->getNumOperands(); while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) --N; @@ -63,7 +64,8 @@ static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) { --N; // Ignore chain if it exists. // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses. - for (unsigned I = N; I; --I) { + NumImpUses = N - NumExpUses; + for (unsigned I = N; I > NumExpUses; --I) { if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1))) continue; if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1))) @@ -720,7 +722,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NumImpUses = 0; - unsigned NodeOperands = countOperands(Node, NumImpUses); + unsigned NodeOperands = + countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses); bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; @@ -870,6 +873,17 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; } + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: { + unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ? + TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END; + + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1)); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp)) + .addFrameIndex(FI->getIndex()); + break; + } + case ISD::INLINEASM: { unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) @@ -890,19 +904,23 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, getZExtValue(); MI->addOperand(MachineOperand::CreateImm(ExtraInfo)); + // Remember to operand index of the group flags. + SmallVector<unsigned, 8> GroupIdx; + // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); - unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + GroupIdx.push_back(MI->getNumOperands()); MI->addOperand(MachineOperand::CreateImm(Flags)); ++i; // Skip the ID value. switch (InlineAsm::getKind(Flags)) { default: llvm_unreachable("Bad flags!"); case InlineAsm::Kind_RegDef: - for (; NumVals; --NumVals, ++i) { + for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast @@ -913,7 +931,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: - for (; NumVals; --NumVals, ++i) { + for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg), @@ -928,9 +946,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case InlineAsm::Kind_Mem: // Addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. - for (; NumVals; --NumVals, ++i) + for (unsigned j = 0; j != NumVals; ++j, ++i) AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); + + // Manually set isTied bits. + if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) { + unsigned DefGroup = 0; + if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) { + unsigned DefIdx = GroupIdx[DefGroup] + 1; + unsigned UseIdx = GroupIdx.back() + 1; + for (unsigned j = 0; j != NumVals; ++j) + MI->tieOperands(DefIdx + j, UseIdx + j); + } + } break; } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 908ebb9..7b34170 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2042,7 +2042,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, DebugLoc dl) { - if (Op0.getValueType() == MVT::i32) { + if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { // simple 32-bit [signed|unsigned] integer to float/double expansion // Get the stack frame index of a 8 byte buffer. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 94fc976..37f0e60 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -625,6 +625,7 @@ private: SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); SDValue WidenVecRes_VSETCC(SDNode* N); + SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 704f99b..22f8d51 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -64,6 +64,7 @@ class VectorLegalizer { // Implement vselect in terms of XOR, AND, OR when blend is not supported // by the target. SDValue ExpandVSELECT(SDValue Op); + SDValue ExpandSELECT(SDValue Op); SDValue ExpandLoad(SDValue Op); SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); @@ -220,6 +221,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: + case ISD::FMA: case ISD::SIGN_EXTEND_INREG: QueryType = Node->getValueType(0); break; @@ -260,6 +262,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case TargetLowering::Expand: if (Node->getOpcode() == ISD::VSELECT) Result = ExpandVSELECT(Op); + else if (Node->getOpcode() == ISD::SELECT) + Result = ExpandSELECT(Op); else if (Node->getOpcode() == ISD::UINT_TO_FP) Result = ExpandUINT_TO_FLOAT(Op); else if (Node->getOpcode() == ISD::FNEG) @@ -435,6 +439,66 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { return TF; } +SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { + // Lower a select instruction where the condition is a scalar and the + // operands are vectors. Lower this select to VSELECT and implement it + // using XOR AND OR. The selector bit is broadcasted. + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + + SDValue Mask = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + + assert(VT.isVector() && !Mask.getValueType().isVector() + && Op1.getValueType() == Op2.getValueType() && "Invalid type"); + + unsigned NumElem = VT.getVectorNumElements(); + + // If we can't even use the basic vector operations of + // AND,OR,XOR, we will have to scalarize the op. + // Notice that the operation may be 'promoted' which means that it is + // 'bitcasted' to another type which is handled. + // Also, we need to be able to construct a splat vector using BUILD_VECTOR. + if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + // Generate a mask operand. + EVT MaskTy = TLI.getSetCCResultType(VT); + assert(MaskTy.isVector() && "Invalid CC type"); + assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits() + && "Invalid mask size"); + + // What is the size of each element in the vector mask. + EVT BitTy = MaskTy.getScalarType(); + + Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask, + DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy), + DAG.getConstant(0, BitTy)); + + // Broadcast the mask so that the entire vector is all-one or all zero. + SmallVector<SDValue, 8> Ops(NumElem, Mask); + Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size()); + + // Bitcast the operands to be the same type as the mask. + // This is needed when we select between FP types because + // the mask is a vector of integers. + Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); + Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); + + SDValue AllOnes = DAG.getConstant( + APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy); + SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); + + Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); + Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); + SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. @@ -449,12 +513,17 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // AND,OR,XOR, we will have to scalarize the op. // Notice that the operation may be 'promoted' which means that it is // 'bitcasted' to another type which is handled. + // This operation also isn't safe with AND, OR, XOR when the boolean + // type is 0/1 as we need an all ones vector constant to mask with. + // FIXME: Sign extend 1 to all ones if thats legal on the target. if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand) + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getBooleanContents(true) != + TargetLowering::ZeroOrNegativeOneBooleanContent) return DAG.UnrollVectorOp(Op.getNode()); - assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits() + assert(VT.getSizeInBits() == Op1.getValueType().getSizeInBits() && "Invalid mask size"); // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4709202..4095728 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1366,6 +1366,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FTRUNC: Res = WidenVecRes_Unary(N); break; + case ISD::FMA: + Res = WidenVecRes_Ternary(N); + break; } // If Res is null, the sub-method took care of registering the result. @@ -1373,6 +1376,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { SetWidenedVector(SDValue(N, ResNo), Res); } +SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { + // Ternary op widening. + DebugLoc dl = N->getDebugLoc(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + SDValue InOp3 = GetWidenedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); +} + SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. unsigned Opcode = N->getOpcode(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index bf0a437..2b86e36 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -656,6 +656,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) { break; case ISD::MERGE_VALUES: case ISD::TokenFactor: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: case ISD::CopyToReg: case ISD::CopyFromReg: case ISD::EH_LABEL: @@ -1756,6 +1758,7 @@ public: return V; } +#ifndef NDEBUG void dump(ScheduleDAG *DAG) const { // Emulate pop() without clobbering NodeQueueIds. std::vector<SUnit*> DumpQueue = Queue; @@ -1766,6 +1769,7 @@ public: SU->dump(DAG); } } +#endif }; typedef RegReductionPriorityQueue<bu_ls_rr_sort> @@ -1893,6 +1897,7 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { //===----------------------------------------------------------------------===// void RegReductionPQBase::dumpRegPressure() const { +#ifndef NDEBUG for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), E = TRI->regclass_end(); I != E; ++I) { const TargetRegisterClass *RC = *I; @@ -1902,6 +1907,7 @@ void RegReductionPQBase::dumpRegPressure() const { DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] << '\n'); } +#endif } bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 748668c..222dc55 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -643,6 +643,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, } void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { +#ifndef NDEBUG if (!SU->getNode()) { dbgs() << "PHYS REG COPY\n"; return; @@ -659,8 +660,10 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { dbgs() << "\n"; GluedNodes.pop_back(); } +#endif } +#ifndef NDEBUG void ScheduleDAGSDNodes::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { if (SUnit *SU = Sequence[i]) @@ -669,6 +672,7 @@ void ScheduleDAGSDNodes::dumpSchedule() const { dbgs() << "**** NOOP ****\n"; } } +#endif #ifndef NDEBUG /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f4fe892..d85d41b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1097,10 +1097,9 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, "Cannot set target flags on target-independent globals"); // Truncate (with sign-extension) the offset value to the pointer size. - EVT PTy = TLI.getPointerTy(); - unsigned BitWidth = PTy.getSizeInBits(); + unsigned BitWidth = TLI.getPointerTy().getSizeInBits(); if (BitWidth < 64) - Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth)); + Offset = SignExtend64(Offset, BitWidth); const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); if (!GVar) { @@ -2817,6 +2816,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2)) if (CFP->getValueAPF().isZero()) return N1; + } else if (Opcode == ISD::FMUL) { + ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1); + SDValue V = N2; + + // If the first operand isn't the constant, try the second + if (!CFP) { + CFP = dyn_cast<ConstantFPSDNode>(N2); + V = N1; + } + + if (CFP) { + // 0*x --> 0 + if (CFP->isZero()) + return SDValue(CFP,0); + // 1*x --> x + if (CFP->isExactlyValue(1.0)) + return V; + } } } assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); @@ -2935,17 +2952,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // expanding large vector constants. if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { SDValue Elt = N1.getOperand(N2C->getZExtValue()); - EVT VEltTy = N1.getValueType().getVectorElementType(); - if (Elt.getValueType() != VEltTy) { + + if (VT != Elt.getValueType()) // If the vector element type is not legal, the BUILD_VECTOR operands - // are promoted and implicitly truncated. Make that explicit here. - Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt); - } - if (VT != VEltTy) { - // If the vector element type is not legal, the EXTRACT_VECTOR_ELT - // result is implicitly extended. - Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt); - } + // are promoted and implicitly truncated, and the result implicitly + // extended. Make that explicit here. + Elt = getAnyExtOrTrunc(Elt, DL, VT); + return Elt; } @@ -3923,17 +3936,21 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, AtomicOrdering Ordering, - SynchronizationScope SynchScope) { + SynchronizationScope SynchScope) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE. // For now, atomics are considered to be volatile always. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - Flags |= MachineMemOperand::MOVolatile; + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); @@ -3983,17 +4000,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - // A monotonic store does not load; a release store "loads" in the sense - // that other stores cannot be sunk past it. + // An atomic store does not load. An atomic load does not store. // (An atomicrmw obviously both loads and stores.) - unsigned Flags = MachineMemOperand::MOStore; - if (Opcode != ISD::ATOMIC_STORE || Ordering > Monotonic) - Flags |= MachineMemOperand::MOLoad; - - // For now, atomics are considered to be volatile always. + // For now, atomics are considered to be volatile always, and they are + // chained as such. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - Flags |= MachineMemOperand::MOVolatile; + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, @@ -4056,16 +4073,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - // A monotonic load does not store; an acquire load "stores" in the sense - // that other loads cannot be hoisted past it. - unsigned Flags = MachineMemOperand::MOLoad; - if (Ordering > Monotonic) - Flags |= MachineMemOperand::MOStore; - - // For now, atomics are considered to be volatile always. + // An atomic store does not load. An atomic load does not store. + // (An atomicrmw obviously both loads and stores.) + // For now, atomics are considered to be volatile always, and they are + // chained as such. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - Flags |= MachineMemOperand::MOVolatile; + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, @@ -4157,6 +4175,8 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::PREFETCH || + Opcode == ISD::LIFETIME_START || + Opcode == ISD::LIFETIME_END || (Opcode <= INT_MAX && (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); @@ -4226,7 +4246,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo, const MDNode *Ranges) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(VT); @@ -4284,7 +4304,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), - MMO->isNonTemporal(), + MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; @@ -4303,7 +4323,7 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, - bool isInvariant, unsigned Alignment, + bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo, const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -4332,7 +4352,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, "Load is already a indexed load!"); return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getPointerInfo(), - LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), + LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), false, LD->getAlignment()); } @@ -4340,7 +4360,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(Val.getValueType()); @@ -4365,7 +4385,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachineMemOperand *MMO) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); EVT VT = Val.getValueType(); SDVTList VTs = getVTList(MVT::Other); @@ -4394,7 +4414,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, EVT SVT,bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(SVT); @@ -4421,7 +4441,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, MachineMemOperand *MMO) { EVT VT = Val.getValueType(); - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (VT == SVT) return getStore(Chain, dl, Val, Ptr, MMO); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index f3cf758..483b051 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/CallingConv.h" #include "llvm/DebugInfo.h" @@ -825,6 +826,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, GFI = gfi; LibInfo = li; TD = DAG.getTarget().getTargetData(); + Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -1765,6 +1767,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, /// visitBitTestCase - this function produces one "bit test" void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, + uint32_t BranchWeightToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { @@ -1802,8 +1805,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ISD::SETNE); } - addSuccessorWithWeight(SwitchBB, B.TargetBB); - addSuccessorWithWeight(SwitchBB, NextMBB); + // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. + addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); + // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. + addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -1926,6 +1931,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; + BranchProbabilityInfo *BPI = FuncInfo.BPI; // If any two of the cases has the same destination, and if one value // is the same as the other, but has one bit unset that the other has set, // use bit manipulation to do two compares at once. For example: @@ -1959,8 +1965,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, ISD::SETEQ); // Update successor info. - addSuccessorWithWeight(SwitchBB, Small.BB); - addSuccessorWithWeight(SwitchBB, Default); + // Both Small and Big will jump to Small.BB, so we sum up the weights. + addSuccessorWithWeight(SwitchBB, Small.BB, + Small.ExtraWeight + Big.ExtraWeight); + addSuccessorWithWeight(SwitchBB, Default, + // The default destination is the first successor in IR. + BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0); // Insert the true branch. SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, @@ -1978,14 +1988,13 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } // Order cases by weight so the most likely case will be checked first. - BranchProbabilityInfo *BPI = FuncInfo.BPI; + uint32_t UnhandledWeights = 0; if (BPI) { for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { - uint32_t IWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(), - I->BB->getBasicBlock()); + uint32_t IWeight = I->ExtraWeight; + UnhandledWeights += IWeight; for (CaseItr J = CR.Range.first; J < I; ++J) { - uint32_t JWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(), - J->BB->getBasicBlock()); + uint32_t JWeight = J->ExtraWeight; if (IWeight > JWeight) std::swap(*I, *J); } @@ -2034,10 +2043,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, LHS = I->Low; MHS = SV; RHS = I->High; } - uint32_t ExtraWeight = I->ExtraWeight; + // The false weight should be sum of all un-handled cases. + UnhandledWeights -= I->ExtraWeight; CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, /* me */ CurBlock, - /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2); + /* trueweight */ I->ExtraWeight, + /* falseweight */ UnhandledWeights); // If emitting the first comparison, just call visitSwitchCase to emit the // code into the current block. Otherwise, push the CaseBlock onto the @@ -2137,13 +2148,28 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, } } + // Calculate weight for each unique destination in CR. + DenseMap<MachineBasicBlock*, uint32_t> DestWeights; + if (FuncInfo.BPI) + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { + DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = + DestWeights.find(I->BB); + if (Itr != DestWeights.end()) + Itr->second += I->ExtraWeight; + else + DestWeights[I->BB] = I->ExtraWeight; + } + // Update successor info. Add one edge to each unique successor. BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), E = DestBBs.end(); I != E; ++I) { if (!SuccsHandled[(*I)->getNumber()]) { SuccsHandled[(*I)->getNumber()] = true; - addSuccessorWithWeight(JumpTableBB, *I); + DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = + DestWeights.find(*I); + addSuccessorWithWeight(JumpTableBB, *I, + Itr != DestWeights.end() ? Itr->second : 0); } } @@ -2374,7 +2400,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, if (i == count) { assert((count < 3) && "Too much destinations to test!"); - CasesBits.push_back(CaseBits(0, Dest, 0)); + CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/)); count++; } @@ -2383,6 +2409,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, uint64_t lo = (lowValue - lowBound).getZExtValue(); uint64_t hi = (highValue - lowBound).getZExtValue(); + CasesBits[i].ExtraWeight += I->ExtraWeight; for (uint64_t j = lo; j <= hi; j++) { CasesBits[i].Mask |= 1ULL << j; @@ -2410,7 +2437,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, CurMF->insert(BBI, CaseBB); BTC.push_back(BitTestCase(CasesBits[i].Mask, CaseBB, - CasesBits[i].BB)); + CasesBits[i].BB, CasesBits[i].ExtraWeight)); // Put SV in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(SV); @@ -2438,30 +2465,25 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, Clusterifier TheClusterifier; + BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - TheClusterifier.add(i.getCaseValueEx(), SMBB); + TheClusterifier.add(i.getCaseValueEx(), SMBB, + BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0); } TheClusterifier.optimize(); - BranchProbabilityInfo *BPI = FuncInfo.BPI; size_t numCmps = 0; for (Clusterifier::RangeIterator i = TheClusterifier.begin(), e = TheClusterifier.end(); i != e; ++i, ++numCmps) { Clusterifier::Cluster &C = *i; - unsigned W = 0; - if (BPI) { - W = BPI->getEdgeWeight(SI.getParent(), C.second->getBasicBlock()); - if (!W) - W = 16; - W *= C.first.Weight; - BPI->setEdgeWeight(SI.getParent(), C.second->getBasicBlock(), W); - } + // Update edge weight for the cluster. + unsigned W = C.first.Weight; // FIXME: Currently work with ConstantInt based numbers. // Changing it to APInt based is a pretty heavy for this commit. @@ -4853,7 +4875,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - DAG.getConstant(Idx, MVT::i32)); + DAG.getIntPtrConstant(Idx)); + setValue(&I, Res); + return 0; + } + case Intrinsic::x86_avx_vextractf128_pd_256: + case Intrinsic::x86_avx_vextractf128_ps_256: + case Intrinsic::x86_avx_vextractf128_si_256: + case Intrinsic::x86_avx2_vextracti128: { + DebugLoc dl = getCurDebugLoc(); + EVT DestVT = TLI.getValueType(I.getType()); + uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * + DestVT.getVectorNumElements(); + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, + getValue(I.getArgOperand(0)), + DAG.getIntPtrConstant(Idx)); setValue(&I, Res); return 0; } @@ -5180,14 +5216,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { rw==1)); /* write */ return 0; } + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: { + bool IsStart = (Intrinsic == Intrinsic::lifetime_start); + // Stack coloring is not enabled in O0, discard region information. + if (TM.getOptLevel() == CodeGenOpt::None) + return 0; + + SmallVector<Value *, 4> Allocas; + GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); + + for (SmallVector<Value*, 4>::iterator Object = Allocas.begin(), + E = Allocas.end(); Object != E; ++Object) { + AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); + + // Could not find an Alloca. + if (!LifetimeObject) + continue; + + int FI = FuncInfo.StaticAllocaMap[LifetimeObject]; + + SDValue Ops[2]; + Ops[0] = getRoot(); + Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); + unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); + Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2); + DAG.setRoot(Res); + } + } case Intrinsic::invariant_start: - case Intrinsic::lifetime_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); return 0; case Intrinsic::invariant_end: - case Intrinsic::lifetime_end: // Discard region information. return 0; case Intrinsic::donothing: @@ -6043,12 +6105,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); - // Remember the HasSideEffect and AlignStack bits as operand 3. + // Remember the HasSideEffect, AlignStack and AsmDialect bits as operand 3. unsigned ExtraInfo = 0; if (IA->hasSideEffects()) ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; + // Set the asm dialect. + ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, TLI.getPointerTy())); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 4090002..3b7615a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -150,9 +150,11 @@ private: uint64_t Mask; MachineBasicBlock* BB; unsigned Bits; + uint32_t ExtraWeight; - CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits): - Mask(mask), BB(bb), Bits(bits) { } + CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits, + uint32_t Weight): + Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { } }; typedef std::vector<Case> CaseVector; @@ -247,11 +249,13 @@ private: typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock; struct BitTestCase { - BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr): - Mask(M), ThisBB(T), TargetBB(Tr) { } + BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr, + uint32_t Weight): + Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { } uint64_t Mask; MachineBasicBlock *ThisBB; MachineBasicBlock *TargetBB; + uint32_t ExtraWeight; }; typedef SmallVector<BitTestCase, 3> BitTestInfo; @@ -325,7 +329,7 @@ public: CodeGenOpt::Level ol) : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), - HasTailCall(false), Context(dag.getContext()) { + HasTailCall(false) { } void init(GCFunctionInfo *gfi, AliasAnalysis &aa, @@ -452,6 +456,7 @@ public: void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, + uint32_t BranchWeightToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 13cd011..75989ad 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -267,6 +267,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STACKRESTORE: return "stackrestore"; case ISD::TRAP: return "trap"; case ISD::DEBUGTRAP: return "debugtrap"; + case ISD::LIFETIME_START: return "lifetime.start"; + case ISD::LIFETIME_END: return "lifetime.end"; // Bit manipulation case ISD::BSWAP: return "bswap"; @@ -331,7 +333,7 @@ void SDNode::dump(const SelectionDAG *G) const { } void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { - OS << (void*)this << ": "; + OS << (const void*)this << ": "; for (unsigned i = 0, e = getNumValues(); i != e; ++i) { if (i) OS << ","; @@ -559,7 +561,7 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, child->printr(OS, G); once.insert(child); } else { // Just the address. FIXME: also print the child's opcode. - OS << (void*)child; + OS << (const void*)child; if (unsigned RN = N->getOperand(i).getResNo()) OS << ":" << RN; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 4e5e3ba..7542941 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -554,7 +554,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #endif { BlockNumber = FuncInfo->MBB->getNumber(); - BlockName = MF->getFunction()->getName().str() + ":" + + BlockName = MF->getName().str() + ":" + FuncInfo->MBB->getBasicBlock()->getName().str(); } DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber @@ -1209,7 +1209,12 @@ SelectionDAGISel::FinishBasicBlock() { CodeGenAndEmitDAG(); } + uint32_t UnhandledWeight = 0; + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) + UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight; + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { + UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight; // Set the current basic block to the mbb we wish to insert the code into FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); @@ -1217,12 +1222,14 @@ SelectionDAGISel::FinishBasicBlock() { if (j+1 != ej) SDB->visitBitTestCase(SDB->BitTestCases[i], SDB->BitTestCases[i].Cases[j+1].ThisBB, + UnhandledWeight, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], FuncInfo->MBB); else SDB->visitBitTestCase(SDB->BitTestCases[i], SDB->BitTestCases[i].Default, + UnhandledWeight, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], FuncInfo->MBB); @@ -1794,10 +1801,13 @@ WalkChainUsers(const SDNode *ChainedNode, User->getOpcode() == ISD::HANDLENODE) // Root of the graph. continue; - if (User->getOpcode() == ISD::CopyToReg || - User->getOpcode() == ISD::CopyFromReg || - User->getOpcode() == ISD::INLINEASM || - User->getOpcode() == ISD::EH_LABEL) { + unsigned UserOpcode = User->getOpcode(); + if (UserOpcode == ISD::CopyToReg || + UserOpcode == ISD::CopyFromReg || + UserOpcode == ISD::INLINEASM || + UserOpcode == ISD::EH_LABEL || + UserOpcode == ISD::LIFETIME_START || + UserOpcode == ISD::LIFETIME_END) { // If their node ID got reset to -1 then they've already been selected. // Treat them like a MachineOpcode. if (User->getNodeId() == -1) @@ -2213,6 +2223,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::CopyFromReg: case ISD::CopyToReg: case ISD::EH_LABEL: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: NodeToMatch->setNodeId(-1); // Mark selected. return 0; case ISD::AssertSext: @@ -2981,7 +2993,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && N->getOpcode() != ISD::INTRINSIC_VOID) { N->printrFull(Msg, CurDAG); - Msg << "\nIn function: " << MF->getFunction()->getName(); + Msg << "\nIn function: " << MF->getName(); } else { bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; unsigned iid = diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 173ffac..3921635 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -14,7 +14,6 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/Constants.h" #include "llvm/DebugInfo.h" -#include "llvm/Function.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -50,7 +49,7 @@ namespace llvm { template<typename EdgeIter> static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) { - return itostr(I - SDNodeIterator::begin((SDNode *) Node)); + return itostr(I - SDNodeIterator::begin((const SDNode *) Node)); } /// edgeTargetsEdgeSource - This method returns true if this outgoing edge @@ -73,7 +72,7 @@ namespace llvm { } static std::string getGraphName(const SelectionDAG *G) { - return G->getMachineFunction().getFunction()->getName(); + return G->getMachineFunction().getName(); } static bool renderGraphFromBottomUp() { @@ -146,7 +145,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, void SelectionDAG::viewGraph(const std::string &Title) { // This code is only for debugging! #ifndef NDEBUG - ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), + ViewGraph(this, "dag." + getMachineFunction().getName(), false, Title); #else errs() << "SelectionDAG::viewGraph is only available in debug builds on " diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 6820175..dcaa9ba 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -772,7 +772,7 @@ void TargetLowering::computeRegisterProperties() { LegalIntReg = IntReg; } else { RegisterTypeForVT[IntReg] = TransformToType[IntReg] = - (MVT::SimpleValueType)LegalIntReg; + (const MVT::SimpleValueType)LegalIntReg; ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); } } @@ -898,7 +898,6 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return NULL; } - EVT TargetLowering::getSetCCResultType(EVT VT) const { assert(!VT.isVector() && "No default SetCC type for vectors!"); return PointerTy.SimpleTy; @@ -2441,7 +2440,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0 == N1) { // The sext(setcc()) => setcc() optimization relies on the appropriate // constant being emitted. - uint64_t EqVal; + uint64_t EqVal = 0; switch (getBooleanContents(N0.getValueType().isVector())) { case UndefinedBooleanContent: case ZeroOrOneBooleanContent: diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index 21ae2f5..4fbe1b3 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -159,7 +159,7 @@ void PEI::initShrinkWrappingInfo() { // via --shrink-wrap-func=<funcname>. #ifndef NDEBUG if (ShrinkWrapFunc != "") { - std::string MFName = MF->getFunction()->getName().str(); + std::string MFName = MF->getName().str(); ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); } #endif @@ -187,7 +187,7 @@ void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { DEBUG(if (ShrinkWrapThisFunction) { dbgs() << "Place CSR spills/restores for " - << MF->getFunction()->getName() << "\n"; + << MF->getName() << "\n"; }); if (calculateSets(Fn)) @@ -364,7 +364,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { // If no CSRs used, we are done. if (CSI.empty()) { DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getFunction()->getName() + dbgs() << "DISABLED: " << Fn.getName() << ": uses no callee-saved registers\n"); return false; } @@ -384,7 +384,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { // implementation to functions with <= 500 MBBs. if (Fn.size() > 500) { DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getFunction()->getName() + dbgs() << "DISABLED: " << Fn.getName() << ": too large (" << Fn.size() << " MBBs)\n"); ShrinkWrapThisFunction = false; } @@ -466,7 +466,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { } if (allCSRUsesInEntryBlock) { - DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getName() << ": all CSRs used in EntryBlock\n"); ShrinkWrapThisFunction = false; } else { @@ -478,7 +478,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { allCSRsUsedInEntryFanout = false; } if (allCSRsUsedInEntryFanout) { - DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getName() << ": all CSRs used in imm successors of EntryBlock\n"); ShrinkWrapThisFunction = false; } @@ -505,7 +505,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { if (dominatesExitNodes) { CSRUsedInChokePoints |= CSRUsed[MBB]; if (CSRUsedInChokePoints == UsedCSRegs) { - DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getName() << ": all CSRs used in choke point(s) at " << getBasicBlockName(MBB) << "\n"); ShrinkWrapThisFunction = false; @@ -521,7 +521,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { return false; DEBUG({ - dbgs() << "ENABLED: " << Fn.getFunction()->getName(); + dbgs() << "ENABLED: " << Fn.getName(); if (HasFastExitPath) dbgs() << " (fast exit path)"; dbgs() << "\n"; @@ -861,7 +861,7 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) { DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { dbgs() << "-----------------------------------------------------------\n"; dbgs() << "total iterations = " << iterations << " ( " - << Fn.getFunction()->getName() + << Fn.getName() << " " << numSRReducedThisFunc << " " << Fn.size() << " )\n"; @@ -984,7 +984,7 @@ void PEI::verifySpillRestorePlacement() { if (isReturnBlock(SBB) || SBB->succ_size() == 0) { if (restored != spilled) { CSRegSet notRestored = (spilled - restored); - DEBUG(dbgs() << MF->getFunction()->getName() << ": " + DEBUG(dbgs() << MF->getName() << ": " << stringifyCSRegSet(notRestored) << " spilled at " << getBasicBlockName(MBB) << " are never restored on path to return " @@ -1032,7 +1032,7 @@ void PEI::verifySpillRestorePlacement() { } if (spilled != restored) { CSRegSet notSpilled = (restored - spilled); - DEBUG(dbgs() << MF->getFunction()->getName() << ": " + DEBUG(dbgs() << MF->getName() << ": " << stringifyCSRegSet(notSpilled) << " restored at " << getBasicBlockName(MBB) << " are never spilled\n"); diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 980bd74..7f46a06 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -196,53 +196,38 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin()); // Fill in the function context structure. - Type *Int32Ty = Type::getInt32Ty(F.getContext()); - Value *Zero = ConstantInt::get(Int32Ty, 0); - Value *One = ConstantInt::get(Int32Ty, 1); - Value *Two = ConstantInt::get(Int32Ty, 2); - Value *Three = ConstantInt::get(Int32Ty, 3); - Value *Four = ConstantInt::get(Int32Ty, 4); - - Value *Idxs[2] = { Zero, 0 }; - for (unsigned I = 0, E = LPads.size(); I != E; ++I) { LandingPadInst *LPI = LPads[I]; IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt()); // Reference the __data field. - Idxs[1] = Two; - Value *FCData = Builder.CreateGEP(FuncCtx, Idxs, "__data"); + Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data"); // The exception values come back in context->__data[0]. - Idxs[1] = Zero; - Value *ExceptionAddr = Builder.CreateGEP(FCData, Idxs, "exception_gep"); + Value *ExceptionAddr = Builder.CreateConstGEP2_32(FCData, 0, 0, + "exception_gep"); Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); - ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext())); + ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy()); - Idxs[1] = One; - Value *SelectorAddr = Builder.CreateGEP(FCData, Idxs, "exn_selector_gep"); + Value *SelectorAddr = Builder.CreateConstGEP2_32(FCData, 0, 1, + "exn_selector_gep"); Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); substituteLPadValues(LPI, ExnVal, SelVal); } // Personality function - Idxs[1] = Three; + IRBuilder<> Builder(EntryBB->getTerminator()); if (!PersonalityFn) PersonalityFn = LPads[0]->getPersonalityFn(); - Value *PersonalityFieldPtr = - GetElementPtrInst::Create(FuncCtx, Idxs, "pers_fn_gep", - EntryBB->getTerminator()); - new StoreInst(PersonalityFn, PersonalityFieldPtr, true, - EntryBB->getTerminator()); + Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3, + "pers_fn_gep"); + Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true); // LSDA address - Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", - EntryBB->getTerminator()); - Idxs[1] = Four; - Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep", - EntryBB->getTerminator()); - new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator()); + Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr"); + Value *LSDAFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 4, "lsda_gep"); + Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true); return FuncCtx; } @@ -417,48 +402,31 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { Value *FuncCtx = setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = F.begin(); - Type *Int32Ty = Type::getInt32Ty(F.getContext()); - - Value *Idxs[2] = { - ConstantInt::get(Int32Ty, 0), 0 - }; + IRBuilder<> Builder(EntryBB->getTerminator()); // Get a reference to the jump buffer. - Idxs[1] = ConstantInt::get(Int32Ty, 5); - Value *JBufPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "jbuf_gep", - EntryBB->getTerminator()); + Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep"); // Save the frame pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 0); - Value *FramePtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep", - EntryBB->getTerminator()); + Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep"); - Value *Val = CallInst::Create(FrameAddrFn, - ConstantInt::get(Int32Ty, 0), - "fp", - EntryBB->getTerminator()); - new StoreInst(Val, FramePtr, true, EntryBB->getTerminator()); + Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp"); + Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true); // Save the stack pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 2); - Value *StackPtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep", - EntryBB->getTerminator()); + Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep"); - Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); - new StoreInst(Val, StackPtr, true, EntryBB->getTerminator()); + Val = Builder.CreateCall(StackAddrFn, "sp"); + Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. - Value *SetjmpArg = CastInst::Create(Instruction::BitCast, JBufPtr, - Type::getInt8PtrTy(F.getContext()), "", - EntryBB->getTerminator()); - CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "", EntryBB->getTerminator()); + Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy()); + Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg); // Store a pointer to the function context so that the back-end will know // where to look for it. - Value *FuncCtxArg = CastInst::Create(Instruction::BitCast, FuncCtx, - Type::getInt8PtrTy(F.getContext()), "", - EntryBB->getTerminator()); - CallInst::Create(FuncCtxFn, FuncCtxArg, "", EntryBB->getTerminator()); + Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy()); + Builder.CreateCall(FuncCtxFn, FuncCtxArg); // At this point, we are all set up, update the invoke instructions to mark // their call_site values. diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index c8c3fb3..c98efb4 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -143,6 +143,7 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { } +#ifndef NDEBUG void SlotIndexes::dump() const { for (IndexList::const_iterator itr = indexList.begin(); itr != indexList.end(); ++itr) { @@ -159,6 +160,7 @@ void SlotIndexes::dump() const { dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';' << MBBRanges[i].second << ")\n"; } +#endif // Print a SlotIndex to a raw_ostream. void SlotIndex::print(raw_ostream &os) const { @@ -168,9 +170,11 @@ void SlotIndex::print(raw_ostream &os) const { os << "invalid"; } +#ifndef NDEBUG // Dump a SlotIndex to stderr. void SlotIndex::dump() const { print(dbgs()); dbgs() << "\n"; } +#endif diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 4a2b7ec..96151b6 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -356,6 +356,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { Edit->anyRematerializable(0); } +#ifndef NDEBUG void SplitEditor::dump() const { if (RegAssign.empty()) { dbgs() << " empty\n"; @@ -366,6 +367,7 @@ void SplitEditor::dump() const { dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value(); dbgs() << '\n'; } +#endif VNInfo *SplitEditor::defValue(unsigned RegIdx, const VNInfo *ParentVNI, diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp new file mode 100644 index 0000000..dbfa4bb --- /dev/null +++ b/lib/CodeGen/StackColoring.cpp @@ -0,0 +1,696 @@ +//===-- StackColoring.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements the stack-coloring optimization that looks for +// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END), +// which represent the possible lifetime of stack slots. It attempts to +// merge disjoint stack slots and reduce the used stack space. +// NOTE: This pass is not StackSlotColoring, which optimizes spill slots. +// +// TODO: In the future we plan to improve stack coloring in the following ways: +// 1. Allow merging multiple small slots into a single larger slot at different +// offsets. +// 2. Merge this pass with StackSlotColoring and allow merging of allocas with +// spill slots. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stackcoloring" +#include "MachineTraceMetrics.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/DebugInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<bool> +DisableColoring("no-stack-coloring", + cl::init(true), cl::Hidden, + cl::desc("Suppress stack coloring")); + +STATISTIC(NumMarkerSeen, "Number of life markers found."); +STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); +STATISTIC(StackSlotMerged, "Number of stack slot merged."); + +//===----------------------------------------------------------------------===// +// StackColoring Pass +//===----------------------------------------------------------------------===// + +namespace { +/// StackColoring - A machine pass for merging disjoint stack allocations, +/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. +class StackColoring : public MachineFunctionPass { + MachineFrameInfo *MFI; + MachineFunction *MF; + + /// A class representing liveness information for a single basic block. + /// Each bit in the BitVector represents the liveness property + /// for a different stack slot. + struct BlockLifetimeInfo { + /// Which slots BEGINs in each basic block. + BitVector Begin; + /// Which slots ENDs in each basic block. + BitVector End; + /// Which slots are marked as LIVE_IN, coming into each basic block. + BitVector LiveIn; + /// Which slots are marked as LIVE_OUT, coming out of each basic block. + BitVector LiveOut; + }; + + /// Maps active slots (per bit) for each basic block. + DenseMap<MachineBasicBlock*, BlockLifetimeInfo> BlockLiveness; + + /// Maps serial numbers to basic blocks. + DenseMap<MachineBasicBlock*, int> BasicBlocks; + /// Maps basic blocks to a serial number. + SmallVector<MachineBasicBlock*, 8> BasicBlockNumbering; + + /// Maps liveness intervals for each slot. + SmallVector<LiveInterval*, 16> Intervals; + /// VNInfo is used for the construction of LiveIntervals. + VNInfo::Allocator VNInfoAllocator; + /// SlotIndex analysis object. + SlotIndexes* Indexes; + + /// The list of lifetime markers found. These markers are to be removed + /// once the coloring is done. + SmallVector<MachineInstr*, 8> Markers; + + /// SlotSizeSorter - A Sort utility for arranging stack slots according + /// to their size. + struct SlotSizeSorter { + MachineFrameInfo *MFI; + SlotSizeSorter(MachineFrameInfo *mfi) : MFI(mfi) { } + bool operator()(int LHS, int RHS) { + // We use -1 to denote a uninteresting slot. Place these slots at the end. + if (LHS == -1) return false; + if (RHS == -1) return true; + // Sort according to size. + return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); + } +}; + +public: + static char ID; + StackColoring() : MachineFunctionPass(ID) { + initializeStackColoringPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &MF); + +private: + /// Debug. + void dump(); + + /// Removes all of the lifetime marker instructions from the function. + /// \returns true if any markers were removed. + bool removeAllMarkers(); + + /// Scan the machine function and find all of the lifetime markers. + /// Record the findings in the BEGIN and END vectors. + /// \returns the number of markers found. + unsigned collectMarkers(unsigned NumSlot); + + /// Perform the dataflow calculation and calculate the lifetime for each of + /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and + /// LifetimeLIVE_OUT maps that represent which stack slots are live coming + /// in and out blocks. + void calculateLocalLiveness(); + + /// Construct the LiveIntervals for the slots. + void calculateLiveIntervals(unsigned NumSlots); + + /// Go over the machine function and change instructions which use stack + /// slots to use the joint slots. + void remapInstructions(DenseMap<int, int> &SlotRemap); + + /// Map entries which point to other entries to their destination. + /// A->B->C becomes A->C. + void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots); +}; +} // end anonymous namespace + +char StackColoring::ID = 0; +char &llvm::StackColoringID = StackColoring::ID; + +INITIALIZE_PASS_BEGIN(StackColoring, + "stack-coloring", "Merge disjoint stack slots", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_END(StackColoring, + "stack-coloring", "Merge disjoint stack slots", false, false) + +void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void StackColoring::dump() { + for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF); + FI != FE; ++FI) { + unsigned Num = BasicBlocks[*FI]; + DEBUG(dbgs()<<"Inspecting block #"<<Num<<" ["<<FI->getName()<<"]\n"); + Num = 0; + DEBUG(dbgs()<<"BEGIN : {"); + for (unsigned i=0; i < BlockLiveness[*FI].Begin.size(); ++i) + DEBUG(dbgs()<<BlockLiveness[*FI].Begin.test(i)<<" "); + DEBUG(dbgs()<<"}\n"); + + DEBUG(dbgs()<<"END : {"); + for (unsigned i=0; i < BlockLiveness[*FI].End.size(); ++i) + DEBUG(dbgs()<<BlockLiveness[*FI].End.test(i)<<" "); + + DEBUG(dbgs()<<"}\n"); + + DEBUG(dbgs()<<"LIVE_IN: {"); + for (unsigned i=0; i < BlockLiveness[*FI].LiveIn.size(); ++i) + DEBUG(dbgs()<<BlockLiveness[*FI].LiveIn.test(i)<<" "); + + DEBUG(dbgs()<<"}\n"); + DEBUG(dbgs()<<"LIVEOUT: {"); + for (unsigned i=0; i < BlockLiveness[*FI].LiveOut.size(); ++i) + DEBUG(dbgs()<<BlockLiveness[*FI].LiveOut.test(i)<<" "); + DEBUG(dbgs()<<"}\n"); + } +} + +unsigned StackColoring::collectMarkers(unsigned NumSlot) { + unsigned MarkersFound = 0; + // Scan the function to find all lifetime markers. + // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a + // deterministic numbering, and because we'll need a post-order iteration + // later for solving the liveness dataflow problem. + for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF); + FI != FE; ++FI) { + + // Assign a serial number to this basic block. + BasicBlocks[*FI] = BasicBlockNumbering.size(); + BasicBlockNumbering.push_back(*FI); + + BlockLiveness[*FI].Begin.resize(NumSlot); + BlockLiveness[*FI].End.resize(NumSlot); + + for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end(); + BI != BE; ++BI) { + + if (BI->getOpcode() != TargetOpcode::LIFETIME_START && + BI->getOpcode() != TargetOpcode::LIFETIME_END) + continue; + + Markers.push_back(BI); + + bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START; + MachineOperand &MI = BI->getOperand(0); + unsigned Slot = MI.getIndex(); + + MarkersFound++; + + const Value *Allocation = MFI->getObjectAllocation(Slot); + if (Allocation) { + DEBUG(dbgs()<<"Found lifetime marker for allocation: "<< + Allocation->getName()<<"\n"); + } + + if (IsStart) { + BlockLiveness[*FI].Begin.set(Slot); + } else { + if (BlockLiveness[*FI].Begin.test(Slot)) { + // Allocas that start and end within a single block are handled + // specially when computing the LiveIntervals to avoid pessimizing + // the liveness propagation. + BlockLiveness[*FI].Begin.reset(Slot); + } else { + BlockLiveness[*FI].End.set(Slot); + } + } + } + } + + // Update statistics. + NumMarkerSeen += MarkersFound; + return MarkersFound; +} + +void StackColoring::calculateLocalLiveness() { + // Perform a standard reverse dataflow computation to solve for + // global liveness. The BEGIN set here is equivalent to KILL in the standard + // formulation, and END is equivalent to GEN. The result of this computation + // is a map from blocks to bitvectors where the bitvectors represent which + // allocas are live in/out of that block. + SmallPtrSet<MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(), + BasicBlockNumbering.end()); + unsigned NumSSMIters = 0; + bool changed = true; + while (changed) { + changed = false; + ++NumSSMIters; + + SmallPtrSet<MachineBasicBlock*, 8> NextBBSet; + + for (SmallVector<MachineBasicBlock*, 8>::iterator + PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); + PI != PE; ++PI) { + + MachineBasicBlock *BB = *PI; + if (!BBSet.count(BB)) continue; + + BitVector LocalLiveIn; + BitVector LocalLiveOut; + + // Forward propagation from begins to ends. + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + PE = BB->pred_end(); PI != PE; ++PI) + LocalLiveIn |= BlockLiveness[*PI].LiveOut; + LocalLiveIn |= BlockLiveness[BB].End; + LocalLiveIn.reset(BlockLiveness[BB].Begin); + + // Reverse propagation from ends to begins. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + LocalLiveOut |= BlockLiveness[*SI].LiveIn; + LocalLiveOut |= BlockLiveness[BB].Begin; + LocalLiveOut.reset(BlockLiveness[BB].End); + + LocalLiveIn |= LocalLiveOut; + LocalLiveOut |= LocalLiveIn; + + // After adopting the live bits, we need to turn-off the bits which + // are de-activated in this block. + LocalLiveOut.reset(BlockLiveness[BB].End); + LocalLiveIn.reset(BlockLiveness[BB].Begin); + + // If we have both BEGIN and END markers in the same basic block then + // we know that the BEGIN marker comes after the END, because we already + // handle the case where the BEGIN comes before the END when collecting + // the markers (and building the BEGIN/END vectore). + // Want to enable the LIVE_IN and LIVE_OUT of slots that have both + // BEGIN and END because it means that the value lives before and after + // this basic block. + BitVector LocalEndBegin = BlockLiveness[BB].End; + LocalEndBegin &= BlockLiveness[BB].Begin; + LocalLiveIn |= LocalEndBegin; + LocalLiveOut |= LocalEndBegin; + + if (LocalLiveIn.test(BlockLiveness[BB].LiveIn)) { + changed = true; + BlockLiveness[BB].LiveIn |= LocalLiveIn; + + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + PE = BB->pred_end(); PI != PE; ++PI) + NextBBSet.insert(*PI); + } + + if (LocalLiveOut.test(BlockLiveness[BB].LiveOut)) { + changed = true; + BlockLiveness[BB].LiveOut |= LocalLiveOut; + + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + NextBBSet.insert(*SI); + } + } + + BBSet = NextBBSet; + }// while changed. +} + +void StackColoring::calculateLiveIntervals(unsigned NumSlots) { + SmallVector<SlotIndex, 16> Starts; + SmallVector<SlotIndex, 16> Finishes; + + // For each block, find which slots are active within this block + // and update the live intervals. + for (MachineFunction::iterator MBB = MF->begin(), MBBe = MF->end(); + MBB != MBBe; ++MBB) { + Starts.clear(); + Starts.resize(NumSlots); + Finishes.clear(); + Finishes.resize(NumSlots); + + // Create the interval for the basic blocks with lifetime markers in them. + for (SmallVector<MachineInstr*, 8>::iterator it = Markers.begin(), + e = Markers.end(); it != e; ++it) { + MachineInstr *MI = *it; + if (MI->getParent() != MBB) + continue; + + assert((MI->getOpcode() == TargetOpcode::LIFETIME_START || + MI->getOpcode() == TargetOpcode::LIFETIME_END) && + "Invalid Lifetime marker"); + + bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START; + MachineOperand &Mo = MI->getOperand(0); + int Slot = Mo.getIndex(); + assert(Slot >= 0 && "Invalid slot"); + + SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); + + if (IsStart) { + if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) + Starts[Slot] = ThisIndex; + } else { + if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) + Finishes[Slot] = ThisIndex; + } + } + + // Create the interval of the blocks that we previously found to be 'alive'. + BitVector Alive = BlockLiveness[MBB].LiveIn; + Alive |= BlockLiveness[MBB].LiveOut; + + if (Alive.any()) { + for (int pos = Alive.find_first(); pos != -1; + pos = Alive.find_next(pos)) { + if (!Starts[pos].isValid()) + Starts[pos] = Indexes->getMBBStartIdx(MBB); + if (!Finishes[pos].isValid()) + Finishes[pos] = Indexes->getMBBEndIdx(MBB); + } + } + + for (unsigned i = 0; i < NumSlots; ++i) { + assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range"); + if (!Starts[i].isValid()) + continue; + + assert(Starts[i] && Finishes[i] && "Invalid interval"); + VNInfo *ValNum = Intervals[i]->getValNumInfo(0); + SlotIndex S = Starts[i]; + SlotIndex F = Finishes[i]; + if (S < F) { + // We have a single consecutive region. + Intervals[i]->addRange(LiveRange(S, F, ValNum)); + } else { + // We have two non consecutive regions. This happens when + // LIFETIME_START appears after the LIFETIME_END marker. + SlotIndex NewStart = Indexes->getMBBStartIdx(MBB); + SlotIndex NewFin = Indexes->getMBBEndIdx(MBB); + Intervals[i]->addRange(LiveRange(NewStart, F, ValNum)); + Intervals[i]->addRange(LiveRange(S, NewFin, ValNum)); + } + } + } +} + +bool StackColoring::removeAllMarkers() { + unsigned Count = 0; + for (unsigned i = 0; i < Markers.size(); ++i) { + Markers[i]->eraseFromParent(); + Count++; + } + Markers.clear(); + + DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n"); + return Count; +} + +void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { + unsigned FixedInstr = 0; + unsigned FixedMemOp = 0; + unsigned FixedDbg = 0; + MachineModuleInfo *MMI = &MF->getMMI(); + + // Remap debug information that refers to stack slots. + MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); + for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), + VE = VMap.end(); VI != VE; ++VI) { + const MDNode *Var = VI->first; + if (!Var) continue; + std::pair<unsigned, DebugLoc> &VP = VI->second; + if (SlotRemap.count(VP.first)) { + DEBUG(dbgs()<<"Remapping debug info for ["<<Var->getName()<<"].\n"); + VP.first = SlotRemap[VP.first]; + FixedDbg++; + } + } + + // Keep a list of *allocas* which need to be remapped. + DenseMap<const Value*, const Value*> Allocas; + for (DenseMap<int, int>::iterator it = SlotRemap.begin(), + e = SlotRemap.end(); it != e; ++it) { + const Value *From = MFI->getObjectAllocation(it->first); + const Value *To = MFI->getObjectAllocation(it->second); + assert(To && From && "Invalid allocation object"); + Allocas[From] = To; + } + + // Remap all instructions to the new stack slots. + MachineFunction::iterator BB, BBE; + MachineBasicBlock::iterator I, IE; + for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) + for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { + + // Skip lifetime markers. We'll remove them soon. + if (I->getOpcode() == TargetOpcode::LIFETIME_START || + I->getOpcode() == TargetOpcode::LIFETIME_END) + continue; + + // Update the MachineMemOperand to use the new alloca. + for (MachineInstr::mmo_iterator MM = I->memoperands_begin(), + E = I->memoperands_end(); MM != E; ++MM) { + MachineMemOperand *MMO = *MM; + + const Value *V = MMO->getValue(); + + if (!V) + continue; + + // Climb up and find the original alloca. + V = GetUnderlyingObject(V); + // If we did not find one, or if the one that we found is not in our + // map, then move on. + if (!V || !Allocas.count(V)) + continue; + + MMO->setValue(Allocas[V]); + FixedMemOp++; + } + + // Update all of the machine instruction operands. + for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { + MachineOperand &MO = I->getOperand(i); + + if (!MO.isFI()) + continue; + int FromSlot = MO.getIndex(); + + // Don't touch arguments. + if (FromSlot<0) + continue; + + // Only look at mapped slots. + if (!SlotRemap.count(FromSlot)) + continue; + + // In a debug build, check that the instruction that we are modifying is + // inside the expected live range. If the instruction is not inside + // the calculated range then it means that the alloca usage moved + // outside of the lifetime markers. +#ifndef NDEBUG + SlotIndex Index = Indexes->getInstructionIndex(I); + LiveInterval* Interval = Intervals[FromSlot]; + assert(Interval->find(Index) != Interval->end() && + "Found instruction usage outside of live range."); +#endif + + // Fix the machine instructions. + int ToSlot = SlotRemap[FromSlot]; + MO.setIndex(ToSlot); + FixedInstr++; + } + } + + DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n"); + DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n"); + DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n"); +} + +void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap, + unsigned NumSlots) { + // Expunge slot remap map. + for (unsigned i=0; i < NumSlots; ++i) { + // If we are remapping i + if (SlotRemap.count(i)) { + int Target = SlotRemap[i]; + // As long as our target is mapped to something else, follow it. + while (SlotRemap.count(Target)) { + Target = SlotRemap[Target]; + SlotRemap[i] = Target; + } + } + } +} + +bool StackColoring::runOnMachineFunction(MachineFunction &Func) { + DEBUG(dbgs() << "********** Stack Coloring **********\n" + << "********** Function: " + << ((const Value*)Func.getFunction())->getName() << '\n'); + MF = &Func; + MFI = MF->getFrameInfo(); + Indexes = &getAnalysis<SlotIndexes>(); + BlockLiveness.clear(); + BasicBlocks.clear(); + BasicBlockNumbering.clear(); + Markers.clear(); + Intervals.clear(); + VNInfoAllocator.Reset(); + + unsigned NumSlots = MFI->getObjectIndexEnd(); + + // If there are no stack slots then there are no markers to remove. + if (!NumSlots) + return false; + + SmallVector<int, 8> SortedSlots; + + SortedSlots.reserve(NumSlots); + Intervals.reserve(NumSlots); + + unsigned NumMarkers = collectMarkers(NumSlots); + + unsigned TotalSize = 0; + DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n"); + DEBUG(dbgs()<<"Slot structure:\n"); + + for (int i=0; i < MFI->getObjectIndexEnd(); ++i) { + DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n"); + TotalSize += MFI->getObjectSize(i); + } + + DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n"); + + // Don't continue because there are not enough lifetime markers, or the + // stack or too small, or we are told not to optimize the slots. + if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) { + DEBUG(dbgs()<<"Will not try to merge slots.\n"); + return removeAllMarkers(); + } + + for (unsigned i=0; i < NumSlots; ++i) { + LiveInterval *LI = new LiveInterval(i, 0); + Intervals.push_back(LI); + LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator); + SortedSlots.push_back(i); + } + + // Calculate the liveness of each block. + calculateLocalLiveness(); + + // Propagate the liveness information. + calculateLiveIntervals(NumSlots); + + // Maps old slots to new slots. + DenseMap<int, int> SlotRemap; + unsigned RemovedSlots = 0; + unsigned ReducedSize = 0; + + // Do not bother looking at empty intervals. + for (unsigned I = 0; I < NumSlots; ++I) { + if (Intervals[SortedSlots[I]]->empty()) + SortedSlots[I] = -1; + } + + // This is a simple greedy algorithm for merging allocas. First, sort the + // slots, placing the largest slots first. Next, perform an n^2 scan and look + // for disjoint slots. When you find disjoint slots, merge the samller one + // into the bigger one and update the live interval. Remove the small alloca + // and continue. + + // Sort the slots according to their size. Place unused slots at the end. + std::sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI)); + + bool Chanded = true; + while (Chanded) { + Chanded = false; + for (unsigned I = 0; I < NumSlots; ++I) { + if (SortedSlots[I] == -1) + continue; + + for (unsigned J=I+1; J < NumSlots; ++J) { + if (SortedSlots[J] == -1) + continue; + + int FirstSlot = SortedSlots[I]; + int SecondSlot = SortedSlots[J]; + LiveInterval *First = Intervals[FirstSlot]; + LiveInterval *Second = Intervals[SecondSlot]; + assert (!First->empty() && !Second->empty() && "Found an empty range"); + + // Merge disjoint slots. + if (!First->overlaps(*Second)) { + Chanded = true; + First->MergeRangesInAsValue(*Second, First->getValNumInfo(0)); + SlotRemap[SecondSlot] = FirstSlot; + SortedSlots[J] = -1; + DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<< + SecondSlot<<" together.\n"); + unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot), + MFI->getObjectAlignment(SecondSlot)); + + assert(MFI->getObjectSize(FirstSlot) >= + MFI->getObjectSize(SecondSlot) && + "Merging a small object into a larger one"); + + RemovedSlots+=1; + ReducedSize += MFI->getObjectSize(SecondSlot); + MFI->setObjectAlignment(FirstSlot, MaxAlignment); + MFI->RemoveStackObject(SecondSlot); + } + } + } + }// While changed. + + // Record statistics. + StackSpaceSaved += ReducedSize; + StackSlotMerged += RemovedSlots; + DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<< + ReducedSize<<" bytes\n"); + + // Scan the entire function and update all machine operands that use frame + // indices to use the remapped frame index. + expungeSlotMap(SlotRemap, NumSlots); + remapInstructions(SlotRemap); + + // Release the intervals. + for (unsigned I = 0; I < NumSlots; ++I) { + delete Intervals[I]; + } + + return removeAllMarkers(); +} diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 20da36e..9d0fd0a 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "stackcoloring" -#include "llvm/Function.h" #include "llvm/Module.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -391,8 +390,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { DEBUG({ dbgs() << "********** Stack Slot Coloring **********\n" - << "********** Function: " - << MF.getFunction()->getName() << '\n'; + << "********** Function: " << MF.getName() << '\n'; }); MFI = MF.getFrameInfo(); diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index 5b06195..39fd600 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -404,9 +404,9 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { } void StrongPHIElimination::addReg(unsigned Reg) { - if (RegNodeMap.count(Reg)) - return; - RegNodeMap[Reg] = new (Allocator) Node(Reg); + Node *&N = RegNodeMap[Reg]; + if (!N) + N = new (Allocator) Node(Reg); } StrongPHIElimination::Node* @@ -714,8 +714,9 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, assert(getRegColor(CopyReg) == CopyReg); } - if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor))) - InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr; + // Insert into map if not already there. + InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor), + CopyInstr)); } SrcMO.setReg(CopyReg); diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index ddee6b2..7e7f835 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -99,17 +99,8 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, if (NewMI) { // Create a new instruction. - bool Reg0IsDead = HasDef ? MI->getOperand(0).isDead() : false; MachineFunction &MF = *MI->getParent()->getParent(); - if (HasDef) - return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead), SubReg0) - .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) - .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); - else - return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) - .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); + MI = MF.CloneMachineInstr(MI); } if (HasDef) { @@ -645,9 +636,16 @@ static int computeDefOperandLatency( } /// computeOperandLatency - Compute and return the latency of the given data -/// dependent def and use when the operand indices are already known. +/// dependent def and use when the operand indices are already known. UseMI may +/// be NULL for an unknown use. +/// +/// FindMin may be set to get the minimum vs. expected latency. Minimum +/// latency is used for scheduling groups, while expected latency is for +/// instruction cost and critical path. /// -/// FindMin may be set to get the minimum vs. expected latency. +/// Depending on the subtarget's itinerary properties, this may or may not need +/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or +/// UseIdx to compute min latency. unsigned TargetInstrInfo:: computeOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, @@ -660,7 +658,13 @@ computeOperandLatency(const InstrItineraryData *ItinData, assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); - int OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + int OperLatency = 0; + if (UseMI) + OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + else { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); + } if (OperLatency >= 0) return OperLatency; @@ -673,77 +677,3 @@ computeOperandLatency(const InstrItineraryData *ItinData, defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } - -/// computeOperandLatency - Compute and return the latency of the given data -/// dependent def and use. DefMI must be a valid def. UseMI may be NULL for an -/// unknown use. Depending on the subtarget's itinerary properties, this may or -/// may not need to call getOperandLatency(). -/// -/// FindMin may be set to get the minimum vs. expected latency. Minimum -/// latency is used for scheduling groups, while expected latency is for -/// instruction cost and critical path. -/// -/// For most subtargets, we don't need DefIdx or UseIdx to compute min latency. -/// DefMI must be a valid definition, but UseMI may be NULL for an unknown use. -unsigned TargetInstrInfo:: -computeOperandLatency(const InstrItineraryData *ItinData, - const TargetRegisterInfo *TRI, - const MachineInstr *DefMI, const MachineInstr *UseMI, - unsigned Reg, bool FindMin) const { - - int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin); - if (DefLatency >= 0) - return DefLatency; - - assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); - - // Find the definition of the register in the defining instruction. - int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); - if (DefIdx != -1) { - const MachineOperand &MO = DefMI->getOperand(DefIdx); - if (MO.isReg() && MO.isImplicit() && - DefIdx >= (int)DefMI->getDesc().getNumOperands()) { - // This is an implicit def, getOperandLatency() won't return the correct - // latency. e.g. - // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> - // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... - // What we want is to compute latency between def of %D6/%D7 and use of - // %Q3 instead. - unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); - if (DefMI->getOperand(Op2).isReg()) - DefIdx = Op2; - } - // For all uses of the register, calculate the maxmimum latency - int OperLatency = -1; - - // UseMI is null, then it must be a scheduling barrier. - if (!UseMI) { - unsigned DefClass = DefMI->getDesc().getSchedClass(); - OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); - } - else { - for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = UseMI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned MOReg = MO.getReg(); - if (MOReg != Reg) - continue; - - int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, i); - OperLatency = std::max(OperLatency, UseCycle); - } - } - // If we found an operand latency, we're done. - if (OperLatency >= 0) - return OperLatency; - } - // No operand latency was found. - unsigned InstrLatency = getInstrLatency(ItinData, DefMI); - - // Expected latency is the max of the stage latency and itinerary props. - if (!FindMin) - InstrLatency = std::max(InstrLatency, - defaultDefLatency(ItinData->SchedModel, DefMI)); - return InstrLatency; -} diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index aa601af..bd12f92 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1202,8 +1202,7 @@ bool TwoAddressInstructionPass:: collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { const MCInstrDesc &MCID = MI->getDesc(); bool AnyOps = false; - unsigned NumOps = MI->isInlineAsm() ? - MI->getNumOperands() : MCID.getNumOperands(); + unsigned NumOps = MI->getNumOperands(); for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { unsigned DstIdx = 0; @@ -1373,7 +1372,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); DEBUG(dbgs() << "********** Function: " - << MF->getFunction()->getName() << '\n'); + << MF->getName() << '\n'); // This pass takes the function out of SSA form. MRI->leaveSSA(); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 93840f0..bd10a4b 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -19,7 +19,6 @@ #define DEBUG_TYPE "regalloc" #include "VirtRegMap.h" #include "LiveDebugVariables.h" -#include "llvm/Function.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -127,9 +126,11 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { OS << '\n'; } +#ifndef NDEBUG void VirtRegMap::dump() const { print(dbgs()); } +#endif //===----------------------------------------------------------------------===// // VirtRegRewriter @@ -197,11 +198,11 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { VRM = &getAnalysis<VirtRegMap>(); DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " - << MF->getFunction()->getName() << '\n'); + << MF->getName() << '\n'); DEBUG(VRM->dump()); // Add kill flags while we still have virtual registers. - LIS->addKillFlags(); + LIS->addKillFlags(VRM); // Live-in lists on basic blocks are required for physregs. addMBBLiveIns(); diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt index 441f1e8..1e9e509 100644 --- a/lib/DebugInfo/CMakeLists.txt +++ b/lib/DebugInfo/CMakeLists.txt @@ -8,5 +8,6 @@ add_llvm_library(LLVMDebugInfo DWARFDebugAranges.cpp DWARFDebugInfoEntry.cpp DWARFDebugLine.cpp + DWARFDebugRangeList.cpp DWARFFormValue.cpp ) diff --git a/lib/DebugInfo/DIContext.cpp b/lib/DebugInfo/DIContext.cpp index e2fd55f..ead57f9 100644 --- a/lib/DebugInfo/DIContext.cpp +++ b/lib/DebugInfo/DIContext.cpp @@ -18,7 +18,9 @@ DIContext *DIContext::getDWARFContext(bool isLittleEndian, StringRef abbrevSection, StringRef aRangeSection, StringRef lineSection, - StringRef stringSection) { + StringRef stringSection, + StringRef rangeSection) { return new DWARFContextInMemory(isLittleEndian, infoSection, abbrevSection, - aRangeSection, lineSection, stringSection); + aRangeSection, lineSection, stringSection, + rangeSection); } diff --git a/lib/DebugInfo/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARFCompileUnit.cpp index b27d57b..bdd65b7 100644 --- a/lib/DebugInfo/DWARFCompileUnit.cpp +++ b/lib/DebugInfo/DWARFCompileUnit.cpp @@ -63,7 +63,7 @@ DWARFCompileUnit::extract(uint32_t offset, DataExtractor debug_info_data, Version = debug_info_data.getU16(&offset); bool abbrevsOK = debug_info_data.getU32(&offset) == abbrevs->getOffset(); Abbrevs = abbrevs; - AddrSize = debug_info_data.getU8 (&offset); + AddrSize = debug_info_data.getU8(&offset); bool versionOK = DWARFContext::isSupportedVersion(Version); bool addrSizeOK = AddrSize == 4 || AddrSize == 8; @@ -75,6 +75,15 @@ DWARFCompileUnit::extract(uint32_t offset, DataExtractor debug_info_data, return 0; } +bool DWARFCompileUnit::extractRangeList(uint32_t RangeListOffset, + DWARFDebugRangeList &RangeList) const { + // Require that compile unit is extracted. + assert(DieArray.size() > 0); + DataExtractor RangesData(Context.getRangeSection(), + Context.isLittleEndian(), AddrSize); + return RangeList.extract(RangesData, &RangeListOffset); +} + void DWARFCompileUnit::clear() { Offset = 0; Length = 0; @@ -94,7 +103,9 @@ void DWARFCompileUnit::dump(raw_ostream &OS) { << " (next CU at " << format("0x%08x", getNextCompileUnitOffset()) << ")\n"; - getCompileUnitDIE(false)->dump(OS, this, -1U); + const DWARFDebugInfoEntryMinimal *CU = getCompileUnitDIE(false); + assert(CU && "Null Compile Unit?"); + CU->dump(OS, this, -1U); } const char *DWARFCompileUnit::getCompilationDir() { @@ -174,11 +185,11 @@ size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) { addDIE(die); return 1; } - else if (depth == 0 && initial_die_array_size == 1) { + else if (depth == 0 && initial_die_array_size == 1) // Don't append the CU die as we already did that - } else { - addDIE (die); - } + ; + else + addDIE(die); const DWARFAbbreviationDeclaration *abbrDecl = die.getAbbreviationDeclarationPtr(); @@ -199,9 +210,9 @@ size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) { // Give a little bit of info if we encounter corrupt DWARF (our offset // should always terminate at or before the start of the next compilation // unit header). - if (offset > next_cu_offset) { - fprintf (stderr, "warning: DWARF compile unit extends beyond its bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), offset); - } + if (offset > next_cu_offset) + fprintf(stderr, "warning: DWARF compile unit extends beyond its" + "bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), offset); setDIERelations(); return DieArray.size(); @@ -244,12 +255,21 @@ DWARFCompileUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges, clearDIEs(true); } -const DWARFDebugInfoEntryMinimal* -DWARFCompileUnit::getFunctionDIEForAddress(int64_t address) { +DWARFDebugInfoEntryMinimal::InlinedChain +DWARFCompileUnit::getInlinedChainForAddress(uint64_t Address) { + // First, find a subprogram that contains the given address (the root + // of inlined chain). extractDIEsIfNeeded(false); + const DWARFDebugInfoEntryMinimal *SubprogramDIE = 0; for (size_t i = 0, n = DieArray.size(); i != n; i++) { - if (DieArray[i].addressRangeContainsAddress(this, address)) - return &DieArray[i]; + if (DieArray[i].isSubprogramDIE() && + DieArray[i].addressRangeContainsAddress(this, Address)) { + SubprogramDIE = &DieArray[i]; + break; + } } - return 0; + // Get inlined chain rooted at this subprogram DIE. + if (!SubprogramDIE) + return DWARFDebugInfoEntryMinimal::InlinedChain(); + return SubprogramDIE->getInlinedChainForAddress(this, Address); } diff --git a/lib/DebugInfo/DWARFCompileUnit.h b/lib/DebugInfo/DWARFCompileUnit.h index b34a596..03e2862 100644 --- a/lib/DebugInfo/DWARFCompileUnit.h +++ b/lib/DebugInfo/DWARFCompileUnit.h @@ -12,6 +12,7 @@ #include "DWARFDebugAbbrev.h" #include "DWARFDebugInfoEntry.h" +#include "DWARFDebugRangeList.h" #include <vector> namespace llvm { @@ -45,6 +46,11 @@ public: /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it /// hasn't already been done. Returns the number of DIEs parsed at this call. size_t extractDIEsIfNeeded(bool cu_die_only); + /// extractRangeList - extracts the range list referenced by this compile + /// unit from .debug_ranges section. Returns true on success. + /// Requires that compile unit is already extracted. + bool extractRangeList(uint32_t RangeListOffset, + DWARFDebugRangeList &RangeList) const; void clear(); void dump(raw_ostream &OS); uint32_t getOffset() const { return Offset; } @@ -106,11 +112,11 @@ public: void buildAddressRangeTable(DWARFDebugAranges *debug_aranges, bool clear_dies_if_already_not_parsed); - /// getFunctionDIEForAddress - Returns pointer to parsed subprogram DIE, - /// address ranges of which contain the provided address, - /// or NULL if there is no such subprogram. The pointer - /// is valid until DWARFCompileUnit::clear() or clearDIEs() is called. - const DWARFDebugInfoEntryMinimal *getFunctionDIEForAddress(int64_t address); + + /// getInlinedChainForAddress - fetches inlined chain for a given address. + /// Returns empty chain if there is no subprogram containing address. + DWARFDebugInfoEntryMinimal::InlinedChain getInlinedChainForAddress( + uint64_t Address); }; } diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index 797662b..241f55e 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -32,15 +32,17 @@ void DWARFContext::dump(raw_ostream &OS) { while (set.extract(arangesData, &offset)) set.dump(OS); + uint8_t savedAddressByteSize = 0; OS << "\n.debug_lines contents:\n"; for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) { DWARFCompileUnit *cu = getCompileUnitAtIndex(i); + savedAddressByteSize = cu->getAddressByteSize(); unsigned stmtOffset = cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list, -1U); if (stmtOffset != -1U) { DataExtractor lineData(getLineSection(), isLittleEndian(), - cu->getAddressByteSize()); + savedAddressByteSize); DWARFDebugLine::DumpingState state(OS); DWARFDebugLine::parseStatementTable(lineData, &stmtOffset, state); } @@ -54,6 +56,18 @@ void DWARFContext::dump(raw_ostream &OS) { OS << format("0x%8.8x: \"%s\"\n", lastOffset, s); lastOffset = offset; } + + OS << "\n.debug_ranges contents:\n"; + // In fact, different compile units may have different address byte + // sizes, but for simplicity we just use the address byte size of the last + // compile unit (there is no easy and fast way to associate address range + // list and the compile unit it describes). + DataExtractor rangesData(getRangeSection(), isLittleEndian(), + savedAddressByteSize); + offset = 0; + DWARFDebugRangeList rangeList; + while (rangeList.extract(rangesData, &offset)) + rangeList.dump(OS); } const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() { @@ -131,75 +145,152 @@ namespace { }; } -DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t offset) { +DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) { if (CUs.empty()) parseCompileUnits(); - DWARFCompileUnit *i = std::lower_bound(CUs.begin(), CUs.end(), offset, - OffsetComparator()); - if (i != CUs.end()) - return &*i; + DWARFCompileUnit *CU = std::lower_bound(CUs.begin(), CUs.end(), Offset, + OffsetComparator()); + if (CU != CUs.end()) + return &*CU; return 0; } -DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address, - DILineInfoSpecifier specifier) { +DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { // First, get the offset of the compile unit. - uint32_t cuOffset = getDebugAranges()->findAddress(address); + uint32_t CUOffset = getDebugAranges()->findAddress(Address); // Retrieve the compile unit. - DWARFCompileUnit *cu = getCompileUnitForOffset(cuOffset); - if (!cu) + return getCompileUnitForOffset(CUOffset); +} + +static bool getFileNameForCompileUnit( + DWARFCompileUnit *CU, const DWARFDebugLine::LineTable *LineTable, + uint64_t FileIndex, bool NeedsAbsoluteFilePath, std::string &FileName) { + if (CU == 0 || + LineTable == 0 || + !LineTable->getFileNameByIndex(FileIndex, NeedsAbsoluteFilePath, + FileName)) + return false; + if (NeedsAbsoluteFilePath && sys::path::is_relative(FileName)) { + // We may still need to append compilation directory of compile unit. + SmallString<16> AbsolutePath; + if (const char *CompilationDir = CU->getCompilationDir()) { + sys::path::append(AbsolutePath, CompilationDir); + } + sys::path::append(AbsolutePath, FileName); + FileName = AbsolutePath.str(); + } + return true; +} + +static bool getFileLineInfoForCompileUnit( + DWARFCompileUnit *CU, const DWARFDebugLine::LineTable *LineTable, + uint64_t Address, bool NeedsAbsoluteFilePath, std::string &FileName, + uint32_t &Line, uint32_t &Column) { + if (CU == 0 || LineTable == 0) + return false; + // Get the index of row we're looking for in the line table. + uint32_t RowIndex = LineTable->lookupAddress(Address); + if (RowIndex == -1U) + return false; + // Take file number and line/column from the row. + const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex]; + if (!getFileNameForCompileUnit(CU, LineTable, Row.File, + NeedsAbsoluteFilePath, FileName)) + return false; + Line = Row.Line; + Column = Row.Column; + return true; +} + +DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address, + DILineInfoSpecifier Specifier) { + DWARFCompileUnit *CU = getCompileUnitForAddress(Address); + if (!CU) return DILineInfo(); - SmallString<16> fileName("<invalid>"); - SmallString<16> functionName("<invalid>"); - uint32_t line = 0; - uint32_t column = 0; - if (specifier.needs(DILineInfoSpecifier::FunctionName)) { - const DWARFDebugInfoEntryMinimal *function_die = - cu->getFunctionDIEForAddress(address); - if (function_die) { - if (const char *name = function_die->getSubprogramName(cu)) - functionName = name; + std::string FileName = "<invalid>"; + std::string FunctionName = "<invalid>"; + uint32_t Line = 0; + uint32_t Column = 0; + if (Specifier.needs(DILineInfoSpecifier::FunctionName)) { + // The address may correspond to instruction in some inlined function, + // so we have to build the chain of inlined functions and take the + // name of the topmost function in it. + const DWARFDebugInfoEntryMinimal::InlinedChain &InlinedChain = + CU->getInlinedChainForAddress(Address); + if (InlinedChain.size() > 0) { + const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain[0]; + if (const char *Name = TopFunctionDIE.getSubroutineName(CU)) + FunctionName = Name; } } - if (specifier.needs(DILineInfoSpecifier::FileLineInfo)) { - // Get the line table for this compile unit. - const DWARFDebugLine::LineTable *lineTable = getLineTableForCompileUnit(cu); - if (lineTable) { - // Get the index of the row we're looking for in the line table. - uint32_t rowIndex = lineTable->lookupAddress(address); - if (rowIndex != -1U) { - const DWARFDebugLine::Row &row = lineTable->Rows[rowIndex]; - // Take file/line info from the line table. - const DWARFDebugLine::FileNameEntry &fileNameEntry = - lineTable->Prologue.FileNames[row.File - 1]; - fileName = fileNameEntry.Name; - if (specifier.needs(DILineInfoSpecifier::AbsoluteFilePath) && - sys::path::is_relative(fileName.str())) { - // Append include directory of file (if it is present in line table) - // and compilation directory of compile unit to make path absolute. - const char *includeDir = 0; - if (uint64_t includeDirIndex = fileNameEntry.DirIdx) { - includeDir = lineTable->Prologue - .IncludeDirectories[includeDirIndex - 1]; - } - SmallString<16> absFileName; - if (includeDir == 0 || sys::path::is_relative(includeDir)) { - if (const char *compilationDir = cu->getCompilationDir()) - sys::path::append(absFileName, compilationDir); - } - if (includeDir) { - sys::path::append(absFileName, includeDir); - } - sys::path::append(absFileName, fileName.str()); - fileName = absFileName; - } - line = row.Line; - column = row.Column; + if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) { + const DWARFDebugLine::LineTable *LineTable = + getLineTableForCompileUnit(CU); + const bool NeedsAbsoluteFilePath = + Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath); + getFileLineInfoForCompileUnit(CU, LineTable, Address, + NeedsAbsoluteFilePath, + FileName, Line, Column); + } + return DILineInfo(StringRef(FileName), StringRef(FunctionName), + Line, Column); +} + +DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address, + DILineInfoSpecifier Specifier) { + DWARFCompileUnit *CU = getCompileUnitForAddress(Address); + if (!CU) + return DIInliningInfo(); + + const DWARFDebugInfoEntryMinimal::InlinedChain &InlinedChain = + CU->getInlinedChainForAddress(Address); + if (InlinedChain.size() == 0) + return DIInliningInfo(); + + DIInliningInfo InliningInfo; + uint32_t CallFile = 0, CallLine = 0, CallColumn = 0; + const DWARFDebugLine::LineTable *LineTable = 0; + for (uint32_t i = 0, n = InlinedChain.size(); i != n; i++) { + const DWARFDebugInfoEntryMinimal &FunctionDIE = InlinedChain[i]; + std::string FileName = "<invalid>"; + std::string FunctionName = "<invalid>"; + uint32_t Line = 0; + uint32_t Column = 0; + // Get function name if necessary. + if (Specifier.needs(DILineInfoSpecifier::FunctionName)) { + if (const char *Name = FunctionDIE.getSubroutineName(CU)) + FunctionName = Name; + } + if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) { + const bool NeedsAbsoluteFilePath = + Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath); + if (i == 0) { + // For the topmost frame, initialize the line table of this + // compile unit and fetch file/line info from it. + LineTable = getLineTableForCompileUnit(CU); + // For the topmost routine, get file/line info from line table. + getFileLineInfoForCompileUnit(CU, LineTable, Address, + NeedsAbsoluteFilePath, + FileName, Line, Column); + } else { + // Otherwise, use call file, call line and call column from + // previous DIE in inlined chain. + getFileNameForCompileUnit(CU, LineTable, CallFile, + NeedsAbsoluteFilePath, FileName); + Line = CallLine; + Column = CallColumn; + } + // Get call file/line/column of a current DIE. + if (i + 1 < n) { + FunctionDIE.getCallerFrame(CU, CallFile, CallLine, CallColumn); } } + DILineInfo Frame(StringRef(FileName), StringRef(FunctionName), + Line, Column); + InliningInfo.addFrame(Frame); } - return DILineInfo(fileName, functionName, line, column); + return InliningInfo; } void DWARFContextInMemory::anchor() { } diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h index e55a27e..7633997 100644 --- a/lib/DebugInfo/DWARFContext.h +++ b/lib/DebugInfo/DWARFContext.h @@ -13,6 +13,7 @@ #include "DWARFCompileUnit.h" #include "DWARFDebugAranges.h" #include "DWARFDebugLine.h" +#include "DWARFDebugRangeList.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" @@ -53,9 +54,6 @@ public: return &CUs[index]; } - /// Return the compile unit that includes an offset (relative to .debug_info). - DWARFCompileUnit *getCompileUnitForOffset(uint32_t offset); - /// Get a pointer to the parsed DebugAbbrev object. const DWARFDebugAbbrev *getDebugAbbrev(); @@ -66,8 +64,10 @@ public: const DWARFDebugLine::LineTable * getLineTableForCompileUnit(DWARFCompileUnit *cu); - virtual DILineInfo getLineInfoForAddress(uint64_t address, - DILineInfoSpecifier specifier = DILineInfoSpecifier()); + virtual DILineInfo getLineInfoForAddress(uint64_t Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()); + virtual DIInliningInfo getInliningInfoForAddress(uint64_t Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()); bool isLittleEndian() const { return IsLittleEndian; } @@ -76,12 +76,19 @@ public: virtual StringRef getARangeSection() = 0; virtual StringRef getLineSection() = 0; virtual StringRef getStringSection() = 0; + virtual StringRef getRangeSection() = 0; static bool isSupportedVersion(unsigned version) { return version == 2 || version == 3; } -}; +private: + /// Return the compile unit that includes an offset (relative to .debug_info). + DWARFCompileUnit *getCompileUnitForOffset(uint32_t Offset); + /// Return the compile unit which contains instruction with provided + /// address. + DWARFCompileUnit *getCompileUnitForAddress(uint64_t Address); +}; /// DWARFContextInMemory is the simplest possible implementation of a /// DWARFContext. It assumes all content is available in memory and stores @@ -93,19 +100,22 @@ class DWARFContextInMemory : public DWARFContext { StringRef ARangeSection; StringRef LineSection; StringRef StringSection; + StringRef RangeSection; public: DWARFContextInMemory(bool isLittleEndian, StringRef infoSection, StringRef abbrevSection, StringRef aRangeSection, StringRef lineSection, - StringRef stringSection) + StringRef stringSection, + StringRef rangeSection) : DWARFContext(isLittleEndian), InfoSection(infoSection), AbbrevSection(abbrevSection), ARangeSection(aRangeSection), LineSection(lineSection), - StringSection(stringSection) + StringSection(stringSection), + RangeSection(rangeSection) {} virtual StringRef getInfoSection() { return InfoSection; } @@ -113,6 +123,7 @@ public: virtual StringRef getARangeSection() { return ARangeSection; } virtual StringRef getLineSection() { return LineSection; } virtual StringRef getStringSection() { return StringSection; } + virtual StringRef getRangeSection() { return RangeSection; } }; } diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp index ef470e5..f9a34c9 100644 --- a/lib/DebugInfo/DWARFDebugAranges.cpp +++ b/lib/DebugInfo/DWARFDebugAranges.cpp @@ -62,7 +62,6 @@ bool DWARFDebugAranges::extract(DataExtractor debug_aranges_data) { uint32_t offset = 0; typedef std::vector<DWARFDebugArangeSet> SetCollection; - typedef SetCollection::const_iterator SetCollectionIter; SetCollection sets; DWARFDebugArangeSet set; diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp index 429a36c..1bfd126 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -1,4 +1,4 @@ -//===-- DWARFDebugInfoEntry.cpp --------------------------------------------===// +//===-- DWARFDebugInfoEntry.cpp -------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -101,7 +101,7 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, DataExtractor debug_info_data = cu->getDebugInfoExtractor(); uint64_t abbrCode = debug_info_data.getULEB128(offset_ptr); - assert (fixed_form_sizes); // For best performance this should be specified! + assert(fixed_form_sizes); // For best performance this should be specified! if (abbrCode) { uint32_t offset = *offset_ptr; @@ -126,6 +126,7 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, switch (form) { // Blocks if inlined data that have a length field and the data bytes // inlined in the .debug_info. + case DW_FORM_exprloc: case DW_FORM_block: form_size = debug_info_data.getULEB128(&offset); break; @@ -150,6 +151,11 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, form_size = cu->getAddressByteSize(); break; + // 0 sized form. + case DW_FORM_flag_present: + form_size = 0; + break; + // 1 byte values case DW_FORM_data1: case DW_FORM_flag: @@ -173,6 +179,7 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, // 8 byte values case DW_FORM_data8: case DW_FORM_ref8: + case DW_FORM_ref_sig8: form_size = 8; break; @@ -188,6 +195,13 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, form = debug_info_data.getULEB128(&offset); break; + case DW_FORM_sec_offset: + if (cu->getAddressByteSize() == 4) + debug_info_data.getU32(offset_ptr); + else + debug_info_data.getU64(offset_ptr); + break; + default: *offset_ptr = Offset; return false; @@ -249,6 +263,7 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, switch (form) { // Blocks if inlined data that have a length field and the data // bytes // inlined in the .debug_info + case DW_FORM_exprloc: case DW_FORM_block: form_size = debug_info_data.getULEB128(&offset); break; @@ -273,6 +288,11 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, form_size = cu_addr_size; break; + // 0 byte value + case DW_FORM_flag_present: + form_size = 0; + break; + // 1 byte values case DW_FORM_data1: case DW_FORM_flag: @@ -299,6 +319,7 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, // 8 byte values case DW_FORM_data8: case DW_FORM_ref8: + case DW_FORM_ref_sig8: form_size = 8; break; @@ -314,6 +335,13 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, form_is_indirect = true; break; + case DW_FORM_sec_offset: + if (cu->getAddressByteSize() == 4) + debug_info_data.getU32(offset_ptr); + else + debug_info_data.getU64(offset_ptr); + break; + default: *offset_ptr = offset; return false; @@ -336,6 +364,16 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, return false; } +bool DWARFDebugInfoEntryMinimal::isSubprogramDIE() const { + return getTag() == DW_TAG_subprogram; +} + +bool DWARFDebugInfoEntryMinimal::isSubroutineDIE() const { + uint32_t Tag = getTag(); + return Tag == DW_TAG_subprogram || + Tag == DW_TAG_inlined_subroutine; +} + uint32_t DWARFDebugInfoEntryMinimal::getAttributeValue(const DWARFCompileUnit *cu, const uint16_t attr, @@ -418,24 +456,31 @@ DWARFDebugInfoEntryMinimal::getAttributeValueAsReference( return fail_value; } +bool DWARFDebugInfoEntryMinimal::getLowAndHighPC(const DWARFCompileUnit *CU, + uint64_t &LowPC, uint64_t &HighPC) const { + HighPC = -1ULL; + LowPC = getAttributeValueAsUnsigned(CU, DW_AT_low_pc, -1ULL); + if (LowPC != -1ULL) + HighPC = getAttributeValueAsUnsigned(CU, DW_AT_high_pc, -1ULL); + return (HighPC != -1ULL); +} + void -DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *cu, - DWARFDebugAranges *debug_aranges) +DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *CU, + DWARFDebugAranges *DebugAranges) const { if (AbbrevDecl) { - uint16_t tag = AbbrevDecl->getTag(); - if (tag == DW_TAG_subprogram) { - uint64_t hi_pc = -1ULL; - uint64_t lo_pc = getAttributeValueAsUnsigned(cu, DW_AT_low_pc, -1ULL); - if (lo_pc != -1ULL) - hi_pc = getAttributeValueAsUnsigned(cu, DW_AT_high_pc, -1ULL); - if (hi_pc != -1ULL) - debug_aranges->appendRange(cu->getOffset(), lo_pc, hi_pc); + if (isSubprogramDIE()) { + uint64_t LowPC, HighPC; + if (getLowAndHighPC(CU, LowPC, HighPC)) { + DebugAranges->appendRange(CU->getOffset(), LowPC, HighPC); + } + // FIXME: try to append ranges from .debug_ranges section. } const DWARFDebugInfoEntryMinimal *child = getFirstChild(); while (child) { - child->buildAddressRangeTable(cu, debug_aranges); + child->buildAddressRangeTable(CU, DebugAranges); child = child->getSibling(); } } @@ -443,51 +488,90 @@ DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *cu, bool DWARFDebugInfoEntryMinimal::addressRangeContainsAddress( - const DWARFCompileUnit *cu, const uint64_t address) const { - if (!isNULL() && getTag() == DW_TAG_subprogram) { - uint64_t hi_pc = -1ULL; - uint64_t lo_pc = getAttributeValueAsUnsigned(cu, DW_AT_low_pc, -1ULL); - if (lo_pc != -1ULL) - hi_pc = getAttributeValueAsUnsigned(cu, DW_AT_high_pc, -1ULL); - if (hi_pc != -1ULL) { - return (lo_pc <= address && address < hi_pc); - } + const DWARFCompileUnit *CU, const uint64_t Address) const { + if (isNULL()) + return false; + uint64_t LowPC, HighPC; + if (getLowAndHighPC(CU, LowPC, HighPC)) + return (LowPC <= Address && Address <= HighPC); + // Try to get address ranges from .debug_ranges section. + uint32_t RangesOffset = getAttributeValueAsReference(CU, DW_AT_ranges, -1U); + if (RangesOffset != -1U) { + DWARFDebugRangeList RangeList; + if (CU->extractRangeList(RangesOffset, RangeList)) + return RangeList.containsAddress(CU->getBaseAddress(), Address); } return false; } const char* -DWARFDebugInfoEntryMinimal::getSubprogramName( - const DWARFCompileUnit *cu) const { - if (isNULL() || getTag() != DW_TAG_subprogram) +DWARFDebugInfoEntryMinimal::getSubroutineName( + const DWARFCompileUnit *CU) const { + if (!isSubroutineDIE()) return 0; // Try to get mangled name if possible. if (const char *name = - getAttributeValueAsString(cu, DW_AT_MIPS_linkage_name, 0)) + getAttributeValueAsString(CU, DW_AT_MIPS_linkage_name, 0)) return name; - if (const char *name = getAttributeValueAsString(cu, DW_AT_linkage_name, 0)) + if (const char *name = getAttributeValueAsString(CU, DW_AT_linkage_name, 0)) return name; - if (const char *name = getAttributeValueAsString(cu, DW_AT_name, 0)) + if (const char *name = getAttributeValueAsString(CU, DW_AT_name, 0)) return name; // Try to get name from specification DIE. uint32_t spec_ref = - getAttributeValueAsReference(cu, DW_AT_specification, -1U); + getAttributeValueAsReference(CU, DW_AT_specification, -1U); if (spec_ref != -1U) { DWARFDebugInfoEntryMinimal spec_die; - if (spec_die.extract(cu, &spec_ref)) { - if (const char *name = spec_die.getSubprogramName(cu)) + if (spec_die.extract(CU, &spec_ref)) { + if (const char *name = spec_die.getSubroutineName(CU)) return name; } } // Try to get name from abstract origin DIE. uint32_t abs_origin_ref = - getAttributeValueAsReference(cu, DW_AT_abstract_origin, -1U); + getAttributeValueAsReference(CU, DW_AT_abstract_origin, -1U); if (abs_origin_ref != -1U) { DWARFDebugInfoEntryMinimal abs_origin_die; - if (abs_origin_die.extract(cu, &abs_origin_ref)) { - if (const char *name = abs_origin_die.getSubprogramName(cu)) + if (abs_origin_die.extract(CU, &abs_origin_ref)) { + if (const char *name = abs_origin_die.getSubroutineName(CU)) return name; } } return 0; } + +void DWARFDebugInfoEntryMinimal::getCallerFrame( + const DWARFCompileUnit *CU, uint32_t &CallFile, uint32_t &CallLine, + uint32_t &CallColumn) const { + CallFile = getAttributeValueAsUnsigned(CU, DW_AT_call_file, 0); + CallLine = getAttributeValueAsUnsigned(CU, DW_AT_call_line, 0); + CallColumn = getAttributeValueAsUnsigned(CU, DW_AT_call_column, 0); +} + +DWARFDebugInfoEntryMinimal::InlinedChain +DWARFDebugInfoEntryMinimal::getInlinedChainForAddress( + const DWARFCompileUnit *CU, const uint64_t Address) const { + DWARFDebugInfoEntryMinimal::InlinedChain InlinedChain; + if (isNULL()) + return InlinedChain; + for (const DWARFDebugInfoEntryMinimal *DIE = this; DIE; ) { + // Append current DIE to inlined chain only if it has correct tag + // (e.g. it is not a lexical block). + if (DIE->isSubroutineDIE()) { + InlinedChain.push_back(*DIE); + } + // Try to get child which also contains provided address. + const DWARFDebugInfoEntryMinimal *Child = DIE->getFirstChild(); + while (Child) { + if (Child->addressRangeContainsAddress(CU, Address)) { + // Assume there is only one such child. + break; + } + Child = Child->getSibling(); + } + DIE = Child; + } + // Reverse the obtained chain to make the root of inlined chain last. + std::reverse(InlinedChain.begin(), InlinedChain.end()); + return InlinedChain; +} diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h index d5d86b9..9c1b2be 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.h +++ b/lib/DebugInfo/DWARFDebugInfoEntry.h @@ -11,6 +11,7 @@ #define LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H #include "DWARFAbbreviationDeclaration.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/DataTypes.h" namespace llvm { @@ -19,6 +20,7 @@ class DWARFDebugAranges; class DWARFCompileUnit; class DWARFContext; class DWARFFormValue; +class DWARFInlinedSubroutineChain; /// DWARFDebugInfoEntryMinimal - A DIE with only the minimum required data. class DWARFDebugInfoEntryMinimal { @@ -52,6 +54,13 @@ public: uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; } bool isNULL() const { return AbbrevDecl == 0; } + + /// Returns true if DIE represents a subprogram (not inlined). + bool isSubprogramDIE() const; + /// Returns true if DIE represents a subprogram or an inlined + /// subroutine. + bool isSubroutineDIE() const; + uint32_t getOffset() const { return Offset; } uint32_t getNumAttributes() const { return !isNULL() ? AbbrevDecl->getNumAttributes() : 0; @@ -126,17 +135,40 @@ public: const uint16_t attr, int64_t fail_value) const; - void buildAddressRangeTable(const DWARFCompileUnit *cu, - DWARFDebugAranges *debug_aranges) const; - - bool addressRangeContainsAddress(const DWARFCompileUnit *cu, - const uint64_t address) const; - - // If a DIE represents a subprogram, returns its mangled name - // (or short name, if mangled is missing). This name may be fetched - // from specification or abstract origin for this subprogram. - // Returns null if no name is found. - const char* getSubprogramName(const DWARFCompileUnit *cu) const; + /// Retrieves DW_AT_low_pc and DW_AT_high_pc from CU. + /// Returns true if both attributes are present. + bool getLowAndHighPC(const DWARFCompileUnit *CU, + uint64_t &LowPC, uint64_t &HighPC) const; + + void buildAddressRangeTable(const DWARFCompileUnit *CU, + DWARFDebugAranges *DebugAranges) const; + + bool addressRangeContainsAddress(const DWARFCompileUnit *CU, + const uint64_t Address) const; + + /// If a DIE represents a subprogram (or inlined subroutine), + /// returns its mangled name (or short name, if mangled is missing). + /// This name may be fetched from specification or abstract origin + /// for this subprogram. Returns null if no name is found. + const char* getSubroutineName(const DWARFCompileUnit *CU) const; + + /// Retrieves values of DW_AT_call_file, DW_AT_call_line and + /// DW_AT_call_column from DIE (or zeroes if they are missing). + void getCallerFrame(const DWARFCompileUnit *CU, uint32_t &CallFile, + uint32_t &CallLine, uint32_t &CallColumn) const; + + /// InlinedChain - represents a chain of inlined_subroutine + /// DIEs, (possibly ending with subprogram DIE), all of which are contained + /// in some concrete inlined instance tree. Address range for each DIE + /// (except the last DIE) in this chain is contained in address + /// range for next DIE in the chain. + typedef SmallVector<DWARFDebugInfoEntryMinimal, 4> InlinedChain; + + /// Get inlined chain for a given address, rooted at the current DIE. + /// Returns empty chain if address is not contained in address range + /// of current DIE. + InlinedChain getInlinedChainForAddress(const DWARFCompileUnit *CU, + const uint64_t Address) const; }; } diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp index d99575d..267364a 100644 --- a/lib/DebugInfo/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARFDebugLine.cpp @@ -10,6 +10,7 @@ #include "DWARFDebugLine.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> using namespace llvm; @@ -513,3 +514,29 @@ DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const { } return index; } + +bool +DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, + bool NeedsAbsoluteFilePath, + std::string &Result) const { + if (FileIndex == 0 || FileIndex > Prologue.FileNames.size()) + return false; + const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1]; + const char *FileName = Entry.Name; + if (!NeedsAbsoluteFilePath || + sys::path::is_absolute(FileName)) { + Result = FileName; + return true; + } + SmallString<16> FilePath; + uint64_t IncludeDirIndex = Entry.DirIdx; + // Be defensive about the contents of Entry. + if (IncludeDirIndex > 0 && + IncludeDirIndex <= Prologue.IncludeDirectories.size()) { + const char *IncludeDir = Prologue.IncludeDirectories[IncludeDirIndex - 1]; + sys::path::append(FilePath, IncludeDir); + } + sys::path::append(FilePath, FileName); + Result = FilePath.str(); + return true; +} diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h index 6382b45..586dd7e 100644 --- a/lib/DebugInfo/DWARFDebugLine.h +++ b/lib/DebugInfo/DWARFDebugLine.h @@ -12,6 +12,7 @@ #include "llvm/Support/DataExtractor.h" #include <map> +#include <string> #include <vector> namespace llvm { @@ -174,6 +175,13 @@ public: // Returns the index of the row with file/line info for a given address, // or -1 if there is no such row. uint32_t lookupAddress(uint64_t address) const; + + // Extracts filename by its index in filename table in prologue. + // Returns true on success. + bool getFileNameByIndex(uint64_t FileIndex, + bool NeedsAbsoluteFilePath, + std::string &Result) const; + void dump(raw_ostream &OS) const; struct Prologue Prologue; diff --git a/lib/DebugInfo/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARFDebugRangeList.cpp new file mode 100644 index 0000000..1806bee --- /dev/null +++ b/lib/DebugInfo/DWARFDebugRangeList.cpp @@ -0,0 +1,67 @@ +//===-- DWARFDebugRangesList.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFDebugRangeList.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +void DWARFDebugRangeList::clear() { + Offset = -1U; + AddressSize = 0; + Entries.clear(); +} + +bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr) { + clear(); + if (!data.isValidOffset(*offset_ptr)) + return false; + AddressSize = data.getAddressSize(); + if (AddressSize != 4 && AddressSize != 8) + return false; + Offset = *offset_ptr; + while (true) { + RangeListEntry entry; + uint32_t prev_offset = *offset_ptr; + entry.StartAddress = data.getAddress(offset_ptr); + entry.EndAddress = data.getAddress(offset_ptr); + // Check that both values were extracted correctly. + if (*offset_ptr != prev_offset + 2 * AddressSize) { + clear(); + return false; + } + if (entry.isEndOfListEntry()) + break; + Entries.push_back(entry); + } + return true; +} + +void DWARFDebugRangeList::dump(raw_ostream &OS) const { + for (int i = 0, n = Entries.size(); i != n; ++i) { + const char *format_str = (AddressSize == 4 + ? "%08x %08" PRIx64 " %08" PRIx64 "\n" + : "%08x %016" PRIx64 " %016" PRIx64 "\n"); + OS << format(format_str, Offset, Entries[i].StartAddress, + Entries[i].EndAddress); + } + OS << format("%08x <End of list>\n", Offset); +} + +bool DWARFDebugRangeList::containsAddress(uint64_t BaseAddress, + uint64_t Address) const { + for (int i = 0, n = Entries.size(); i != n; ++i) { + if (Entries[i].isBaseAddressSelectionEntry(AddressSize)) + BaseAddress = Entries[i].EndAddress; + else if (Entries[i].containsAddress(BaseAddress, Address)) + return true; + } + return false; +} diff --git a/lib/DebugInfo/DWARFDebugRangeList.h b/lib/DebugInfo/DWARFDebugRangeList.h new file mode 100644 index 0000000..4e34a91 --- /dev/null +++ b/lib/DebugInfo/DWARFDebugRangeList.h @@ -0,0 +1,78 @@ +//===-- DWARFDebugRangeList.h -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H +#define LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H + +#include "llvm/Support/DataExtractor.h" +#include <vector> + +namespace llvm { + +class raw_ostream; + +class DWARFDebugRangeList { +public: + struct RangeListEntry { + // A beginning address offset. This address offset has the size of an + // address and is relative to the applicable base address of the + // compilation unit referencing this range list. It marks the beginning + // of an address range. + uint64_t StartAddress; + // An ending address offset. This address offset again has the size of + // an address and is relative to the applicable base address of the + // compilation unit referencing this range list. It marks the first + // address past the end of the address range. The ending address must + // be greater than or equal to the beginning address. + uint64_t EndAddress; + // The end of any given range list is marked by an end of list entry, + // which consists of a 0 for the beginning address offset + // and a 0 for the ending address offset. + bool isEndOfListEntry() const { + return (StartAddress == 0) && (EndAddress == 0); + } + // A base address selection entry consists of: + // 1. The value of the largest representable address offset + // (for example, 0xffffffff when the size of an address is 32 bits). + // 2. An address, which defines the appropriate base address for + // use in interpreting the beginning and ending address offsets of + // subsequent entries of the location list. + bool isBaseAddressSelectionEntry(uint8_t AddressSize) const { + assert(AddressSize == 4 || AddressSize == 8); + if (AddressSize == 4) + return StartAddress == -1U; + else + return StartAddress == -1ULL; + } + bool containsAddress(uint64_t BaseAddress, uint64_t Address) const { + return (BaseAddress + StartAddress <= Address) && + (Address < BaseAddress + EndAddress); + } + }; + +private: + // Offset in .debug_ranges section. + uint32_t Offset; + uint8_t AddressSize; + std::vector<RangeListEntry> Entries; + +public: + DWARFDebugRangeList() { clear(); } + void clear(); + void dump(raw_ostream &OS) const; + bool extract(DataExtractor data, uint32_t *offset_ptr); + /// containsAddress - Returns true if range list contains the given + /// address. Has to be passed base address of the compile unit that + /// references this range list. + bool containsAddress(uint64_t BaseAddress, uint64_t Address) const; +}; + +} // namespace llvm + +#endif // LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp index ee2a3ab..c9ecbbb 100644 --- a/lib/DebugInfo/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARFFormValue.cpp @@ -41,6 +41,10 @@ static const uint8_t form_sizes_addr4[] = { 8, // 0x14 DW_FORM_ref8 0, // 0x15 DW_FORM_ref_udata 0, // 0x16 DW_FORM_indirect + 4, // 0x17 DW_FORM_sec_offset + 0, // 0x18 DW_FORM_exprloc + 0, // 0x19 DW_FORM_flag_present + 8, // 0x20 DW_FORM_ref_sig8 }; static const uint8_t form_sizes_addr8[] = { @@ -67,6 +71,10 @@ static const uint8_t form_sizes_addr8[] = { 8, // 0x14 DW_FORM_ref8 0, // 0x15 DW_FORM_ref_udata 0, // 0x16 DW_FORM_indirect + 8, // 0x17 DW_FORM_sec_offset + 0, // 0x18 DW_FORM_exprloc + 0, // 0x19 DW_FORM_flag_present + 8, // 0x20 DW_FORM_ref_sig8 }; const uint8_t * @@ -93,6 +101,7 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, case DW_FORM_ref_addr: Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize()); break; + case DW_FORM_exprloc: case DW_FORM_block: Value.uval = data.getULEB128(offset_ptr); is_block = true; @@ -141,12 +150,24 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, // Set the string value to also be the data for inlined cstr form // values only so we can tell the differnence between DW_FORM_string // and DW_FORM_strp form values - Value.data = (uint8_t*)Value.cstr; + Value.data = (const uint8_t*)Value.cstr; break; case DW_FORM_indirect: Form = data.getULEB128(offset_ptr); indirect = true; break; + case DW_FORM_sec_offset: + if (cu->getAddressByteSize() == 4) + Value.uval = data.getU32(offset_ptr); + else + Value.uval = data.getU64(offset_ptr); + break; + case DW_FORM_flag_present: + Value.uval = 1; + break; + case DW_FORM_ref_sig8: + Value.uval = data.getU64(offset_ptr); + break; default: return false; } @@ -179,6 +200,7 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, switch (form) { // Blocks if inlined data that have a length field and the data bytes // inlined in the .debug_info + case DW_FORM_exprloc: case DW_FORM_block: { uint64_t size = debug_info_data.getULEB128(offset_ptr); *offset_ptr += size; @@ -211,6 +233,10 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, *offset_ptr += cu->getAddressByteSize(); return true; + // 0 byte values - implied from the form. + case DW_FORM_flag_present: + return true; + // 1 byte values case DW_FORM_data1: case DW_FORM_flag: @@ -234,6 +260,7 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, // 8 byte values case DW_FORM_data8: case DW_FORM_ref8: + case DW_FORM_ref_sig8: *offset_ptr += 8; return true; @@ -249,6 +276,15 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, indirect = true; form = debug_info_data.getULEB128(offset_ptr); break; + + // 4 for DWARF32, 8 for DWARF64. + case DW_FORM_sec_offset: + if (cu->getAddressByteSize() == 4) + *offset_ptr += 4; + else + *offset_ptr += 8; + return true; + default: return false; } @@ -264,22 +300,26 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { switch (Form) { case DW_FORM_addr: OS << format("0x%016" PRIx64, uvalue); break; + case DW_FORM_flag_present: OS << "true"; break; case DW_FORM_flag: case DW_FORM_data1: OS << format("0x%02x", (uint8_t)uvalue); break; case DW_FORM_data2: OS << format("0x%04x", (uint16_t)uvalue); break; case DW_FORM_data4: OS << format("0x%08x", (uint32_t)uvalue); break; + case DW_FORM_ref_sig8: case DW_FORM_data8: OS << format("0x%016" PRIx64, uvalue); break; case DW_FORM_string: OS << '"'; OS.write_escaped(getAsCString(NULL)); OS << '"'; break; + case DW_FORM_exprloc: case DW_FORM_block: case DW_FORM_block1: case DW_FORM_block2: case DW_FORM_block4: if (uvalue > 0) { switch (Form) { + case DW_FORM_exprloc: case DW_FORM_block: OS << format("<0x%" PRIx64 "> ", uvalue); break; case DW_FORM_block1: OS << format("<0x%2.2x> ", (uint8_t)uvalue); break; case DW_FORM_block2: OS << format("<0x%4.4x> ", (uint16_t)uvalue); break; @@ -342,6 +382,14 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { case DW_FORM_indirect: OS << "DW_FORM_indirect"; break; + + case DW_FORM_sec_offset: + if (cu->getAddressByteSize() == 4) + OS << format("0x%08x", (uint32_t)uvalue); + else + OS << format("0x%016" PRIx64, uvalue); + break; + default: OS << format("DW_FORM(0x%4.4x)", Form); break; @@ -404,6 +452,7 @@ const uint8_t *DWARFFormValue::BlockData() const { bool DWARFFormValue::isBlockForm(uint16_t form) { switch (form) { + case DW_FORM_exprloc: case DW_FORM_block: case DW_FORM_block1: case DW_FORM_block2: diff --git a/lib/DebugInfo/DWARFFormValue.h b/lib/DebugInfo/DWARFFormValue.h index 22ac011..c5b590d 100644 --- a/lib/DebugInfo/DWARFFormValue.h +++ b/lib/DebugInfo/DWARFFormValue.h @@ -52,7 +52,7 @@ public: bool extractValue(DataExtractor data, uint32_t *offset_ptr, const DWARFCompileUnit *cu); bool isInlinedCStr() const { - return Value.data != NULL && Value.data == (uint8_t*)Value.cstr; + return Value.data != NULL && Value.data == (const uint8_t*)Value.cstr; } const uint8_t *BlockData() const; uint64_t getReference(const DWARFCompileUnit* cu) const; diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 4afc900..ba0aeca 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -833,7 +833,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes) { assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!"); - uint8_t *Src = (uint8_t *)IntVal.getRawData(); + const uint8_t *Src = (const uint8_t *)IntVal.getRawData(); if (sys::isLittleEndianHost()) { // Little-endian host - the source is ordered from LSB to MSB. Order the diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index ff3a9dc..3bf6db8 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -780,7 +780,7 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP, void JITEmitter::startFunction(MachineFunction &F) { DEBUG(dbgs() << "JIT: Starting CodeGen of Function " - << F.getFunction()->getName() << "\n"); + << F.getName() << "\n"); uintptr_t ActualSize = 0; // Set the memory writable, if it's not already @@ -929,7 +929,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { PrevDL = DebugLoc(); DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart - << "] Function: " << F.getFunction()->getName() + << "] Function: " << F.getName() << ": " << (FnEnd-FnStart) << " bytes of text, " << Relocations.size() << " relocations\n"); diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 99c65ec..fa71305 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -113,6 +113,11 @@ void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { } void *MCJIT::getPointerToFunction(Function *F) { + // FIXME: This should really return a uint64_t since it's a pointer in the + // target address space, not our local address space. That's part of the + // ExecutionEngine interface, though. Fix that when the old JIT finally + // dies. + // FIXME: Add support for per-module compilation state if (!isCompiled) emitObject(M); @@ -126,10 +131,13 @@ void *MCJIT::getPointerToFunction(Function *F) { // FIXME: Should the Dyld be retaining module information? Probably not. // FIXME: Should we be using the mangler for this? Probably. + // + // This is the accessor for the target address, so make sure to check the + // load address of the symbol, not the local address. StringRef BaseName = F->getName(); if (BaseName[0] == '\1') - return (void*)Dyld.getSymbolAddress(BaseName.substr(1)); - return (void*)Dyld.getSymbolAddress((TM->getMCAsmInfo()->getGlobalPrefix() + return (void*)Dyld.getSymbolLoadAddress(BaseName.substr(1)); + return (void*)Dyld.getSymbolLoadAddress((TM->getMCAsmInfo()->getGlobalPrefix() + BaseName).str()); } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index a98ddc0..d47287b 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -479,6 +479,10 @@ void *RuntimeDyld::getSymbolAddress(StringRef Name) { return Dyld->getSymbolAddress(Name); } +uint64_t RuntimeDyld::getSymbolLoadAddress(StringRef Name) { + return Dyld->getSymbolLoadAddress(Name); +} + void RuntimeDyld::resolveRelocations() { Dyld->resolveRelocations(); } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 0aea598..a1c0e40 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -36,8 +36,7 @@ class DyldELFObject : public ELFObjectFile<target_endianness, is64Bits> { typedef Elf_Rel_Impl<target_endianness, is64Bits, false> Elf_Rel; typedef Elf_Rel_Impl<target_endianness, is64Bits, true> Elf_Rela; - typedef typename ELFObjectFile<target_endianness, is64Bits>:: - Elf_Ehdr Elf_Ehdr; + typedef Elf_Ehdr_Impl<target_endianness, is64Bits> Elf_Ehdr; typedef typename ELFDataTypeTypedefHelper< target_endianness, is64Bits>::value_type addr_type; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 3d89994..d5df732 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -177,6 +177,10 @@ protected: return true; } + uint64_t getSectionLoadAddress(unsigned SectionID) { + return Sections[SectionID].LoadAddress; + } + uint8_t *getSectionAddress(unsigned SectionID) { return (uint8_t*)Sections[SectionID].Address; } @@ -223,7 +227,10 @@ protected: void resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value); /// \brief A object file specific relocation resolver - /// \param Address Address to apply the relocation action + /// \param LocalAddress The address to apply the relocation action + /// \param FinalAddress If the linker prepare code for remote executon then + /// FinalAddress has the remote address to apply the + /// relocation action, otherwise is same as LocalAddress /// \param Value Target symbol address to apply the relocation action /// \param Type object file specific relocation type /// \param Addend A constant addend used to compute the value to be stored @@ -267,6 +274,15 @@ public: return getSectionAddress(Loc.first) + Loc.second; } + uint64_t getSymbolLoadAddress(StringRef Name) { + // FIXME: Just look up as a function for now. Overly simple of course. + // Work in progress. + if (GlobalSymbolTable.find(Name) == GlobalSymbolTable.end()) + return 0; + SymbolLoc Loc = GlobalSymbolTable.lookup(Name); + return getSectionLoadAddress(Loc.first) + Loc.second; + } + void resolveRelocations(); void reassignSectionAddress(unsigned SectionID, uint64_t Addr); diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 7203b9a..6e37b5c 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -270,9 +270,10 @@ class ELFObjectWriter : public MCObjectWriter { /// ComputeSymbolTable - Compute the symbol table data /// - /// \param StringTable [out] - The string table data. - /// \param StringIndexMap [out] - Map from symbol names to offsets in the - /// string table. + /// \param Asm - The assembler. + /// \param SectionIndexMap - Maps a section to its index. + /// \param RevGroupMap - Maps a signature symbol to the group section. + /// \param NumRegularSections - Number of non-relocation sections. void ComputeSymbolTable(MCAssembler &Asm, const SectionIndexMapTy &SectionIndexMap, RevGroupMapTy RevGroupMap, diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 8da2e0e..7ea0f3b 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -68,8 +68,8 @@ MCAsmInfo::MCAsmInfo() { GlobalDirective = "\t.globl\t"; HasSetDirective = true; HasAggressiveSymbolFolding = true; - LCOMMDirectiveType = LCOMM::None; COMMDirectiveAlignmentIsInBytes = true; + LCOMMDirectiveAlignmentType = LCOMM::NoAlignment; HasDotTypeDotSizeDirective = true; HasSingleParameterDotFile = true; HasNoDeadStrip = false; diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp index 678e75a..fd79193 100644 --- a/lib/MC/MCAsmInfoCOFF.cpp +++ b/lib/MC/MCAsmInfoCOFF.cpp @@ -19,8 +19,10 @@ void MCAsmInfoCOFF::anchor() { } MCAsmInfoCOFF::MCAsmInfoCOFF() { GlobalPrefix = "_"; + // MingW 4.5 and later support .comm with log2 alignment, but .lcomm uses byte + // alignment. COMMDirectiveAlignmentIsInBytes = false; - LCOMMDirectiveType = LCOMM::ByteAlignment; + LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment; HasDotTypeDotSizeDirective = false; HasSingleParameterDotFile = false; PrivateGlobalPrefix = "L"; // Prefix for private global symbols diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index 8e0ac23..a0e3eba 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -32,6 +32,7 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { AlignmentIsInBytes = false; COMMDirectiveAlignmentIsInBytes = false; + LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment; InlineAsmStart = " InlineAsm Start"; InlineAsmEnd = " InlineAsm End"; diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 373df4b..b0bc290 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -166,7 +166,7 @@ public: /// /// @param Symbol - The common symbol to emit. /// @param Size - The size of the common symbol. - /// @param Size - The alignment of the common symbol in bytes. + /// @param ByteAlignment - The alignment of the common symbol in bytes. virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment); @@ -517,13 +517,19 @@ void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, /// @param Size - The size of the common symbol. void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlign) { - assert(MAI.getLCOMMDirectiveType() != LCOMM::None && - "Doesn't have .lcomm, can't emit it!"); OS << "\t.lcomm\t" << *Symbol << ',' << Size; if (ByteAlign > 1) { - assert(MAI.getLCOMMDirectiveType() == LCOMM::ByteAlignment && - "Alignment not supported on .lcomm!"); - OS << ',' << ByteAlign; + switch (MAI.getLCOMMDirectiveAlignmentType()) { + case LCOMM::NoAlignment: + llvm_unreachable("alignment not supported on .lcomm!"); + case LCOMM::ByteAlignment: + OS << ',' << ByteAlign; + break; + case LCOMM::Log2Alignment: + assert(isPowerOf2_32(ByteAlign) && "alignment must be a power of 2"); + OS << ',' << Log2_32(ByteAlign); + break; + } } EmitEOL(); } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index b7d2c28..926d39b 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -331,6 +331,12 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, const MCAlignFragment &AF = cast<MCAlignFragment>(F); unsigned Offset = Layout.getFragmentOffset(&AF); unsigned Size = OffsetToAlignment(Offset, AF.getAlignment()); + // If we are padding with nops, force the padding to be larger than the + // minimum nop size. + if (Size > 0 && AF.hasEmitNops()) { + while (Size % getBackend().getMinimumNopSize()) + Size += AF.getAlignment(); + } if (Size > AF.getMaxBytesToEmit()) return 0; return Size; @@ -830,6 +836,7 @@ raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) { } +#ifndef NDEBUG void MCFragment::dump() { raw_ostream &OS = llvm::errs(); @@ -970,6 +977,7 @@ void MCAssembler::dump() { } OS << "]>\n"; } +#endif // anchors for MC*Fragment vtables void MCDataFragment::anchor() { } diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 4c63e43..96938f7 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -425,9 +425,11 @@ void MCDwarfFile::print(raw_ostream &OS) const { OS << '"' << getName() << '"'; } +#ifndef NDEBUG void MCDwarfFile::dump() const { print(dbgs()); } +#endif // Utility function to write a tuple for .debug_abbrev. static void EmitAbbrev(MCStreamer *MCOS, uint64_t Name, uint64_t Form) { diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 0eb7fcc..b196659 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -136,10 +136,12 @@ void MCExpr::print(raw_ostream &OS) const { llvm_unreachable("Invalid expression kind!"); } +#ifndef NDEBUG void MCExpr::dump() const { print(dbgs()); dbgs() << '\n'; } +#endif /* *** */ @@ -197,7 +199,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_ARM_GOTTPOFF: return "(gottpoff)"; case VK_ARM_TLSGD: return "(tlsgd)"; case VK_ARM_TARGET1: return "(target1)"; - case VK_PPC_TOC: return "toc"; + case VK_PPC_TOC: return "tocbase"; + case VK_PPC_TOC_ENTRY: return "toc"; case VK_PPC_DARWIN_HA16: return "ha16"; case VK_PPC_DARWIN_LO16: return "lo16"; case VK_PPC_GAS_HA16: return "ha"; diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp index 7bbfd2e..e96010b 100644 --- a/lib/MC/MCInst.cpp +++ b/lib/MC/MCInst.cpp @@ -32,10 +32,12 @@ void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const { OS << ">"; } +#ifndef NDEBUG void MCOperand::dump() const { print(dbgs(), 0); dbgs() << "\n"; } +#endif void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const { OS << "<MCInst " << getOpcode(); @@ -62,7 +64,9 @@ void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI, OS << ">"; } +#ifndef NDEBUG void MCInst::dump() const { print(dbgs(), 0); dbgs() << "\n"; } +#endif diff --git a/lib/MC/MCLabel.cpp b/lib/MC/MCLabel.cpp index 9c0fc92..95d7d16 100644 --- a/lib/MC/MCLabel.cpp +++ b/lib/MC/MCLabel.cpp @@ -16,6 +16,8 @@ void MCLabel::print(raw_ostream &OS) const { OS << '"' << getInstance() << '"'; } +#ifndef NDEBUG void MCLabel::dump() const { print(dbgs()); } +#endif diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index b75fe2c..74f6dc6 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -70,9 +70,7 @@ public: llvm_unreachable("macho doesn't support this directive"); } virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) { - llvm_unreachable("macho doesn't support this directive"); - } + unsigned ByteAlignment); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, uint64_t Size = 0, unsigned ByteAlignment = 0); virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, @@ -325,6 +323,15 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, SD.setCommon(Size, ByteAlignment); } +void MCMachOStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + // '.lcomm' is equivalent to '.zerofill'. + return EmitZerofill(getContext().getMachOSection("__DATA", "__bss", + MCSectionMachO::S_ZEROFILL, + 0, SectionKind::getBSS()), + Symbol, Size, ByteAlignment); +} + void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section); diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index bad7cfe..21756cd 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -258,12 +258,18 @@ bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset, void MCObjectStreamer::EmitGPRel32Value(const MCExpr *Value) { MCDataFragment *DF = getOrCreateDataFragment(); - DF->addFixup(MCFixup::Create(DF->getContents().size(), - Value, - FK_GPRel_4)); + DF->addFixup(MCFixup::Create(DF->getContents().size(), Value, FK_GPRel_4)); DF->getContents().resize(DF->getContents().size() + 4, 0); } +// Associate GPRel32 fixup with data and resize data area +void MCObjectStreamer::EmitGPRel64Value(const MCExpr *Value) { + MCDataFragment *DF = getOrCreateDataFragment(); + + DF->addFixup(MCFixup::Create(DF->getContents().size(), Value, FK_GPRel_4)); + DF->getContents().resize(DF->getContents().size() + 8, 0); +} + void MCObjectStreamer::FinishImpl() { // Dump out the dwarf file & directory tables and line tables. const MCSymbol *LineSectionSymbol = NULL; diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 240c10b..55ef01c 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -133,13 +133,13 @@ private: public: AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, const MCAsmInfo &MAI); - ~AsmParser(); + virtual ~AsmParser(); virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false); - void AddDirectiveHandler(MCAsmParserExtension *Object, - StringRef Directive, - DirectiveHandler Handler) { + virtual void AddDirectiveHandler(MCAsmParserExtension *Object, + StringRef Directive, + DirectiveHandler Handler) { DirectiveMap[Directive] = std::make_pair(Object, Handler); } @@ -166,7 +166,7 @@ public: virtual bool Error(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()); - const AsmToken &Lex(); + virtual const AsmToken &Lex(); bool ParseExpression(const MCExpr *&Res); virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc); @@ -207,7 +207,7 @@ private: /// subsequently. void JumpToLoc(SMLoc Loc); - void EatToEndOfStatement(); + virtual void EatToEndOfStatement(); bool ParseMacroArgument(MacroArgument &MA); bool ParseMacroArguments(const Macro *M, MacroArguments &A); @@ -215,7 +215,7 @@ private: /// \brief Parse up to the end of statement and a return the contents from the /// current token until the end of the statement; the current token on exit /// will be either the EndOfStatement or EOF. - StringRef ParseStringToEndOfStatement(); + virtual StringRef ParseStringToEndOfStatement(); /// \brief Parse until the end of a statement or a comma is encountered, /// return the contents from the current token up to the end or comma. @@ -230,7 +230,7 @@ private: /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) /// and set \arg Res to the identifier contents. - bool ParseIdentifier(StringRef &Res); + virtual bool ParseIdentifier(StringRef &Res); // Directive Parsing. @@ -2280,8 +2280,13 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { if (ParseAbsoluteExpression(Pow2Alignment)) return true; + LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType(); + if (IsLocal && LCOMM == LCOMM::NoAlignment) + return Error(Pow2AlignmentLoc, "alignment not supported on this target"); + // If this target takes alignments in bytes (not log) validate and convert. - if (Lexer.getMAI().getAlignmentIsInBytes()) { + if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) || + (IsLocal && LCOMM == LCOMM::ByteAlignment)) { if (!isPowerOf2_64(Pow2Alignment)) return Error(Pow2AlignmentLoc, "alignment must be a power of 2"); Pow2Alignment = Log2_64(Pow2Alignment); @@ -2309,13 +2314,9 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { if (!Sym->isUndefined()) return Error(IDLoc, "invalid symbol redefinition"); - // '.lcomm' is equivalent to '.zerofill'. // Create the Symbol as a common or local common with Size and Pow2Alignment if (IsLocal) { - getStreamer().EmitZerofill(Ctx.getMachOSection( - "__DATA", "__bss", MCSectionMachO::S_ZEROFILL, - 0, SectionKind::getBSS()), - Sym, Size, 1 << Pow2Alignment); + getStreamer().EmitLocalCommonSymbol(Sym, Size, 1 << Pow2Alignment); return false; } diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index 9316bb1..d55de1f 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -203,7 +203,7 @@ bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) { StringRef Name; if (getParser().ParseIdentifier(Name)) return TokError("expected identifier in directive"); - MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);; + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp index 3a825f0..93ee2dd 100644 --- a/lib/MC/MCParser/MCAsmParser.cpp +++ b/lib/MC/MCParser/MCAsmParser.cpp @@ -44,5 +44,7 @@ bool MCAsmParser::ParseExpression(const MCExpr *&Res) { } void MCParsedAsmOperand::dump() const { +#ifndef NDEBUG dbgs() << " " << *this; +#endif } diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp index 05c83f7..cbf853c 100644 --- a/lib/MC/MCSubtargetInfo.cpp +++ b/lib/MC/MCSubtargetInfo.cpp @@ -70,7 +70,7 @@ uint64_t MCSubtargetInfo::ToggleFeature(StringRef FS) { } -MCSchedModel * +const MCSchedModel * MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { assert(ProcSchedModel && "Processor machine model not available!"); @@ -93,11 +93,11 @@ MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { return &MCSchedModel::DefaultSchedModel; } assert(Found->Value && "Missing processor SchedModel value"); - return (MCSchedModel *)Found->Value; + return (const MCSchedModel *)Found->Value; } InstrItineraryData MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const { - MCSchedModel *SchedModel = getSchedModelForCPU(CPU); + const MCSchedModel *SchedModel = getSchedModelForCPU(CPU); return InstrItineraryData(SchedModel, Stages, OperandCycles, ForwardingPaths); } diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index f7f9184..f60126b 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -76,6 +76,8 @@ void MCSymbol::print(raw_ostream &OS) const { OS << '"' << getName() << '"'; } +#ifndef NDEBUG void MCSymbol::dump() const { print(dbgs()); } +#endif diff --git a/lib/MC/MCValue.cpp b/lib/MC/MCValue.cpp index c6ea16c..a37149d 100644 --- a/lib/MC/MCValue.cpp +++ b/lib/MC/MCValue.cpp @@ -31,6 +31,8 @@ void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const { OS << " + " << getConstant(); } +#ifndef NDEBUG void MCValue::dump() const { print(dbgs(), 0); } +#endif diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 5820a22..c57b0d6 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -396,8 +396,7 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) { continue; // Initialize the section indirect symbol base, if necessary. - if (!IndirectSymBase.count(it->SectionData)) - IndirectSymBase[it->SectionData] = IndirectIndex; + IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex)); Asm.getOrCreateSymbolData(*it->Symbol); } @@ -414,8 +413,7 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) { continue; // Initialize the section indirect symbol base, if necessary. - if (!IndirectSymBase.count(it->SectionData)) - IndirectSymBase[it->SectionData] = IndirectIndex; + IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex)); // Set the symbol type to undefined lazy, but only on construction. // diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp index 0a44e77..317a48e 100644 --- a/lib/MC/SubtargetFeature.cpp +++ b/lib/MC/SubtargetFeature.cpp @@ -337,9 +337,9 @@ uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU, } /// Get scheduling itinerary of a CPU. -void *SubtargetFeatures::getItinerary(const StringRef CPU, - const SubtargetInfoKV *Table, - size_t TableSize) { +const void *SubtargetFeatures::getItinerary(const StringRef CPU, + const SubtargetInfoKV *Table, + size_t TableSize) { assert(Table && "missing table"); #ifndef NDEBUG for (size_t i = 1; i < TableSize; i++) { @@ -368,11 +368,13 @@ void SubtargetFeatures::print(raw_ostream &OS) const { OS << "\n"; } +#ifndef NDEBUG /// dump - Dump feature info. /// void SubtargetFeatures::dump() const { print(dbgs()); } +#endif /// getDefaultSubtargetFeatures - Return a string listing the features /// associated with the target triple. diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index ed261a4..f143e6d 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -196,8 +196,10 @@ totalExponent(StringRef::iterator p, StringRef::iterator end, assert(value < 10U && "Invalid character in exponent"); unsignedExponent = unsignedExponent * 10 + value; - if (unsignedExponent > 32767) + if (unsignedExponent > 32767) { overflow = true; + break; + } } if (exponentAdjustment > 32767 || exponentAdjustment < -32768) diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp index 2d8ddd9..45fec36 100644 --- a/lib/Support/DynamicLibrary.cpp +++ b/lib/Support/DynamicLibrary.cpp @@ -160,7 +160,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { // On linux we have a weird situation. The stderr/out/in symbols are both // macros and global variables because of standards requirements. So, we // boldly use the EXPLICIT_SYMBOL macro without checking for a #define first. -#if defined(__linux__) && !defined(__android__) +#if defined(__linux__) and !defined(__ANDROID__) { EXPLICIT_SYMBOL(stderr); EXPLICIT_SYMBOL(stdout); diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index c6282c6..4d489a8 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -38,6 +38,14 @@ bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const { return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0; } +/// Used to compare the "ordering" of two nodes as defined by the +/// profiled bits and their ordering defined by memcmp(). +bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const { + if (Size != RHS.Size) + return Size < RHS.Size; + return memcmp(Data, RHS.Data, Size*sizeof(*Data)) < 0; +} + //===----------------------------------------------------------------------===// // FoldingSetNodeID Implementation @@ -152,6 +160,16 @@ bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const { return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS; } +/// Used to compare the "ordering" of two nodes as defined by the +/// profiled bits and their ordering defined by memcmp(). +bool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS)const{ + return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); +} + +bool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const { + return FoldingSetNodeIDRef(Bits.data(), Bits.size()) < RHS; +} + /// Intern - Copy this node's data to a memory region allocated from the /// given allocator and return a FoldingSetNodeIDRef describing the /// interned data. diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp index 8cb9857..59bfcfc 100644 --- a/lib/Support/LockFileManager.cpp +++ b/lib/Support/LockFileManager.cpp @@ -49,8 +49,7 @@ LockFileManager::readLockFile(StringRef LockFileName) { } bool LockFileManager::processStillExecuting(StringRef Hostname, int PID) { -// getsid not supported in Android bionic library -#if LLVM_ON_UNIX && !defined(ANDROID_TARGET_BUILD) +#if LLVM_ON_UNIX && !defined(__ANDROID__) char MyHostname[256]; MyHostname[255] = 0; MyHostname[0] = 0; diff --git a/lib/Support/SmallVector.cpp b/lib/Support/SmallVector.cpp index a89f149..f9c0e78 100644 --- a/lib/Support/SmallVector.cpp +++ b/lib/Support/SmallVector.cpp @@ -16,14 +16,15 @@ using namespace llvm; /// grow_pod - This is an implementation of the grow() method which only works /// on POD-like datatypes and is out of line to reduce code duplication. -void SmallVectorBase::grow_pod(size_t MinSizeInBytes, size_t TSize) { +void SmallVectorBase::grow_pod(void *FirstEl, size_t MinSizeInBytes, + size_t TSize) { size_t CurSizeBytes = size_in_bytes(); size_t NewCapacityInBytes = 2 * capacity_in_bytes() + TSize; // Always grow. if (NewCapacityInBytes < MinSizeInBytes) NewCapacityInBytes = MinSizeInBytes; void *NewElts; - if (this->isSmall()) { + if (BeginX == FirstEl) { NewElts = malloc(NewCapacityInBytes); // Copy the elements over. No need to run dtors on PODs. @@ -37,4 +38,3 @@ void SmallVectorBase::grow_pod(size_t MinSizeInBytes, size_t TSize) { this->BeginX = NewElts; this->CapacityX = (char*)this->BeginX + NewCapacityInBytes; } - diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp index c2fc261..9ac1f86 100644 --- a/lib/Support/StringMap.cpp +++ b/lib/Support/StringMap.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Compiler.h" #include <cassert> using namespace llvm; @@ -69,7 +70,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) { while (1) { StringMapEntryBase *BucketItem = TheTable[BucketNo]; // If we found an empty bucket, this key isn't in the table yet, return it. - if (BucketItem == 0) { + if (LLVM_LIKELY(BucketItem == 0)) { // If we found a tombstone, we want to reuse the tombstone instead of an // empty bucket. This reduces probing. if (FirstTombstone != -1) { @@ -84,7 +85,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) { if (BucketItem == getTombstoneVal()) { // Skip over tombstones. However, remember the first one we see. if (FirstTombstone == -1) FirstTombstone = BucketNo; - } else if (HashTable[BucketNo] == FullHashValue) { + } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) { // If the full hash value matches, check deeply for a match. The common // case here is that we are only looking at the buckets (for item info // being non-null and for the full hash value) not at the items. This @@ -123,12 +124,12 @@ int StringMapImpl::FindKey(StringRef Key) const { while (1) { StringMapEntryBase *BucketItem = TheTable[BucketNo]; // If we found an empty bucket, this key isn't in the table yet, return. - if (BucketItem == 0) + if (LLVM_LIKELY(BucketItem == 0)) return -1; if (BucketItem == getTombstoneVal()) { // Ignore tombstones. - } else if (HashTable[BucketNo] == FullHashValue) { + } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) { // If the full hash value matches, check deeply for a match. The common // case here is that we are only looking at the buckets (for item info // being non-null and for the full hash value) not at the items. This diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index cca549d..d1dc7c8 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -95,6 +95,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) { case SCEI: return "scei"; case BGP: return "bgp"; case BGQ: return "bgq"; + case Freescale: return "fsl"; } llvm_unreachable("Invalid VendorType!"); @@ -138,7 +139,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) { case GNUEABI: return "gnueabi"; case EABI: return "eabi"; case MachO: return "macho"; - case ANDROIDEABI: return "androideabi"; + case Android: return "android"; } llvm_unreachable("Invalid EnvironmentType!"); @@ -269,6 +270,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) { .Case("scei", Triple::SCEI) .Case("bgp", Triple::BGP) .Case("bgq", Triple::BGQ) + .Case("fsl", Triple::Freescale) .Default(Triple::UnknownVendor); } @@ -305,7 +307,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { .StartsWith("gnueabi", Triple::GNUEABI) .StartsWith("gnu", Triple::GNU) .StartsWith("macho", Triple::MachO) - .StartsWith("androideabi", Triple::ANDROIDEABI) + .StartsWith("android", Triple::Android) .Default(Triple::UnknownEnvironment); } diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 1d667ab..2f1e382 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -293,7 +293,7 @@ static void PrintStackTrace(void *) { #endif } -/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or +/// PrintStackTraceOnErrorSignal - When an error signal (such as SIGABRT or /// SIGSEGV) is delivered to the process, print a stack trace and then exit. void llvm::sys::PrintStackTraceOnErrorSignal() { AddSignalHandler(PrintStackTrace, 0); @@ -305,10 +305,10 @@ void llvm::sys::PrintStackTraceOnErrorSignal() { exception_mask_t mask = EXC_MASK_CRASH; - kern_return_t ret = task_set_exception_ports(self, + kern_return_t ret = task_set_exception_ports(self, mask, MACH_PORT_NULL, - EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES, + EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES, THREAD_STATE_NONE); (void)ret; } diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index fa69c2d..7cd5364 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -266,8 +266,8 @@ void raw_ostream::flush_nonempty() { raw_ostream &raw_ostream::write(unsigned char C) { // Group exceptional cases into a single branch. - if (BUILTIN_EXPECT(OutBufCur >= OutBufEnd, false)) { - if (BUILTIN_EXPECT(!OutBufStart, false)) { + if (LLVM_UNLIKELY(OutBufCur >= OutBufEnd)) { + if (LLVM_UNLIKELY(!OutBufStart)) { if (BufferMode == Unbuffered) { write_impl(reinterpret_cast<char*>(&C), 1); return *this; @@ -286,8 +286,8 @@ raw_ostream &raw_ostream::write(unsigned char C) { raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) { // Group exceptional cases into a single branch. - if (BUILTIN_EXPECT(size_t(OutBufEnd - OutBufCur) < Size, false)) { - if (BUILTIN_EXPECT(!OutBufStart, false)) { + if (LLVM_UNLIKELY(size_t(OutBufEnd - OutBufCur) < Size)) { + if (LLVM_UNLIKELY(!OutBufStart)) { if (BufferMode == Unbuffered) { write_impl(Ptr, Size); return *this; @@ -302,7 +302,7 @@ raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) { // If the buffer is empty at this point we have a string that is larger // than the buffer. Directly write the chunk that is a multiple of the // preferred buffer size and put the remainder in the buffer. - if (BUILTIN_EXPECT(OutBufCur == OutBufStart, false)) { + if (LLVM_UNLIKELY(OutBufCur == OutBufStart)) { size_t BytesToWrite = Size - (Size % NumBytes); write_impl(Ptr, BytesToWrite); copy_to_buffer(Ptr + BytesToWrite, Size - BytesToWrite); @@ -523,7 +523,7 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { ssize_t ret; // Check whether we should attempt to use atomic writes. - if (BUILTIN_EXPECT(!UseAtomicWrites, true)) { + if (LLVM_LIKELY(!UseAtomicWrites)) { ret = ::write(FD, Ptr, Size); } else { // Use ::writev() where available. diff --git a/lib/Support/regexec.c b/lib/Support/regexec.c index 0078616..bd5e72d 100644 --- a/lib/Support/regexec.c +++ b/lib/Support/regexec.c @@ -69,7 +69,7 @@ #define SETUP(v) ((v) = 0) #define onestate long #define INIT(o, n) ((o) = (unsigned long)1 << (n)) -#define INC(o) ((o) <<= 1) +#define INC(o) ((o) = (unsigned long)(o) << 1) #define ISSTATEIN(v, o) (((v) & (o)) != 0) /* some abbreviations; note that some of these know variable names! */ /* do "if I'm here, I can also be there" etc without branches */ diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp index 1463b68..5dd688c 100644 --- a/lib/TableGen/Error.cpp +++ b/lib/TableGen/Error.cpp @@ -20,8 +20,19 @@ namespace llvm { SourceMgr SrcMgr; -void PrintWarning(SMLoc WarningLoc, const Twine &Msg) { - SrcMgr.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg); +static void PrintMessage(ArrayRef<SMLoc> Loc, SourceMgr::DiagKind Kind, + const Twine &Msg) { + SMLoc NullLoc; + if (Loc.empty()) + Loc = NullLoc; + SrcMgr.PrintMessage(Loc.front(), Kind, Msg); + for (unsigned i = 1; i < Loc.size(); ++i) + SrcMgr.PrintMessage(Loc[i], SourceMgr::DK_Note, + "instantiated from multiclass"); +} + +void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) { + PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg); } void PrintWarning(const char *Loc, const Twine &Msg) { @@ -36,8 +47,8 @@ void PrintWarning(const TGError &Warning) { PrintWarning(Warning.getLoc(), Warning.getMessage()); } -void PrintError(SMLoc ErrorLoc, const Twine &Msg) { - SrcMgr.PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg); +void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) { + PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg); } void PrintError(const char *Loc, const Twine &Msg) { diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index 99fdc1f..b2a7b62 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -112,7 +112,10 @@ Init *BitRecTy::convertValue(IntInit *II) { } Init *BitRecTy::convertValue(TypedInit *VI) { - if (dynamic_cast<BitRecTy*>(VI->getType())) + RecTy *Ty = VI->getType(); + if (dynamic_cast<BitRecTy*>(Ty) || + dynamic_cast<BitsRecTy*>(Ty) || + dynamic_cast<IntRecTy*>(Ty)) return VI; // Accept variable if it is already of bit type! return 0; } @@ -178,60 +181,15 @@ Init *BitsRecTy::convertValue(BitsInit *BI) { } Init *BitsRecTy::convertValue(TypedInit *VI) { - if (BitsRecTy *BRT = dynamic_cast<BitsRecTy*>(VI->getType())) - if (BRT->Size == Size) { - SmallVector<Init *, 16> NewBits(Size); - - for (unsigned i = 0; i != Size; ++i) - NewBits[i] = VarBitInit::get(VI, i); - return BitsInit::get(NewBits); - } - if (Size == 1 && dynamic_cast<BitRecTy*>(VI->getType())) return BitsInit::get(VI); - if (TernOpInit *Tern = dynamic_cast<TernOpInit*>(VI)) { - if (Tern->getOpcode() == TernOpInit::IF) { - Init *LHS = Tern->getLHS(); - Init *MHS = Tern->getMHS(); - Init *RHS = Tern->getRHS(); - - IntInit *MHSi = dynamic_cast<IntInit*>(MHS); - IntInit *RHSi = dynamic_cast<IntInit*>(RHS); - - if (MHSi && RHSi) { - int64_t MHSVal = MHSi->getValue(); - int64_t RHSVal = RHSi->getValue(); + if (VI->getType()->typeIsConvertibleTo(this)) { + SmallVector<Init *, 16> NewBits(Size); - if (canFitInBitfield(MHSVal, Size) && canFitInBitfield(RHSVal, Size)) { - SmallVector<Init *, 16> NewBits(Size); - - for (unsigned i = 0; i != Size; ++i) - NewBits[i] = - TernOpInit::get(TernOpInit::IF, LHS, - IntInit::get((MHSVal & (1LL << i)) ? 1 : 0), - IntInit::get((RHSVal & (1LL << i)) ? 1 : 0), - VI->getType()); - - return BitsInit::get(NewBits); - } - } else { - BitsInit *MHSbs = dynamic_cast<BitsInit*>(MHS); - BitsInit *RHSbs = dynamic_cast<BitsInit*>(RHS); - - if (MHSbs && RHSbs) { - SmallVector<Init *, 16> NewBits(Size); - - for (unsigned i = 0; i != Size; ++i) - NewBits[i] = TernOpInit::get(TernOpInit::IF, LHS, - MHSbs->getBit(i), - RHSbs->getBit(i), - VI->getType()); - - return BitsInit::get(NewBits); - } - } - } + for (unsigned i = 0; i != Size; ++i) + NewBits[i] = VarBitInit::get(VI, i); + return BitsInit::get(NewBits); } return 0; @@ -519,6 +477,15 @@ std::string BitsInit::getAsString() const { return Result + " }"; } +// Fix bit initializer to preserve the behavior that bit reference from a unset +// bits initializer will resolve into VarBitInit to keep the field name and bit +// number used in targets with fixed insn length. +static Init *fixBitInit(const RecordVal *RV, Init *Before, Init *After) { + if (RV || After != UnsetInit::get()) + return After; + return Before; +} + // resolveReferences - If there are any field references that refer to fields // that have been filled in, we can propagate the values now. // @@ -526,16 +493,39 @@ Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) const { bool Changed = false; SmallVector<Init *, 16> NewBits(getNumBits()); - for (unsigned i = 0, e = Bits.size(); i != e; ++i) { - Init *B; - Init *CurBit = getBit(i); + Init *CachedInit = 0; + Init *CachedBitVar = 0; + bool CachedBitVarChanged = false; + + for (unsigned i = 0, e = getNumBits(); i != e; ++i) { + Init *CurBit = Bits[i]; + Init *CurBitVar = CurBit->getBitVar(); - do { - B = CurBit; - CurBit = CurBit->resolveReferences(R, RV); - Changed |= B != CurBit; - } while (B != CurBit); NewBits[i] = CurBit; + + if (CurBitVar == CachedBitVar) { + if (CachedBitVarChanged) { + Init *Bit = CachedInit->getBit(CurBit->getBitNum()); + NewBits[i] = fixBitInit(RV, CurBit, Bit); + } + continue; + } + CachedBitVar = CurBitVar; + CachedBitVarChanged = false; + + Init *B; + do { + B = CurBitVar; + CurBitVar = CurBitVar->resolveReferences(R, RV); + CachedBitVarChanged |= B != CurBitVar; + Changed |= B != CurBitVar; + } while (B != CurBitVar); + CachedInit = CurBitVar; + + if (CachedBitVarChanged) { + Init *Bit = CurBitVar->getBit(CurBit->getBitNum()); + NewBits[i] = fixBitInit(RV, CurBit, Bit); + } } if (Changed) @@ -682,20 +672,6 @@ std::string ListInit::getAsString() const { return Result + "]"; } -Init *OpInit::resolveBitReference(Record &R, const RecordVal *IRV, - unsigned Bit) const { - Init *Folded = Fold(&R, 0); - - if (Folded != this) { - TypedInit *Typed = dynamic_cast<TypedInit *>(Folded); - if (Typed) { - return Typed->resolveBitReference(R, IRV, Bit); - } - } - - return 0; -} - Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV, unsigned Elt) const { Init *Resolved = resolveReferences(R, IRV); @@ -718,6 +694,12 @@ Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV, return 0; } +Init *OpInit::getBit(unsigned Bit) const { + if (getType() == BitRecTy::get()) + return const_cast<OpInit*>(this); + return VarBitInit::get(const_cast<OpInit*>(this), Bit); +} + UnOpInit *UnOpInit::get(UnaryOp opc, Init *lhs, RecTy *Type) { typedef std::pair<std::pair<unsigned, Init *>, RecTy *> Key; @@ -922,9 +904,9 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { case EQ: { // try to fold eq comparison for 'bit' and 'int', otherwise fallback // to string objects. - IntInit* L = + IntInit *L = dynamic_cast<IntInit*>(LHS->convertInitializerTo(IntRecTy::get())); - IntInit* R = + IntInit *R = dynamic_cast<IntInit*>(RHS->convertInitializerTo(IntRecTy::get())); if (L && R) @@ -1324,25 +1306,10 @@ const std::string &VarInit::getName() const { return NameString->getValue(); } -Init *VarInit::resolveBitReference(Record &R, const RecordVal *IRV, - unsigned Bit) const { - if (R.isTemplateArg(getNameInit())) return 0; - if (IRV && IRV->getNameInit() != getNameInit()) return 0; - - RecordVal *RV = R.getValue(getNameInit()); - assert(RV && "Reference to a non-existent variable?"); - assert(dynamic_cast<BitsInit*>(RV->getValue())); - BitsInit *BI = (BitsInit*)RV->getValue(); - - assert(Bit < BI->getNumBits() && "Bit reference out of range!"); - Init *B = BI->getBit(Bit); - - // If the bit is set to some value, or if we are resolving a reference to a - // specific variable and that variable is explicitly unset, then replace the - // VarBitInit with it. - if (IRV || !dynamic_cast<UnsetInit*>(B)) - return B; - return 0; +Init *VarInit::getBit(unsigned Bit) const { + if (getType() == BitRecTy::get()) + return const_cast<VarInit*>(this); + return VarBitInit::get(const_cast<VarInit*>(this), Bit); } Init *VarInit::resolveListElementReference(Record &R, @@ -1425,9 +1392,11 @@ std::string VarBitInit::getAsString() const { } Init *VarBitInit::resolveReferences(Record &R, const RecordVal *RV) const { - if (Init *I = getVariable()->resolveBitReference(R, RV, getBitNum())) - return I; - return const_cast<VarBitInit *>(this); + Init *I = TI->resolveReferences(R, RV); + if (TI != I) + return I->getBit(getBitNum()); + + return const_cast<VarBitInit*>(this); } VarListElementInit *VarListElementInit::get(TypedInit *T, @@ -1456,11 +1425,10 @@ VarListElementInit::resolveReferences(Record &R, const RecordVal *RV) const { return const_cast<VarListElementInit *>(this); } -Init *VarListElementInit::resolveBitReference(Record &R, const RecordVal *RV, - unsigned Bit) const { - // FIXME: This should be implemented, to support references like: - // bit B = AA[0]{1}; - return 0; +Init *VarListElementInit::getBit(unsigned Bit) const { + if (getType() == BitRecTy::get()) + return const_cast<VarListElementInit*>(this); + return VarBitInit::get(const_cast<VarListElementInit*>(this), Bit); } Init *VarListElementInit:: resolveListElementReference(Record &R, @@ -1513,17 +1481,10 @@ FieldInit *FieldInit::get(Init *R, const std::string &FN) { return I; } -Init *FieldInit::resolveBitReference(Record &R, const RecordVal *RV, - unsigned Bit) const { - if (Init *BitsVal = Rec->getFieldInit(R, RV, FieldName)) - if (BitsInit *BI = dynamic_cast<BitsInit*>(BitsVal)) { - assert(Bit < BI->getNumBits() && "Bit reference out of range!"); - Init *B = BI->getBit(Bit); - - if (dynamic_cast<BitInit*>(B)) // If the bit is set. - return B; // Replace the VarBitInit with it. - } - return 0; +Init *FieldInit::getBit(unsigned Bit) const { + if (getType() == BitRecTy::get()) + return const_cast<FieldInit*>(this); + return VarBitInit::get(const_cast<FieldInit*>(this), Bit); } Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV, @@ -1751,7 +1712,15 @@ void Record::resolveReferencesTo(const RecordVal *RV) { if (RV == &Values[i]) // Skip resolve the same field as the given one continue; if (Init *V = Values[i].getValue()) - Values[i].setValue(V->resolveReferences(*this, RV)); + if (Values[i].setValue(V->resolveReferences(*this, RV))) + throw TGError(getLoc(), "Invalid value is found when setting '" + + Values[i].getNameInitAsString() + + "' after resolving references" + + (RV ? " against '" + RV->getNameInitAsString() + + "' of (" + + RV->getValue()->getAsUnquotedString() + ")" + : "") + + "\n"); } Init *OldName = getNameInit(); Init *NewName = Name->resolveReferences(*this, RV); @@ -1963,6 +1932,23 @@ bool Record::getValueAsBit(StringRef FieldName) const { "' does not have a bit initializer!"; } +bool Record::getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const { + const RecordVal *R = getValue(FieldName); + if (R == 0 || R->getValue() == 0) + throw "Record `" + getName() + "' does not have a field named `" + + FieldName.str() + "'!\n"; + + if (R->getValue() == UnsetInit::get()) { + Unset = true; + return false; + } + Unset = false; + if (BitInit *BI = dynamic_cast<BitInit*>(R->getValue())) + return BI->getValue(); + throw "Record `" + getName() + "', field `" + FieldName.str() + + "' does not have a bit initializer!"; +} + /// getValueAsDag - This method looks up the specified field and returns its /// value as an Dag, throwing an exception if the field does not exist or if /// the value is not the right type. diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index b9c7ff6..aee93e7 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -1044,35 +1044,28 @@ Init *TGParser::ParseOperation(Record *CurRec) { switch (LexCode) { default: llvm_unreachable("Unhandled code!"); case tgtok::XIf: { - // FIXME: The `!if' operator doesn't handle non-TypedInit well at - // all. This can be made much more robust. - TypedInit *MHSt = dynamic_cast<TypedInit*>(MHS); - TypedInit *RHSt = dynamic_cast<TypedInit*>(RHS); - RecTy *MHSTy = 0; RecTy *RHSTy = 0; - if (MHSt == 0 && RHSt == 0) { - BitsInit *MHSbits = dynamic_cast<BitsInit*>(MHS); - BitsInit *RHSbits = dynamic_cast<BitsInit*>(RHS); - - if (MHSbits && RHSbits && - MHSbits->getNumBits() == RHSbits->getNumBits()) { - Type = BitRecTy::get(); - break; - } else { - BitInit *MHSbit = dynamic_cast<BitInit*>(MHS); - BitInit *RHSbit = dynamic_cast<BitInit*>(RHS); - - if (MHSbit && RHSbit) { - Type = BitRecTy::get(); - break; - } - } - } else if (MHSt != 0 && RHSt != 0) { + if (TypedInit *MHSt = dynamic_cast<TypedInit*>(MHS)) MHSTy = MHSt->getType(); + if (BitsInit *MHSbits = dynamic_cast<BitsInit*>(MHS)) + MHSTy = BitsRecTy::get(MHSbits->getNumBits()); + if (dynamic_cast<BitInit*>(MHS)) + MHSTy = BitRecTy::get(); + + if (TypedInit *RHSt = dynamic_cast<TypedInit*>(RHS)) RHSTy = RHSt->getType(); - } + if (BitsInit *RHSbits = dynamic_cast<BitsInit*>(RHS)) + RHSTy = BitsRecTy::get(RHSbits->getNumBits()); + if (dynamic_cast<BitInit*>(RHS)) + RHSTy = BitRecTy::get(); + + // For UnsetInit, it's typed from the other hand. + if (dynamic_cast<UnsetInit*>(MHS)) + MHSTy = RHSTy; + if (dynamic_cast<UnsetInit*>(RHS)) + RHSTy = MHSTy; if (!MHSTy || !RHSTy) { TokError("could not get type for !if"); @@ -2277,7 +2270,10 @@ InstantiateMulticlassDef(MultiClass &MC, DefName, StringRecTy::get())->Fold(DefProto, &MC); } - Record *CurRec = new Record(DefName, DefmPrefixLoc, Records); + // Make a trail of SMLocs from the multiclass instantiations. + SmallVector<SMLoc, 4> Locs(1, DefmPrefixLoc); + Locs.append(DefProto->getLoc().begin(), DefProto->getLoc().end()); + Record *CurRec = new Record(DefName, Locs, Records); SubClassReference Ref; Ref.RefLoc = DefmPrefixLoc; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 29033e5..e2f0d7d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -683,7 +683,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Handle register classes that require multiple instructions. unsigned BeginIdx = 0; unsigned SubRegs = 0; - unsigned Spacing = 1; + int Spacing = 1; // Use VORRq when possible. if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) @@ -705,27 +705,38 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; - if (Opc) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MachineInstrBuilder Mov; - for (unsigned i = 0; i != SubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); - unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); - assert(Dst && Src && "Bad sub-register"); - Mov = AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) - .addReg(Src)); - // VORR takes two source operands. - if (Opc == ARM::VORRq) - Mov.addReg(Src); - } - // Add implicit super-register defs and kills to the last instruction. - Mov->addRegisterDefined(DestReg, TRI); - if (KillSrc) - Mov->addRegisterKilled(SrcReg, TRI); - return; - } + assert(Opc && "Impossible reg-to-reg copy"); - llvm_unreachable("Impossible reg-to-reg copy"); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineInstrBuilder Mov; + + // Copy register tuples backward when the first Dest reg overlaps with SrcReg. + if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { + BeginIdx = BeginIdx + ((SubRegs-1)*Spacing); + Spacing = -Spacing; + } +#ifndef NDEBUG + SmallSet<unsigned, 4> DstRegs; +#endif + for (unsigned i = 0; i != SubRegs; ++i) { + unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); + unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); + assert(Dst && Src && "Bad sub-register"); +#ifndef NDEBUG + assert(!DstRegs.count(Src) && "destructive vector copy"); + DstRegs.insert(Dst); +#endif + Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) + .addReg(Src); + // VORR takes two source operands. + if (Opc == ARM::VORRq) + Mov.addReg(Src); + Mov = AddDefaultPred(Mov); + } + // Add implicit super-register defs and kills to the last instruction. + Mov->addRegisterDefined(DestReg, TRI); + if (KillSrc) + Mov->addRegisterKilled(SrcReg, TRI); } static const @@ -1569,16 +1580,20 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { } /// Identify instructions that can be folded into a MOVCC instruction, and -/// return the corresponding opcode for the predicated pseudo-instruction. -static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI, - const MachineRegisterInfo &MRI) { +/// return the defining instruction. +static MachineInstr *canFoldIntoMOVCC(unsigned Reg, + const MachineRegisterInfo &MRI, + const TargetInstrInfo *TII) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) return 0; if (!MRI.hasOneNonDBGUse(Reg)) return 0; - MI = MRI.getVRegDef(Reg); + MachineInstr *MI = MRI.getVRegDef(Reg); if (!MI) return 0; + // MI is folded into the MOVCC by predicating it. + if (!MI->isPredicable()) + return 0; // Check if MI has any non-dead defs or physreg uses. This also detects // predicated instructions which will be reading CPSR. for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { @@ -1588,55 +1603,18 @@ static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI, return 0; if (!MO.isReg()) continue; + // MI can't have any tied operands, that would conflict with predication. + if (MO.isTied()) + return 0; if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) return 0; if (MO.isDef() && !MO.isDead()) return 0; } - switch (MI->getOpcode()) { - default: return 0; - case ARM::ANDri: return ARM::ANDCCri; - case ARM::ANDrr: return ARM::ANDCCrr; - case ARM::ANDrsi: return ARM::ANDCCrsi; - case ARM::ANDrsr: return ARM::ANDCCrsr; - case ARM::t2ANDri: return ARM::t2ANDCCri; - case ARM::t2ANDrr: return ARM::t2ANDCCrr; - case ARM::t2ANDrs: return ARM::t2ANDCCrs; - case ARM::EORri: return ARM::EORCCri; - case ARM::EORrr: return ARM::EORCCrr; - case ARM::EORrsi: return ARM::EORCCrsi; - case ARM::EORrsr: return ARM::EORCCrsr; - case ARM::t2EORri: return ARM::t2EORCCri; - case ARM::t2EORrr: return ARM::t2EORCCrr; - case ARM::t2EORrs: return ARM::t2EORCCrs; - case ARM::ORRri: return ARM::ORRCCri; - case ARM::ORRrr: return ARM::ORRCCrr; - case ARM::ORRrsi: return ARM::ORRCCrsi; - case ARM::ORRrsr: return ARM::ORRCCrsr; - case ARM::t2ORRri: return ARM::t2ORRCCri; - case ARM::t2ORRrr: return ARM::t2ORRCCrr; - case ARM::t2ORRrs: return ARM::t2ORRCCrs; - - // ARM ADD/SUB - case ARM::ADDri: return ARM::ADDCCri; - case ARM::ADDrr: return ARM::ADDCCrr; - case ARM::ADDrsi: return ARM::ADDCCrsi; - case ARM::ADDrsr: return ARM::ADDCCrsr; - case ARM::SUBri: return ARM::SUBCCri; - case ARM::SUBrr: return ARM::SUBCCrr; - case ARM::SUBrsi: return ARM::SUBCCrsi; - case ARM::SUBrsr: return ARM::SUBCCrsr; - - // Thumb2 ADD/SUB - case ARM::t2ADDri: return ARM::t2ADDCCri; - case ARM::t2ADDri12: return ARM::t2ADDCCri12; - case ARM::t2ADDrr: return ARM::t2ADDCCrr; - case ARM::t2ADDrs: return ARM::t2ADDCCrs; - case ARM::t2SUBri: return ARM::t2SUBCCri; - case ARM::t2SUBri12: return ARM::t2SUBCCri12; - case ARM::t2SUBrr: return ARM::t2SUBCCrr; - case ARM::t2SUBrs: return ARM::t2SUBCCrs; - } + bool DontMoveAcrossStores = true; + if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores)) + return 0; + return MI; } bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, @@ -1665,19 +1643,18 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && "Unknown select instruction"); const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); - MachineInstr *DefMI = 0; - unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI); - bool Invert = !Opc; - if (!Opc) - Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI); - if (!Opc) + MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); + bool Invert = !DefMI; + if (!DefMI) + DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); + if (!DefMI) return 0; // Create a new predicated version of DefMI. // Rfalse is the first use. MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - get(Opc), MI->getOperand(0).getReg()) - .addOperand(MI->getOperand(Invert ? 2 : 1)); + DefMI->getDesc(), + MI->getOperand(0).getReg()); // Copy all the DefMI operands, excluding its (null) predicate. const MCInstrDesc &DefDesc = DefMI->getDesc(); @@ -1696,6 +1673,15 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, if (NewMI->hasOptionalDef()) AddDefaultCC(NewMI); + // The output register value when the predicate is false is an implicit + // register operand tied to the first def. + // The tie makes the register allocator ensure the FalseReg is allocated the + // same register as operand 0. + MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); + FalseReg.setImplicit(); + NewMI->addOperand(FalseReg); + NewMI->tieOperands(0, NewMI->getNumOperands() - 1); + // The caller will erase MI, but not DefMI. DefMI->eraseFromParent(); return NewMI; @@ -2042,13 +2028,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // Masked compares sometimes use the same register as the corresponding 'and'. if (CmpMask != ~0) { - if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) { + if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { MI = 0; for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), UE = MRI->use_end(); UI != UE; ++UI) { if (UI->getParent() != CmpInstr->getParent()) continue; MachineInstr *PotentialAND = &*UI; - if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true)) + if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || + isPredicated(PotentialAND)) continue; MI = PotentialAND; break; @@ -2114,6 +2101,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // The single candidate is called MI. if (!MI) MI = Sub; + // We can't use a predicated instruction - it doesn't always write the flags. + if (isPredicated(MI)) + return false; + switch (MI->getOpcode()) { default: break; case ARM::RSBrr: @@ -2220,6 +2211,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // Toggle the optional operand to CPSR. MI->getOperand(5).setReg(ARM::CPSR); MI->getOperand(5).setIsDef(true); + assert(!isPredicated(MI) && "Can't use flags from predicated instruction"); CmpInstr->eraseFromParent(); // Modify the condition code of operands in OperandsToUpdate. @@ -3366,7 +3358,8 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { // converted. if (Subtarget.isCortexA9() && !isPredicated(MI) && (MI->getOpcode() == ARM::VMOVRS || - MI->getOpcode() == ARM::VMOVSR)) + MI->getOpcode() == ARM::VMOVSR || + MI->getOpcode() == ARM::VMOVS)) return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); // No other instructions can be swizzled, so just determine their domain. @@ -3386,13 +3379,28 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { return std::make_pair(ExeGeneric, 0); } +static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, + unsigned SReg, unsigned &Lane) { + unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); + Lane = 0; + + if (DReg != ARM::NoRegister) + return DReg; + + Lane = 1; + DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); + + assert(DReg && "S-register with no D super-register?"); + return DReg; +} + + void ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { unsigned DstReg, SrcReg, DReg; unsigned Lane; MachineInstrBuilder MIB(MI); const TargetRegisterInfo *TRI = &getRegisterInfo(); - bool isKill; switch (MI->getOpcode()) { default: llvm_unreachable("cannot handle opcode!"); @@ -3403,78 +3411,175 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // Zap the predicate operands. assert(!isPredicated(MI) && "Cannot predicate a VORRd"); - MI->RemoveOperand(3); - MI->RemoveOperand(2); - // Change to a VORRd which requires two identical use operands. - MI->setDesc(get(ARM::VORRd)); + // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); - // Add the extra source operand and new predicates. - // This will go before any implicit ops. - AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); + + // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) + MI->setDesc(get(ARM::VORRd)); + AddDefaultPred(MIB.addReg(DstReg, RegState::Define) + .addReg(SrcReg) + .addReg(SrcReg)); break; case ARM::VMOVRS: if (Domain != ExeNEON) break; assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); + // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); - DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass); - Lane = 0; - if (DReg == ARM::NoRegister) { - DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass); - Lane = 1; - assert(DReg && "S-register with no D super-register?"); - } + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); - MI->RemoveOperand(3); - MI->RemoveOperand(2); - MI->RemoveOperand(1); + DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); + // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) + // Note that DSrc has been widened and the other lane may be undef, which + // contaminates the entire register. MI->setDesc(get(ARM::VGETLNi32)); - MIB.addReg(DReg); - MIB.addImm(Lane); + AddDefaultPred(MIB.addReg(DstReg, RegState::Define) + .addReg(DReg, RegState::Undef) + .addImm(Lane)); - MIB->getOperand(1).setIsUndef(); + // The old source should be an implicit use, otherwise we might think it + // was dead before here. MIB.addReg(SrcReg, RegState::Implicit); - - AddDefaultPred(MIB); break; case ARM::VMOVSR: if (Domain != ExeNEON) break; assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); + // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); - DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass); - Lane = 0; - if (DReg == ARM::NoRegister) { - DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass); - Lane = 1; - assert(DReg && "S-register with no D super-register?"); - } - isKill = MI->getOperand(0).isKill(); - MI->RemoveOperand(3); - MI->RemoveOperand(2); - MI->RemoveOperand(1); - MI->RemoveOperand(0); + DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); + // If we insert both a novel <def> and an <undef> on the DReg, we break + // any existing dependency chain on the unused lane. Either already being + // present means this instruction is in that chain anyway so we can make + // the transformation. + if (!MI->definesRegister(DReg, TRI) && !MI->readsRegister(DReg, TRI)) + break; + + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); + + // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) + // Again DDst may be undefined at the beginning of this instruction. MI->setDesc(get(ARM::VSETLNi32)); - MIB.addReg(DReg, RegState::Define); - MIB.addReg(DReg, RegState::Undef); - MIB.addReg(SrcReg); - MIB.addImm(Lane); + MIB.addReg(DReg, RegState::Define) + .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI))) + .addReg(SrcReg) + .addImm(Lane); + AddDefaultPred(MIB); - if (isKill) - MIB->addRegisterKilled(DstReg, TRI, true); - MIB->addRegisterDefined(DstReg, TRI); + // The narrower destination must be marked as set to keep previous chains + // in place. + MIB.addReg(DstReg, RegState::Define | RegState::Implicit); + break; + case ARM::VMOVS: { + if (Domain != ExeNEON) + break; + // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + + unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; + DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); + DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); + + // If we insert both a novel <def> and an <undef> on the DReg, we break + // any existing dependency chain on the unused lane. Either already being + // present means this instruction is in that chain anyway so we can make + // the transformation. + if (!MI->definesRegister(DDst, TRI) && !MI->readsRegister(DDst, TRI)) + break; + + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); + + if (DSrc == DDst) { + // Destination can be: + // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) + MI->setDesc(get(ARM::VDUPLN32d)); + MIB.addReg(DDst, RegState::Define) + .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI))) + .addImm(SrcLane); + AddDefaultPred(MIB); + + // Neither the source or the destination are naturally represented any + // more, so add them in manually. + MIB.addReg(DstReg, RegState::Implicit | RegState::Define); + MIB.addReg(SrcReg, RegState::Implicit); + break; + } + + // In general there's no single instruction that can perform an S <-> S + // move in NEON space, but a pair of VEXT instructions *can* do the + // job. It turns out that the VEXTs needed will only use DSrc once, with + // the position based purely on the combination of lane-0 and lane-1 + // involved. For example + // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 + // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 + // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 + // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 + // + // Pattern of the MachineInstrs is: + // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) + MachineInstrBuilder NewMIB; + NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + get(ARM::VEXTd32), DDst); + + // On the first instruction, both DSrc and DDst may be <undef> if present. + // Specifically when the original instruction didn't have them as an + // <imp-use>. + unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; + bool CurUndef = !MI->readsRegister(CurReg, TRI); + NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); + + CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; + CurUndef = !MI->readsRegister(CurReg, TRI); + NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); + + NewMIB.addImm(1); + AddDefaultPred(NewMIB); + + if (SrcLane == DstLane) + NewMIB.addReg(SrcReg, RegState::Implicit); + + MI->setDesc(get(ARM::VEXTd32)); + MIB.addReg(DDst, RegState::Define); + + // On the second instruction, DDst has definitely been defined above, so + // it is not <undef>. DSrc, if present, can be <undef> as above. + CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; + CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); + MIB.addReg(CurReg, getUndefRegState(CurUndef)); + + CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; + CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); + MIB.addReg(CurReg, getUndefRegState(CurUndef)); + + MIB.addImm(1); AddDefaultPred(MIB); + + if (SrcLane != DstLane) + MIB.addReg(SrcReg, RegState::Implicit); + + // As before, the original destination is no longer represented, add it + // implicitly. + MIB.addReg(DstReg, RegState::Define | RegState::Implicit); break; + } } } diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 132b81f..89cbc84 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -410,7 +410,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { do { DEBUG(errs() << "JITTing function '" - << MF.getFunction()->getName() << "'\n"); + << MF.getName() << "'\n"); MCE.startFunction(MF); for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index a953985..dd05f0c 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1388,10 +1388,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { // If the original WaterList entry was "new water" on this iteration, // propagate that to the new island. This is just keeping NewWaterList // updated to match the WaterList, which will be updated below. - if (NewWaterList.count(WaterBB)) { - NewWaterList.erase(WaterBB); + if (NewWaterList.erase(WaterBB)) NewWaterList.insert(NewIsland); - } + // The new CPE goes before the following block (NewMBB). NewMBB = llvm::next(MachineFunction::iterator(WaterBB)); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 15bb32e..8ed6b75 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1208,6 +1208,57 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandLaneOp(MBBI); return true; + case ARM::VSETLNi8Q: + case ARM::VSETLNi16Q: { + // Expand VSETLNs acting on a Q register to equivalent VSETLNs acting + // on the respective D register. + + unsigned QReg = MI.getOperand(1).getReg(); + unsigned QLane = MI.getOperand(3).getImm(); + + unsigned NewOpcode, DLane, DSubReg; + switch (Opcode) { + default: llvm_unreachable("Invalid opcode!"); + case ARM::VSETLNi8Q: + // 4 possible 8-bit lanes per DPR: + NewOpcode = ARM::VSETLNi8; + DLane = QLane % 8; + DSubReg = (QLane / 8) ? ARM::dsub_1 : ARM::dsub_0; + break; + case ARM::VSETLNi16Q: + // 4 possible 16-bit lanes per DPR. + NewOpcode = ARM::VSETLNi16; + DLane = QLane % 4; + DSubReg = (QLane / 4) ? ARM::dsub_1 : ARM::dsub_0; + break; + } + + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpcode)); + + unsigned DReg = TRI->getSubReg(QReg, DSubReg); + + MIB.addReg(DReg, RegState::Define); // Output DPR + MIB.addReg(DReg); // Input DPR + MIB.addOperand(MI.getOperand(2)); // Input GPR + MIB.addImm(DLane); // Lane + + // Add the predicate operands. + MIB.addOperand(MI.getOperand(4)); + MIB.addOperand(MI.getOperand(5)); + + if (MI.getOperand(1).isKill()) // Add an implicit kill for the Q register. + MIB->addRegisterKilled(QReg, TRI, true); + // And an implicit def of the output register (which should always be the + // same as the input register). + MIB->addRegisterDefined(QReg, TRI); + + TransferImpOps(MI, MIB, MIB); + + MI.eraseFromParent(); + return true; + } + case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 5a5ca1b..045d904 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -617,10 +617,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { if (VT != MVT::i32) return 0; Reloc::Model RelocM = TM.getRelocationModel(); - - // TODO: Need more magic for ARM PIC. - if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0; - + bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM); unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); // Use movw+movt when possible, it avoids constant pool entries. @@ -668,17 +665,30 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { .addConstantPoolIndex(Idx); if (RelocM == Reloc::PIC_) MIB.addImm(Id); + AddOptionalDefs(MIB); } else { // The extra immediate is for addrmode2. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg) .addConstantPoolIndex(Idx) .addImm(0); + AddOptionalDefs(MIB); + + if (RelocM == Reloc::PIC_) { + unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD; + unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); + + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc), NewDestReg) + .addReg(DestReg) + .addImm(Id); + AddOptionalDefs(MIB); + return NewDestReg; + } } - AddOptionalDefs(MIB); } - if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { + if (IsIndirect) { MachineInstrBuilder MIB; unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); if (isThumb2) @@ -2212,25 +2222,17 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)); - if (isThumb2) { - // Explicitly adding the predicate here. + // BL / BLX don't take a predicate, but tBL / tBLX do. + if (isThumb2) AddDefaultPred(MIB); - if (EnableARMLongCalls) - MIB.addReg(CalleeReg); - else - MIB.addExternalSymbol(TLI.getLibcallName(Call)); - } else { - if (EnableARMLongCalls) - MIB.addReg(CalleeReg); - else - MIB.addExternalSymbol(TLI.getLibcallName(Call)); + if (EnableARMLongCalls) + MIB.addReg(CalleeReg); + else + MIB.addExternalSymbol(TLI.getLibcallName(Call)); - // Explicitly adding the predicate here. - AddDefaultPred(MIB); - } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i]); + MIB.addReg(RegArgs[i], RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). @@ -2358,30 +2360,20 @@ bool ARMFastISel::SelectCall(const Instruction *I, unsigned CallOpc = ARMSelectCallOp(UseReg); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)); - if(isThumb2) { - // Explicitly adding the predicate here. - AddDefaultPred(MIB); - if (UseReg) - MIB.addReg(CalleeReg); - else if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, 0); - else - MIB.addExternalSymbol(IntrMemName, 0); - } else { - if (UseReg) - MIB.addReg(CalleeReg); - else if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, 0); - else - MIB.addExternalSymbol(IntrMemName, 0); - // Explicitly adding the predicate here. + // ARM calls don't take a predicate, but tBL / tBLX do. + if(isThumb2) AddDefaultPred(MIB); - } + if (UseReg) + MIB.addReg(CalleeReg); + else if (!IntrMemName) + MIB.addGlobalAddress(GV, 0, 0); + else + MIB.addExternalSymbol(IntrMemName, 0); // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i]); + MIB.addReg(RegArgs[i], RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). @@ -2650,7 +2642,7 @@ bool ARMFastISel::SelectShift(const Instruction *I, unsigned Reg1 = getRegForValue(Src1Value); if (Reg1 == 0) return false; - unsigned Reg2; + unsigned Reg2 = 0; if (Opc == ARM::MOVsr) { Reg2 = getRegForValue(Src2Value); if (Reg2 == 0) return false; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index c6f9d15..1406620 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2637,6 +2637,38 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { dl, MVT::i32, MVT::i32, Ops, 5); } } + case ARMISD::UMLAL:{ + if (Subtarget->isThumb()) { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32)}; + return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6); + }else{ + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? + ARM::UMLAL : ARM::UMLALv5, + dl, MVT::i32, MVT::i32, Ops, 7); + } + } + case ARMISD::SMLAL:{ + if (Subtarget->isThumb()) { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32)}; + return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6); + }else{ + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? + ARM::SMLAL : ARM::SMLALv5, + dl, MVT::i32, MVT::i32, Ops, 7); + } + } case ISD::LOAD: { SDNode *ResNode = 0; if (Subtarget->isThumb() && Subtarget->hasThumb2()) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index df4039b..e51315e 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -514,6 +514,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); setOperationAction(ISD::FEXP, MVT::v4f32, Expand); setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); + setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); @@ -566,6 +567,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } + // ARM and Thumb2 support UMLAL/SMLAL. + if (!Subtarget->isThumb1Only()) + setTargetDAGCombine(ISD::ADDC); + + computeRegisterProperties(); // ARM does not have f32 extending load. @@ -791,12 +797,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); - - if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) { - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::OR); - setTargetDAGCombine(ISD::XOR); - } + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::XOR); if (Subtarget->hasV6Ops()) setTargetDAGCombine(ISD::SRL); @@ -981,6 +984,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VTBL2: return "ARMISD::VTBL2"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; + case ARMISD::UMLAL: return "ARMISD::UMLAL"; + case ARMISD::SMLAL: return "ARMISD::SMLAL"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; @@ -4154,10 +4159,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } // Scan through the operands to see if only one value is used. + // + // As an optimisation, even if more than one value is used it may be more + // profitable to splat with one value then change some lanes. + // + // Heuristically we decide to do this if the vector has a "dominant" value, + // defined as splatted to more than half of the lanes. unsigned NumElts = VT.getVectorNumElements(); bool isOnlyLowElement = true; bool usesOnlyOneValue = true; + bool hasDominantValue = false; bool isConstant = true; + + // Map of the number of times a particular SDValue appears in the + // element list. + DenseMap<SDValue, unsigned> ValueCounts; SDValue Value; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); @@ -4168,13 +4184,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) isConstant = false; - if (!Value.getNode()) + ValueCounts.insert(std::make_pair(V, 0)); + unsigned &Count = ValueCounts[V]; + + // Is this value dominant? (takes up more than half of the lanes) + if (++Count > (NumElts / 2)) { + hasDominantValue = true; Value = V; - else if (V != Value) - usesOnlyOneValue = false; + } } + if (ValueCounts.size() != 1) + usesOnlyOneValue = false; + if (!Value.getNode() && ValueCounts.size() > 0) + Value = ValueCounts.begin()->first; - if (!Value.getNode()) + if (ValueCounts.size() == 0) return DAG.getUNDEF(VT); if (isOnlyLowElement) @@ -4184,9 +4208,34 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // Use VDUP for non-constant splats. For f32 constant splats, reduce to // i32 and try again. - if (usesOnlyOneValue && EltSize <= 32) { - if (!isConstant) - return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + if (hasDominantValue && EltSize <= 32) { + if (!isConstant) { + SDValue N; + + // If we are VDUPing a value that comes directly from a vector, that will + // cause an unnecessary move to and from a GPR, where instead we could + // just use VDUPLANE. + if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) + N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, + Value->getOperand(0), Value->getOperand(1)); + else + N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); + + if (!usesOnlyOneValue) { + // The dominant value was splatted as 'N', but we now have to insert + // all differing elements. + for (unsigned I = 0; I < NumElts; ++I) { + if (Op.getOperand(I) == Value) + continue; + SmallVector<SDValue, 3> Ops; + Ops.push_back(N); + Ops.push_back(Op.getOperand(I)); + Ops.push_back(DAG.getConstant(I, MVT::i32)); + N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); + } + } + return N; + } if (VT.getVectorElementType().isFloatingPoint()) { SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumElts; ++i) @@ -4198,9 +4247,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); } - SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); - if (Val.getNode()) - return DAG.getNode(ARMISD::VDUP, dl, VT, Val); + if (usesOnlyOneValue) { + SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); + if (isConstant && Val.getNode()) + return DAG.getNode(ARMISD::VDUP, dl, VT, Val); + } } // If all elements are constants and the case above didn't get hit, fall back @@ -5418,7 +5469,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, exitMBB->transferSuccessorsAndUpdatePHIs(BB); const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::rGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); @@ -5529,7 +5580,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, exitMBB->transferSuccessorsAndUpdatePHIs(BB); const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::rGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = MRI.createVirtualRegister(TRC); @@ -7193,6 +7244,154 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp); } +static SDValue findMUL_LOHI(SDValue V) { + if (V->getOpcode() == ISD::UMUL_LOHI || + V->getOpcode() == ISD::SMUL_LOHI) + return V; + return SDValue(); +} + +static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + + if (Subtarget->isThumb1Only()) return SDValue(); + + // Only perform the checks after legalize when the pattern is available. + if (DCI.isBeforeLegalize()) return SDValue(); + + // Look for multiply add opportunities. + // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where + // each add nodes consumes a value from ISD::UMUL_LOHI and there is + // a glue link from the first add to the second add. + // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by + // a S/UMLAL instruction. + // loAdd UMUL_LOHI + // \ / :lo \ :hi + // \ / \ [no multiline comment] + // ADDC | hiAdd + // \ :glue / / + // \ / / + // ADDE + // + assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"); + SDValue AddcOp0 = AddcNode->getOperand(0); + SDValue AddcOp1 = AddcNode->getOperand(1); + + // Check if the two operands are from the same mul_lohi node. + if (AddcOp0.getNode() == AddcOp1.getNode()) + return SDValue(); + + assert(AddcNode->getNumValues() == 2 && + AddcNode->getValueType(0) == MVT::i32 && + AddcNode->getValueType(1) == MVT::Glue && + "Expect ADDC with two result values: i32, glue"); + + // Check that the ADDC adds the low result of the S/UMUL_LOHI. + if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && + AddcOp0->getOpcode() != ISD::SMUL_LOHI && + AddcOp1->getOpcode() != ISD::UMUL_LOHI && + AddcOp1->getOpcode() != ISD::SMUL_LOHI) + return SDValue(); + + // Look for the glued ADDE. + SDNode* AddeNode = AddcNode->getGluedUser(); + if (AddeNode == NULL) + return SDValue(); + + // Make sure it is really an ADDE. + if (AddeNode->getOpcode() != ISD::ADDE) + return SDValue(); + + assert(AddeNode->getNumOperands() == 3 && + AddeNode->getOperand(2).getValueType() == MVT::Glue && + "ADDE node has the wrong inputs"); + + // Check for the triangle shape. + SDValue AddeOp0 = AddeNode->getOperand(0); + SDValue AddeOp1 = AddeNode->getOperand(1); + + // Make sure that the ADDE operands are not coming from the same node. + if (AddeOp0.getNode() == AddeOp1.getNode()) + return SDValue(); + + // Find the MUL_LOHI node walking up ADDE's operands. + bool IsLeftOperandMUL = false; + SDValue MULOp = findMUL_LOHI(AddeOp0); + if (MULOp == SDValue()) + MULOp = findMUL_LOHI(AddeOp1); + else + IsLeftOperandMUL = true; + if (MULOp == SDValue()) + return SDValue(); + + // Figure out the right opcode. + unsigned Opc = MULOp->getOpcode(); + unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL; + + // Figure out the high and low input values to the MLAL node. + SDValue* HiMul = &MULOp; + SDValue* HiAdd = NULL; + SDValue* LoMul = NULL; + SDValue* LowAdd = NULL; + + if (IsLeftOperandMUL) + HiAdd = &AddeOp1; + else + HiAdd = &AddeOp0; + + + if (AddcOp0->getOpcode() == Opc) { + LoMul = &AddcOp0; + LowAdd = &AddcOp1; + } + if (AddcOp1->getOpcode() == Opc) { + LoMul = &AddcOp1; + LowAdd = &AddcOp0; + } + + if (LoMul == NULL) + return SDValue(); + + if (LoMul->getNode() != HiMul->getNode()) + return SDValue(); + + // Create the merged node. + SelectionDAG &DAG = DCI.DAG; + + // Build operand list. + SmallVector<SDValue, 8> Ops; + Ops.push_back(LoMul->getOperand(0)); + Ops.push_back(LoMul->getOperand(1)); + Ops.push_back(*LowAdd); + Ops.push_back(*HiAdd); + + SDValue MLALNode = DAG.getNode(FinalOpc, AddcNode->getDebugLoc(), + DAG.getVTList(MVT::i32, MVT::i32), + &Ops[0], Ops.size()); + + // Replace the ADDs' nodes uses by the MLA node's values. + SDValue HiMLALResult(MLALNode.getNode(), 1); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); + + SDValue LoMLALResult(MLALNode.getNode(), 0); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); + + // Return original node to notify the driver to stop replacing. + SDValue resNode(AddcNode, 0); + return resNode; +} + +/// PerformADDCCombine - Target-specific dag combine transform from +/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL. +static SDValue PerformADDCCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + + return AddCombineTo64bitMLAL(N, DCI, Subtarget); + +} + /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with /// operands N0 and N1. This is a helper for PerformADDCombine that is /// called with the default operands, and if that fails, with commuted @@ -8764,6 +8963,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; + case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 13b83de..2b8f382 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -173,6 +173,9 @@ namespace llvm { VMULLs, // ...signed VMULLu, // ...unsigned + UMLAL, // 64bit Unsigned Accumulate Multiply + SMLAL, // 64bit Signed Accumulate Multiply + // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other // operations, but for ARM some BUILD_VECTORs are legal as-is and their @@ -257,6 +260,11 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; + virtual bool isSelectSupported(SelectSupportKind Kind) const { + // ARM does not support scalar condition selects on vectors. + return (Kind != ScalarCondVectorVal); + } + /// getSetCCResultType - Return the value type to use for ISD::SETCC. virtual EVT getSetCCResultType(EVT VT) const; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 992aba5..e23989e 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -83,6 +83,13 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, SDTCisInt<0>, SDTCisVT<1, i32>, SDTCisVT<4, i32>]>; + +def SDT_ARM64bitmlal : SDTypeProfile<2,4, [ SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>, + SDTCisVT<4, i32>, SDTCisVT<5, i32> ] >; +def ARMUmlal : SDNode<"ARMISD::UMLAL", SDT_ARM64bitmlal>; +def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>; @@ -90,9 +97,10 @@ def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPOptInGlue, SDNPOutGlue]>; def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" , SDT_ARMStructByVal, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, @@ -148,14 +156,16 @@ def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>; def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", - SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>; + SDT_ARMEH_SJLJ_Setjmp, + [SDNPHasChain, SDNPSideEffect]>; def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", - SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; + SDT_ARMEH_SJLJ_Longjmp, + [SDNPHasChain, SDNPSideEffect]>; def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, - [SDNPHasChain]>; + [SDNPHasChain, SDNPSideEffect]>; def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, - [SDNPHasChain]>; + [SDNPHasChain, SDNPSideEffect]>; def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH, [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; @@ -275,7 +285,7 @@ def imm16_31 : ImmLeaf<i32, [{ def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ - int64_t Value = -(int)N->getZExtValue(); + unsigned Value = -(unsigned)N->getZExtValue(); return Value && ARM_AM::getSOImmVal(Value) != -1; }], imm_neg_XFORM> { let ParserMatchClass = so_imm_neg_asmoperand; @@ -1791,12 +1801,15 @@ def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), let Inst{15-12} = Rd; let Inst{11-0} = label{11-0}; } + +let hasSideEffects = 1 in { def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p), 4, IIC_iALUi, []>; def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 4, IIC_iALUi, []>; +} //===----------------------------------------------------------------------===// // Control Flow Instructions. @@ -3399,6 +3412,18 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, let Inst{11-8} = Rm; let Inst{3-0} = Rn; } +class AsMla1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> { + bits<4> RdLo; + bits<4> RdHi; + bits<4> Rm; + bits<4> Rn; + let Inst{19-16} = RdHi; + let Inst{15-12} = RdLo; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} // FIXME: The v5 pseudos are only necessary for the additional Constraint // property. Remove them when it's possible to add those properties @@ -3481,14 +3506,14 @@ def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), } // Multiply + accumulate -def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, +def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; -def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; +def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, @@ -3504,17 +3529,22 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), let Inst{3-0} = Rn; } -let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { +let Constraints = "$RLo = $RdLo,$RHi = $RdHi" in { def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), 4, IIC_iMAC64, [], - (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, + pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), 4, IIC_iMAC64, [], - (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, + pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; +} + +let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, pred:$p), 4, IIC_iMAC64, [], @@ -3986,48 +4016,6 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; -// Conditional instructions -multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi, - Instruction irsr, - InstrItinClass iii, InstrItinClass iir, - InstrItinClass iis> { - def ri : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rfalse, GPR:$Rn, so_imm:$imm, - pred:$p, cc_out:$s), - 4, iii, [], - (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def rr : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rfalse, GPR:$Rn, GPR:$Rm, - pred:$p, cc_out:$s), - 4, iir, [], - (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def rsi : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rfalse, GPR:$Rn, so_reg_imm:$shift, - pred:$p, cc_out:$s), - 4, iis, [], - (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, so_reg_reg:$shift, - pred:$p, cc_out:$s), - 4, iis, [], - (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; -} - -defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm ADDCC : AsI1_bincc_irs<ADDri, ADDrr, ADDrsi, ADDrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm SUBCC : AsI1_bincc_irs<SUBri, SUBrr, SUBrsi, SUBrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; - } // neverHasSideEffects diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 048d340..8158a11 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1980,7 +1980,7 @@ def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{5}; + let Inst{4} = Rn{4}; } def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, @@ -2023,7 +2023,7 @@ def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{5}; + let Inst{4} = Rn{4}; } def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, extractelt, addrmode6oneL32> { @@ -5045,25 +5045,23 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), GPR:$R, imm:$lane))]> { let Inst{21} = lane{0}; } + +def VSETLNi8Q : PseudoNeonI<(outs QPR:$V), + (ins QPR:$src1, GPR:$R, VectorIndex8:$lane), + IIC_VMOVISL, "", + [(set QPR:$V, (vector_insert (v16i8 QPR:$src1), + GPR:$R, imm:$lane))]>; +def VSETLNi16Q : PseudoNeonI<(outs QPR:$V), + (ins QPR:$src1, GPR:$R, VectorIndex16:$lane), + IIC_VMOVISL, "", + [(set QPR:$V, (vector_insert (v8i16 QPR:$src1), + GPR:$R, imm:$lane))]>; } -def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), - (v16i8 (INSERT_SUBREG QPR:$src1, - (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, - (DSubReg_i8_reg imm:$lane))), - GPR:$src2, (SubReg_i8_lane imm:$lane))), - (DSubReg_i8_reg imm:$lane)))>; -def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), - (v8i16 (INSERT_SUBREG QPR:$src1, - (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, - (DSubReg_i16_reg imm:$lane))), - GPR:$src2, (SubReg_i16_lane imm:$lane))), - (DSubReg_i16_reg imm:$lane)))>; + def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), - (v4i32 (INSERT_SUBREG QPR:$src1, - (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, - (DSubReg_i32_reg imm:$lane))), - GPR:$src2, (SubReg_i32_lane imm:$lane))), - (DSubReg_i32_reg imm:$lane)))>; + (v4i32 (INSERT_SUBREG QPR:$src1, + GPR:$src2, + (SSubReg_f32_reg imm:$lane)))>; def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 554f6d9..e171f8b 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -1200,6 +1200,7 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p), 2, IIC_iALUi, []>; +let hasSideEffects = 1 in def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 2, IIC_iALUi, []>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 8ecf009..f1a6cce 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -523,6 +523,23 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4, let Inst{7-4} = opc7_4; let Inst{3-0} = Rm; } +class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> RdLo; + bits<4> RdHi; + bits<4> Rn; + bits<4> Rm; + + let Inst{31-23} = 0b111110111; + let Inst{22-20} = opc22_20; + let Inst{19-16} = Rn; + let Inst{15-12} = RdLo; + let Inst{11-8} = RdHi; + let Inst{7-4} = opc7_4; + let Inst{3-0} = Rm; +} /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a @@ -757,33 +774,6 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{24} = 1; let Inst{23-21} = op23_21; } - - // Predicated versions. - def CCri : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_imm:$imm, - pred:$p, cc_out:$s), 4, IIC_iALUi, [], - (!cast<Instruction>(NAME#ri) GPRnopc:$Rd, - GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def CCri12 : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPR:$Rn, imm0_4095:$imm, - pred:$p), - 4, IIC_iALUi, [], - (!cast<Instruction>(NAME#ri12) GPRnopc:$Rd, - GPR:$Rn, imm0_4095:$imm, pred:$p)>, - RegConstraint<"$Rfalse = $Rd">; - def CCrr : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, rGPR:$Rm, - pred:$p, cc_out:$s), 4, IIC_iALUr, [], - (!cast<Instruction>(NAME#rr) GPRnopc:$Rd, - GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def CCrs : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_reg:$Rm, - pred:$p, cc_out:$s), 4, IIC_iALUsi, [], - (!cast<Instruction>(NAME#rs) GPRnopc:$Rd, - GPRnopc:$Rn, t2_so_reg:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; } /// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns @@ -1200,6 +1190,7 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), let neverHasSideEffects = 1, isReMaterializable = 1 in def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p), 4, IIC_iALUi, []>; +let hasSideEffects = 1 in def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 4, IIC_iALUi, @@ -2437,15 +2428,17 @@ def t2UMULL : T2MulLong<0b010, 0b0000, } // isCommutable // Multiply + accumulate -def t2SMLAL : T2MulLong<0b100, 0b0000, +def t2SMLAL : T2MlaLong<0b100, 0b0000, (outs rGPR:$RdLo, rGPR:$RdHi), - (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, - "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>; + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, + "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">; -def t2UMLAL : T2MulLong<0b110, 0b0000, +def t2UMLAL : T2MlaLong<0b110, 0b0000, (outs rGPR:$RdLo, rGPR:$RdHi), - (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, - "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>; + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, + "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">; def t2UMAAL : T2MulLong<0b110, 0b0110, (outs rGPR:$RdLo, rGPR:$RdHi), @@ -3049,37 +3042,6 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), RegConstraint<"$false = $Rd">; } // isCodeGenOnly = 1 -multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs, - InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { - // shifted imm - def ri : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_imm:$imm, - pred:$p, cc_out:$s), - 4, iii, [], - (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - // register - def rr : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rfalse, rGPR:$Rn, rGPR:$Rm, - pred:$p, cc_out:$s), - 4, iir, [], - (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - // shifted register - def rs : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_reg:$ShiftedRm, - pred:$p, cc_out:$s), - 4, iis, [], - (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; -} // T2I_bincc_irs - -defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs, - IIC_iBITi, IIC_iBITr, IIC_iBITsi>; -defm t2ORRCC : T2I_bincc_irs<t2ORRri, t2ORRrr, t2ORRrs, - IIC_iBITi, IIC_iBITr, IIC_iBITsi>; -defm t2EORCC : T2I_bincc_irs<t2EORri, t2EORrr, t2EORrs, - IIC_iBITi, IIC_iBITr, IIC_iBITsi>; } // neverHasSideEffects //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index a812e21..2dc49a9 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -169,7 +169,7 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, intptr_t LazyPtr = getIndirectSymAddr(Fn); if (!LazyPtr) { // In PIC mode, the function stub is loading a lazy-ptr. - LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE); + LazyPtr= (intptr_t)emitGlobalValueIndirectSym((const GlobalValue*)F, Fn, JCE); DEBUG(if (F) errs() << "JIT: Indirect symbol emitted at [" << LazyPtr << "] for GV '" << F->getName() << "'\n"; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 3a5957b..e1e2f6e 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -181,49 +181,44 @@ class ARMAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index); // Asm Match Converter Methods - bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtT2StrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, + void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtT2StrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + void cvtLdWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, + void cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + void cvtLdExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + void cvtLdExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + void cvtStExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + void cvtStExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + void cvtLdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtStrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtLdWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode, + void cvtThumbMultiply(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, + void cvtVLDwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, + void cvtVLDwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, + void cvtVSTwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, + void cvtVSTwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool validateInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); bool processInstruction(MCInst &Inst, @@ -267,6 +262,12 @@ public: bool MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); + + unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned OperandNum, unsigned &NumMCOperands) { + return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, NumMCOperands); + } }; } // end anonymous namespace @@ -3880,8 +3881,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// cvtT2LdrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt, Rt2 ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3892,14 +3893,13 @@ cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtT2StrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtT2StrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtT2StrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateReg(0)); @@ -3910,14 +3910,13 @@ cvtT2StrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3926,28 +3925,26 @@ cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3956,14 +3953,13 @@ cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3972,57 +3968,53 @@ cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -4034,14 +4026,13 @@ cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -4053,14 +4044,13 @@ cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStExtTWriteBackImm - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4072,14 +4062,13 @@ cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStExtTWriteBackReg - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4091,14 +4080,13 @@ cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt, Rt2 ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -4109,14 +4097,13 @@ cvtLdrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4127,40 +4114,27 @@ cvtStrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -/// cvtThumbMultiple- Convert parsed operands to MCInst. +/// cvtThumbMultiply - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtThumbMultiply(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtThumbMultiply(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // The second source operand must be the same register as the destination - // operand. - if (Operands.size() == 6 && - (((ARMOperand*)Operands[3])->getReg() != - ((ARMOperand*)Operands[5])->getReg()) && - (((ARMOperand*)Operands[3])->getReg() != - ((ARMOperand*)Operands[4])->getReg())) { - Error(Operands[3]->getStartLoc(), - "destination register must match source register"); - return false; - } ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1); // If we have a three-operand form, make sure to set Rn to be the operand @@ -4173,12 +4147,10 @@ cvtThumbMultiply(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1); Inst.addOperand(Inst.getOperand(0)); ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2); - - return true; } -bool ARMAsmParser:: -cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVLDwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Vd ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); @@ -4188,11 +4160,10 @@ cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -bool ARMAsmParser:: -cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVLDwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Vd ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); @@ -4204,11 +4175,10 @@ cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -bool ARMAsmParser:: -cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVSTwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4218,11 +4188,10 @@ cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -bool ARMAsmParser:: -cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVSTwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4234,7 +4203,6 @@ cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// Parse an ARM memory expression, return false if successful else return true @@ -5377,6 +5345,25 @@ validateInstruction(MCInst &Inst, "in register list"); break; } + case ARM::tMUL: { + // The second source operand must be the same register as the destination + // operand. + // + // In this case, we must directly check the parsed operands because the + // cvtThumbMultiply() function is written in such a way that it guarantees + // this first statement is always true for the new Inst. Essentially, the + // destination is unconditionally copied into the second source operand + // without checking to see if it matches what we actually parsed. + if (Operands.size() == 6 && + (((ARMOperand*)Operands[3])->getReg() != + ((ARMOperand*)Operands[5])->getReg()) && + (((ARMOperand*)Operands[3])->getReg() != + ((ARMOperand*)Operands[4])->getReg())) { + return Error(Operands[3]->getStartLoc(), + "destination register must match source register"); + } + break; + } // Like for ldm/stm, push and pop have hi-reg handling version in Thumb2, // so only issue a diagnostic for thumb1. The instructions will be // switched to the t2 encodings in processInstruction() if necessary. @@ -7475,9 +7462,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out) { MCInst Inst; + unsigned Kind; unsigned ErrorInfo; unsigned MatchResult; - MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo); + + MatchResult = MatchInstructionImpl(Operands, Kind, Inst, ErrorInfo); switch (MatchResult) { default: break; case Match_Success: @@ -7540,9 +7529,6 @@ MatchAndEmitInstruction(SMLoc IDLoc, case Match_MnemonicFail: return Error(IDLoc, "invalid instruction", ((ARMOperand*)Operands[0])->getLocRange()); - case Match_ConversionFail: - // The converter function will have already emitted a diagnostic. - return true; case Match_RequiresNotITBlock: return Error(IDLoc, "flag setting instruction only valid outside IT block"); case Match_RequiresITBlock: diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index c90751d..57642e1 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2701,6 +2701,8 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn, unsigned align = fieldFromInstruction(Insn, 4, 1); unsigned size = fieldFromInstruction(Insn, 6, 2); + if (size == 0 && align == 1) + return MCDisassembler::Fail; align *= (1 << size); switch (Inst.getOpcode()) { @@ -2831,6 +2833,8 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, unsigned align = fieldFromInstruction(Insn, 4, 1); if (size == 0x3) { + if (align == 0) + return MCDisassembler::Fail; size = 4; align = 16; } else { @@ -3170,7 +3174,7 @@ static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, int imm = Val & 0xFF; if (!(Val & 0x100)) imm *= -1; - Inst.addOperand(MCOperand::CreateImm(imm << 2)); + Inst.addOperand(MCOperand::CreateImm(imm * 4)); } return MCDisassembler::Success; @@ -3710,8 +3714,16 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, if (fieldFromInstruction(Insn, 6, 1)) return MCDisassembler::Fail; // UNDEFINED index = fieldFromInstruction(Insn, 7, 1); - if (fieldFromInstruction(Insn, 4, 2) != 0) - align = 4; + + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0 : + align = 0; break; + case 3: + align = 4; break; + default: + return MCDisassembler::Fail; + } + break; } if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) @@ -3769,8 +3781,16 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, if (fieldFromInstruction(Insn, 6, 1)) return MCDisassembler::Fail; // UNDEFINED index = fieldFromInstruction(Insn, 7, 1); - if (fieldFromInstruction(Insn, 4, 2) != 0) - align = 4; + + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0: + align = 0; break; + case 3: + align = 4; break; + default: + return MCDisassembler::Fail; + } + break; } if (Rm != 0xF) { // Writeback @@ -4090,8 +4110,15 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, inc = 2; break; case 2: - if (fieldFromInstruction(Insn, 4, 2)) - align = 4 << fieldFromInstruction(Insn, 4, 2); + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0: + align = 0; break; + case 3: + return MCDisassembler::Fail; + default: + align = 4 << fieldFromInstruction(Insn, 4, 2); break; + } + index = fieldFromInstruction(Insn, 7, 1); if (fieldFromInstruction(Insn, 6, 1)) inc = 2; @@ -4164,8 +4191,15 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, inc = 2; break; case 2: - if (fieldFromInstruction(Insn, 4, 2)) - align = 4 << fieldFromInstruction(Insn, 4, 2); + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0: + align = 0; break; + case 3: + return MCDisassembler::Fail; + default: + align = 4 << fieldFromInstruction(Insn, 4, 2); break; + } + index = fieldFromInstruction(Insn, 7, 1); if (fieldFromInstruction(Insn, 6, 1)) inc = 2; diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 7d6acbc..b53da3b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -194,6 +194,10 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case ARM::fixup_arm_uncondbranch: Type = ELF::R_ARM_JUMP24; break; + case ARM::fixup_t2_condbranch: + case ARM::fixup_t2_uncondbranch: + Type = ELF::R_ARM_THM_JUMP24; + break; case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: Type = ELF::R_ARM_MOVT_PREL; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index d32805e..c1aab9c 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -50,7 +50,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() { Code32Directive = ".code\t32"; WeakRefDirective = "\t.weak\t"; - LCOMMDirectiveType = LCOMM::NoAlignment; HasLEB128 = true; SupportsDebugInformation = true; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 94f1082..1917564 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -783,7 +783,7 @@ getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx, // Immediate is always encoded as positive. The 'U' bit controls add vs sub. if (Imm8 < 0) - Imm8 = -Imm8; + Imm8 = -(uint32_t)Imm8; // Scaled by 4. Imm8 /= 4; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index a51e0fa..95640f7 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -410,7 +410,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, if (Type == macho::RIT_ARM_Half) { // The other-half value only gets populated for the movt and movw // relocation entries. - uint32_t Value = 0;; + uint32_t Value = 0; switch ((unsigned)Fixup.getKind()) { default: break; case ARM::fixup_arm_movw_lo16: diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp index 03d5a9a..3396e8b 100644 --- a/lib/Target/CellSPU/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -130,8 +130,7 @@ namespace { void printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16) - >> 16); + short value = MI->getOperand(OpNo).getImm(); assert((value >= -(1 << 9) && value <= (1 << 9) - 1) && "Invalid s10 argument"); O << value; @@ -140,8 +139,7 @@ namespace { void printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16) - >> 16); + short value = MI->getOperand(OpNo).getImm(); assert((value <= (1 << 10) - 1) && "Invalid u10 argument"); O << value; } diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index c27caea..425371d 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -83,12 +83,10 @@ namespace { return true; } else if (vt == MVT::i32) { int32_t i_val = (int32_t) CN->getZExtValue(); - short s_val = (short) i_val; - return i_val == s_val; + return i_val == SignExtend32<16>(i_val); } else { int64_t i_val = (int64_t) CN->getZExtValue(); - short s_val = (short) i_val; - return i_val == s_val; + return i_val == SignExtend64<16>(i_val); } } @@ -99,9 +97,10 @@ namespace { EVT vt = FPN->getValueType(0); if (vt == MVT::f32) { int val = FloatToBits(FPN->getValueAPF().convertToFloat()); - int sval = (int) ((val << 16) >> 16); - Imm = (short) val; - return val == sval; + if (val == SignExtend32<16>(val)) { + Imm = (short) val; + return true; + } } return false; diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index 1f2d8ac..306084b 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_target(HexagonCodeGen HexagonExpandPredSpillCode.cpp HexagonFrameLowering.cpp HexagonHardwareLoops.cpp + HexagonMachineScheduler.cpp HexagonMCInstLower.cpp HexagonInstrInfo.cpp HexagonISelDAGToDAG.cpp diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp new file mode 100644 index 0000000..b131a8f --- /dev/null +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -0,0 +1,952 @@ +//===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// MachineScheduler schedules machine instructions after phi elimination. It +// preserves LiveIntervals so it can be invoked before register allocation. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "misched" + +#include "HexagonMachineScheduler.h" + +#include <queue> + +using namespace llvm; + +static cl::opt<bool> ForceTopDown("vliw-misched-topdown", cl::Hidden, + cl::desc("Force top-down list scheduling")); +static cl::opt<bool> ForceBottomUp("vliw-misched-bottomup", cl::Hidden, + cl::desc("Force bottom-up list scheduling")); + +#ifndef NDEBUG +static cl::opt<bool> ViewMISchedDAGs("vliw-view-misched-dags", cl::Hidden, + cl::desc("Pop up a window to show MISched dags after they are processed")); + +static cl::opt<unsigned> MISchedCutoff("vliw-misched-cutoff", cl::Hidden, + cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); +#else +static bool ViewMISchedDAGs = false; +#endif // NDEBUG + +/// Decrement this iterator until reaching the top or a non-debug instr. +static MachineBasicBlock::iterator +priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { + assert(I != Beg && "reached the top of the region, cannot decrement"); + while (--I != Beg) { + if (!I->isDebugValue()) + break; + } + return I; +} + +/// If this iterator is a debug value, increment until reaching the End or a +/// non-debug instruction. +static MachineBasicBlock::iterator +nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { + for(; I != End; ++I) { + if (!I->isDebugValue()) + break; + } + return I; +} + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When +/// NumPredsLeft reaches zero, release the successor node. +/// +/// FIXME: Adjust SuccSU height based on MinLatency. +void VLIWMachineScheduler::releaseSucc(SUnit *SU, SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --SuccSU->NumPredsLeft; + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + SchedImpl->releaseTopNode(SuccSU); +} + +/// releaseSuccessors - Call releaseSucc on each of SU's successors. +void VLIWMachineScheduler::releaseSuccessors(SUnit *SU) { + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + releaseSucc(SU, &*I); + } +} + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When +/// NumSuccsLeft reaches zero, release the predecessor node. +/// +/// FIXME: Adjust PredSU height based on MinLatency. +void VLIWMachineScheduler::releasePred(SUnit *SU, SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --PredSU->NumSuccsLeft; + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) + SchedImpl->releaseBottomNode(PredSU); +} + +/// releasePredecessors - Call releasePred on each of SU's predecessors. +void VLIWMachineScheduler::releasePredecessors(SUnit *SU) { + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + releasePred(SU, &*I); + } +} + +void VLIWMachineScheduler::moveInstruction(MachineInstr *MI, + MachineBasicBlock::iterator InsertPos) { + // Advance RegionBegin if the first instruction moves down. + if (&*RegionBegin == MI) + ++RegionBegin; + + // Update the instruction stream. + BB->splice(InsertPos, BB, MI); + + // Update LiveIntervals + LIS->handleMove(MI); + + // Recede RegionBegin if an instruction moves above the first. + if (RegionBegin == InsertPos) + RegionBegin = MI; +} + +bool VLIWMachineScheduler::checkSchedLimit() { +#ifndef NDEBUG + if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { + CurrentTop = CurrentBottom; + return false; + } + ++NumInstrsScheduled; +#endif + return true; +} + +/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after +/// crossing a scheduling boundary. [begin, end) includes all instructions in +/// the region, including the boundary itself and single-instruction regions +/// that don't get scheduled. +void VLIWMachineScheduler::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) +{ + ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + + // For convenience remember the end of the liveness region. + LiveRegionEnd = + (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); +} + +// Setup the register pressure trackers for the top scheduled top and bottom +// scheduled regions. +void VLIWMachineScheduler::initRegPressure() { + TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); + BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + + // Close the RPTracker to finalize live ins. + RPTracker.closeRegion(); + + DEBUG(RPTracker.getPressure().dump(TRI)); + + // Initialize the live ins and live outs. + TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); + BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); + + // Close one end of the tracker so we can call + // getMaxUpward/DownwardPressureDelta before advancing across any + // instructions. This converts currently live regs into live ins/outs. + TopRPTracker.closeTop(); + BotRPTracker.closeBottom(); + + // Account for liveness generated by the region boundary. + if (LiveRegionEnd != RegionEnd) + BotRPTracker.recede(); + + assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); + + // Cache the list of excess pressure sets in this region. This will also track + // the max pressure in the scheduled code for these sets. + RegionCriticalPSets.clear(); + std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure; + for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { + unsigned Limit = TRI->getRegPressureSetLimit(i); + DEBUG(dbgs() << TRI->getRegPressureSetName(i) + << "Limit " << Limit + << " Actual " << RegionPressure[i] << "\n"); + if (RegionPressure[i] > Limit) + RegionCriticalPSets.push_back(PressureElement(i, 0)); + } + DEBUG(dbgs() << "Excess PSets: "; + for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) + dbgs() << TRI->getRegPressureSetName( + RegionCriticalPSets[i].PSetID) << " "; + dbgs() << "\n"); + + TotalPackets = 0; +} + +// FIXME: When the pressure tracker deals in pressure differences then we won't +// iterate over all RegionCriticalPSets[i]. +void VLIWMachineScheduler:: +updateScheduledPressure(std::vector<unsigned> NewMaxPressure) { + for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { + unsigned ID = RegionCriticalPSets[i].PSetID; + int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; + if ((int)NewMaxPressure[ID] > MaxUnits) + MaxUnits = NewMaxPressure[ID]; + } +} + +/// Check if scheduling of this SU is possible +/// in the current packet. +/// It is _not_ precise (statefull), it is more like +/// another heuristic. Many corner cases are figured +/// empirically. +bool VLIWResourceModel::isResourceAvailable(SUnit *SU) { + if (!SU || !SU->getInstr()) + return false; + + // First see if the pipeline could receive this instruction + // in the current cycle. + switch (SU->getInstr()->getOpcode()) { + default: + if (!ResourcesModel->canReserveResources(SU->getInstr())) + return false; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + break; + } + + // Now see if there are no other dependencies to instructions already + // in the packet. + for (unsigned i = 0, e = Packet.size(); i != e; ++i) { + if (Packet[i]->Succs.size() == 0) + continue; + for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), + E = Packet[i]->Succs.end(); I != E; ++I) { + // Since we do not add pseudos to packets, might as well + // ignore order dependencies. + if (I->isCtrl()) + continue; + + if (I->getSUnit() == SU) + return false; + } + } + return true; +} + +/// Keep track of available resources. +bool VLIWResourceModel::reserveResources(SUnit *SU) { + bool startNewCycle = false; + // If this SU does not fit in the packet + // start a new one. + if (!isResourceAvailable(SU)) { + ResourcesModel->clearResources(); + Packet.clear(); + TotalPackets++; + startNewCycle = true; + } + + switch (SU->getInstr()->getOpcode()) { + default: + ResourcesModel->reserveResources(SU->getInstr()); + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + break; + } + Packet.push_back(SU); + +#ifndef NDEBUG + DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); + for (unsigned i = 0, e = Packet.size(); i != e; ++i) { + DEBUG(dbgs() << "\t[" << i << "] SU("); + DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); + DEBUG(Packet[i]->getInstr()->dump()); + } +#endif + + // If packet is now full, reset the state so in the next cycle + // we start fresh. + if (Packet.size() >= InstrItins->SchedModel->IssueWidth) { + ResourcesModel->clearResources(); + Packet.clear(); + TotalPackets++; + startNewCycle = true; + } + + return startNewCycle; +} + +// Release all DAG roots for scheduling. +void VLIWMachineScheduler::releaseRoots() { + SmallVector<SUnit*, 16> BotRoots; + + for (std::vector<SUnit>::iterator + I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { + // A SUnit is ready to top schedule if it has no predecessors. + if (I->Preds.empty()) + SchedImpl->releaseTopNode(&(*I)); + // A SUnit is ready to bottom schedule if it has no successors. + if (I->Succs.empty()) + BotRoots.push_back(&(*I)); + } + // Release bottom roots in reverse order so the higher priority nodes appear + // first. This is more natural and slightly more efficient. + for (SmallVectorImpl<SUnit*>::const_reverse_iterator + I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) + SchedImpl->releaseBottomNode(*I); +} + +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. [RegionBegin, RegionEnd) +/// only includes instructions that have DAG nodes, not scheduling boundaries. +void VLIWMachineScheduler::schedule() { + DEBUG(dbgs() + << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber() + << " " << BB->getName() + << " in_func " << BB->getParent()->getFunction()->getName() + << " at loop depth " << MLI->getLoopDepth(BB) + << " \n"); + + // Initialize the register pressure tracker used by buildSchedGraph. + RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + + // Account for liveness generate by the region boundary. + if (LiveRegionEnd != RegionEnd) + RPTracker.recede(); + + // Build the DAG, and compute current register pressure. + buildSchedGraph(AA, &RPTracker); + + // Initialize top/bottom trackers after computing region pressure. + initRegPressure(); + + // To view Height/Depth correctly, they should be accessed at least once. + DEBUG(unsigned maxH = 0; + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + if (SUnits[su].getHeight() > maxH) + maxH = SUnits[su].getHeight(); + dbgs() << "Max Height " << maxH << "\n";); + DEBUG(unsigned maxD = 0; + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + if (SUnits[su].getDepth() > maxD) + maxD = SUnits[su].getDepth(); + dbgs() << "Max Depth " << maxD << "\n";); + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + if (ViewMISchedDAGs) viewGraph(); + + SchedImpl->initialize(this); + + // Release edges from the special Entry node or to the special Exit node. + releaseSuccessors(&EntrySU); + releasePredecessors(&ExitSU); + + // Release all DAG roots for scheduling. + releaseRoots(); + + CurrentTop = nextIfDebug(RegionBegin, RegionEnd); + CurrentBottom = RegionEnd; + bool IsTopNode = false; + while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + if (!checkSchedLimit()) + break; + + // Move the instruction to its new location in the instruction stream. + MachineInstr *MI = SU->getInstr(); + + if (IsTopNode) { + assert(SU->isTopReady() && "node still has unscheduled dependencies"); + if (&*CurrentTop == MI) + CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); + else { + moveInstruction(MI, CurrentTop); + TopRPTracker.setPos(MI); + } + + // Update top scheduled pressure. + TopRPTracker.advance(); + assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); + + // Release dependent instructions for scheduling. + releaseSuccessors(SU); + } else { + assert(SU->isBottomReady() && "node still has unscheduled dependencies"); + MachineBasicBlock::iterator priorII = + priorNonDebug(CurrentBottom, CurrentTop); + if (&*priorII == MI) + CurrentBottom = priorII; + else { + if (&*CurrentTop == MI) { + CurrentTop = nextIfDebug(++CurrentTop, priorII); + TopRPTracker.setPos(CurrentTop); + } + moveInstruction(MI, CurrentBottom); + CurrentBottom = MI; + } + // Update bottom scheduled pressure. + BotRPTracker.recede(); + assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); + + // Release dependent instructions for scheduling. + releasePredecessors(SU); + } + SU->isScheduled = true; + SchedImpl->schedNode(SU, IsTopNode); + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); +} + +/// Reinsert any remaining debug_values, just like the PostRA scheduler. +void VLIWMachineScheduler::placeDebugValues() { + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) { + BB->splice(RegionBegin, BB, FirstDbgValue); + RegionBegin = FirstDbgValue; + } + + for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); + MachineInstr *DbgValue = P.first; + MachineBasicBlock::iterator OrigPrevMI = P.second; + BB->splice(++OrigPrevMI, BB, DbgValue); + if (OrigPrevMI == llvm::prior(RegionEnd)) + RegionEnd = DbgValue; + } + DbgValues.clear(); + FirstDbgValue = NULL; +} + +void ConvergingVLIWScheduler::initialize(VLIWMachineScheduler *dag) { + DAG = dag; + TRI = DAG->TRI; + Top.DAG = dag; + Bot.DAG = dag; + + // Initialize the HazardRecognizers. + const TargetMachine &TM = DAG->MF.getTarget(); + const InstrItineraryData *Itin = TM.getInstrItineraryData(); + Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + + Top.ResourceModel = new VLIWResourceModel(TM); + Bot.ResourceModel = new VLIWResourceModel(TM); + + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); +} + +void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { + if (SU->isScheduled) + return; + + for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; + unsigned MinLatency = I->getMinLatency(); +#ifndef NDEBUG + Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); +#endif + if (SU->TopReadyCycle < PredReadyCycle + MinLatency) + SU->TopReadyCycle = PredReadyCycle + MinLatency; + } + Top.releaseNode(SU, SU->TopReadyCycle); +} + +void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { + if (SU->isScheduled) + return; + + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; + unsigned MinLatency = I->getMinLatency(); +#ifndef NDEBUG + Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); +#endif + if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) + SU->BotReadyCycle = SuccReadyCycle + MinLatency; + } + Bot.releaseNode(SU, SU->BotReadyCycle); +} + +/// Does this SU have a hazard within the current instruction group. +/// +/// The scheduler supports two modes of hazard recognition. The first is the +/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that +/// supports highly complicated in-order reservation tables +/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. +/// +/// The second is a streamlined mechanism that checks for hazards based on +/// simple counters that the scheduler itself maintains. It explicitly checks +/// for instruction dispatch limitations, including the number of micro-ops that +/// can dispatch per cycle. +/// +/// TODO: Also check whether the SU must start a new group. +bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) { + if (HazardRec->isEnabled()) + return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; + + if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth()) + return true; + + return false; +} + +void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU, + unsigned ReadyCycle) { + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + // Check for interlocks first. For the purpose of other heuristics, an + // instruction that cannot issue appears as if it's not in the ReadyQueue. + if (ReadyCycle > CurrCycle || checkHazard(SU)) + + Pending.push(SU); + else + Available.push(SU); +} + +/// Move the boundary of scheduled code by one cycle. +void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() { + unsigned Width = DAG->getIssueWidth(); + IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; + + assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); + + if (!HazardRec->isEnabled()) { + // Bypass HazardRec virtual calls. + CurrCycle = NextCycle; + } else { + // Bypass getHazardType calls in case of long latency. + for (; CurrCycle != NextCycle; ++CurrCycle) { + if (isTop()) + HazardRec->AdvanceCycle(); + else + HazardRec->RecedeCycle(); + } + } + CheckPending = true; + + DEBUG(dbgs() << "*** " << Available.getName() << " cycle " + << CurrCycle << '\n'); +} + +/// Move the boundary of scheduled code by one SUnit. +void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) { + bool startNewCycle = false; + + // Update the reservation table. + if (HazardRec->isEnabled()) { + if (!isTop() && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + HazardRec->EmitInstruction(SU); + } + + // Update DFA model. + startNewCycle = ResourceModel->reserveResources(SU); + + // Check the instruction group dispatch limit. + // TODO: Check if this SU must end a dispatch group. + IssueCount += DAG->getNumMicroOps(SU->getInstr()); + if (startNewCycle) { + DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); + bumpCycle(); + } + else + DEBUG(dbgs() << "*** IssueCount " << IssueCount + << " at cycle " << CurrCycle << '\n'); +} + +/// Release pending ready nodes in to the available queue. This makes them +/// visible to heuristics. +void ConvergingVLIWScheduler::SchedBoundary::releasePending() { + // If the available queue is empty, it is safe to reset MinReadyCycle. + if (Available.empty()) + MinReadyCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = Pending.size(); i != e; ++i) { + SUnit *SU = *(Pending.begin()+i); + unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; + + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + if (ReadyCycle > CurrCycle) + continue; + + if (checkHazard(SU)) + continue; + + Available.push(SU); + Pending.remove(Pending.begin()+i); + --i; --e; + } + CheckPending = false; +} + +/// Remove SU from the ready set for this boundary. +void ConvergingVLIWScheduler::SchedBoundary::removeReady(SUnit *SU) { + if (Available.isInQueue(SU)) + Available.remove(Available.find(SU)); + else { + assert(Pending.isInQueue(SU) && "bad ready count"); + Pending.remove(Pending.find(SU)); + } +} + +/// If this queue only has one ready candidate, return it. As a side effect, +/// advance the cycle until at least one node is ready. If multiple instructions +/// are ready, return NULL. +SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() { + if (CheckPending) + releasePending(); + + for (unsigned i = 0; Available.empty(); ++i) { + assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && + "permanent hazard"); (void)i; + bumpCycle(); + releasePending(); + } + if (Available.size() == 1) + return *Available.begin(); + return NULL; +} + +#ifndef NDEBUG +void ConvergingVLIWScheduler::traceCandidate(const char *Label, + const ReadyQueue &Q, + SUnit *SU, PressureElement P) { + dbgs() << Label << " " << Q.getName() << " "; + if (P.isValid()) + dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease + << " "; + else + dbgs() << " "; + SU->dump(DAG); +} +#endif + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +static SUnit *getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->getSUnit(); + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return 0; + OnlyAvailablePred = &Pred; + } + } + return OnlyAvailablePred; +} + +/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor +/// of SU, return it, otherwise return null. +static SUnit *getSingleUnscheduledSucc(SUnit *SU) { + SUnit *OnlyAvailableSucc = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + SUnit &Succ = *I->getSUnit(); + if (!Succ.isScheduled) { + // We found an available, but not scheduled, successor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ) + return 0; + OnlyAvailableSucc = &Succ; + } + } + return OnlyAvailableSucc; +} + +// Constants used to denote relative importance of +// heuristic components for cost computation. +static const unsigned PriorityOne = 200; +static const unsigned PriorityTwo = 100; +static const unsigned PriorityThree = 50; +static const unsigned PriorityFour = 20; +static const unsigned ScaleTwo = 10; +static const unsigned FactorOne = 2; + +/// Single point to compute overall scheduling cost. +/// TODO: More heuristics will be used soon. +int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, + SchedCandidate &Candidate, + RegPressureDelta &Delta, + bool verbose) { + // Initial trivial priority. + int ResCount = 1; + + // Do not waste time on a node that is already scheduled. + if (!SU || SU->isScheduled) + return ResCount; + + // Forced priority is high. + if (SU->isScheduleHigh) + ResCount += PriorityOne; + + // Critical path first. + if (Q.getID() == TopQID) { + ResCount += (SU->getHeight() * ScaleTwo); + + // If resources are available for it, multiply the + // chance of scheduling. + if (Top.ResourceModel->isResourceAvailable(SU)) + ResCount <<= FactorOne; + } else { + ResCount += (SU->getDepth() * ScaleTwo); + + // If resources are available for it, multiply the + // chance of scheduling. + if (Bot.ResourceModel->isResourceAvailable(SU)) + ResCount <<= FactorOne; + } + + unsigned NumNodesBlocking = 0; + if (Q.getID() == TopQID) { + // How many SUs does it block from scheduling? + // Look at all of the successors of this node. + // Count the number of nodes that + // this node is the sole unscheduled node for. + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (getSingleUnscheduledPred(I->getSUnit()) == SU) + ++NumNodesBlocking; + } else { + // How many unscheduled predecessors block this node? + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (getSingleUnscheduledSucc(I->getSUnit()) == SU) + ++NumNodesBlocking; + } + ResCount += (NumNodesBlocking * ScaleTwo); + + // Factor in reg pressure as a heuristic. + ResCount -= (Delta.Excess.UnitIncrease*PriorityThree); + ResCount -= (Delta.CriticalMax.UnitIncrease*PriorityThree); + + DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")"); + + return ResCount; +} + +/// Pick the best candidate from the top queue. +/// +/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during +/// DAG building. To adjust for the current scheduling location we need to +/// maintain the number of vreg uses remaining to be top-scheduled. +ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler:: +pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, + SchedCandidate &Candidate) { + DEBUG(Q.dump()); + + // getMaxPressureDelta temporarily modifies the tracker. + RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); + + // BestSU remains NULL if no top candidates beat the best existing candidate. + CandResult FoundCandidate = NoCand; + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + RegPressureDelta RPDelta; + TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + + int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); + + // Initialize the candidate if needed. + if (!Candidate.SU) { + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = NodeOrder; + continue; + } + + // Best cost. + if (CurrentCost > Candidate.SCost) { + DEBUG(traceCandidate("CCAND", Q, *I)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + continue; + } + + // Fall through to original instruction order. + // Only consider node order if Candidate was chosen from this Q. + if (FoundCandidate == NoCand) + continue; + } + return FoundCandidate; +} + +/// Pick the best candidate node from either the top or bottom queue. +SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { + // Schedule as far as possible in the direction of no choice. This is most + // efficient, but also provides the best heuristics for CriticalPSets. + if (SUnit *SU = Bot.pickOnlyChoice()) { + IsTopNode = false; + return SU; + } + if (SUnit *SU = Top.pickOnlyChoice()) { + IsTopNode = true; + return SU; + } + SchedCandidate BotCand; + // Prefer bottom scheduling when heuristics are silent. + CandResult BotResult = pickNodeFromQueue(Bot.Available, + DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + + // If either Q has a single candidate that provides the least increase in + // Excess pressure, we can immediately schedule from that Q. + // + // RegionCriticalPSets summarizes the pressure within the scheduled region and + // affects picking from either Q. If scheduling in one direction must + // increase pressure for one of the excess PSets, then schedule in that + // direction first to provide more freedom in the other direction. + if (BotResult == SingleExcess || BotResult == SingleCritical) { + IsTopNode = false; + return BotCand.SU; + } + // Check if the top Q has a better candidate. + SchedCandidate TopCand; + CandResult TopResult = pickNodeFromQueue(Top.Available, + DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + + if (TopResult == SingleExcess || TopResult == SingleCritical) { + IsTopNode = true; + return TopCand.SU; + } + // If either Q has a single candidate that minimizes pressure above the + // original region's pressure pick it. + if (BotResult == SingleMax) { + IsTopNode = false; + return BotCand.SU; + } + if (TopResult == SingleMax) { + IsTopNode = true; + return TopCand.SU; + } + if (TopCand.SCost > BotCand.SCost) { + IsTopNode = true; + return TopCand.SU; + } + // Otherwise prefer the bottom candidate in node order. + IsTopNode = false; + return BotCand.SU; +} + +/// Pick the best node to balance the schedule. Implements MachineSchedStrategy. +SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && + Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); + return NULL; + } + SUnit *SU; + if (ForceTopDown) { + SU = Top.pickOnlyChoice(); + if (!SU) { + SchedCandidate TopCand; + CandResult TopResult = + pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + (void)TopResult; + SU = TopCand.SU; + } + IsTopNode = true; + } else if (ForceBottomUp) { + SU = Bot.pickOnlyChoice(); + if (!SU) { + SchedCandidate BotCand; + CandResult BotResult = + pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + (void)BotResult; + SU = BotCand.SU; + } + IsTopNode = false; + } else { + SU = pickNodeBidrectional(IsTopNode); + } + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); + + DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") + << " Scheduling Instruction in cycle " + << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; + SU->dump(DAG)); + return SU; +} + +/// Update the scheduler's state after scheduling a node. This is the same node +/// that was just returned by pickNode(). However, VLIWMachineScheduler needs +/// to update it's state based on the current cycle before MachineSchedStrategy +/// does. +void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { + if (IsTopNode) { + SU->TopReadyCycle = Top.CurrCycle; + Top.bumpNode(SU); + } else { + SU->BotReadyCycle = Bot.CurrCycle; + Bot.bumpNode(SU); + } +} + diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h new file mode 100644 index 0000000..f3643d6 --- /dev/null +++ b/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -0,0 +1,437 @@ +//===-- HexagonMachineScheduler.h - Custom Hexagon MI scheduler. ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Custom Hexagon MI scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONASMPRINTER_H +#define HEXAGONASMPRINTER_H + +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/PriorityQueue.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MachineSchedStrategy - Interface to a machine scheduling algorithm. +//===----------------------------------------------------------------------===// + +namespace llvm { +class VLIWMachineScheduler; + +/// MachineSchedStrategy - Interface used by VLIWMachineScheduler to drive +/// the selected scheduling algorithm. +/// +/// TODO: Move this to ScheduleDAGInstrs.h +class MachineSchedStrategy { +public: + virtual ~MachineSchedStrategy() {} + + /// Initialize the strategy after building the DAG for a new region. + virtual void initialize(VLIWMachineScheduler *DAG) = 0; + + /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to + /// schedule the node at the top of the unscheduled region. Otherwise it will + /// be scheduled at the bottom. + virtual SUnit *pickNode(bool &IsTopNode) = 0; + + /// Notify MachineSchedStrategy that VLIWMachineScheduler has + /// scheduled a node. + virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; + + /// When all predecessor dependencies have been resolved, free this node for + /// top-down scheduling. + virtual void releaseTopNode(SUnit *SU) = 0; + /// When all successor dependencies have been resolved, free this node for + /// bottom-up scheduling. + virtual void releaseBottomNode(SUnit *SU) = 0; +}; + +//===----------------------------------------------------------------------===// +// ConvergingVLIWScheduler - Implementation of the standard +// MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience +/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified +/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. +class ReadyQueue { + unsigned ID; + std::string Name; + std::vector<SUnit*> Queue; + +public: + ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {} + + unsigned getID() const { return ID; } + + StringRef getName() const { return Name; } + + // SU is in this queue if it's NodeQueueID is a superset of this ID. + bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); } + + bool empty() const { return Queue.empty(); } + + unsigned size() const { return Queue.size(); } + + typedef std::vector<SUnit*>::iterator iterator; + + iterator begin() { return Queue.begin(); } + + iterator end() { return Queue.end(); } + + iterator find(SUnit *SU) { + return std::find(Queue.begin(), Queue.end(), SU); + } + + void push(SUnit *SU) { + Queue.push_back(SU); + SU->NodeQueueId |= ID; + } + + void remove(iterator I) { + (*I)->NodeQueueId &= ~ID; + *I = Queue.back(); + Queue.pop_back(); + } + + void dump() { + dbgs() << Name << ": "; + for (unsigned i = 0, e = Queue.size(); i < e; ++i) + dbgs() << Queue[i]->NodeNum << " "; + dbgs() << "\n"; + } +}; + +class VLIWResourceModel { + /// ResourcesModel - Represents VLIW state. + /// Not limited to VLIW targets per say, but assumes + /// definition of DFA by a target. + DFAPacketizer *ResourcesModel; + + const InstrItineraryData *InstrItins; + + /// Local packet/bundle model. Purely + /// internal to the MI schedulre at the time. + std::vector<SUnit*> Packet; + + /// Total packets created. + unsigned TotalPackets; + +public: + VLIWResourceModel(MachineSchedContext *C, const InstrItineraryData *IID) : + InstrItins(IID), TotalPackets(0) { + const TargetMachine &TM = C->MF->getTarget(); + ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM,NULL); + + // This hard requirement could be relaxed, + // but for now do not let it proceed. + assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + Packet.resize(InstrItins->SchedModel->IssueWidth); + Packet.clear(); + ResourcesModel->clearResources(); + } + + VLIWResourceModel(const TargetMachine &TM) : + InstrItins(TM.getInstrItineraryData()), TotalPackets(0) { + ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM,NULL); + + // This hard requirement could be relaxed, + // but for now do not let it proceed. + assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + Packet.resize(InstrItins->SchedModel->IssueWidth); + Packet.clear(); + ResourcesModel->clearResources(); + } + + ~VLIWResourceModel() { + delete ResourcesModel; + } + + void resetPacketState() { + Packet.clear(); + } + + void resetDFA() { + ResourcesModel->clearResources(); + } + + void reset() { + Packet.clear(); + ResourcesModel->clearResources(); + } + + bool isResourceAvailable(SUnit *SU); + bool reserveResources(SUnit *SU); + unsigned getTotalPackets() const { return TotalPackets; } +}; + +class VLIWMachineScheduler : public ScheduleDAGInstrs { + /// AA - AliasAnalysis for making memory reference queries. + AliasAnalysis *AA; + + RegisterClassInfo *RegClassInfo; + MachineSchedStrategy *SchedImpl; + + MachineBasicBlock::iterator LiveRegionEnd; + + /// Register pressure in this region computed by buildSchedGraph. + IntervalPressure RegPressure; + RegPressureTracker RPTracker; + + /// List of pressure sets that exceed the target's pressure limit before + /// scheduling, listed in increasing set ID order. Each pressure set is paired + /// with its max pressure in the currently scheduled regions. + std::vector<PressureElement> RegionCriticalPSets; + + /// The top of the unscheduled zone. + MachineBasicBlock::iterator CurrentTop; + IntervalPressure TopPressure; + RegPressureTracker TopRPTracker; + + /// The bottom of the unscheduled zone. + MachineBasicBlock::iterator CurrentBottom; + IntervalPressure BotPressure; + RegPressureTracker BotRPTracker; + +#ifndef NDEBUG + /// The number of instructions scheduled so far. Used to cut off the + /// scheduler at the point determined by misched-cutoff. + unsigned NumInstrsScheduled; +#endif + + /// Total packets in the region. + unsigned TotalPackets; + + const MachineLoopInfo *MLI; +public: + VLIWMachineScheduler(MachineSchedContext *C, MachineSchedStrategy *S): + ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS), + AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), + RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure), + CurrentBottom(), BotRPTracker(BotPressure), MLI(C->MLI) { +#ifndef NDEBUG + NumInstrsScheduled = 0; +#endif + TotalPackets = 0; + } + + virtual ~VLIWMachineScheduler() { + delete SchedImpl; + } + + MachineBasicBlock::iterator top() const { return CurrentTop; } + MachineBasicBlock::iterator bottom() const { return CurrentBottom; } + + /// Implement the ScheduleDAGInstrs interface for handling the next scheduling + /// region. This covers all instructions in a block, while schedule() may only + /// cover a subset. + void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount); + + /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's + /// time to do some work. + void schedule(); + + unsigned CurCycle; + + /// Get current register pressure for the top scheduled instructions. + const IntervalPressure &getTopPressure() const { return TopPressure; } + const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } + + /// Get current register pressure for the bottom scheduled instructions. + const IntervalPressure &getBotPressure() const { return BotPressure; } + const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } + + /// Get register pressure for the entire scheduling region before scheduling. + const IntervalPressure &getRegPressure() const { return RegPressure; } + + const std::vector<PressureElement> &getRegionCriticalPSets() const { + return RegionCriticalPSets; + } + + /// getIssueWidth - Return the max instructions per scheduling group. + unsigned getIssueWidth() const { + return (InstrItins && InstrItins->SchedModel) + ? InstrItins->SchedModel->IssueWidth : 1; + } + + /// getNumMicroOps - Return the number of issue slots required for this MI. + unsigned getNumMicroOps(MachineInstr *MI) const { + return 1; + //if (!InstrItins) return 1; + //int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass()); + //return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI); + } + +private: + void scheduleNodeTopDown(SUnit *SU); + void listScheduleTopDown(); + + void initRegPressure(); + void updateScheduledPressure(std::vector<unsigned> NewMaxPressure); + + void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); + bool checkSchedLimit(); + + void releaseRoots(); + + void releaseSucc(SUnit *SU, SDep *SuccEdge); + void releaseSuccessors(SUnit *SU); + void releasePred(SUnit *SU, SDep *PredEdge); + void releasePredecessors(SUnit *SU); + + void placeDebugValues(); +}; + +/// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics +/// to balance the schedule. +class ConvergingVLIWScheduler : public MachineSchedStrategy { + + /// Store the state used by ConvergingVLIWScheduler heuristics, required + /// for the lifetime of one invocation of pickNode(). + struct SchedCandidate { + // The best SUnit candidate. + SUnit *SU; + + // Register pressure values for the best candidate. + RegPressureDelta RPDelta; + + // Best scheduling cost. + int SCost; + + SchedCandidate(): SU(NULL), SCost(0) {} + }; + /// Represent the type of SchedCandidate found within a single queue. + enum CandResult { + NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure, + BestCost}; + + /// Each Scheduling boundary is associated with ready queues. It tracks the + /// current cycle in whichever direction at has moved, and maintains the state + /// of "hazards" and other interlocks at the current cycle. + struct SchedBoundary { + VLIWMachineScheduler *DAG; + + ReadyQueue Available; + ReadyQueue Pending; + bool CheckPending; + + ScheduleHazardRecognizer *HazardRec; + VLIWResourceModel *ResourceModel; + + unsigned CurrCycle; + unsigned IssueCount; + + /// MinReadyCycle - Cycle of the soonest available instruction. + unsigned MinReadyCycle; + + // Remember the greatest min operand latency. + unsigned MaxMinLatency; + + /// Pending queues extend the ready queues with the same ID and the + /// PendingFlag set. + SchedBoundary(unsigned ID, const Twine &Name): + DAG(0), Available(ID, Name+".A"), + Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"), + CheckPending(false), HazardRec(0), ResourceModel(0), + CurrCycle(0), IssueCount(0), + MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} + + ~SchedBoundary() { + delete ResourceModel; + delete HazardRec; + } + + bool isTop() const { + return Available.getID() == ConvergingVLIWScheduler::TopQID; + } + + bool checkHazard(SUnit *SU); + + void releaseNode(SUnit *SU, unsigned ReadyCycle); + + void bumpCycle(); + + void bumpNode(SUnit *SU); + + void releasePending(); + + void removeReady(SUnit *SU); + + SUnit *pickOnlyChoice(); + }; + + VLIWMachineScheduler *DAG; + const TargetRegisterInfo *TRI; + + // State of the top and bottom scheduled instruction boundaries. + SchedBoundary Top; + SchedBoundary Bot; + +public: + /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) + enum { + TopQID = 1, + BotQID = 2, + LogMaxQID = 2 + }; + + ConvergingVLIWScheduler(): + DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + + virtual void initialize(VLIWMachineScheduler *dag); + + virtual SUnit *pickNode(bool &IsTopNode); + + virtual void schedNode(SUnit *SU, bool IsTopNode); + + virtual void releaseTopNode(SUnit *SU); + + virtual void releaseBottomNode(SUnit *SU); + +protected: + SUnit *pickNodeBidrectional(bool &IsTopNode); + + int SchedulingCost(ReadyQueue &Q, + SUnit *SU, SchedCandidate &Candidate, + RegPressureDelta &Delta, bool verbose); + + CandResult pickNodeFromQueue(ReadyQueue &Q, + const RegPressureTracker &RPTracker, + SchedCandidate &Candidate); +#ifndef NDEBUG + void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU, + PressureElement P = PressureElement()); +#endif +}; + +} // namespace + + +#endif diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index 7ece408..1e91c39 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -337,7 +337,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" << "********** Function: " - << MF.getFunction()->getName() << "\n"); + << MF.getName() << "\n"); #if 0 // for now disable this, if we move NewValueJump before register diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index 55cbc09..a295015 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -109,6 +109,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); DenseMap<unsigned, unsigned> PeepholeMap; + DenseMap<unsigned, std::pair<unsigned, unsigned> > PeepholeDoubleRegsMap; if (DisableHexagonPeephole) return false; @@ -117,6 +118,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { MBBb != MBBe; ++MBBb) { MachineBasicBlock* MBB = MBBb; PeepholeMap.clear(); + PeepholeDoubleRegsMap.clear(); // Traverse the basic block. for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); @@ -140,6 +142,24 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { } } + // Look for this sequence below + // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32 + // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg. + // and convert into + // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg. + if (MI->getOpcode() == Hexagon::LSRd_ri) { + assert(MI->getNumOperands() == 3); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src1 = MI->getOperand(1); + MachineOperand &Src2 = MI->getOperand(2); + if (Src2.getImm() != 32) + continue; + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src1.getReg(); + PeepholeDoubleRegsMap[DstReg] = + std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/); + } + // Look for P=NOT(P). if (!DisablePNotP && (MI->getOpcode() == Hexagon::NOT_p)) { @@ -178,6 +198,21 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { // Change the 1st operand. MI->RemoveOperand(1); MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false)); + } else { + DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI = + PeepholeDoubleRegsMap.find(SrcReg); + if (DI != PeepholeDoubleRegsMap.end()) { + std::pair<unsigned,unsigned> PeepholeSrc = DI->second; + MI->RemoveOperand(1); + MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first, + false /*isDef*/, + false /*isImp*/, + false /*isKill*/, + false /*isDead*/, + false /*isUndef*/, + false /*isEarlyClobber*/, + PeepholeSrc.second)); + } } } } diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 2c23674..3742486 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -310,6 +310,58 @@ void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove> Moves.push_back(MachineMove(0, Dst, Src)); } +// Get the weight in units of pressure for this register class. +const RegClassWeight & +HexagonRegisterInfo::getRegClassWeight(const TargetRegisterClass *RC) const { + // Each TargetRegisterClass has a per register weight, and weight + // limit which must be less than the limits of its pressure sets. + static const RegClassWeight RCWeightTable[] = { + {1, 32}, // IntRegs + {1, 8}, // CRRegs + {1, 4}, // PredRegs + {2, 16}, // DoubleRegs + {0, 0} }; + return RCWeightTable[RC->getID()]; +} + +/// Get the number of dimensions of register pressure. +unsigned HexagonRegisterInfo::getNumRegPressureSets() const { + return 4; +} + +/// Get the name of this register unit pressure set. +const char *HexagonRegisterInfo::getRegPressureSetName(unsigned Idx) const { + static const char *const RegPressureSetName[] = { + "IntRegsRegSet", + "CRRegsRegSet", + "PredRegsRegSet", + "DoubleRegsRegSet" + }; + assert((Idx < 4) && "Index out of bounds"); + return RegPressureSetName[Idx]; +} + +/// Get the register unit pressure limit for this dimension. +/// This limit must be adjusted dynamically for reserved registers. +unsigned HexagonRegisterInfo::getRegPressureSetLimit(unsigned Idx) const { + static const int RegPressureLimit [] = { 16, 4, 2, 8 }; + assert((Idx < 4) && "Index out of bounds"); + return RegPressureLimit[Idx]; +} + +const int* +HexagonRegisterInfo::getRegClassPressureSets(const TargetRegisterClass *RC) + const { + static const int RCSetsTable[] = { + 0, -1, // IntRegs + 1, -1, // CRRegs + 2, -1, // PredRegs + 0, -1, // DoubleRegs + -1 }; + static const unsigned RCSetStartTable[] = { 0, 2, 4, 6, 0 }; + unsigned SetListStart = RCSetStartTable[RC->getID()]; + return &RCSetsTable[SetListStart]; +} unsigned HexagonRegisterInfo::getEHExceptionRegister() const { llvm_unreachable("What is the exception register"); } diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index 85355ae..8820d13 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -87,6 +87,11 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo { // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; + const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const; + unsigned getNumRegPressureSets() const; + const char *getRegPressureSetName(unsigned Idx) const; + unsigned getRegPressureSetLimit(unsigned Idx) const; + const int* getRegClassPressureSets(const TargetRegisterClass *RC) const; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td index d1076b8..b5ff69a 100644 --- a/lib/Target/Hexagon/HexagonSchedule.td +++ b/lib/Target/Hexagon/HexagonSchedule.td @@ -47,6 +47,7 @@ def HexagonModel : SchedMachineModel { // Max issue per cycle == bundle width. let IssueWidth = 4; let Itineraries = HexagonItineraries; + let LoadLatency = 1; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td index 9b41126..5668ae8 100644 --- a/lib/Target/Hexagon/HexagonScheduleV4.td +++ b/lib/Target/Hexagon/HexagonScheduleV4.td @@ -58,6 +58,7 @@ def HexagonModelV4 : SchedMachineModel { // Max issue per cycle == bundle width. let IssueWidth = 4; let Itineraries = HexagonItinerariesV4; + let LoadLatency = 1; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index a7b291f..5688e9c 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -14,6 +14,7 @@ #include "HexagonTargetMachine.h" #include "Hexagon.h" #include "HexagonISelLowering.h" +#include "HexagonMachineScheduler.h" #include "llvm/Module.h" #include "llvm/CodeGen/Passes.h" #include "llvm/PassManager.h" @@ -29,6 +30,11 @@ opt<bool> DisableHardwareLoops( "disable-hexagon-hwloops", cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); +static cl:: +opt<bool> DisableHexagonMISched("disable-hexagon-misched", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon MI Scheduling")); + /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the /// library. In particular, it seems that it is not possible to get @@ -42,6 +48,13 @@ extern "C" void LLVMInitializeHexagonTarget() { RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget); } +static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) { + return new VLIWMachineScheduler(C, new ConvergingVLIWScheduler()); +} + +static MachineSchedRegistry +SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", + createVLIWMachineSched); /// HexagonTargetMachine ctor - Create an ILP32 architecture model. /// @@ -83,7 +96,13 @@ namespace { class HexagonPassConfig : public TargetPassConfig { public: HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + // Enable MI scheduler. + if (!DisableHexagonMISched) { + enablePass(&MachineSchedulerID); + MachineSchedRegistry::setDefault(createVLIWMachineSched); + } + } HexagonTargetMachine &getHexagonTargetMachine() const { return getTM<HexagonTargetMachine>(); diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index a03ed03..3d5f685 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -3474,8 +3474,8 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // 1. Two loads unless they are volatile. // 2. Two stores in V4 unless they are volatile. else if ((DepType == SDep::Order) && - !I->hasVolatileMemoryRef() && - !J->hasVolatileMemoryRef()) { + !I->hasOrderedMemoryRef() && + !J->hasOrderedMemoryRef()) { if (QRI->Subtarget.hasV4TOps() && // hexagonv4 allows dual store. MCIDI.mayStore() && MCIDJ.mayStore()) { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp index d6e6c36..86f75d1 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -24,7 +24,7 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) { HasLEB128 = true; PrivateGlobalPrefix = ".L"; - LCOMMDirectiveType = LCOMM::ByteAlignment; + LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment; InlineAsmStart = "# InlineAsm Start"; InlineAsmEnd = "# InlineAsm End"; ZeroDirective = "\t.space\t"; diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index 38fb0e8..4059403 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -56,6 +56,12 @@ class MBlazeAsmParser : public MCTargetAsmParser { /// } + unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned OperandNum, unsigned &NumMCOperands) { + return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, + NumMCOperands); + } public: MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) @@ -317,10 +323,10 @@ MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out) { MCInst Inst; - SMLoc ErrorLoc; + unsigned Kind; unsigned ErrorInfo; - switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) { + switch (MatchInstructionImpl(Operands, Kind, Inst, ErrorInfo)) { default: break; case Match_Success: Out.EmitInstruction(Inst); @@ -329,10 +335,8 @@ MatchAndEmitInstruction(SMLoc IDLoc, return Error(IDLoc, "instruction use requires an option to be enabled"); case Match_MnemonicFail: return Error(IDLoc, "unrecognized instruction mnemonic"); - case Match_ConversionFail: - return Error(IDLoc, "unable to convert operands to instruction"); - case Match_InvalidOperand: - ErrorLoc = IDLoc; + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; if (ErrorInfo != ~0U) { if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); @@ -343,6 +347,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, return Error(ErrorLoc, "invalid operand for instruction"); } + } llvm_unreachable("Implement any new match types added!"); } diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index 46f5207..daa76e8 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -140,7 +140,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned oi = i == 2 ? 1 : 2; - DEBUG(dbgs() << "\nFunction : " << MF.getFunction()->getName() << "\n"; + DEBUG(dbgs() << "\nFunction : " << MF.getName() << "\n"; dbgs() << "<--------->\n" << MI); int FrameIndex = MI.getOperand(i).getIndex(); diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 43bd345..8418b75 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -8,20 +8,36 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/MipsMCTargetDesc.h" +#include "MipsRegisterInfo.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCTargetAsmParser.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCTargetAsmParser.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; namespace { + class MipsAsmParser : public MCTargetAsmParser { + enum FpFormatTy { + FP_FORMAT_NONE = -1, + FP_FORMAT_S, + FP_FORMAT_D, + FP_FORMAT_L, + FP_FORMAT_W + } FpFormat; + + MCSubtargetInfo &STI; + MCAsmParser &Parser; + #define GET_ASSEMBLER_HEADER #include "MipsGenAsmMatcher.inc" @@ -34,14 +50,67 @@ class MipsAsmParser : public MCTargetAsmParser { bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool parseMathOperation(StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool ParseDirective(AsmToken DirectiveID); - OperandMatchResultTy parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&); + MipsAsmParser::OperandMatchResultTy + parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&); + + unsigned + getMCInstOperandNum(unsigned Kind, MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned OperandNum, unsigned &NumMCOperands); + + bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, + StringRef Mnemonic); + + int tryParseRegister(StringRef Mnemonic); + + bool tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + StringRef Mnemonic); + + bool parseMemOffset(const MCExpr *&Res); + bool parseRelocOperand(const MCExpr *&Res); + MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol); + + bool isMips64() const { + return (STI.getFeatureBits() & Mips::FeatureMips64) != 0; + } + + bool isFP64() const { + return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0; + } + + int matchRegisterName(StringRef Symbol); + + int matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic); + + void setFpFormat(FpFormatTy Format) { + FpFormat = Format; + } + + void setDefaultFpFormat(); + + void setFpFormat(StringRef Format); + + FpFormatTy getFpFormat() {return FpFormat;} + + bool requestsDoubleOperand(StringRef Mnemonic); + + unsigned getReg(int RC,int RegNo); + public: MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) - : MCTargetAsmParser() { + : MCTargetAsmParser(), STI(sti), Parser(parser) { + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } + MCAsmParser &getParser() const { return Parser; } + MCAsmLexer &getLexer() const { return Parser.getLexer(); } + }; } @@ -50,6 +119,7 @@ namespace { /// MipsOperand - Instances of this class represent a parsed Mips machine /// instruction. class MipsOperand : public MCParsedAsmOperand { + enum KindTy { k_CondCode, k_CoprocNum, @@ -61,18 +131,58 @@ class MipsOperand : public MCParsedAsmOperand { } Kind; MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + + union { + struct { + const char *Data; + unsigned Length; + } Tok; + + struct { + unsigned RegNum; + } Reg; + + struct { + const MCExpr *Val; + } Imm; + + struct { + unsigned Base; + const MCExpr *Off; + } Mem; + }; + + SMLoc StartLoc, EndLoc; + public: void addRegOperands(MCInst &Inst, unsigned N) const { - llvm_unreachable("unimplemented!"); + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getReg())); } + void addExpr(MCInst &Inst, const MCExpr *Expr) const{ - llvm_unreachable("unimplemented!"); + // Add as immediate when possible. Null MCExpr = 0. + if (Expr == 0) + Inst.addOperand(MCOperand::CreateImm(0)); + else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); } + void addImmOperands(MCInst &Inst, unsigned N) const { - llvm_unreachable("unimplemented!"); + assert(N == 1 && "Invalid number of operands!"); + const MCExpr *Expr = getImm(); + addExpr(Inst,Expr); } + void addMemOperands(MCInst &Inst, unsigned N) const { - llvm_unreachable("unimplemented!"); + assert(N == 2 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateReg(getMemBase())); + + const MCExpr *Expr = getMemOff(); + addExpr(Inst,Expr); } bool isReg() const { return Kind == k_Register; } @@ -82,46 +192,751 @@ public: StringRef getToken() const { assert(Kind == k_Token && "Invalid access!"); - return ""; + return StringRef(Tok.Data, Tok.Length); } unsigned getReg() const { assert((Kind == k_Register) && "Invalid access!"); - return 0; + return Reg.RegNum; + } + + const MCExpr *getImm() const { + assert((Kind == k_Immediate) && "Invalid access!"); + return Imm.Val; } + unsigned getMemBase() const { + assert((Kind == k_Memory) && "Invalid access!"); + return Mem.Base; + } + + const MCExpr *getMemOff() const { + assert((Kind == k_Memory) && "Invalid access!"); + return Mem.Off; + } + + static MipsOperand *CreateToken(StringRef Str, SMLoc S) { + MipsOperand *Op = new MipsOperand(k_Token); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + static MipsOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { + MipsOperand *Op = new MipsOperand(k_Register); + Op->Reg.RegNum = RegNum; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static MipsOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { + MipsOperand *Op = new MipsOperand(k_Immediate); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static MipsOperand *CreateMem(unsigned Base, const MCExpr *Off, + SMLoc S, SMLoc E) { + MipsOperand *Op = new MipsOperand(k_Memory); + Op->Mem.Base = Base; + Op->Mem.Off = Off; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + /// getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const { return StartLoc; } + /// getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const { return EndLoc; } + virtual void print(raw_ostream &OS) const { llvm_unreachable("unimplemented!"); } }; } +unsigned MipsAsmParser:: +getMCInstOperandNum(unsigned Kind, MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned OperandNum, unsigned &NumMCOperands) { + assert (0 && "getMCInstOperandNum() not supported by the Mips target."); + // The Mips backend doesn't currently include the matcher implementation, so + // the getMCInstOperandNumImpl() is undefined. This is a temporary + // work around. + NumMCOperands = 0; + return 0; +} + bool MipsAsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out) { + MCInst Inst; + unsigned ErrorInfo; + unsigned Kind; + unsigned MatchResult = MatchInstructionImpl(Operands, Kind, Inst, ErrorInfo); + + switch (MatchResult) { + default: break; + case Match_Success: { + Inst.setLoc(IDLoc); + Out.EmitInstruction(Inst); + return false; + } + case Match_MissingFeature: + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((MipsOperand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + case Match_MnemonicFail: + return Error(IDLoc, "invalid instruction"); + } return true; } +int MipsAsmParser::matchRegisterName(StringRef Name) { + + int CC = StringSwitch<unsigned>(Name) + .Case("zero", Mips::ZERO) + .Case("a0", Mips::A0) + .Case("a1", Mips::A1) + .Case("a2", Mips::A2) + .Case("a3", Mips::A3) + .Case("v0", Mips::V0) + .Case("v1", Mips::V1) + .Case("s0", Mips::S0) + .Case("s1", Mips::S1) + .Case("s2", Mips::S2) + .Case("s3", Mips::S3) + .Case("s4", Mips::S4) + .Case("s5", Mips::S5) + .Case("s6", Mips::S6) + .Case("s7", Mips::S7) + .Case("k0", Mips::K0) + .Case("k1", Mips::K1) + .Case("sp", Mips::SP) + .Case("fp", Mips::FP) + .Case("gp", Mips::GP) + .Case("ra", Mips::RA) + .Case("t0", Mips::T0) + .Case("t1", Mips::T1) + .Case("t2", Mips::T2) + .Case("t3", Mips::T3) + .Case("t4", Mips::T4) + .Case("t5", Mips::T5) + .Case("t6", Mips::T6) + .Case("t7", Mips::T7) + .Case("t8", Mips::T8) + .Case("t9", Mips::T9) + .Case("at", Mips::AT) + .Case("fcc0", Mips::FCC0) + .Default(-1); + + if (CC != -1) { + //64 bit register in Mips are following 32 bit definitions. + if (isMips64()) + CC++; + return CC; + } + + if (Name[0] == 'f') { + StringRef NumString = Name.substr(1); + unsigned IntVal; + if( NumString.getAsInteger(10, IntVal)) + return -1; //not integer + if (IntVal > 31) + return -1; + + FpFormatTy Format = getFpFormat(); + + if (Format == FP_FORMAT_S || Format == FP_FORMAT_W) + return getReg(Mips::FGR32RegClassID, IntVal); + if (Format == FP_FORMAT_D) { + if(isFP64()) { + return getReg(Mips::FGR64RegClassID, IntVal); + } + //only even numbers available as register pairs + if (( IntVal > 31) || (IntVal%2 != 0)) + return -1; + return getReg(Mips::AFGR64RegClassID, IntVal/2); + } + } + + return -1; +} +void MipsAsmParser::setDefaultFpFormat() { + + if (isMips64() || isFP64()) + FpFormat = FP_FORMAT_D; + else + FpFormat = FP_FORMAT_S; +} + +bool MipsAsmParser::requestsDoubleOperand(StringRef Mnemonic){ + + bool IsDouble = StringSwitch<bool>(Mnemonic.lower()) + .Case("ldxc1", true) + .Case("ldc1", true) + .Case("sdxc1", true) + .Case("sdc1", true) + .Default(false); + + return IsDouble; +} +void MipsAsmParser::setFpFormat(StringRef Format) { + + FpFormat = StringSwitch<FpFormatTy>(Format.lower()) + .Case(".s", FP_FORMAT_S) + .Case(".d", FP_FORMAT_D) + .Case(".l", FP_FORMAT_L) + .Case(".w", FP_FORMAT_W) + .Default(FP_FORMAT_NONE); +} + +unsigned MipsAsmParser::getReg(int RC,int RegNo){ + return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo); +} + +int MipsAsmParser::matchRegisterByNumber(unsigned RegNum,StringRef Mnemonic) { + + if (Mnemonic.lower() == "rdhwr") { + //at the moment only hwreg29 is supported + if (RegNum != 29) + return -1; + return Mips::HWR29; + } + + if (RegNum > 31) + return -1; + + return getReg(Mips::CPURegsRegClassID,RegNum); +} + +int MipsAsmParser::tryParseRegister(StringRef Mnemonic) { + const AsmToken &Tok = Parser.getTok(); + int RegNum = -1; + + if (Tok.is(AsmToken::Identifier)) { + std::string lowerCase = Tok.getString().lower(); + RegNum = matchRegisterName(lowerCase); + } else if (Tok.is(AsmToken::Integer)) + RegNum = matchRegisterByNumber(static_cast<unsigned> (Tok.getIntVal()), + Mnemonic.lower()); + else + return RegNum; //error + //64 bit div operations require Mips::ZERO instead of MIPS::ZERO_64 + if (isMips64() && RegNum == Mips::ZERO_64) { + if (Mnemonic.find("ddiv") != StringRef::npos) + RegNum = Mips::ZERO; + } + return RegNum; +} + bool MipsAsmParser:: -ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { + tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + StringRef Mnemonic){ + + SMLoc S = Parser.getTok().getLoc(); + int RegNo = -1; + + //FIXME: we should make a more generic method for CCR + if ((Mnemonic == "cfc1" || Mnemonic == "ctc1") + && Operands.size() == 2 && Parser.getTok().is(AsmToken::Integer)){ + RegNo = Parser.getTok().getIntVal(); //get the int value + //at the moment only fcc0 is supported + if (RegNo == 0) + RegNo = Mips::FCC0; + } else + RegNo = tryParseRegister(Mnemonic); + if (RegNo == -1) + return true; + + Operands.push_back(MipsOperand::CreateReg(RegNo, S, + Parser.getTok().getLoc())); + Parser.Lex(); // Eat register token. + return false; +} + +bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, + StringRef Mnemonic) { + //Check if the current operand has a custom associated parser, if so, try to + //custom parse the operand, or fallback to the general approach. + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + if (ResTy == MatchOperand_Success) + return false; + // If there wasn't a custom match, try the generic matcher below. Otherwise, + // there was a match, but an error occurred, in which case, just return that + // the operand parsing failed. + if (ResTy == MatchOperand_ParseFail) + return true; + + switch (getLexer().getKind()) { + default: + Error(Parser.getTok().getLoc(), "unexpected token in operand"); + return true; + case AsmToken::Dollar: { + //parse register + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat dollar token. + //parse register operand + if (!tryParseRegisterOperand(Operands,Mnemonic)) { + if (getLexer().is(AsmToken::LParen)) { + //check if it is indexed addressing operand + Operands.push_back(MipsOperand::CreateToken("(", S)); + Parser.Lex(); //eat parenthesis + if (getLexer().isNot(AsmToken::Dollar)) + return true; + + Parser.Lex(); //eat dollar + if (tryParseRegisterOperand(Operands,Mnemonic)) + return true; + + if (!getLexer().is(AsmToken::RParen)) + return true; + + S = Parser.getTok().getLoc(); + Operands.push_back(MipsOperand::CreateToken(")", S)); + Parser.Lex(); + } + return false; + } + //maybe it is a symbol reference + StringRef Identifier; + if (Parser.ParseIdentifier(Identifier)) + return true; + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier); + + // Otherwise create a symbol ref. + const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, + getContext()); + + Operands.push_back(MipsOperand::CreateImm(Res, S, E)); + return false; + } + case AsmToken::Identifier: + case AsmToken::LParen: + case AsmToken::Minus: + case AsmToken::Plus: + case AsmToken::Integer: + case AsmToken::String: { + // quoted label names + const MCExpr *IdVal; + SMLoc S = Parser.getTok().getLoc(); + if (getParser().ParseExpression(IdVal)) + return true; + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); + return false; + } + case AsmToken::Percent: { + //it is a symbol reference or constant expression + const MCExpr *IdVal; + SMLoc S = Parser.getTok().getLoc(); //start location of the operand + if (parseRelocOperand(IdVal)) + return true; + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); + return false; + }//case AsmToken::Percent + }//switch(getLexer().getKind()) + return true; +} + +bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { + + Parser.Lex(); //eat % token + const AsmToken &Tok = Parser.getTok(); //get next token, operation + if (Tok.isNot(AsmToken::Identifier)) + return true; + + std::string Str = Tok.getIdentifier().str(); + + Parser.Lex(); //eat identifier + //now make expression from the rest of the operand + const MCExpr *IdVal; + SMLoc EndLoc; + + if (getLexer().getKind() == AsmToken::LParen) { + while (1) { + Parser.Lex(); //eat '(' token + if (getLexer().getKind() == AsmToken::Percent) { + Parser.Lex(); //eat % token + const AsmToken &nextTok = Parser.getTok(); + if (nextTok.isNot(AsmToken::Identifier)) + return true; + Str += "(%"; + Str += nextTok.getIdentifier(); + Parser.Lex(); //eat identifier + if (getLexer().getKind() != AsmToken::LParen) + return true; + } else + break; + } + if (getParser().ParseParenExpression(IdVal,EndLoc)) + return true; + + while (getLexer().getKind() == AsmToken::RParen) + Parser.Lex(); //eat ')' token + + } else + return true; //parenthesis must follow reloc operand + + //Check the type of the expression + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) { + //it's a constant, evaluate lo or hi value + int Val = MCE->getValue(); + if (Str == "lo") { + Val = Val & 0xffff; + } else if (Str == "hi") { + Val = (Val & 0xffff0000) >> 16; + } + Res = MCConstantExpr::Create(Val, getContext()); + return false; + } + + if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) { + //it's a symbol, create symbolic expression from symbol + StringRef Symbol = MSRE->getSymbol().getName(); + MCSymbolRefExpr::VariantKind VK = getVariantKind(Str); + Res = MCSymbolRefExpr::Create(Symbol,VK,getContext()); + return false; + } return true; } +bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + + StartLoc = Parser.getTok().getLoc(); + RegNo = tryParseRegister(""); + EndLoc = Parser.getTok().getLoc(); + return (RegNo == (unsigned)-1); +} + +bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) { + + SMLoc S; + + switch(getLexer().getKind()) { + default: + return true; + case AsmToken::Integer: + case AsmToken::Minus: + case AsmToken::Plus: + return (getParser().ParseExpression(Res)); + case AsmToken::Percent: + return parseRelocOperand(Res); + case AsmToken::LParen: + return false; //it's probably assuming 0 + } + return true; +} + +MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( + SmallVectorImpl<MCParsedAsmOperand*>&Operands) { + + const MCExpr *IdVal = 0; + SMLoc S; + //first operand is the offset + S = Parser.getTok().getLoc(); + + if (parseMemOffset(IdVal)) + return MatchOperand_ParseFail; + + const AsmToken &Tok = Parser.getTok(); //get next token + if (Tok.isNot(AsmToken::LParen)) { + Error(Parser.getTok().getLoc(), "'(' expected"); + return MatchOperand_ParseFail; + } + + Parser.Lex(); // Eat '(' token. + + const AsmToken &Tok1 = Parser.getTok(); //get next token + if (Tok1.is(AsmToken::Dollar)) { + Parser.Lex(); // Eat '$' token. + if (tryParseRegisterOperand(Operands,"")) { + Error(Parser.getTok().getLoc(), "unexpected token in operand"); + return MatchOperand_ParseFail; + } + + } else { + Error(Parser.getTok().getLoc(),"unexpected token in operand"); + return MatchOperand_ParseFail; + } + + const AsmToken &Tok2 = Parser.getTok(); //get next token + if (Tok2.isNot(AsmToken::RParen)) { + Error(Parser.getTok().getLoc(), "')' expected"); + return MatchOperand_ParseFail; + } + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + Parser.Lex(); // Eat ')' token. + + if (IdVal == 0) + IdVal = MCConstantExpr::Create(0, getContext()); + + //now replace register operand with the mem operand + MipsOperand* op = static_cast<MipsOperand*>(Operands.back()); + int RegNo = op->getReg(); + //remove register from operands + Operands.pop_back(); + //and add memory operand + Operands.push_back(MipsOperand::CreateMem(RegNo, IdVal, S, E)); + delete op; + return MatchOperand_Success; +} + +MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) { + + MCSymbolRefExpr::VariantKind VK + = StringSwitch<MCSymbolRefExpr::VariantKind>(Symbol) + .Case("hi", MCSymbolRefExpr::VK_Mips_ABS_HI) + .Case("lo", MCSymbolRefExpr::VK_Mips_ABS_LO) + .Case("gp_rel", MCSymbolRefExpr::VK_Mips_GPREL) + .Case("call16", MCSymbolRefExpr::VK_Mips_GOT_CALL) + .Case("got", MCSymbolRefExpr::VK_Mips_GOT) + .Case("tlsgd", MCSymbolRefExpr::VK_Mips_TLSGD) + .Case("tlsldm", MCSymbolRefExpr::VK_Mips_TLSLDM) + .Case("dtprel_hi", MCSymbolRefExpr::VK_Mips_DTPREL_HI) + .Case("dtprel_lo", MCSymbolRefExpr::VK_Mips_DTPREL_LO) + .Case("gottprel", MCSymbolRefExpr::VK_Mips_GOTTPREL) + .Case("tprel_hi", MCSymbolRefExpr::VK_Mips_TPREL_HI) + .Case("tprel_lo", MCSymbolRefExpr::VK_Mips_TPREL_LO) + .Case("got_disp", MCSymbolRefExpr::VK_Mips_GOT_DISP) + .Case("got_page", MCSymbolRefExpr::VK_Mips_GOT_PAGE) + .Case("got_ofst", MCSymbolRefExpr::VK_Mips_GOT_OFST) + .Case("hi(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_HI) + .Case("lo(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_LO) + .Default(MCSymbolRefExpr::VK_None); + + return VK; +} + +static int ConvertCcString(StringRef CondString) { + int CC = StringSwitch<unsigned>(CondString) + .Case(".f", 0) + .Case(".un", 1) + .Case(".eq", 2) + .Case(".ueq", 3) + .Case(".olt", 4) + .Case(".ult", 5) + .Case(".ole", 6) + .Case(".ule", 7) + .Case(".sf", 8) + .Case(".ngle", 9) + .Case(".seq", 10) + .Case(".ngl", 11) + .Case(".lt", 12) + .Case(".nge", 13) + .Case(".le", 14) + .Case(".ngt", 15) + .Default(-1); + + return CC; +} + +bool MipsAsmParser:: +parseMathOperation(StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + //split the format + size_t Start = Name.find('.'), Next = Name.rfind('.'); + StringRef Format1 = Name.slice(Start, Next); + //and add the first format to the operands + Operands.push_back(MipsOperand::CreateToken(Format1, NameLoc)); + //now for the second format + StringRef Format2 = Name.slice(Next, StringRef::npos); + Operands.push_back(MipsOperand::CreateToken(Format2, NameLoc)); + + //set the format for the first register + setFpFormat(Format1); + + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (ParseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + + if (getLexer().isNot(AsmToken::Comma)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + + } + Parser.Lex(); // Eat the comma. + + //set the format for the first register + setFpFormat(Format2); + + // Parse and remember the operand. + if (ParseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + + Parser.Lex(); // Consume the EndOfStatement + return false; +} + bool MipsAsmParser:: ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return true; + //floating point instructions: should register be treated as double? + if (requestsDoubleOperand(Name)) { + setFpFormat(FP_FORMAT_D); + Operands.push_back(MipsOperand::CreateToken(Name, NameLoc)); + } + else { + setDefaultFpFormat(); + // Create the leading tokens for the mnemonic, split by '.' characters. + size_t Start = 0, Next = Name.find('.'); + StringRef Mnemonic = Name.slice(Start, Next); + + Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc)); + + if (Next != StringRef::npos) { + //there is a format token in mnemonic + //StringRef Rest = Name.slice(Next, StringRef::npos); + size_t Dot = Name.find('.', Next+1); + StringRef Format = Name.slice(Next, Dot); + if (Dot == StringRef::npos) //only one '.' in a string, it's a format + Operands.push_back(MipsOperand::CreateToken(Format, NameLoc)); + else { + if (Name.startswith("c.")){ + // floating point compare, add '.' and immediate represent for cc + Operands.push_back(MipsOperand::CreateToken(".", NameLoc)); + int Cc = ConvertCcString(Format); + if (Cc == -1) { + return Error(NameLoc, "Invalid conditional code"); + } + SMLoc E = SMLoc::getFromPointer( + Parser.getTok().getLoc().getPointer() -1 ); + Operands.push_back(MipsOperand::CreateImm( + MCConstantExpr::Create(Cc, getContext()), NameLoc, E)); + } else { + //trunc, ceil, floor ... + return parseMathOperation(Name, NameLoc, Operands); + } + + //the rest is a format + Format = Name.slice(Dot, StringRef::npos); + Operands.push_back(MipsOperand::CreateToken(Format, NameLoc)); + } + + setFpFormat(Format); + } + } + + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (ParseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + + while (getLexer().is(AsmToken::Comma) ) { + Parser.Lex(); // Eat the comma. + + // Parse and remember the operand. + if (ParseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + + Parser.Lex(); // Consume the EndOfStatement + return false; } bool MipsAsmParser:: ParseDirective(AsmToken DirectiveID) { - return true; -} -MipsAsmParser::OperandMatchResultTy MipsAsmParser:: - parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&) { - return MatchOperand_ParseFail; + if (DirectiveID.getString() == ".ent") { + //ignore this directive for now + Parser.Lex(); + return false; + } + + if (DirectiveID.getString() == ".end") { + //ignore this directive for now + Parser.Lex(); + return false; + } + + if (DirectiveID.getString() == ".frame") { + //ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } + + if (DirectiveID.getString() == ".set") { + //ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } + + if (DirectiveID.getString() == ".fmask") { + //ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } + + if (DirectiveID.getString() == ".mask") { + //ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } + + if (DirectiveID.getString() == ".gpword") { + //ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } + + return true; } extern "C" void LLVMInitializeMipsAsmParser() { @@ -130,3 +945,7 @@ extern "C" void LLVMInitializeMipsAsmParser() { RegisterMCAsmParser<MipsAsmParser> A(TheMips64Target); RegisterMCAsmParser<MipsAsmParser> B(TheMips64elTarget); } + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#include "MipsGenAsmMatcher.inc" diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index f535c50..0f84358 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_target(MipsCodeGen MipsAsmPrinter.cpp MipsCodeEmitter.cpp MipsDelaySlotFiller.cpp + MipsDirectObjLower.cpp MipsELFWriterInfo.cpp MipsJITInfo.cpp MipsInstrInfo.cpp diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index 234455e..9603327 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -122,7 +122,7 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum) { switch (RegEnum) { case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64: - case Mips::D0: + case Mips::D0: case Mips::FCC0: return 0; case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64: return 1; diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index b8489ca..5d240fe 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -56,7 +56,7 @@ namespace { MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64, bool IsLittleEndian) : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS, - /*HasRelocationAddend*/ false, + /*HasRelocationAddend*/ (_isN64) ? true : false, /*IsN64*/ _isN64) {} MipsELFObjectWriter::~MipsELFObjectWriter() {} diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 8dab62d..1d7370a 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -143,7 +143,11 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); - assert(MO.isExpr() && "getBranchTargetOpValue expects only expressions"); + + // If the destination is an immediate, we have nothing to do. + if (MO.isImm()) return MO.getImm(); + assert(MO.isExpr() && + "getBranchTargetOpValue expects only expressions or immediates"); const MCExpr *Expr = MO.getExpr(); Fixups.push_back(MCFixup::Create(0, Expr, @@ -159,7 +163,10 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); - assert(MO.isExpr() && "getJumpTargetOpValue expects only expressions"); + // If the destination is an immediate, we have nothing to do. + if (MO.isImm()) return MO.getImm(); + assert(MO.isExpr() && + "getJumpTargetOpValue expects only expressions or an immediate"); const MCExpr *Expr = MO.getExpr(); Fixups.push_back(MCFixup::Create(0, Expr, diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 2bc286b..ec84ad8 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -26,7 +26,7 @@ using namespace llvm; Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm) : MipsInstrInfo(tm, /* FIXME: set mips16 unconditional br */ 0), - RI(*tm.getSubtargetImpl(), *this) {} + RI(*tm.getSubtargetImpl()) {} const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const { return RI; diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp index c15d1bf..106e82f 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -38,9 +38,8 @@ using namespace llvm; -Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST, - const TargetInstrInfo &TII) - : MipsRegisterInfo(ST, TII) {} +Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST) + : MipsRegisterInfo(ST) {} // This function eliminate ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h index 3f4b3a7..c702a15 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.h +++ b/lib/Target/Mips/Mips16RegisterInfo.h @@ -17,11 +17,11 @@ #include "MipsRegisterInfo.h" namespace llvm { +class Mips16InstrInfo; class Mips16RegisterInfo : public MipsRegisterInfo { public: - Mips16RegisterInfo(const MipsSubtarget &Subtarget, - const TargetInstrInfo &TII); + Mips16RegisterInfo(const MipsSubtarget &Subtarget); void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 20fc178..147be5d 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -110,9 +110,9 @@ def DSLLV : shift_rotate_reg<0x14, 0x00, "dsllv", shl, CPU64Regs>; def DSRLV : shift_rotate_reg<0x16, 0x00, "dsrlv", srl, CPU64Regs>; def DSRAV : shift_rotate_reg<0x17, 0x00, "dsrav", sra, CPU64Regs>; let Pattern = []<dag> in { -def DSLL32 : shift_rotate_imm64<0x3c, 0x00, "dsll32", shl>; -def DSRL32 : shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl>; -def DSRA32 : shift_rotate_imm64<0x3f, 0x00, "dsra32", sra>; + def DSLL32 : shift_rotate_imm64<0x3c, 0x00, "dsll32", shl>; + def DSRL32 : shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl>; + def DSRA32 : shift_rotate_imm64<0x3f, 0x00, "dsra32", sra>; } } // Rotate Instructions @@ -217,7 +217,15 @@ let DecoderNamespace = "Mips64" in { def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>; def DEXT : ExtBase<3, "dext", CPU64Regs>; +let Pattern = []<dag> in { + def DEXTU : ExtBase<2, "dextu", CPU64Regs>; + def DEXTM : ExtBase<1, "dextm", CPU64Regs>; +} def DINS : InsBase<7, "dins", CPU64Regs>; +let Pattern = []<dag> in { + def DINSU : InsBase<6, "dinsu", CPU64Regs>; + def DINSM : InsBase<5, "dinsm", CPU64Regs>; +} let isCodeGenOnly = 1, rs = 0, shamt = 0 in { def DSLL64_32 : FR<0x00, 0x3c, (outs CPU64Regs:$rd), (ins CPURegs:$rt), diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/lib/Target/Mips/MipsAnalyzeImmediate.cpp index dc8fbd0..99b163e 100644 --- a/lib/Target/Mips/MipsAnalyzeImmediate.cpp +++ b/lib/Target/Mips/MipsAnalyzeImmediate.cpp @@ -91,7 +91,7 @@ void MipsAnalyzeImmediate::ReplaceADDiuSLLWithLUi(InstSeq &Seq) { // Sign-extend and shift operand of ADDiu and see if it still fits in 16-bit. int64_t Imm = SignExtend64<16>(Seq[0].ImmOpnd); - int64_t ShiftedImm = Imm << (Seq[1].ImmOpnd - 16); + int64_t ShiftedImm = (uint64_t)Imm << (Seq[1].ImmOpnd - 16); if (!isInt<16>(ShiftedImm)) return; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 00ff754..e780134 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "mips-asm-printer" #include "Mips.h" #include "MipsAsmPrinter.h" +#include "MipsDirectObjLower.h" #include "MipsInstrInfo.h" #include "MipsMCInstLower.h" #include "InstPrinter/MipsInstPrinter.h" @@ -58,33 +59,31 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } - // Direct object specific instruction lowering - if (!OutStreamer.hasRawTextSupport()) - switch (MI->getOpcode()) { - case Mips::DSLL: - case Mips::DSRL: - case Mips::DSRA: - assert(MI->getNumOperands() == 3 && - "Invalid no. of machine operands for shift!"); - assert(MI->getOperand(2).isImm()); - int64_t Shift = MI->getOperand(2).getImm(); - if (Shift > 31) { - MCInst TmpInst0; - MCInstLowering.LowerLargeShift(MI, TmpInst0, Shift - 32); - OutStreamer.EmitInstruction(TmpInst0); - return; - } - break; - } - MachineBasicBlock::const_instr_iterator I = MI; MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); do { MCInst TmpInst0; MCInstLowering.Lower(I++, TmpInst0); + + // Direct object specific instruction lowering + if (!OutStreamer.hasRawTextSupport()){ + switch (TmpInst0.getOpcode()) { + // If shift amount is >= 32 it the inst needs to be lowered further + case Mips::DSLL: + case Mips::DSRL: + case Mips::DSRA: + Mips::LowerLargeShift(TmpInst0); + break; + // Double extract instruction is chosen by pos and size operands + case Mips::DEXT: + case Mips::DINS: + Mips::LowerDextDins(TmpInst0); + } + } + OutStreamer.EmitInstruction(TmpInst0); - } while ((I != E) && I->isInsideBundle()); + } while ((I != E) && I->isInsideBundle()); // Delay slot check } //===----------------------------------------------------------------------===// @@ -214,7 +213,7 @@ const char *MipsAsmPrinter::getCurrentABIString() const { case MipsSubtarget::N32: return "abiN32"; case MipsSubtarget::N64: return "abi64"; case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64 - default: llvm_unreachable("Unknown Mips ABI");; + default: llvm_unreachable("Unknown Mips ABI"); } } diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index cb7022b..5433295 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -30,7 +30,6 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" -#include "llvm/Function.h" #include "llvm/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -139,7 +138,7 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { do { DEBUG(errs() << "JITTing function '" - << MF.getFunction()->getName() << "'\n"); + << MF.getName() << "'\n"); MCE.startFunction(MF); for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index 2bba8a3..e3c8ed7 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -30,10 +30,11 @@ STATISTIC(FilledSlots, "Number of delay slots filled"); STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that" " are not NOP."); -static cl::opt<bool> EnableDelaySlotFiller( - "enable-mips-delay-filler", +static cl::opt<bool> DisableDelaySlotFiller( + "disable-mips-delay-filler", cl::init(false), - cl::desc("Fill the Mips delay slots useful instructions."), + cl::desc("Disable the delay slot filler, which attempts to fill the Mips" + "delay slots with useful instructions."), cl::Hidden); // This option can be used to silence complaints by machine verifier passes. @@ -114,7 +115,9 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) { InstrIter D; - if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) { + // Delay slot filling is disabled at -O0. + if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) && + findDelayInstr(MBB, I, D)) { MBB.splice(llvm::next(I), &MBB, D); ++UsefulSlots; } else diff --git a/lib/Target/Mips/MipsDirectObjLower.cpp b/lib/Target/Mips/MipsDirectObjLower.cpp new file mode 100644 index 0000000..0d74db8 --- /dev/null +++ b/lib/Target/Mips/MipsDirectObjLower.cpp @@ -0,0 +1,86 @@ +//===-- MipsDirectObjLower.cpp - Mips LLVM direct object lowering -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower Mips MCInst records that are normally +// left to the assembler to lower such as large shifts. +// +//===----------------------------------------------------------------------===// +#include "MipsDirectObjLower.h" +#include "MipsInstrInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +// If the D<shift> instruction has a shift amount that is greater +// than 31 (checked in calling routine), lower it to a D<shift>32 instruction +void Mips::LowerLargeShift(MCInst& Inst) { + + assert(Inst.getNumOperands() == 3 && "Invalid no. of operands for shift!"); + assert(Inst.getOperand(2).isImm()); + + bool isLarge = false; + int64_t Shift; + Shift = Inst.getOperand(2).getImm(); + if (Shift > 31) { + Shift -= 32; + isLarge = true; + } + + // saminus32 + (Inst.getOperand(2)).setImm(Shift); + + if (isLarge) + switch (Inst.getOpcode()) { + default: + // Calling function is not synchronized + llvm_unreachable("Unexpected shift instruction"); + case Mips::DSLL: + Inst.setOpcode(Mips::DSLL32); + return; + case Mips::DSRL: + Inst.setOpcode(Mips::DSRL32); + return; + case Mips::DSRA: + Inst.setOpcode(Mips::DSRA32); + return; + } +} + +// Pick a DEXT or DINS instruction variant based on the pos and size operands +void Mips::LowerDextDins(MCInst& InstIn) { + int Opcode = InstIn.getOpcode(); + + if (Opcode == Mips::DEXT) + assert(InstIn.getNumOperands() == 4 && + "Invalid no. of machine operands for DEXT!"); + else // Only DEXT and DINS are possible + assert(InstIn.getNumOperands() == 5 && + "Invalid no. of machine operands for DINS!"); + + assert(InstIn.getOperand(2).isImm()); + int64_t pos = InstIn.getOperand(2).getImm(); + assert(InstIn.getOperand(3).isImm()); + int64_t size = InstIn.getOperand(3).getImm(); + + if (size <= 32) { + if ((pos < 32)) { // DEXT/DINS, do nothing + return; + } else { // DEXTU/DINSU + InstIn.getOperand(2).setImm(pos - 32); + InstIn.setOpcode((Opcode == Mips::DEXT) ? Mips::DEXTU : Mips::DINSU); + return; + } + } else { // DEXTM/DINSM + assert(pos < 32 && "DEXT/DINS cannot have both size and pos > 32"); + InstIn.getOperand(3).setImm(size - 32); + InstIn.setOpcode((Opcode == Mips::DEXT) ? Mips::DEXTM : Mips::DINSM); + return; + } +} diff --git a/lib/Target/Mips/MipsDirectObjLower.h b/lib/Target/Mips/MipsDirectObjLower.h new file mode 100644 index 0000000..8813cc9 --- /dev/null +++ b/lib/Target/Mips/MipsDirectObjLower.h @@ -0,0 +1,28 @@ +//===-- MipsDirectObjLower.h - Mips LLVM direct object lowering *- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSDIRECTOBJLOWER_H +#define MIPSDIRECTOBJLOWER_H +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + class MCInst; + class MCStreamer; + + namespace Mips { + /// MipsDirectObjLower - This name space is used to lower MCInstr in cases + // where the assembler usually finishes the lowering + // such as large shifts. + void LowerLargeShift(MCInst &Inst); + void LowerDextDins(MCInst &Inst); + } +} + +#endif diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 5a97c17..4205223 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -337,8 +337,9 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { // Generate: // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) - if (Addr.getOperand(1).getOpcode() == MipsISD::Lo) { - SDValue LoVal = Addr.getOperand(1), Opnd0 = LoVal.getOperand(0); + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || + Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { + SDValue Opnd0 = Addr.getOperand(1).getOperand(0); if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || isa<JumpTableSDNode>(Opnd0)) { Base = Addr.getOperand(0); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index c5207c6..aa7b459 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1571,15 +1571,15 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { SDVTList VTs = DAG.getVTList(MVT::i32); - MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering(); + const MipsTargetObjectFile &TLOF = (const MipsTargetObjectFile&)getObjFileLowering(); // %gp_rel relocation if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_GPREL); SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1); - SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); - return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode); + SDValue GPReg = DAG.getRegister(Mips::GP, MVT::i32); + return DAG.getNode(ISD::ADD, dl, MVT::i32, GPReg, GPRelNode); } // %hi/%lo relocation SDValue GAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 50e3eb5..8ade891 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -262,46 +262,3 @@ unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { } } } - -unsigned -llvm::Mips::loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII, - MachineBasicBlock& MBB, - MachineBasicBlock::iterator II, DebugLoc DL, - bool LastInstrIsADDiu, - MipsAnalyzeImmediate::Inst *LastInst) { - MipsAnalyzeImmediate AnalyzeImm; - unsigned Size = IsN64 ? 64 : 32; - unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi; - unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO; - unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT; - - const MipsAnalyzeImmediate::InstSeq &Seq = - AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu); - MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - - if (LastInst && (Seq.size() == 1)) { - *LastInst = *Inst; - return 0; - } - - // The first instruction can be a LUi, which is different from other - // instructions (ADDiu, ORI and SLL) in that it does not have a register - // operand. - if (Inst->Opc == LUi) - BuildMI(MBB, II, DL, TII.get(LUi), ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - else - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - // Build the remaining instructions in Seq. Skip the last instruction if - // LastInst is not 0. - for (++Inst; Inst != Seq.end() - !!LastInst; ++Inst) - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - if (LastInst) - *LastInst = *Inst; - - return Seq.size() - !!LastInst; -} diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 7d56259..aca2bc7 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -88,18 +88,6 @@ private: const SmallVectorImpl<MachineOperand>& Cond) const; }; -namespace Mips { - /// Emit a series of instructions to load an immediate. All instructions - /// except for the last one are emitted. The function returns the number of - /// MachineInstrs generated. The opcode-immediate pair of the last - /// instruction is returned in LastInst, if it is not 0. - unsigned - loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII, - MachineBasicBlock& MBB, MachineBasicBlock::iterator II, - DebugLoc DL, bool LastInstrIsADDiu, - MipsAnalyzeImmediate::Inst *LastInst); -} - /// Create MipsInstrInfo objects. const MipsInstrInfo *createMips16InstrInfo(MipsTargetMachine &TM); const MipsInstrInfo *createMipsSEInstrInfo(MipsTargetMachine &TM); diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index fd952ef..6b6005f 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -74,9 +74,10 @@ def MipsRet : SDNode<"MipsISD::Ret", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPOptInGlue, SDNPOutGlue]>; // MAdd*/MSub* nodes def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub, @@ -110,7 +111,7 @@ def MipsWrapper : SDNode<"MipsISD::Wrapper", SDTIntBinOp>; def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc, [SDNPHasChain, SDNPInGlue]>; -def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain]>; +def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain,SDNPSideEffect]>; def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>; def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>; @@ -1079,6 +1080,26 @@ def EXT : ExtBase<0, "ext", CPURegs>; def INS : InsBase<4, "ins", CPURegs>; //===----------------------------------------------------------------------===// +// Instruction aliases +//===----------------------------------------------------------------------===// +def : InstAlias<"move $dst,$src", (ADD CPURegs:$dst,CPURegs:$src,ZERO)>; +def : InstAlias<"bal $offset", (BGEZAL RA,brtarget:$offset)>; +def : InstAlias<"addu $rs,$rt,$imm", + (ADDiu CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; +def : InstAlias<"add $rs,$rt,$imm", + (ADDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; +def : InstAlias<"and $rs,$rt,$imm", + (ANDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; +def : InstAlias<"j $rs", (JR CPURegs:$rs)>; +def : InstAlias<"not $rt,$rs", (NOR CPURegs:$rt,CPURegs:$rs,ZERO)>; +def : InstAlias<"neg $rt,$rs", (SUB CPURegs:$rt,ZERO,CPURegs:$rs)>; +def : InstAlias<"negu $rt,$rs", (SUBu CPURegs:$rt,ZERO,CPURegs:$rs)>; +def : InstAlias<"slt $rs,$rt,$imm", + (SLTi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; +def : InstAlias<"xor $rs,$rt,$imm", + (XORi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; + +//===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index f78203f..b9dbd52 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -10,6 +10,10 @@ // This pass expands a branch or jump instruction into a long branch if its // offset is too large to fit into its immediate field. // +// FIXME: +// 1. Fix pc-region jump instructions which cross 256MB segment boundaries. +// 2. If program has inline assembly statements whose size cannot be +// determined accurately, load branch target addresses from the GOT. //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-long-branch" @@ -48,7 +52,7 @@ namespace { typedef MachineBasicBlock::reverse_iterator ReverseIter; struct MBBInfo { - uint64_t Size; + uint64_t Size, Address; bool HasLongBranch; MachineInstr *Br; @@ -61,7 +65,10 @@ namespace { static char ID; MipsLongBranch(TargetMachine &tm) : MachineFunctionPass(ID), TM(tm), - TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())) {} + TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())), + IsPIC(TM.getRelocationModel() == Reloc::PIC_), + ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()), + LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 13 : 9)) {} virtual const char *getPassName() const { return "Mips Long Branch"; @@ -81,6 +88,9 @@ namespace { const MipsInstrInfo *TII; MachineFunction *MF; SmallVector<MBBInfo, 16> MBBInfos; + bool IsPIC; + unsigned ABI; + unsigned LongBranchSeqSize; }; char MipsLongBranch::ID = 0; @@ -230,12 +240,6 @@ void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br, // Expand branch instructions to long branches. void MipsLongBranch::expandToLongBranch(MBBInfo &I) { - I.HasLongBranch = true; - - bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; - unsigned ABI = TM.getSubtarget<MipsSubtarget>().getTargetABI(); - bool N64 = ABI == MipsSubtarget::N64; - MachineBasicBlock::iterator Pos; MachineBasicBlock *MBB = I.Br->getParent(), *TgtMBB = getTargetMBB(*I.Br); DebugLoc DL = I.Br->getDebugLoc(); @@ -248,101 +252,105 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { MBB->addSuccessor(LongBrMBB); if (IsPIC) { - // $longbr: - // addiu $sp, $sp, -regsize * 2 - // sw $ra, 0($sp) - // bal $baltgt - // sw $a3, regsize($sp) - // $baltgt: - // lui $a3, %hi($baltgt) - // lui $at, %hi($tgt) - // addiu $a3, $a3, %lo($baltgt) - // addiu $at, $at, %lo($tgt) - // subu $at, $at, $a3 - // addu $at, $ra, $at - // - // if n64: - // lui $a3, %highest($baltgt) - // lui $ra, %highest($tgt) - // addiu $a3, $a3, %higher($baltgt) - // addiu $ra, $ra, %higher($tgt) - // dsll $a3, $a3, 32 - // dsll $ra, $ra, 32 - // subu $at, $at, $a3 - // addu $at, $at, $ra - // - // lw $ra, 0($sp) - // lw $a3, regsize($sp) - // jr $at - // addiu $sp, $sp, regsize * 2 - // $fallthrough: - // - MF->getInfo<MipsFunctionInfo>()->setEmitNOAT(); MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB); MF->insert(FallThroughMBB, BalTgtMBB); LongBrMBB->addSuccessor(BalTgtMBB); BalTgtMBB->addSuccessor(TgtMBB); - int RegSize = N64 ? 8 : 4; - unsigned AT = N64 ? Mips::AT_64 : Mips::AT; - unsigned A3 = N64 ? Mips::A3_64 : Mips::A3; - unsigned SP = N64 ? Mips::SP_64 : Mips::SP; - unsigned RA = N64 ? Mips::RA_64 : Mips::RA; - unsigned Load = N64 ? Mips::LD_P8 : Mips::LW; - unsigned Store = N64 ? Mips::SD_P8 : Mips::SW; - unsigned LUi = N64 ? Mips::LUi64 : Mips::LUi; - unsigned ADDiu = N64 ? Mips::DADDiu : Mips::ADDiu; - unsigned ADDu = N64 ? Mips::DADDu : Mips::ADDu; - unsigned SUBu = N64 ? Mips::SUBu : Mips::SUBu; - unsigned JR = N64 ? Mips::JR64 : Mips::JR; - - Pos = LongBrMBB->begin(); - - BuildMI(*LongBrMBB, Pos, DL, TII->get(ADDiu), SP).addReg(SP) - .addImm(-RegSize * 2); - BuildMI(*LongBrMBB, Pos, DL, TII->get(Store)).addReg(RA).addReg(SP) - .addImm(0); - BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB); - BuildMI(*LongBrMBB, Pos, DL, TII->get(Store)).addReg(A3).addReg(SP) - .addImm(RegSize)->setIsInsideBundle(); - - Pos = BalTgtMBB->begin(); - - BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), A3) - .addMBB(BalTgtMBB, MipsII::MO_ABS_HI); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), AT) - .addMBB(TgtMBB, MipsII::MO_ABS_HI); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), A3).addReg(A3) - .addMBB(BalTgtMBB, MipsII::MO_ABS_LO); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), AT).addReg(AT) - .addMBB(TgtMBB, MipsII::MO_ABS_LO); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(SUBu), AT).addReg(AT).addReg(A3); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDu), AT).addReg(RA).addReg(AT); - - if (N64) { - BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), A3) - .addMBB(BalTgtMBB, MipsII::MO_HIGHEST); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), RA) - .addMBB(TgtMBB, MipsII::MO_HIGHEST); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), A3).addReg(A3) - .addMBB(BalTgtMBB, MipsII::MO_HIGHER); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), RA).addReg(RA) - .addMBB(TgtMBB, MipsII::MO_HIGHER); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DSLL), A3).addReg(A3) - .addImm(32); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DSLL), RA).addReg(RA) - .addImm(32); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(SUBu), AT).addReg(AT).addReg(A3); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDu), AT).addReg(AT).addReg(RA); - I.Size += 4 * 8; + int64_t TgtAddress = MBBInfos[TgtMBB->getNumber()].Address; + int64_t Offset = TgtAddress - (I.Address + I.Size - 20); + int64_t Lo = SignExtend64<16>(Offset & 0xffff); + int64_t Hi = SignExtend64<16>(((Offset + 0x8000) >> 16) & 0xffff); + + if (ABI != MipsSubtarget::N64) { + // $longbr: + // addiu $sp, $sp, -8 + // sw $ra, 0($sp) + // bal $baltgt + // lui $at, %hi($tgt - $baltgt) + // $baltgt: + // addiu $at, $at, %lo($tgt - $baltgt) + // addu $at, $ra, $at + // lw $ra, 0($sp) + // jr $at + // addiu $sp, $sp, 8 + // $fallthrough: + // + + Pos = LongBrMBB->begin(); + + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP).addImm(-8); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SW)).addReg(Mips::RA) + .addReg(Mips::SP).addImm(0); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LUi), Mips::AT).addImm(Hi) + ->setIsInsideBundle(); + + Pos = BalTgtMBB->begin(); + + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::AT) + .addReg(Mips::AT).addImm(Lo); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDu), Mips::AT) + .addReg(Mips::RA).addReg(Mips::AT); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA) + .addReg(Mips::SP).addImm(0); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP).addImm(8)->setIsInsideBundle(); + } else { + // $longbr: + // daddiu $sp, $sp, -16 + // sd $ra, 0($sp) + // lui64 $at, %highest($tgt - $baltgt) + // daddiu $at, $at, %higher($tgt - $baltgt) + // dsll $at, $at, 16 + // daddiu $at, $at, %hi($tgt - $baltgt) + // bal $baltgt + // dsll $at, $at, 16 + // $baltgt: + // daddiu $at, $at, %lo($tgt - $baltgt) + // daddu $at, $ra, $at + // ld $ra, 0($sp) + // jr64 $at + // daddiu $sp, $sp, 16 + // $fallthrough: + // + + int64_t Higher = SignExtend64<16>(((Offset + 0x80008000) >> 32) & 0xffff); + int64_t Highest = + SignExtend64<16>(((Offset + 0x800080008000LL) >> 48) & 0xffff); + + Pos = LongBrMBB->begin(); + + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) + .addReg(Mips::SP_64).addImm(-16); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SD)).addReg(Mips::RA_64) + .addReg(Mips::SP_64).addImm(0); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LUi64), Mips::AT_64) + .addImm(Highest); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64) + .addReg(Mips::AT_64).addImm(Higher); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64) + .addReg(Mips::AT_64).addImm(16); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64) + .addReg(Mips::AT_64).addImm(Hi); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64) + .addReg(Mips::AT_64).addImm(16)->setIsInsideBundle(); + + Pos = BalTgtMBB->begin(); + + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64) + .addReg(Mips::AT_64).addImm(Lo); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDu), Mips::AT_64) + .addReg(Mips::RA_64).addReg(Mips::AT_64); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64) + .addReg(Mips::SP_64).addImm(0); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) + .addReg(Mips::SP_64).addImm(16)->setIsInsideBundle(); } - - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Load), RA).addReg(SP).addImm(0); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Load), A3).addReg(SP).addImm(RegSize); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(JR)).addReg(AT); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), SP).addReg(SP) - .addImm(RegSize * 2)->setIsInsideBundle(); - I.Size += 4 * 14; } else { // $longbr: // j $tgt @@ -353,7 +361,6 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { LongBrMBB->addSuccessor(TgtMBB); BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::J)).addMBB(TgtMBB); BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::NOP))->setIsInsideBundle(); - I.Size += 4 * 2; } if (I.Br->isUnconditionalBranch()) { @@ -401,19 +408,36 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) { if (!I->Br || I->HasLongBranch) continue; - if (!ForceLongBranch) - // Check if offset fits into 16-bit immediate field of branches. - if (isInt<16>(computeOffset(I->Br) / 4)) - continue; + // Check if offset fits into 16-bit immediate field of branches. + if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / 4)) + continue; - expandToLongBranch(*I); + I->HasLongBranch = true; + I->Size += LongBranchSeqSize * 4; ++LongBranches; EverMadeChange = MadeChange = true; } } - if (EverMadeChange) - MF->RenumberBlocks(); + if (!EverMadeChange) + return true; + + // Compute basic block addresses. + if (TM.getRelocationModel() == Reloc::PIC_) { + MF->getInfo<MipsFunctionInfo>()->setEmitNOAT(); + + uint64_t Address = 0; + + for (I = MBBInfos.begin(); I != E; Address += I->Size, ++I) + I->Address = Address; + } + + // Do the expansion. + for (I = MBBInfos.begin(); I != E; ++I) + if (I->HasLongBranch) + expandToLongBranch(*I); + + MF->RenumberBlocks(); return true; } diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index d4c5e6d..5fa6339 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -11,7 +11,6 @@ // MCInst records. // //===----------------------------------------------------------------------===// - #include "MipsMCInstLower.h" #include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" @@ -161,31 +160,3 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } } -// If the D<shift> instruction has a shift amount that is greater -// than 31 (checked in calling routine), lower it to a D<shift>32 instruction -void MipsMCInstLower::LowerLargeShift(const MachineInstr *MI, - MCInst& Inst, - int64_t Shift) { - // rt - Inst.addOperand(LowerOperand(MI->getOperand(0))); - // rd - Inst.addOperand(LowerOperand(MI->getOperand(1))); - // saminus32 - Inst.addOperand(MCOperand::CreateImm(Shift)); - - switch (MI->getOpcode()) { - default: - // Calling function is not synchronized - llvm_unreachable("Unexpected shift instruction"); - break; - case Mips::DSLL: - Inst.setOpcode(Mips::DSLL32); - break; - case Mips::DSRL: - Inst.setOpcode(Mips::DSRL32); - break; - case Mips::DSRA: - Inst.setOpcode(Mips::DSRA32); - break; - } -} diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 0abb996..3eab5a4 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -33,7 +33,6 @@ public: MipsMCInstLower(MipsAsmPrinter &asmprinter); void Initialize(Mangler *mang, MCContext *C); void Lower(const MachineInstr *MI, MCInst &OutMI) const; - void LowerLargeShift(const MachineInstr *MI, MCInst &Inst, int64_t Shift); private: MCOperand LowerSymbolOperand(const MachineOperand &MO, diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index ae6ae3a..79a142a 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -22,7 +22,6 @@ #include "llvm/Constants.h" #include "llvm/DebugInfo.h" #include "llvm/Type.h" -#include "llvm/Function.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" @@ -43,9 +42,8 @@ using namespace llvm; -MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, - const TargetInstrInfo &tii) - : MipsGenRegisterInfo(Mips::RA), Subtarget(ST), TII(tii) {} +MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST) + : MipsGenRegisterInfo(Mips::RA), Subtarget(ST) {} unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } @@ -131,6 +129,12 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(Mips::RA_64); } + // Reserve GP if small section is used. + if (Subtarget.useSmallSection()) { + Reserved.set(Mips::GP); + Reserved.set(Mips::GP_64); + } + return Reserved; } @@ -160,7 +164,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, "Instr doesn't have FrameIndex operand!"); } - DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n"; + DEBUG(errs() << "\nFunction : " << MF.getName() << "\n"; errs() << "<--------->\n" << MI); int FrameIndex = MI.getOperand(i).getIndex(); diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 9a05e94..78adf7f 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -22,16 +22,14 @@ namespace llvm { class MipsSubtarget; -class TargetInstrInfo; class Type; class MipsRegisterInfo : public MipsGenRegisterInfo { protected: const MipsSubtarget &Subtarget; - const TargetInstrInfo &TII; public: - MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii); + MipsRegisterInfo(const MipsSubtarget &Subtarget); /// getRegisterNumbering - Given the enum value for some register, e.g. /// Mips::RA, return the number that it corresponds to (e.g. 31). diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index eeb1de3..e4b44ef 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -260,14 +260,53 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, if (isInt<16>(Amount))// addi sp, sp, amount BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount); else { // Expand immediate that doesn't fit in 16-bit. - unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; - MBB.getParent()->getInfo<MipsFunctionInfo>()->setEmitNOAT(); - Mips::loadImmediate(Amount, STI.isABI_N64(), *this, MBB, I, DL, false, 0); - BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(ATReg); + unsigned Reg = loadImmediate(Amount, MBB, I, DL, 0); + BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(Reg); } } +/// This function generates the sequence of instructions needed to get the +/// result of adding register REG and immediate IMM. +unsigned +MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, DebugLoc DL, + unsigned *NewImm) const { + MipsAnalyzeImmediate AnalyzeImm; + const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>(); + unsigned Size = STI.isABI_N64() ? 64 : 32; + unsigned LUi = STI.isABI_N64() ? Mips::LUi64 : Mips::LUi; + unsigned ZEROReg = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; + bool LastInstrIsADDiu = NewImm; + + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu); + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); + + assert(Seq.size() && (!LastInstrIsADDiu || (Seq.size() > 1))); + + // The first instruction can be a LUi, which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == LUi) + BuildMI(MBB, II, DL, get(LUi), ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + else + BuildMI(MBB, II, DL, get(Inst->Opc), ATReg).addReg(ZEROReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + // Build the remaining instructions in Seq. + for (++Inst; Inst != Seq.end() - LastInstrIsADDiu; ++Inst) + BuildMI(MBB, II, DL, get(Inst->Opc), ATReg).addReg(ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + if (LastInstrIsADDiu) + *NewImm = Inst->ImmOpnd; + + return ATReg; +} + unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index 346e74d..55b78b2 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -15,7 +15,6 @@ #define MIPSSEINSTRUCTIONINFO_H #include "MipsInstrInfo.h" -#include "MipsAnalyzeImmediate.h" #include "MipsSERegisterInfo.h" namespace llvm { @@ -70,6 +69,13 @@ public: void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + /// Emit a series of instructions to load an immediate. If NewImm is a + /// non-NULL parameter, the last instruction is not emitted, but instead + /// its immediate operand is returned in NewImm. + unsigned loadImmediate(int64_t Imm, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, DebugLoc DL, + unsigned *NewImm) const; + private: virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index 043a1ef..d868f73 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -40,8 +40,8 @@ using namespace llvm; MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST, - const TargetInstrInfo &TII) - : MipsRegisterInfo(ST, TII) {} + const MipsSEInstrInfo &I) + : MipsRegisterInfo(ST), TII(I) {} // This function eliminate ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions @@ -122,15 +122,14 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, DebugLoc DL = II->getDebugLoc(); unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; - MipsAnalyzeImmediate::Inst LastInst(0, 0); + unsigned NewImm; MipsFI->setEmitNOAT(); - Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true, - &LastInst); - BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg); + unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL, &NewImm); + BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(Reg); FrameReg = ATReg; - Offset = SignExtend64<16>(LastInst.ImmOpnd); + Offset = SignExtend64<16>(NewImm); } MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h index 4b17b33..b4eab65 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.h +++ b/lib/Target/Mips/MipsSERegisterInfo.h @@ -18,11 +18,14 @@ #include "MipsRegisterInfo.h" namespace llvm { +class MipsSEInstrInfo; class MipsSERegisterInfo : public MipsRegisterInfo { + const MipsSEInstrInfo &TII; + public: MipsSERegisterInfo(const MipsSubtarget &Subtarget, - const TargetInstrInfo &TII); + const MipsSEInstrInfo &TII); void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 11ff809..ac83d83 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -25,7 +25,8 @@ using namespace llvm; void MipsSubtarget::anchor() { } MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool little) : + const std::string &FS, bool little, + Reloc::Model RM) : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), @@ -54,6 +55,9 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, // Is the target system Linux ? if (TT.find("linux") == std::string::npos) IsLinux = false; + + // Set UseSmallSection. + UseSmallSection = !IsLinux && (RM == Reloc::Static); } bool diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index ba15362..0595e8d 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -65,6 +65,9 @@ protected: // isLinux - Target system is Linux. Is false we consider ELFOS for now. bool IsLinux; + // UseSmallSection - Small section is used. + bool UseSmallSection; + /// Features related to the presence of specific instructions. // HasSEInReg - SEB and SEH (signext in register) instructions. @@ -109,7 +112,7 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. MipsSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool little); + const std::string &FS, bool little, Reloc::Model RM); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -133,6 +136,7 @@ public: bool inMips16Mode() const { return InMips16Mode; } bool isAndroid() const { return IsAndroid; } bool isLinux() const { return IsLinux; } + bool useSmallSection() const { return UseSmallSection; } bool hasStandardEncoding() const { return !inMips16Mode(); } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 2928a73..b70542b 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -42,7 +42,7 @@ MipsTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL, bool isLittle) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, isLittle), + Subtarget(TT, CPU, FS, isLittle, RM), DataLayout(isLittle ? (Subtarget.isABI_N64() ? "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index 04dc60a..1f5e34f 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -26,6 +26,7 @@ SSThreshold("mips-ssection-threshold", cl::Hidden, void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); SmallDataSection = getContext().getELFSection(".sdata", ELF::SHT_PROGBITS, @@ -60,9 +61,10 @@ bool MipsTargetObjectFile:: IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, SectionKind Kind) const { - // Only use small section for non linux targets. const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>(); - if (Subtarget.isLinux()) + + // Return if small section is not available. + if (!Subtarget.useSmallSection()) return false; // Only global variables, not functions. diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index d175e3e..413142e 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -137,7 +137,7 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { char Value = MI->getOperand(OpNo).getImm(); - Value = (Value << (32-5)) >> (32-5); + Value = SignExtend32<5>(Value); O << (int)Value; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 245b457..b9ea8b5 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -64,7 +64,6 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { ZeroDirective = "\t.space\t"; Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; - LCOMMDirectiveType = LCOMM::NoAlignment; AssemblerDialect = 0; // Old-Style mnemonics. } diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index b7f1688..cb15dad 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -35,6 +35,10 @@ def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">; def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">; def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">; +def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective", + "PPC::DIR_E500mc", "">; +def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective", + "PPC::DIR_E5500", "">; def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; @@ -94,6 +98,12 @@ def : Processor<"g5", G5Itineraries, [Directive970, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; +def : ProcessorModel<"e500mc", PPCE500mcModel, + [DirectiveE500mc, FeatureMFOCRF, + FeatureSTFIWX, FeatureBookE, FeatureISEL]>; +def : ProcessorModel<"e5500", PPCE5500Model, + [DirectiveE5500, FeatureMFOCRF, Feature64Bit, + FeatureSTFIWX, FeatureBookE, FeatureISEL]>; def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, FeatureISEL, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index f76b89c..6e0e8bb 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -109,6 +109,8 @@ namespace { bool doFinalization(Module &M); virtual void EmitFunctionEntryLabel(); + + void EmitFunctionBodyEnd(); }; /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac @@ -345,23 +347,32 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitLabel(PICBase); return; } + case PPC::LDtocJTI: + case PPC::LDtocCPT: case PPC::LDtoc: { // Transform %X3 = LDtoc <ga:@min1>, %X2 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); - + // Change the opcode to LD, and the global address operand to be a // reference to the TOC entry we will synthesize later. TmpInst.setOpcode(PPC::LD); const MachineOperand &MO = MI->getOperand(1); - assert(MO.isGlobal()); - - // Map symbol -> label of TOC entry. - MCSymbol *&TOCEntry = TOC[Mang->getSymbol(MO.getGlobal())]; + + // Map symbol -> label of TOC entry + assert(MO.isGlobal() || MO.isCPI() || MO.isJTI()); + MCSymbol *MOSymbol = 0; + if (MO.isGlobal()) + MOSymbol = Mang->getSymbol(MO.getGlobal()); + else if (MO.isCPI()) + MOSymbol = GetCPISymbol(MO.getIndex()); + else if (MO.isJTI()) + MOSymbol = GetJTISymbol(MO.getIndex()); + MCSymbol *&TOCEntry = TOC[MOSymbol]; if (TOCEntry == 0) TOCEntry = GetTempSymbol("C", TOCLabelID++); - + const MCExpr *Exp = - MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, + MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); OutStreamer.EmitInstruction(TmpInst); @@ -406,9 +417,9 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName())); MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.@tocbase")); OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), - Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/); + 8/*size*/, 0/*addrspace*/); OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext), - Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/); + 8/*size*/, 0/*addrspace*/); OutStreamer.SwitchSection(Current); MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( @@ -441,6 +452,23 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } +/// EmitFunctionBodyEnd - Print the traceback table before the .size +/// directive. +/// +void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() { + // Only the 64-bit target requires a traceback table. For now, + // we only emit the word of zeroes that GDB requires to find + // the end of the function, and zeroes for the eight-byte + // mandatory fields. + // FIXME: We should fill in the eight-byte mandatory fields as described in + // the PPC64 ELF ABI (this is a low-priority item because GDB does not + // currently make use of these fields). + if (Subtarget.isPPC64()) { + OutStreamer.EmitIntValue(0, 4/*size*/); + OutStreamer.EmitIntValue(0, 8/*size*/); + } +} + void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { static const char *const CPUDirectives[] = { "", @@ -453,6 +481,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "ppc750", "ppc970", "ppcA2", + "ppce500mc", + "ppce5500", "power6", "power7", "ppc64" diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index a00f686..e8f4d16 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -975,6 +975,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { case ISD::AND: { unsigned Imm, Imm2, SH, MB, ME; + uint64_t Imm64; // If this is an and of a value rotated between 0 and 31 bits and then and'd // with a mask, emit rlwinm @@ -993,6 +994,14 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) }; return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); } + // If this is a 64-bit zero-extension mask, emit rldicl. + if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && + isMask_64(Imm64)) { + SDValue Val = N->getOperand(0); + MB = 64 - CountTrailingOnes_64(Imm64); + SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB) }; + return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3); + } // AND X, 0 -> 0, not "rlwinm 32". if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { ReplaceUses(SDValue(N, 0), N->getOperand(1)); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 61d44c5..dbb3b14 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -449,6 +449,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setSchedulingPreference(Sched::Hybrid); computeRegisterProperties(); + + // The Freescale cores does better with aggressive inlining of memcpy and + // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). + if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || + Subtarget->getDarwinDirective() == PPC::DIR_E5500) { + maxStoresPerMemset = 32; + maxStoresPerMemsetOptSize = 16; + maxStoresPerMemcpy = 32; + maxStoresPerMemcpyOptSize = 8; + maxStoresPerMemmove = 32; + maxStoresPerMemmoveOptSize = 8; + + setPrefFunctionAlignment(4); + benefitFromCodePlacementOpt = true; + } } /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate @@ -517,6 +532,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; case PPCISD::MTFSF: return "PPCISD::MTFSF"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; + case PPCISD::CR6SET: return "PPCISD::CR6SET"; + case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; } } @@ -811,14 +828,13 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { } // Properly sign extend the value. - int ShAmt = (4-ByteSize)*8; - int MaskVal = ((int)Value << ShAmt) >> ShAmt; + int MaskVal = SignExtend32(Value, ByteSize * 8); // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. if (MaskVal == 0) return SDValue(); // Finally, if this value fits in a 5 bit sext field, return it - if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) + if (SignExtend32<5>(MaskVal) == MaskVal) return DAG.getTargetConstant(MaskVal, MVT::i32); return SDValue(); } @@ -1204,6 +1220,14 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); const Constant *C = CP->getConstVal(); + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. + if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); + return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA, + DAG.getRegister(PPC::X2, MVT::i64)); + } + unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); SDValue CPIHi = @@ -1217,6 +1241,14 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. + if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA, + DAG.getRegister(PPC::X2, MVT::i64)); + } + unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); @@ -1441,7 +1473,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, MachinePointerInfo(), MVT::i32, false, false, 0); - return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), + return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, false, 0); } @@ -2408,7 +2440,7 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { int Addr = C->getZExtValue(); if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. - (Addr << 6 >> 6) != Addr) + SignExtend32<26>(Addr) != Addr) return 0; // Top 6 bits have to be sext of immediate. return DAG.getConstant((int)C->getZExtValue() >> 2, @@ -2819,6 +2851,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, isTailCall, RegsToPass, Ops, NodeTys, PPCSubTarget); + // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls + if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) + Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); + // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in // PPCRegisterInfo::eliminateCallFramePseudoInstr. @@ -3116,14 +3152,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Set CR6 to true if this is a vararg call with floating args passed in - // registers. - if (isVarArg) { - SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET, - dl, MVT::i32), 0); - RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR)); - } - // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; @@ -3133,6 +3161,18 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } + // Set CR bit 6 to true if this is a vararg call with floating args passed in + // registers. + if (isVarArg) { + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Ops[] = { Chain, InFlag }; + + Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, + dl, VTs, Ops, InFlag.getNode() ? 2 : 1); + + InFlag = Chain.getValue(1); + } + if (isTailCall) PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, false, TailCallArguments); @@ -4126,7 +4166,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, unsigned TypeShiftAmt = i & (SplatBitSize-1); // vsplti + shl self. - if (SextVal == (i << (int)TypeShiftAmt)) { + if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, @@ -4171,17 +4211,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } // t = vsplti c, result = vsldoi t, t, 1 - if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) { + if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 2 - if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) { + if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 3 - if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) { + if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index b0a013b..902b188 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -174,6 +174,10 @@ namespace llvm { /// operand #3 optional in flag TC_RETURN, + /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls + CR6SET, + CR6UNSET, + /// STD_32 - This is the STD instruction for use with "32-bit" registers. STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 39778a5..cfe71d17 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -29,6 +29,9 @@ def symbolLo64 : Operand<i64> { let PrintMethod = "printSymbolLo"; let EncoderMethod = "getLO16Encoding"; } +def tocentry : Operand<iPTR> { + let MIOperandInfo = (ops i32imm:$imm); +} //===----------------------------------------------------------------------===// // 64-bit transformation functions. @@ -296,12 +299,14 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins), let PPC970_Unit = 1 in { // FXU Operations. +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm), "li $rD, $imm", IntSimple, [(set G8RC:$rD, immSExt16:$imm)]>; def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm), "lis $rD, $imm", IntSimple, [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>; +} // Logical ops. def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), @@ -459,7 +464,7 @@ def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS), let Defs = [CARRY] in { def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH), - "sradi $rA, $rS, $SH", IntRotateD, + "sradi $rA, $rS, $SH", IntRotateDI, [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64; } def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS), @@ -482,7 +487,7 @@ def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), let isCommutable = 1 in { def RLDIMI : MDForm_1<30, 3, (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB), - "rldimi $rA, $rS, $SH, $MB", IntRotateD, + "rldimi $rA, $rS, $SH, $MB", IntRotateDI, []>, isPPC64, RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; } @@ -494,11 +499,11 @@ def RLDCL : MDForm_1<30, 0, []>, isPPC64; def RLDICL : MDForm_1<30, 0, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB), - "rldicl $rA, $rS, $SH, $MB", IntRotateD, + "rldicl $rA, $rS, $SH, $MB", IntRotateDI, []>, isPPC64; def RLDICR : MDForm_1<30, 1, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME), - "rldicr $rA, $rS, $SH, $ME", IntRotateD, + "rldicr $rA, $rS, $SH, $ME", IntRotateDI, []>, isPPC64; def RLWINM8 : MForm_2<21, @@ -541,19 +546,19 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), let mayLoad = 1 in def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, ptr_rc:$rA), - "lhau $rD, $disp($rA)", LdStLoad, + "lhau $rD, $disp($rA)", LdStLHAU, []>, RegConstraint<"$rA = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLoad, + "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lwaux $rD, $addr", LdStLoad, + "lwaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -584,31 +589,31 @@ def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), // Update forms. let mayLoad = 1 in { def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoad, + "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoad, + "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoad, + "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoad, + "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoad, + "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoad, + "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; } @@ -624,6 +629,14 @@ def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), "", [(set G8RC:$rD, (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; +def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), + "", + [(set G8RC:$rD, + (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64; +def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), + "", + [(set G8RC:$rD, + (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64; let hasSideEffects = 1 in { let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. @@ -642,13 +655,13 @@ def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), let mayLoad = 1 in def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr), - "ldu $rD, $addr", LdStLD, + "ldu $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "ldux $rD, $addr", LdStLoad, + "ldux $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -695,14 +708,14 @@ let PPC970_Unit = 2 in { def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStore, + "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStore, + "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, @@ -710,7 +723,7 @@ def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStore, + "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, @@ -718,7 +731,7 @@ def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, s16immX4:$ptroff, ptr_rc:$ptrreg), - "stdu $rS, $ptroff($ptrreg)", LdStSTD, + "stdu $rS, $ptroff($ptrreg)", LdStSTDU, [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, @@ -727,7 +740,7 @@ def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStore, + "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -736,7 +749,7 @@ def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStore, + "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -745,7 +758,7 @@ def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStore, + "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -754,7 +767,7 @@ def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stdux $rS, $ptroff, $ptrreg", LdStStore, + "stdux $rS, $ptroff, $ptrreg", LdStSTDU, [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 47f09dc..d2df664 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -54,7 +54,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( const TargetMachine *TM, const ScheduleDAG *DAG) const { unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); - if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) { + if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 || + Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) { const InstrItineraryData *II = TM->getInstrItineraryData(); return new PPCScoreboardHazardRecognizer(II, DAG); } @@ -70,7 +71,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); // Most subtargets use a PPC970 recognizer. - if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) { + if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 && + Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) { const TargetInstrInfo *TII = TM.getInstrInfo(); assert(TII && "No InstrInfo?"); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index f57f0c9..a503908 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -123,9 +123,11 @@ def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPInGlue, SDNPOutGlue]>; def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPInGlue, SDNPOutGlue]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, @@ -153,6 +155,12 @@ def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, [SDNPHasChain, SDNPMayStore]>; +// Instructions to set/unset CR bit 6 for SVR4 vararg calls +def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + // Instructions to support atomic operations def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, [SDNPHasChain, SDNPMayLoad]>; @@ -330,9 +338,6 @@ def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); let EncoderMethod = "getMemRIXEncoding"; } -def tocentry : Operand<iPTR> { - let MIOperandInfo = (ops i32imm:$imm); -} // PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg // that doesn't matter. @@ -673,7 +678,7 @@ def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src), [(set GPRC:$rD, (load iaddr:$src))]>; def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src), - "lfs $rD, $src", LdStLFDU, + "lfs $rD, $src", LdStLFD, [(set F4RC:$rD, (load iaddr:$src))]>; def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), "lfd $rD, $src", LdStLFD, @@ -683,32 +688,32 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1 in { def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoad, + "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhau $rD, $addr", LdStLoad, + "lhau $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoad, + "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoad, + "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lfs $rD, $addr", LdStLFDU, + "lfsu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lfd $rD, $addr", LdStLFD, + "lfdu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; @@ -716,37 +721,37 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), // Indexed (r+r) Loads with Update (preinc). def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoad, + "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLoad, + "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoad, + "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoad, + "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lfsux $rD, $addr", LdStLoad, + "lfsux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lfdux $rD, $addr", LdStLoad, + "lfdux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; } @@ -778,10 +783,10 @@ def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), - "lfsx $frD, $src", LdStLFDU, + "lfsx $frD, $src", LdStLFD, [(set F4RC:$frD, (load xaddr:$src))]>; def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), - "lfdx $frD, $src", LdStLFDU, + "lfdx $frD, $src", LdStLFD, [(set F8RC:$frD, (load xaddr:$src))]>; } @@ -801,10 +806,10 @@ def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src), "stw $rS, $src", LdStStore, [(store GPRC:$rS, iaddr:$src)]>; def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst), - "stfs $rS, $dst", LdStUX, + "stfs $rS, $dst", LdStSTFD, [(store F4RC:$rS, iaddr:$dst)]>; def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), - "stfd $rS, $dst", LdStUX, + "stfd $rS, $dst", LdStSTFD, [(store F8RC:$rS, iaddr:$dst)]>; } @@ -812,33 +817,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), let PPC970_Unit = 2 in { def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStore, + "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStore, + "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStore, + "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfsu $rS, $ptroff($ptrreg)", LdStStore, + "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfdu $rS, $ptroff($ptrreg)", LdStStore, + "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; @@ -863,7 +868,7 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStore, + "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -872,7 +877,7 @@ def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStore, + "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -881,7 +886,7 @@ def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStore, + "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, @@ -889,7 +894,7 @@ def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfsux $rS, $ptroff, $ptrreg", LdStStore, + "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, @@ -897,7 +902,7 @@ def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res), (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfdux $rS, $ptroff, $ptrreg", LdStStore, + "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, @@ -913,14 +918,14 @@ def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), PPC970_DGroup_Cracked; def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst), - "stfiwx $frS, $dst", LdStUX, + "stfiwx $frS, $dst", LdStSTFD, [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>; def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst), - "stfsx $frS, $dst", LdStUX, + "stfsx $frS, $dst", LdStSTFD, [(store F4RC:$frS, xaddr:$dst)]>; def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), - "stfdx $frS, $dst", LdStUX, + "stfdx $frS, $dst", LdStSTFD, [(store F8RC:$frS, xaddr:$dst)]>; } @@ -964,7 +969,7 @@ def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>; } -let isReMaterializable = 1 in { +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm), "li $rD, $imm", IntSimple, [(set GPRC:$rD, immSExt16:$imm)]>; @@ -1143,6 +1148,16 @@ def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins), "crxor $dst, $dst, $dst", BrCR, []>; +let Defs = [CR1EQ], CRD = 6 in { +def CR6SET : XLForm_1_ext<19, 289, (outs), (ins), + "creqv 6, 6, 6", BrCR, + [(PPCcr6set)]>; + +def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), + "crxor 6, 6, 6", BrCR, + [(PPCcr6unset)]>; +} + // XFX-Form instructions. Instructions that deal with SPRs. // let Uses = [CTR] in { @@ -1233,7 +1248,7 @@ let Uses = [RM] in { PPC970_DGroup_Single, PPC970_Unit_FPU; def FADDrtz: AForm_2<63, 21, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fadd $FRT, $FRA, $FRB", FPGeneral, + "fadd $FRT, $FRA, $FRB", FPAddSub, [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>, PPC970_DGroup_Single, PPC970_Unit_FPU; } @@ -1364,7 +1379,7 @@ def FSELS : AForm_1<63, 23, let Uses = [RM] in { def FADD : AForm_2<63, 21, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fadd $FRT, $FRA, $FRB", FPGeneral, + "fadd $FRT, $FRA, $FRB", FPAddSub, [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>; def FADDS : AForm_2<59, 21, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), @@ -1388,7 +1403,7 @@ let Uses = [RM] in { [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>; def FSUB : AForm_2<63, 20, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fsub $FRT, $FRA, $FRB", FPGeneral, + "fsub $FRT, $FRA, $FRB", FPAddSub, [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>; def FSUBS : AForm_2<59, 20, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 6a6ccb9..660c0c3 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -40,6 +40,7 @@ def IntMulHWU : InstrItinClass; def IntMulLI : InstrItinClass; def IntRFID : InstrItinClass; def IntRotateD : InstrItinClass; +def IntRotateDI : InstrItinClass; def IntRotate : InstrItinClass; def IntShift : InstrItinClass; def IntTrapD : InstrItinClass; @@ -52,15 +53,18 @@ def LdStDCBA : InstrItinClass; def LdStDCBF : InstrItinClass; def LdStDCBI : InstrItinClass; def LdStLoad : InstrItinClass; +def LdStLoadUpd : InstrItinClass; def LdStStore : InstrItinClass; +def LdStStoreUpd : InstrItinClass; def LdStDSS : InstrItinClass; def LdStICBI : InstrItinClass; -def LdStUX : InstrItinClass; def LdStLD : InstrItinClass; +def LdStLDU : InstrItinClass; def LdStLDARX : InstrItinClass; def LdStLFD : InstrItinClass; def LdStLFDU : InstrItinClass; def LdStLHA : InstrItinClass; +def LdStLHAU : InstrItinClass; def LdStLMW : InstrItinClass; def LdStLVecX : InstrItinClass; def LdStLWA : InstrItinClass; @@ -69,6 +73,9 @@ def LdStSLBIA : InstrItinClass; def LdStSLBIE : InstrItinClass; def LdStSTD : InstrItinClass; def LdStSTDCX : InstrItinClass; +def LdStSTDU : InstrItinClass; +def LdStSTFD : InstrItinClass; +def LdStSTFDU : InstrItinClass; def LdStSTVEBX : InstrItinClass; def LdStSTWCX : InstrItinClass; def LdStSync : InstrItinClass; @@ -86,6 +93,7 @@ def SprMTSRIN : InstrItinClass; def SprRFI : InstrItinClass; def SprSC : InstrItinClass; def FPGeneral : InstrItinClass; +def FPAddSub : InstrItinClass; def FPCompare : InstrItinClass; def FPDivD : InstrItinClass; def FPDivS : InstrItinClass; @@ -110,6 +118,8 @@ include "PPCScheduleG4.td" include "PPCScheduleG4Plus.td" include "PPCScheduleG5.td" include "PPCScheduleA2.td" +include "PPCScheduleE500mc.td" +include "PPCScheduleE5500.td" //===----------------------------------------------------------------------===// // Instruction to itinerary class map - When add new opcodes to the supported @@ -171,7 +181,7 @@ include "PPCScheduleA2.td" // extsh IntSimple // extsw IntSimple // fabs FPGeneral -// fadd FPGeneral +// fadd FPAddSub // fadds FPGeneral // fcfid FPGeneral // fcmpo FPCompare @@ -201,35 +211,35 @@ include "PPCScheduleA2.td" // fsel FPGeneral // fsqrt FPSqrt // fsqrts FPSqrt -// fsub FPGeneral +// fsub FPAddSub // fsubs FPGeneral // icbi LdStICBI // isync SprISYNC // lbz LdStLoad -// lbzu LdStLoad -// lbzux LdStUX +// lbzu LdStLoadUpd +// lbzux LdStLoadUpd // lbzx LdStLoad // ld LdStLD // ldarx LdStLDARX -// ldu LdStLD -// ldux LdStLD +// ldu LdStLDU +// ldux LdStLDU // ldx LdStLD // lfd LdStLFD // lfdu LdStLFDU // lfdux LdStLFDU -// lfdx LdStLFDU -// lfs LdStLFDU +// lfdx LdStLFD +// lfs LdStLFD // lfsu LdStLFDU // lfsux LdStLFDU -// lfsx LdStLFDU +// lfsx LdStLFD // lha LdStLHA -// lhau LdStLHA -// lhaux LdStLHA +// lhau LdStLHAU +// lhaux LdStLHAU // lhax LdStLHA // lhbrx LdStLoad // lhz LdStLoad -// lhzu LdStLoad -// lhzux LdStUX +// lhzu LdStLoadUpd +// lhzux LdStLoadUpd // lhzx LdStLoad // lmw LdStLMW // lswi LdStLMW @@ -243,12 +253,12 @@ include "PPCScheduleA2.td" // lvxl LdStLVecX // lwa LdStLWA // lwarx LdStLWARX -// lwaux LdStLHA +// lwaux LdStLHAU // lwax LdStLHA // lwbrx LdStLoad // lwz LdStLoad -// lwzu LdStLoad -// lwzux LdStUX +// lwzu LdStLoadUpd +// lwzux LdStLoadUpd // lwzx LdStLoad // mcrf BrMCR // mcrfs FPGeneral @@ -292,10 +302,10 @@ include "PPCScheduleA2.td" // rfid IntRFID // rldcl IntRotateD // rldcr IntRotateD -// rldic IntRotateD -// rldicl IntRotateD -// rldicr IntRotateD -// rldimi IntRotateD +// rldic IntRotateDI +// rldicl IntRotateDI +// rldicr IntRotateDI +// rldimi IntRotateDI // rlwimi IntRotate // rlwinm IntGeneral // rlwnm IntGeneral @@ -305,33 +315,33 @@ include "PPCScheduleA2.td" // sld IntRotateD // slw IntGeneral // srad IntRotateD -// sradi IntRotateD +// sradi IntRotateDI // sraw IntShift // srawi IntShift // srd IntRotateD // srw IntGeneral // stb LdStStore -// stbu LdStStore -// stbux LdStStore +// stbu LdStStoreUpd +// stbux LdStStoreUpd // stbx LdStStore // std LdStSTD // stdcx. LdStSTDCX -// stdu LdStSTD -// stdux LdStSTD +// stdu LdStSTDU +// stdux LdStSTDU // stdx LdStSTD -// stfd LdStUX -// stfdu LdStUX -// stfdux LdStUX -// stfdx LdStUX -// stfiwx LdStUX -// stfs LdStUX -// stfsu LdStUX -// stfsux LdStUX -// stfsx LdStUX +// stfd LdStSTFD +// stfdu LdStSTFDU +// stfdux LdStSTFDU +// stfdx LdStSTFD +// stfiwx LdStSTFD +// stfs LdStSTFD +// stfsu LdStSTFDU +// stfsux LdStSTFDU +// stfsx LdStSTFD // sth LdStStore // sthbrx LdStStore -// sthu LdStStore -// sthux LdStStore +// sthu LdStStoreUpd +// sthux LdStStoreUpd // sthx LdStStore // stmw LdStLMW // stswi LdStLMW @@ -344,8 +354,8 @@ include "PPCScheduleA2.td" // stw LdStStore // stwbrx LdStStore // stwcx. LdStSTWCX -// stwu LdStStore -// stwux LdStStore +// stwu LdStStoreUpd +// stwux LdStStoreUpd // stwx LdStStore // subf IntGeneral // subfc IntGeneral diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td index cd0fb70..37b6eac 100644 --- a/lib/Target/PowerPC/PPCSchedule440.td +++ b/lib/Target/PowerPC/PPCSchedule440.td @@ -288,6 +288,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [LWB]>], [9, 5], [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [9, 5], + [GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -297,6 +306,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -306,7 +324,7 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, - InstrItinData<LdStUX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrItinData<LdStSTFD , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, InstrStage<1, [LRACC]>, @@ -315,6 +333,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5, 5], [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5, 5], + [NoBypass, GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -342,6 +369,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -371,6 +407,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1]>, @@ -537,6 +582,19 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [FWB]>], [10, 4, 4], [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<1, [FWB]>], + [10, 4, 4], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td index 4d4a5d0..ba63b5c 100644 --- a/lib/Target/PowerPC/PPCScheduleA2.td +++ b/lib/Target/PowerPC/PPCScheduleA2.td @@ -181,6 +181,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [10, 7, 7], [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotateDI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, InstrItinData<IntShift , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -302,7 +313,18 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLD , [InstrStage<4, + InstrItinData<LdStLoadUpd , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLDU , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7]>, @@ -324,6 +346,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [13, 7], [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStICBI , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -335,7 +368,7 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [NoBypass, GPR_Bypass]>, - InstrItinData<LdStUX , [InstrStage<4, + InstrItinData<LdStSTFD , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7]>, @@ -346,6 +379,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7, 7], [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, InstrItinData<LdStLFD , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -379,6 +423,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStLMW , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -412,6 +467,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [13, 7], [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTDU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStSTDCX , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -593,6 +659,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], [15, 7, 7], [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7, 7], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, InstrItinData<FPCompare , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td new file mode 100644 index 0000000..9bb779a --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleE500mc.td @@ -0,0 +1,265 @@ +//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Freescale e500mc 32-bit +// Power processor. +// +// All information is derived from the "e500mc Core Reference Manual", +// Freescale Document Number E500MCRM, Rev. 1, 03/2012. +// +//===----------------------------------------------------------------------===// +// Relevant functional units in the Freescale e500mc core: +// +// * Decode & Dispatch +// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue +// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). +def DIS0 : FuncUnit; // Dispatch stage - insn 1 +def DIS1 : FuncUnit; // Dispatch stage - insn 2 + +// * Execute +// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. +// Some instructions can only execute in SFX0 but not SFX1. +// The CFX has a bypass path, allowing non-divide instructions to execute +// while a divide instruction is executed. +def SFX0 : FuncUnit; // Simple unit 0 +def SFX1 : FuncUnit; // Simple unit 1 +def BU : FuncUnit; // Branch unit +def CFX_DivBypass + : FuncUnit; // CFX divide bypass path +def CFX_0 : FuncUnit; // CFX pipeline +def LSU_0 : FuncUnit; // LSU pipeline +def FPU_0 : FuncUnit; // FPU pipeline + +def PPCE500mcItineraries : ProcessorItineraries< + [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0], + [CR_Bypass, GPR_Bypass, FPR_Bypass], [ + InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 1, 1], // Latency = 1 or 2 + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<14, [CFX_DivBypass]>], + [17, 1, 1], // Latency=4..35, Repeat= 4..35 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<8, [FPU_0]>], + [11], // Latency = 8 + [FPR_Bypass]>, + InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<8, [FPU_0]>], + [11, 1, 1], // Latency = 8 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0]>], + [5, 1], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [4, 1], // Latency = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [4, 1, 1], // Latency = 1 + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [4, 1], // Latency = 1 + [CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [CR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 1, 1], // Latency = 4 + [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 1, 1], // Latency = 4 + [FPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 1], // Latency = r+3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<3, [LSU_0]>], + [6, 1, 1], // Latency = 3, Repeat rate = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>]>, + InstrItinData<SprMFSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [7, 1], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0, SFX1]>], + [5, 1], // Latency = 2, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0]>], + [5, 1], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0], 0>]>, + InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<5, [SFX0]>], + [8, 1], + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [CR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTSRIN , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0]>], + [4, 1], + [NoBypass, GPR_Bypass]>, + InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [FPU_0]>], + [13, 1, 1], // Latency = 10, Repeat rate = 4 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [CR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<68, [FPU_0]>], + [71, 1, 1], // Latency = 68, Repeat rate = 68 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<38, [FPU_0]>], + [41, 1, 1], // Latency = 38, Repeat rate = 38 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [FPU_0]>], + [13, 1, 1, 1], // Latency = 10, Repeat rate = 4 + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<38, [FPU_0]>], + [41, 1], // Latency = 38, Repeat rate = 38 + [FPR_Bypass, FPR_Bypass]> +]>; + +// ===---------------------------------------------------------------------===// +// e500mc machine model for scheduling and other instruction cost heuristics. + +def PPCE500mcModel : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let MinLatency = -1; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 5; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let Itineraries = PPCE500mcItineraries; +} diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td new file mode 100644 index 0000000..d7e11ac --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleE5500.td @@ -0,0 +1,309 @@ +//===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Freescale e5500 64-bit +// Power processor. +// +// All information is derived from the "e5500 Core Reference Manual", +// Freescale Document Number e5500RM, Rev. 1, 03/2012. +// +//===----------------------------------------------------------------------===// +// Relevant functional units in the Freescale e5500 core +// (These are the same as for the e500mc) +// +// * Decode & Dispatch +// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue +// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). +// def DIS0 : FuncUnit; +// def DIS1 : FuncUnit; + +// * Execute +// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. +// The CFX has a bypass path, allowing non-divide instructions to execute +// while a divide instruction is being executed. +// def SFX0 : FuncUnit; // Simple unit 0 +// def SFX1 : FuncUnit; // Simple unit 1 +// def BU : FuncUnit; // Branch unit +// def CFX_DivBypass +// : FuncUnit; // CFX divide bypass path +// def CFX_0 : FuncUnit; // CFX pipeline stage 0 + +def CFX_1 : FuncUnit; // CFX pipeline stage 1 + +// def LSU_0 : FuncUnit; // LSU pipeline +// def FPU_0 : FuncUnit; // FPU pipeline + + +def PPCE5500Itineraries : ProcessorItineraries< + [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1, + LSU_0, FPU_0], + [CR_Bypass, GPR_Bypass, FPR_Bypass], [ + InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [6, 2, 2], // Latency = 1 or 2 + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<26, [CFX_DivBypass]>], + [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<16, [CFX_DivBypass]>], + [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11], // Latency = 7, Repeat rate = 1 + [FPR_Bypass]>, + InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<7, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 7 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData<IntMulHD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<2, [CFX_1]>], + [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<1, [CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<1, [CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<2, [CFX_1]>], + [8, 2, 2], // Latency = 4 or 5, Repeat = 2 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotateD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0, SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotateDI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0, SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0]>], + [6, 2], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [5, 2], // Latency = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [5, 2, 2], // Latency = 1 + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [5, 2], // Latency = 1 + [CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [5, 2, 2], // Latency = 1 + [CR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLDARX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<3, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [FPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [FPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [LSU_0]>], + [8, 2], // Latency = r+3, Repeat rate = r+3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<3, [LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>]>, + InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [CFX_0]>], + [6, 2], // Latency = 2, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0], 0>]>, + InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<5, [CFX_0]>], + [9, 2], // Latency = 5, Repeat rate = 5 + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [5], // Latency = 1, Repeat rate = 1 + [GPR_Bypass]>, + InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [CFX_0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5], // Latency = 1, Repeat rate = 1 + [GPR_Bypass]>, + InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [CR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<31, [FPU_0]>], + [39, 2, 2], // Latency = 35, Repeat rate = 31 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<16, [FPU_0]>], + [24, 2, 2], // Latency = 20, Repeat rate = 16 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2, 2], // Latency = 7, Repeat rate = 1 + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [FPU_0]>], + [12, 2], // Latency = 8, Repeat rate = 2 + [FPR_Bypass, FPR_Bypass]> +]>; + +// ===---------------------------------------------------------------------===// +// e5500 machine model for scheduling and other instruction cost heuristics. + +def PPCE5500Model : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let MinLatency = -1; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 6; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let Itineraries = PPCE5500Itineraries; +} diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td index 61e89ed..72a0a39 100644 --- a/lib/Target/PowerPC/PPCScheduleG3.td +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -34,12 +34,16 @@ def G3Itineraries : ProcessorItineraries< InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>, InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>, @@ -58,6 +62,7 @@ def G3Itineraries : ProcessorItineraries< InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>, InstrItinData<SprSC , [InstrStage<2, [SRU]>]>, InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>, + InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>, InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>, InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>, InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td index e19ddfa..fc9120d 100644 --- a/lib/Target/PowerPC/PPCScheduleG4.td +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -33,13 +33,17 @@ def G4Itineraries : ProcessorItineraries< InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>, InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, @@ -60,6 +64,7 @@ def G4Itineraries : ProcessorItineraries< InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>, InstrItinData<SprSC , [InstrStage<2, [SRU]>]>, InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>, + InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>, InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>, InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>, InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td index e7446cb..a4e82ce 100644 --- a/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -36,19 +36,24 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>, - InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>, InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>, @@ -66,6 +71,7 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>, InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>, + InstrItinData<FPAddSub , [InstrStage<5, [FPU1]>]>, InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>, InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>, InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td index 1371499..7c02ea0 100644 --- a/lib/Target/PowerPC/PPCScheduleG5.td +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -27,6 +27,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>, InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>, InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>, + InstrItinData<IntRotateDI , [InstrStage<2, [IU1, IU2]>]>, InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>, InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>, InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>, @@ -37,15 +38,20 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>, - InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<4, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<4, [SLU]>]>, InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<5, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>, InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>, @@ -53,6 +59,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>, InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>, InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>, InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>, @@ -69,6 +76,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>, InstrItinData<SprSC , [InstrStage<1, [IU2]>]>, InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>, + InstrItinData<FPAddSub , [InstrStage<6, [FPU1, FPU2]>]>, InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>, InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>, InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>, diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 0207c83..b8b1614 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -41,6 +41,8 @@ namespace PPC { DIR_750, DIR_970, DIR_A2, + DIR_E500mc, + DIR_E5500, DIR_PWR6, DIR_PWR7, DIR_64 diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 15541ef..e64c140 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -129,7 +129,7 @@ def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet, [SDNPHasChain, SDNPOptInGlue]>; def flushw : SDNode<"SPISD::FLUSHW", SDTNone, - [SDNPHasChain]>; + [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>; def getPCX : Operand<i32> { let PrintMethod = "printGetPCX"; diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index 8e215a7..62f973e 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -24,6 +24,16 @@ void TargetLibraryInfo::anchor() { } const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = { + "_ZdaPv", + "_ZdlPv", + "_Znaj", + "_ZnajRKSt9nothrow_t", + "_Znam", + "_ZnamRKSt9nothrow_t", + "_Znwj", + "_ZnwjRKSt9nothrow_t", + "_Znwm", + "_ZnwmRKSt9nothrow_t", "__cxa_atexit", "__cxa_guard_abort", "__cxa_guard_acquire", @@ -31,16 +41,29 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "__memcpy_chk", "acos", "acosf", + "acosh", + "acoshf", + "acoshl", "acosl", "asin", "asinf", + "asinh", + "asinhf", + "asinhl", "asinl", "atan", "atan2", "atan2f", "atan2l", "atanf", + "atanh", + "atanhf", + "atanhl", "atanl", + "calloc", + "cbrt", + "cbrtf", + "cbrtl", "ceil", "ceilf", "ceill", @@ -54,6 +77,9 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "coshl", "cosl", "exp", + "exp10", + "exp10f", + "exp10l", "exp2", "exp2f", "exp2l", @@ -74,6 +100,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "fmodl", "fputc", "fputs", + "free", "fwrite", "iprintf", "log", @@ -86,8 +113,12 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "log2", "log2f", "log2l", + "logb", + "logbf", + "logbl", "logf", "logl", + "malloc", "memchr", "memcmp", "memcpy", @@ -97,11 +128,14 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "nearbyint", "nearbyintf", "nearbyintl", + "posix_memalign", "pow", "powf", "powl", "putchar", "puts", + "realloc", + "reallocf", "rint", "rintf", "rintl", @@ -121,10 +155,12 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "strcat", "strchr", "strcpy", + "strdup", "strlen", "strncat", "strncmp", "strncpy", + "strndup", "strnlen", "tan", "tanf", @@ -134,7 +170,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "tanl", "trunc", "truncf", - "truncl" + "truncl", + "valloc" }; /// initialize - Initialize the set of available library functions based on the @@ -205,6 +242,21 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, TLI.setUnavailable(LibFunc::tanhl); // Win32 only has C89 math + TLI.setUnavailable(LibFunc::acosh); + TLI.setUnavailable(LibFunc::acoshf); + TLI.setUnavailable(LibFunc::acoshl); + TLI.setUnavailable(LibFunc::asinh); + TLI.setUnavailable(LibFunc::asinhf); + TLI.setUnavailable(LibFunc::asinhl); + TLI.setUnavailable(LibFunc::atanh); + TLI.setUnavailable(LibFunc::atanhf); + TLI.setUnavailable(LibFunc::atanhl); + TLI.setUnavailable(LibFunc::cbrt); + TLI.setUnavailable(LibFunc::cbrtf); + TLI.setUnavailable(LibFunc::cbrtl); + TLI.setUnavailable(LibFunc::exp10); + TLI.setUnavailable(LibFunc::exp10f); + TLI.setUnavailable(LibFunc::exp10l); TLI.setUnavailable(LibFunc::exp2); TLI.setUnavailable(LibFunc::exp2f); TLI.setUnavailable(LibFunc::exp2l); @@ -217,6 +269,9 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, TLI.setUnavailable(LibFunc::log1p); TLI.setUnavailable(LibFunc::log1pf); TLI.setUnavailable(LibFunc::log1pl); + TLI.setUnavailable(LibFunc::logb); + TLI.setUnavailable(LibFunc::logbf); + TLI.setUnavailable(LibFunc::logbl); TLI.setUnavailable(LibFunc::nearbyint); TLI.setUnavailable(LibFunc::nearbyintf); TLI.setUnavailable(LibFunc::nearbyintl); diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 73a0095..c89e738 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -67,12 +67,19 @@ private: SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); - bool MatchInstruction(SMLoc IDLoc, + bool MatchInstruction(SMLoc IDLoc, unsigned &Kind, SmallVectorImpl<MCParsedAsmOperand*> &Operands, SmallVectorImpl<MCInst> &MCInsts, unsigned &OrigErrorInfo, bool matchingInlineAsm = false); + unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned OperandNum, unsigned &NumMCOperands) { + return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, + NumMCOperands); + } + /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. bool isSrcOp(X86Operand &Op); @@ -514,12 +521,13 @@ bool X86AsmParser::isDstOp(X86Operand &Op) { bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { RegNo = 0; - if (!isParsingIntelSyntax()) { - const AsmToken &TokPercent = Parser.getTok(); - assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!"); - StartLoc = TokPercent.getLoc(); + const AsmToken &PercentTok = Parser.getTok(); + StartLoc = PercentTok.getLoc(); + + // If we encounter a %, ignore it. This code handles registers with and + // without the prefix, unprefixed registers can occur in cfi directives. + if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) Parser.Lex(); // Eat percent token. - } const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) { @@ -1516,9 +1524,12 @@ bool X86AsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out) { - SmallVector<MCInst, 2> Insts; + unsigned Kind; unsigned ErrorInfo; - bool Error = MatchInstruction(IDLoc, Operands, Insts, ErrorInfo); + SmallVector<MCInst, 2> Insts; + + bool Error = MatchInstruction(IDLoc, Kind, Operands, Insts, + ErrorInfo); if (!Error) for (unsigned i = 0, e = Insts.size(); i != e; ++i) Out.EmitInstruction(Insts[i]); @@ -1526,7 +1537,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, } bool X86AsmParser:: -MatchInstruction(SMLoc IDLoc, +MatchInstruction(SMLoc IDLoc, unsigned &Kind, SmallVectorImpl<MCParsedAsmOperand*> &Operands, SmallVectorImpl<MCInst> &MCInsts, unsigned &OrigErrorInfo, bool matchingInlineAsm) { @@ -1537,7 +1548,7 @@ MatchInstruction(SMLoc IDLoc, // First, handle aliases that expand to multiple instructions. // FIXME: This should be replaced with a real .td file alias mechanism. - // Also, MatchInstructionImpl should do actually *do* the EmitInstruction + // Also, MatchInstructionImpl should actually *do* the EmitInstruction // call. if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" || Op->getToken() == "fstsww" || Op->getToken() == "fstcww" || @@ -1568,7 +1579,7 @@ MatchInstruction(SMLoc IDLoc, MCInst Inst; // First, try a direct match. - switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo, + switch (MatchInstructionImpl(Operands, Kind, Inst, OrigErrorInfo, isParsingIntelSyntax())) { default: break; case Match_Success: @@ -1585,9 +1596,6 @@ MatchInstruction(SMLoc IDLoc, Error(IDLoc, "instruction requires a CPU feature not currently enabled", EmptyRanges, matchingInlineAsm); return true; - case Match_ConversionFail: - return Error(IDLoc, "unable to convert operands to instruction", - EmptyRanges, matchingInlineAsm); case Match_InvalidOperand: WasOriginallyInvalidOperand = true; break; @@ -1619,14 +1627,19 @@ MatchInstruction(SMLoc IDLoc, Tmp[Base.size()] = Suffixes[0]; unsigned ErrorInfoIgnore; unsigned Match1, Match2, Match3, Match4; + unsigned tKind; - Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Match1 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + if (Match1 == Match_Success) Kind = tKind; Tmp[Base.size()] = Suffixes[1]; - Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Match2 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + if (Match2 == Match_Success) Kind = tKind; Tmp[Base.size()] = Suffixes[2]; - Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Match3 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + if (Match3 == Match_Success) Kind = tKind; Tmp[Base.size()] = Suffixes[3]; - Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Match4 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + if (Match4 == Match_Success) Kind = tKind; // Restore the old token. Op->setTokenValue(Base); @@ -1677,8 +1690,10 @@ MatchInstruction(SMLoc IDLoc, if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { if (!WasOriginallyInvalidOperand) { + ArrayRef<SMRange> Ranges = matchingInlineAsm ? EmptyRanges : + Op->getLocRange(); return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", - Op->getLocRange(), matchingInlineAsm); + Ranges, matchingInlineAsm); } // Recover location info for the operand if we know which was the problem. @@ -1730,7 +1745,10 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { return ParseDirectiveWord(2, DirectiveID.getLoc()); else if (IDVal.startswith(".code")) return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); - else if (IDVal.startswith(".intel_syntax")) { + else if (IDVal.startswith(".att_syntax")) { + getParser().setAssemblerDialect(0); + return false; + } else if (IDVal.startswith(".intel_syntax")) { getParser().setAssemblerDialect(1); if (getLexer().isNot(AsmToken::EndOfStatement)) { if(Parser.getTok().getString() == "noprefix") { diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 5039887..f136927 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -44,7 +44,7 @@ void x86DisassemblerDebug(const char *file, dbgs() << file << ":" << line << ": " << s; } -const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii) { +const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii) { const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii); return MII->getName(Opcode); } @@ -95,8 +95,8 @@ const EDInstInfo *X86GenericDisassembler::getEDInfo() const { /// be a pointer to a MemoryObject. /// @param byte - A pointer to the byte to be read. /// @param address - The address to be read. -static int regionReader(void* arg, uint8_t* byte, uint64_t address) { - MemoryObject* region = static_cast<MemoryObject*>(arg); +static int regionReader(const void* arg, uint8_t* byte, uint64_t address) { + const MemoryObject* region = static_cast<const MemoryObject*>(arg); return region->readByte(address, byte); } @@ -135,10 +135,10 @@ X86GenericDisassembler::getInstruction(MCInst &instr, int ret = decodeInstruction(&internalInstr, regionReader, - (void*)®ion, + (const void*)®ion, loggerFn, (void*)&vStream, - (void*)MII, + (const void*)MII, address, fMode); @@ -379,6 +379,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, } switch (type) { + case TYPE_XMM32: + case TYPE_XMM64: case TYPE_XMM128: mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); return; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index 0c92912..af444d1 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -200,7 +200,7 @@ static void unconsumeByte(struct InternalInstruction* insn) { insn->readerCursor + offset); \ if (ret) \ return ret; \ - combined = combined | ((type)byte << ((type)offset * 8)); \ + combined = combined | ((uint64_t)byte << (offset * 8)); \ } \ *ptr = combined; \ insn->readerCursor += sizeof(type); \ @@ -719,7 +719,7 @@ static BOOL is16BitEquvalent(const char* orig, const char* equiv) { * @return - 0 if the ModR/M could be read when needed or was not needed; * nonzero otherwise. */ -static int getID(struct InternalInstruction* insn, void *miiArg) { +static int getID(struct InternalInstruction* insn, const void *miiArg) { uint8_t attrMask; uint16_t instructionID; @@ -1621,10 +1621,10 @@ static int readOperands(struct InternalInstruction* insn) { */ int decodeInstruction(struct InternalInstruction* insn, byteReader_t reader, - void* readerArg, + const void* readerArg, dlog_t logger, void* loggerArg, - void* miiArg, + const void* miiArg, uint64_t startLoc, DisassemblerMode mode) { memset(insn, 0, sizeof(struct InternalInstruction)); diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 797703f..05cbb4c 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -403,7 +403,7 @@ typedef uint8_t BOOL; * be read from. * @return - -1 if the byte cannot be read for any reason; 0 otherwise. */ -typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address); +typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address); /* * dlog_t - Type for the logging function that the consumer can provide to @@ -422,7 +422,7 @@ struct InternalInstruction { /* Reader interface (C) */ byteReader_t reader; /* Opaque value passed to the reader */ - void* readerArg; + const void* readerArg; /* The address of the next byte to read via the reader */ uint64_t readerCursor; @@ -561,10 +561,10 @@ struct InternalInstruction { */ int decodeInstruction(struct InternalInstruction* insn, byteReader_t reader, - void* readerArg, + const void* readerArg, dlog_t logger, void* loggerArg, - void* miiArg, + const void* miiArg, uint64_t startLoc, DisassemblerMode mode); @@ -579,7 +579,7 @@ void x86DisassemblerDebug(const char *file, unsigned line, const char *s); -const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii); +const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii); #ifdef __cplusplus } diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 624e56f..4011035 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -941,3 +941,15 @@ and inversion with an rsqrtss instruction, which computes 1/sqrt faster at the cost of reduced accuracy. //===---------------------------------------------------------------------===// + +This function should be matched to haddpd when the appropriate CPU is enabled: + +#include <x86intrin.h> +double f (__m128d p) { + return p[0] + p[1]; +} + +similarly, v[0]-v[1] should match to hsubpd, and {v[0]-v[1], w[0]-w[1]} should +turn into hsubpd also. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 18e6b7c..d078a7b 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -120,6 +120,9 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", "Support BMI2 instructions">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; +def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", + "HasSlowDivide", "true", + "Use small divide for positive values less than 256">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -160,7 +163,8 @@ def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B, def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : AtomProc<"atom", [ProcIntelAtom, FeatureSSE3, FeatureCMPXCHG16B, - FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP]>; + FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, + FeatureSlowDivide]>; // "Arrandale" along with corei3 and corei5 def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureFastUAMem, diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index db71e27..a4785c9 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -233,12 +233,14 @@ void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo, void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O, const char *Modifier) { + raw_ostream &O, const char *Modifier, + unsigned AsmVariant) { const MachineOperand &MO = MI->getOperand(OpNo); switch (MO.getType()) { default: llvm_unreachable("unknown operand type!"); case MachineOperand::MO_Register: { - O << '%'; + // FIXME: Enumerating AsmVariant, so we can remove magic number. + if (AsmVariant == 0) O << '%'; unsigned Reg = MO.getReg(); if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) { EVT VT = (strcmp(Modifier+6,"64") == 0) ? @@ -471,7 +473,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, } } - printOperand(MI, OpNo, O); + printOperand(MI, OpNo, O, /*Modifier*/ 0, AsmVariant); return false; } diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 35386cd..0062387 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -50,7 +50,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { // These methods are used by the tablegen'erated instruction printer. void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O, - const char *Modifier = 0); + const char *Modifier = 0, unsigned AsmVariant = 0); void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O); diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index d705049..e202321 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -26,7 +26,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Function.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" @@ -134,8 +133,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { IsPIC = TM.getRelocationModel() == Reloc::PIC_; do { - DEBUG(dbgs() << "JITTing function '" - << MF.getFunction()->getName() << "'\n"); + DEBUG(dbgs() << "JITTing function '" << MF.getName() << "'\n"); MCE.startFunction(MF); for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index e5952aa..54704d8 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2014,13 +2014,17 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) { unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { MVT VT; if (!isTypeLegal(C->getType(), VT)) - return false; + return 0; + + // Can't handle alternate code models yet. + if (TM.getCodeModel() != CodeModel::Small) + return 0; // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = NULL; switch (VT.SimpleTy) { - default: return false; + default: return 0; case MVT::i8: Opc = X86::MOV8rm; RC = &X86::GR8RegClass; @@ -2058,7 +2062,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { break; case MVT::f80: // No f80 support yet. - return false; + return 0; } // Materialize addresses with LEA instructions. diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 955c75a..9d5de81 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -171,6 +171,7 @@ namespace { // Shuffle live registers to match the expectations of successor blocks. void finishBlockStack(); +#ifndef NDEBUG void dumpStack() const { dbgs() << "Stack contents:"; for (unsigned i = 0; i != StackTop; ++i) { @@ -181,6 +182,7 @@ namespace { dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]); dbgs() << "\n"; } +#endif /// getSlot - Return the stack slot number a particular register number is /// in. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 27195b4..5fdc61e 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -100,6 +100,7 @@ namespace { Base_Reg = Reg; } +#ifndef NDEBUG void dump() { dbgs() << "X86ISelAddressMode " << this << '\n'; dbgs() << "Base_Reg "; @@ -133,6 +134,7 @@ namespace { dbgs() << "nul"; dbgs() << " JT" << JT << " Align" << Align << '\n'; } +#endif }; } @@ -1011,7 +1013,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM.IndexReg = ShVal.getNode()->getOperand(0); ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); - uint64_t Disp = AddVal->getSExtValue() << Val; + uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val; if (!FoldOffsetIntoAddress(Disp, AM)) return false; } @@ -2116,7 +2118,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Make sure that we don't change the operation by removing bits. // This only matters for OR and XOR, AND is unaffected. - if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val) + uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1; + if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) break; unsigned ShlOp, Op; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7954170..5c525ae 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -85,7 +85,7 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128) * ElemsPerChunk); - SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); + SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); @@ -118,7 +118,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128) * ElemsPerChunk); - SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); + SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); } @@ -182,6 +182,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); + // Bypass i32 with i8 on Atom when compiling with O2 + if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) + addBypassSlowDivType(Type::getInt32Ty(getGlobalContext()), Type::getInt8Ty(getGlobalContext())); + if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) { // Setup Windows compiler runtime calls. setLibcallName(RTLIB::SDIV_I64, "_alldiv"); @@ -735,6 +739,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FFLOOR, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand); @@ -824,6 +829,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::FNEG, MVT::v4f32, Custom); + setOperationAction(ISD::FABS, MVT::v4f32, Custom); setOperationAction(ISD::LOAD, MVT::v4f32, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); @@ -857,6 +863,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v2f64, Legal); setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Custom); + setOperationAction(ISD::FABS, MVT::v2f64, Custom); setOperationAction(ISD::SETCC, MVT::v2i64, Custom); setOperationAction(ISD::SETCC, MVT::v16i8, Custom); @@ -925,6 +932,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + + setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal); } if (Subtarget->hasSSE41()) { @@ -939,6 +948,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FRINT, MVT::f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); + // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); @@ -1016,19 +1028,25 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FMUL, MVT::v8f32, Legal); setOperationAction(ISD::FDIV, MVT::v8f32, Legal); setOperationAction(ISD::FSQRT, MVT::v8f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal); setOperationAction(ISD::FNEG, MVT::v8f32, Custom); + setOperationAction(ISD::FABS, MVT::v8f32, Custom); setOperationAction(ISD::FADD, MVT::v4f64, Legal); setOperationAction(ISD::FSUB, MVT::v4f64, Legal); setOperationAction(ISD::FMUL, MVT::v4f64, Legal); setOperationAction(ISD::FDIV, MVT::v4f64, Legal); setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); + setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); setOperationAction(ISD::FNEG, MVT::v4f64, Custom); + setOperationAction(ISD::FABS, MVT::v4f64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal); + setOperationAction(ISD::SRL, MVT::v16i16, Custom); setOperationAction(ISD::SRL, MVT::v32i8, Custom); @@ -1052,7 +1070,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); - if (Subtarget->hasFMA()) { + if (Subtarget->hasFMA() || Subtarget->hasFMA4()) { setOperationAction(ISD::FMA, MVT::v8f32, Custom); setOperationAction(ISD::FMA, MVT::v4f64, Custom); setOperationAction(ISD::FMA, MVT::v4f32, Custom); @@ -2832,7 +2850,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); const X86InstrInfo *TII = - ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); + ((const X86TargetMachine&)getTargetMachine()).getInstrInfo(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; @@ -3506,25 +3524,26 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i])) MatchOddMask = false; } - static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1}; - static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1}; - const int *CompactionMask; - if (MatchEvenMask) - CompactionMask = CompactionMaskEven; - else if (MatchOddMask) - CompactionMask = CompactionMaskOdd; - else + if (!MatchEvenMask && !MatchOddMask) return SDValue(); - + SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT); - SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0), - UndefNode, CompactionMask); - SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1), - UndefNode, CompactionMask); - static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13}; - return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask); + SDValue Op0 = SVOp->getOperand(0); + SDValue Op1 = SVOp->getOperand(1); + + if (MatchEvenMask) { + // Shift the second operand right to 32 bits. + static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 }; + Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask); + } else { + // Shift the first operand left to 32 bits. + static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 }; + Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask); + } + static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15}; + return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask); } /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand @@ -4977,6 +4996,18 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, LDBase->getAlignment(), false/*isVolatile*/, true/*ReadMem*/, false/*WriteMem*/); + + // Make sure the newly-created LOAD is in the same position as LDBase in + // terms of dependency. We create a TokenFactor for LDBase and ResNode, and + // update uses of LDBase's output chain to use the TokenFactor. + if (LDBase->hasAnyUseOfValue(1)) { + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); + DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), + SDValue(ResNode.getNode(), 1)); + } + return DAG.getNode(ISD::BITCAST, DL, VT, ResNode); } return SDValue(); @@ -5881,8 +5912,6 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DebugLoc dl = SVOp->getDebugLoc(); ArrayRef<int> MaskVals = SVOp->getMask(); - bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; - // If we have SSSE3, case 1 is generated when all result bytes come from // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is // present, fall back to case 3. @@ -5906,7 +5935,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); - if (V2IsUndef) + + // As PSHUFB will zero elements with negative indices, it's safe to ignore + // the 2nd operand if it's undefined or zero. + if (V2.getOpcode() == ISD::UNDEF || + ISD::isBuildVectorAllZeros(V2.getNode())) return V1; // Calculate the shuffle mask for the second input, shuffle it, and @@ -5992,6 +6025,51 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV); } +// v32i8 shuffles - Translate to VPSHUFB if possible. +static +SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, + const X86TargetLowering &TLI) { + EVT VT = SVOp->getValueType(0); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + SmallVector<int, 32> MaskVals(SVOp->getMask().begin(), SVOp->getMask().end()); + + bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; + bool V1IsAllZero = ISD::isBuildVectorAllZeros(V1.getNode()); + bool V2IsAllZero = ISD::isBuildVectorAllZeros(V2.getNode()); + + // VPSHUFB may be generated if + // (1) one of input vector is undefined or zeroinitializer. + // The mask value 0x80 puts 0 in the corresponding slot of the vector. + // And (2) the mask indexes don't cross the 128-bit lane. + if (VT != MVT::v32i8 || !TLI.getSubtarget()->hasAVX2() || + (!V2IsUndef && !V2IsAllZero && !V1IsAllZero)) + return SDValue(); + + if (V1IsAllZero && !V2IsAllZero) { + CommuteVectorShuffleMask(MaskVals, 32); + V1 = V2; + } + SmallVector<SDValue, 32> pshufbMask; + for (unsigned i = 0; i != 32; i++) { + int EltIdx = MaskVals[i]; + if (EltIdx < 0 || EltIdx >= 32) + EltIdx = 0x80; + else { + if ((EltIdx >= 16 && i < 16) || (EltIdx < 16 && i >= 16)) + // Cross lane is not allowed. + return SDValue(); + EltIdx &= 0xf; + } + pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8)); + } + return DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, V1, + DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::v32i8, &pshufbMask[0], 32)); +} + /// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide /// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be /// done when every pair / quad of shuffle mask elements point to elements in @@ -6818,6 +6896,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return NewOp; } + if (VT == MVT::v32i8) { + SDValue NewOp = LowerVECTOR_SHUFFLEv32i8(SVOp, DAG, *this); + if (NewOp.getNode()) + return NewOp; + } + // Handle all 128-bit wide vectors with 4 elements, and match them with // several different shuffle types. if (NumElems == 4 && VT.is128BitVector()) @@ -8115,26 +8199,35 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, return FIST; } -SDValue X86TargetLowering::LowerFABS(SDValue Op, - SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); EVT EltVT = VT; - if (VT.isVector()) + unsigned NumElts = VT == MVT::f64 ? 2 : 4; + if (VT.isVector()) { EltVT = VT.getVectorElementType(); - Constant *C; - if (EltVT == MVT::f64) { - C = ConstantVector::getSplat(2, - ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))))); - } else { - C = ConstantVector::getSplat(4, - ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))))); + NumElts = VT.getVectorNumElements(); } - SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + Constant *C; + if (EltVT == MVT::f64) + C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))); + else + C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))); + C = ConstantVector::getSplat(NumElts, C); + SDValue CPIdx = DAG.getConstantPool(C, getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, false, 16); + false, false, false, Alignment); + if (VT.isVector()) { + MVT ANDVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; + return DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getNode(ISD::AND, dl, ANDVT, + DAG.getNode(ISD::BITCAST, dl, ANDVT, + Op.getOperand(0)), + DAG.getNode(ISD::BITCAST, dl, ANDVT, Mask))); + } return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); } @@ -8154,10 +8247,11 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { else C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))); C = ConstantVector::getSplat(NumElts, C); - SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + SDValue CPIdx = DAG.getConstantPool(C, getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, false, 16); + false, false, false, Alignment); if (VT.isVector()) { MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; return DAG.getNode(ISD::BITCAST, dl, VT, @@ -9943,62 +10037,6 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const Op.getOperand(1), Op.getOperand(2), DAG); } - // Fix vector shift instructions where the last operand is a non-immediate - // i32 value. - case Intrinsic::x86_mmx_pslli_w: - case Intrinsic::x86_mmx_pslli_d: - case Intrinsic::x86_mmx_pslli_q: - case Intrinsic::x86_mmx_psrli_w: - case Intrinsic::x86_mmx_psrli_d: - case Intrinsic::x86_mmx_psrli_q: - case Intrinsic::x86_mmx_psrai_w: - case Intrinsic::x86_mmx_psrai_d: { - SDValue ShAmt = Op.getOperand(2); - if (isa<ConstantSDNode>(ShAmt)) - return SDValue(); - - unsigned NewIntNo; - switch (IntNo) { - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::x86_mmx_pslli_w: - NewIntNo = Intrinsic::x86_mmx_psll_w; - break; - case Intrinsic::x86_mmx_pslli_d: - NewIntNo = Intrinsic::x86_mmx_psll_d; - break; - case Intrinsic::x86_mmx_pslli_q: - NewIntNo = Intrinsic::x86_mmx_psll_q; - break; - case Intrinsic::x86_mmx_psrli_w: - NewIntNo = Intrinsic::x86_mmx_psrl_w; - break; - case Intrinsic::x86_mmx_psrli_d: - NewIntNo = Intrinsic::x86_mmx_psrl_d; - break; - case Intrinsic::x86_mmx_psrli_q: - NewIntNo = Intrinsic::x86_mmx_psrl_q; - break; - case Intrinsic::x86_mmx_psrai_w: - NewIntNo = Intrinsic::x86_mmx_psra_w; - break; - case Intrinsic::x86_mmx_psrai_d: - NewIntNo = Intrinsic::x86_mmx_psra_d; - break; - } - - // The vector shift intrinsics with scalars uses 32b shift amounts but - // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits - // to be zero. - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, ShAmt, - DAG.getConstant(0, MVT::i32)); -// FIXME this must be lowered to get rid of the invalid type. - - EVT VT = Op.getValueType(); - ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(NewIntNo, MVT::i32), - Op.getOperand(1), ShAmt); - } case Intrinsic::x86_sse42_pcmpistria128: case Intrinsic::x86_sse42_pcmpestria128: case Intrinsic::x86_sse42_pcmpistric128: @@ -10077,6 +10115,74 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); } + case Intrinsic::x86_fma_vfmadd_ps: + case Intrinsic::x86_fma_vfmadd_pd: + case Intrinsic::x86_fma_vfmsub_ps: + case Intrinsic::x86_fma_vfmsub_pd: + case Intrinsic::x86_fma_vfnmadd_ps: + case Intrinsic::x86_fma_vfnmadd_pd: + case Intrinsic::x86_fma_vfnmsub_ps: + case Intrinsic::x86_fma_vfnmsub_pd: + case Intrinsic::x86_fma_vfmaddsub_ps: + case Intrinsic::x86_fma_vfmaddsub_pd: + case Intrinsic::x86_fma_vfmsubadd_ps: + case Intrinsic::x86_fma_vfmsubadd_pd: + case Intrinsic::x86_fma_vfmadd_ps_256: + case Intrinsic::x86_fma_vfmadd_pd_256: + case Intrinsic::x86_fma_vfmsub_ps_256: + case Intrinsic::x86_fma_vfmsub_pd_256: + case Intrinsic::x86_fma_vfnmadd_ps_256: + case Intrinsic::x86_fma_vfnmadd_pd_256: + case Intrinsic::x86_fma_vfnmsub_ps_256: + case Intrinsic::x86_fma_vfnmsub_pd_256: + case Intrinsic::x86_fma_vfmaddsub_ps_256: + case Intrinsic::x86_fma_vfmaddsub_pd_256: + case Intrinsic::x86_fma_vfmsubadd_ps_256: + case Intrinsic::x86_fma_vfmsubadd_pd_256: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_fma_vfmadd_ps: + case Intrinsic::x86_fma_vfmadd_pd: + case Intrinsic::x86_fma_vfmadd_ps_256: + case Intrinsic::x86_fma_vfmadd_pd_256: + Opc = X86ISD::FMADD; + break; + case Intrinsic::x86_fma_vfmsub_ps: + case Intrinsic::x86_fma_vfmsub_pd: + case Intrinsic::x86_fma_vfmsub_ps_256: + case Intrinsic::x86_fma_vfmsub_pd_256: + Opc = X86ISD::FMSUB; + break; + case Intrinsic::x86_fma_vfnmadd_ps: + case Intrinsic::x86_fma_vfnmadd_pd: + case Intrinsic::x86_fma_vfnmadd_ps_256: + case Intrinsic::x86_fma_vfnmadd_pd_256: + Opc = X86ISD::FNMADD; + break; + case Intrinsic::x86_fma_vfnmsub_ps: + case Intrinsic::x86_fma_vfnmsub_pd: + case Intrinsic::x86_fma_vfnmsub_ps_256: + case Intrinsic::x86_fma_vfnmsub_pd_256: + Opc = X86ISD::FNMSUB; + break; + case Intrinsic::x86_fma_vfmaddsub_ps: + case Intrinsic::x86_fma_vfmaddsub_pd: + case Intrinsic::x86_fma_vfmaddsub_ps_256: + case Intrinsic::x86_fma_vfmaddsub_pd_256: + Opc = X86ISD::FMADDSUB; + break; + case Intrinsic::x86_fma_vfmsubadd_ps: + case Intrinsic::x86_fma_vfmsubadd_pd: + case Intrinsic::x86_fma_vfmsubadd_ps_256: + case Intrinsic::x86_fma_vfmsubadd_pd_256: + Opc = X86ISD::FMSUBADD; + break; + } + + return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + } } } @@ -10918,7 +11024,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra); LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra); - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2); } // fall through case MVT::v4i32: @@ -14020,7 +14126,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // // where Op could be BRCOND or CMOV. // -static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { +static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { // Quit if not CMP and SUB with its value result used. if (Cmp.getOpcode() != X86ISD::CMP && (Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0))) @@ -14056,40 +14162,133 @@ static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { if (SetCC.getOpcode() == ISD::ZERO_EXTEND) SetCC = SetCC.getOperand(0); - // Quit if not SETCC. - // FIXME: So far we only handle the boolean value generated from SETCC. If - // there is other ways to generate boolean values, we need handle them here - // as well. - if (SetCC.getOpcode() != X86ISD::SETCC) + switch (SetCC.getOpcode()) { + case X86ISD::SETCC: + // Set the condition code or opposite one if necessary. + CC = X86::CondCode(SetCC.getConstantOperandVal(0)); + if (needOppositeCond) + CC = X86::GetOppositeBranchCondition(CC); + return SetCC.getOperand(1); + case X86ISD::CMOV: { + // Check whether false/true value has canonical one, i.e. 0 or 1. + ConstantSDNode *FVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(0)); + ConstantSDNode *TVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(1)); + // Quit if true value is not a constant. + if (!TVal) + return SDValue(); + // Quit if false value is not a constant. + if (!FVal) { + // A special case for rdrand, where 0 is set if false cond is found. + SDValue Op = SetCC.getOperand(0); + if (Op.getOpcode() != X86ISD::RDRAND) + return SDValue(); + } + // Quit if false value is not the constant 0 or 1. + bool FValIsFalse = true; + if (FVal && FVal->getZExtValue() != 0) { + if (FVal->getZExtValue() != 1) + return SDValue(); + // If FVal is 1, opposite cond is needed. + needOppositeCond = !needOppositeCond; + FValIsFalse = false; + } + // Quit if TVal is not the constant opposite of FVal. + if (FValIsFalse && TVal->getZExtValue() != 1) + return SDValue(); + if (!FValIsFalse && TVal->getZExtValue() != 0) + return SDValue(); + CC = X86::CondCode(SetCC.getConstantOperandVal(2)); + if (needOppositeCond) + CC = X86::GetOppositeBranchCondition(CC); + return SetCC.getOperand(3); + } + } + + return SDValue(); +} + +/// checkFlaggedOrCombine - DAG combination on X86ISD::OR, i.e. with EFLAGS +/// updated. If only flag result is used and the result is evaluated from a +/// series of element extraction, try to combine it into a PTEST. +static SDValue checkFlaggedOrCombine(SDValue Or, X86::CondCode &CC, + SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + SDNode *N = Or.getNode(); + DebugLoc DL = N->getDebugLoc(); + + // Only SSE4.1 and beyond supports PTEST or like. + if (!Subtarget->hasSSE41()) return SDValue(); - // Set the condition code or opposite one if necessary. - CC = X86::CondCode(SetCC.getConstantOperandVal(0)); - if (needOppositeCond) - CC = X86::GetOppositeBranchCondition(CC); + if (N->getOpcode() != X86ISD::OR) + return SDValue(); - return SetCC.getOperand(1); -} + // Quit if the value result of OR is used. + if (N->hasAnyUseOfValue(0)) + return SDValue(); -static bool IsValidFCMOVCondition(X86::CondCode CC) { - switch (CC) { - default: - return false; - case X86::COND_B: - case X86::COND_BE: - case X86::COND_E: - case X86::COND_P: - case X86::COND_AE: - case X86::COND_A: - case X86::COND_NE: - case X86::COND_NP: - return true; + // Quit if not used as a boolean value. + if (CC != X86::COND_E && CC != X86::COND_NE) + return SDValue(); + + SmallVector<SDValue, 8> Opnds; + SDValue VecIn; + EVT VT = MVT::Other; + unsigned Mask = 0; + + // Recognize a special case where a vector is casted into wide integer to + // test all 0s. + Opnds.push_back(N->getOperand(0)); + Opnds.push_back(N->getOperand(1)); + + for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) { + SmallVector<SDValue, 8>::const_iterator I = Opnds.begin() + Slot; + // BFS traverse all OR'd operands. + if (I->getOpcode() == ISD::OR) { + Opnds.push_back(I->getOperand(0)); + Opnds.push_back(I->getOperand(1)); + // Re-evaluate the number of nodes to be traversed. + e += 2; // 2 more nodes (LHS and RHS) are pushed. + continue; + } + + // Quit if a non-EXTRACT_VECTOR_ELT + if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + // Quit if without a constant index. + SDValue Idx = I->getOperand(1); + if (!isa<ConstantSDNode>(Idx)) + return SDValue(); + + // Check if all elements are extracted from the same vector. + SDValue ExtractedFromVec = I->getOperand(0); + if (VecIn.getNode() == 0) { + VT = ExtractedFromVec.getValueType(); + // FIXME: only 128-bit vector is supported so far. + if (!VT.is128BitVector()) + return SDValue(); + VecIn = ExtractedFromVec; + } else if (VecIn != ExtractedFromVec) + return SDValue(); + + // Record the constant index. + Mask |= 1U << cast<ConstantSDNode>(Idx)->getZExtValue(); } + + assert(VT.is128BitVector() && "Only 128-bit vector PTEST is supported so far."); + + // Quit if not all elements are used. + if (Mask != (1U << VT.getVectorNumElements()) - 1U) + return SDValue(); + + return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIn, VecIn); } /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL] static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { DebugLoc DL = N->getDebugLoc(); // If the flag operand isn't dead, don't touch this CMOV. @@ -14114,10 +14313,18 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, SDValue Flags; - Flags = BoolTestSetCCCombine(Cond, CC); + Flags = checkBoolTestSetCCCombine(Cond, CC); if (Flags.getNode() && // Extra check as FCMOV only supports a subset of X86 cond. - (FalseOp.getValueType() != MVT::f80 || IsValidFCMOVCondition(CC))) { + (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) { + SDValue Ops[] = { FalseOp, TrueOp, + DAG.getConstant(CC, MVT::i8), Flags }; + return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), + Ops, array_lengthof(Ops)); + } + + Flags = checkFlaggedOrCombine(Cond, CC, DAG, Subtarget); + if (Flags.getNode()) { SDValue Ops[] = { FalseOp, TrueOp, DAG.getConstant(CC, MVT::i8), Flags }; return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), @@ -15384,7 +15591,7 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes - // into FMINC and MMAXC, which are Commutative operations. + // into FMINC and FMAXC, which are Commutative operations. unsigned NewOp = 0; switch (N->getOpcode()) { default: llvm_unreachable("unknown opcode"); @@ -15502,8 +15709,13 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + EVT ScalarVT = VT.getScalarType(); - if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasFMA()) + if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || + (!Subtarget->hasFMA() && !Subtarget->hasFMA4())) return SDValue(); SDValue A = N->getOperand(0); @@ -15525,9 +15737,10 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode; if (!NegMul) - Opcode = (!NegC)? X86ISD::FMADD : X86ISD::FMSUB; + Opcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB; else - Opcode = (!NegC)? X86ISD::FNMADD : X86ISD::FNMSUB; + Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB; + return DAG.getNode(Opcode, dl, VT, A, B, C); } @@ -15625,7 +15838,9 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) { } // Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT -static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { DebugLoc DL = N->getDebugLoc(); X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); SDValue EFLAGS = N->getOperand(1); @@ -15641,7 +15856,13 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { SDValue Flags; - Flags = BoolTestSetCCCombine(EFLAGS, CC); + Flags = checkBoolTestSetCCCombine(EFLAGS, CC); + if (Flags.getNode()) { + SDValue Cond = DAG.getConstant(CC, MVT::i8); + return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags); + } + + Flags = checkFlaggedOrCombine(EFLAGS, CC, DAG, Subtarget); if (Flags.getNode()) { SDValue Cond = DAG.getConstant(CC, MVT::i8); return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags); @@ -15663,7 +15884,14 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG, SDValue Flags; - Flags = BoolTestSetCCCombine(EFLAGS, CC); + Flags = checkBoolTestSetCCCombine(EFLAGS, CC); + if (Flags.getNode()) { + SDValue Cond = DAG.getConstant(CC, MVT::i8); + return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond, + Flags); + } + + Flags = checkFlaggedOrCombine(EFLAGS, CC, DAG, Subtarget); if (Flags.getNode()) { SDValue Cond = DAG.getConstant(CC, MVT::i8); return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond, @@ -15858,7 +16086,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI); case ISD::VSELECT: case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget); - case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); + case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget); case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget); case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget); case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI); @@ -15888,7 +16116,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget); case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI); case ISD::SETCC: return PerformISDSETCCCombine(N, DAG); - case X86ISD::SETCC: return PerformSETCCCombine(N, DAG); + case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget); case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::PALIGN: diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index b0c27c8..bfe9541 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -16,15 +16,18 @@ // // Return instructions. +// +// The X86retflag return instructions are variadic because we may add ST0 and +// ST1 arguments when returning values on the x87 stack. let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, FPForm = SpecialFP in { - def RET : I <0xC3, RawFrm, (outs), (ins), + def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret", [(X86retflag 0)], IIC_RET>; def RETW : I <0xC3, RawFrm, (outs), (ins), "ret{w}", [], IIC_RET>, OpSize; - def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt), + def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "ret\t$amt", [(X86retflag timm:$amt)], IIC_RET_IMM>; def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt), diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 95ee7e5..5663800 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -19,7 +19,8 @@ let Constraints = "$src1 = $dst" in { multiclass fma3p_rm<bits<8> opc, string OpcodeStr, PatFrag MemFrag128, PatFrag MemFrag256, ValueType OpVT128, ValueType OpVT256, - SDPatternOperator Op = null_frag, bit MayLoad = 1> { + SDPatternOperator Op = null_frag> { + let isCommutable = 1 in def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, @@ -27,7 +28,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1, VR128:$src3)))]>; - let mayLoad = MayLoad in + let mayLoad = 1 in def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, f128mem:$src3), !strconcat(OpcodeStr, @@ -35,6 +36,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1, (MemFrag128 addr:$src3))))]>; + let isCommutable = 1 in def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3), !strconcat(OpcodeStr, @@ -42,7 +44,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr, [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1, VR256:$src3)))]>; - let mayLoad = MayLoad in + let mayLoad = 1 in def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, f256mem:$src3), !strconcat(OpcodeStr, @@ -59,7 +61,7 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, SDNode Op, ValueType OpTy128, ValueType OpTy256> { defm r213 : fma3p_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy)), - MemFrag128, MemFrag256, OpTy128, OpTy256, Op, 0>; + MemFrag128, MemFrag256, OpTy128, OpTy256, Op>; let neverHasSideEffects = 1 in { defm r132 : fma3p_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy)), @@ -112,148 +114,18 @@ let ExeDomain = SSEPackedDouble in { v4f64>, VEX_W; } -let Predicates = [HasFMA] in { - def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFMADDSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFMADDSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFMSUBADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFMSUBADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMADDSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFMADDSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMSUBADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFMSUBADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFMADDSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFMADDSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFMSUBADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFMSUBADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMADDSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFMADDSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMSUBADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFMSUBADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFNMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFNMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFNMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFNMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFNMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFNMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFNMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFNMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFNMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFNMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFNMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFNMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFNMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFNMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFNMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFNMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - -} // Predicates = [HasFMA] - let Constraints = "$src1 = $dst" in { multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop, RegisterClass RC, ValueType OpVT, PatFrag mem_frag, - SDPatternOperator OpNode = null_frag, bit MayLoad = 1> { + SDPatternOperator OpNode = null_frag> { + let isCommutable = 1 in def r : FMA3<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>; - let mayLoad = MayLoad in + let mayLoad = 1 in def m : FMA3<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, RC:$src2, x86memop:$src3), !strconcat(OpcodeStr, @@ -266,6 +138,7 @@ multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop, multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop, ComplexPattern mem_cpat, Intrinsic IntId, RegisterClass RC> { + let isCommutable = 1 in def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, @@ -294,7 +167,7 @@ let neverHasSideEffects = 1 in { } defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)), - x86memop, RC, OpVT, mem_frag, OpNode, 0>, + x86memop, RC, OpVT, mem_frag, OpNode>, fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)), memop, mem_cpat, Int, RC>; } @@ -324,73 +197,102 @@ defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, //===----------------------------------------------------------------------===// -multiclass fma4s<bits<8> opc, string OpcodeStr, Operand memop, - ComplexPattern mem_cpat, Intrinsic Int> { - def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), +multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop, ValueType OpVT, SDNode OpNode, + PatFrag mem_frag> { + let isCommutable = 1 in + def rr : FMA4<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, - (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4; - def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, memop:$src3), + [(set RC:$dst, + (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, MemOp4; + def rm : FMA4<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, RC:$src2, x86memop:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, - (Int VR128:$src1, VR128:$src2, mem_cpat:$src3))]>, VEX_W, MemOp4; - def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, memop:$src2, VR128:$src3), + [(set RC:$dst, (OpNode RC:$src1, RC:$src2, + (mem_frag addr:$src3)))]>, VEX_W, MemOp4; + def mr : FMA4<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, - (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>; + [(set RC:$dst, + (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>; // For disassembler let isCodeGenOnly = 1 in - def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), + def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>; } -multiclass fma4p<bits<8> opc, string OpcodeStr, - Intrinsic Int128, Intrinsic Int256, +multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop, + ComplexPattern mem_cpat, Intrinsic Int> { + let isCommutable = 1 in + def rr_Int : FMA4<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set VR128:$dst, + (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4; + def rm_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, memop:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, + mem_cpat:$src3))]>, VEX_W, MemOp4; + def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, memop:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set VR128:$dst, + (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>; +} + +multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT128, ValueType OpVT256, PatFrag ld_frag128, PatFrag ld_frag256> { + let isCommutable = 1 in def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (Int128 VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4; + (OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, + VEX_W, MemOp4; def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, f128mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2, + [(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2, (ld_frag128 addr:$src3)))]>, VEX_W, MemOp4; def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (Int128 VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>; + (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>; + let isCommutable = 1 in def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR256:$dst, - (Int256 VR256:$src1, VR256:$src2, VR256:$src3))]>, VEX_W, MemOp4; + (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>, + VEX_W, MemOp4; def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, f256mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2, + [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2, (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4; def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR256:$dst, - (Int256 VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>; + (OpNode VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>; // For disassembler let isCodeGenOnly = 1 in { def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst), @@ -406,45 +308,58 @@ let isCodeGenOnly = 1 in { let Predicates = [HasFMA4] in { -defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", ssmem, sse_load_f32, - int_x86_fma_vfmadd_ss>; -defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfmadd_sd>; -defm VFMADDPS4 : fma4p<0x68, "vfmaddps", int_x86_fma_vfmadd_ps, - int_x86_fma_vfmadd_ps_256, memopv4f32, memopv8f32>; -defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", int_x86_fma_vfmadd_pd, - int_x86_fma_vfmadd_pd_256, memopv2f64, memopv4f64>; -defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", ssmem, sse_load_f32, - int_x86_fma_vfmsub_ss>; -defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfmsub_sd>; -defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", int_x86_fma_vfmsub_ps, - int_x86_fma_vfmsub_ps_256, memopv4f32, memopv8f32>; -defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", int_x86_fma_vfmsub_pd, - int_x86_fma_vfmsub_pd_256, memopv2f64, memopv4f64>; -defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", ssmem, sse_load_f32, - int_x86_fma_vfnmadd_ss>; -defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfnmadd_sd>; -defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", int_x86_fma_vfnmadd_ps, - int_x86_fma_vfnmadd_ps_256, memopv4f32, memopv8f32>; -defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", int_x86_fma_vfnmadd_pd, - int_x86_fma_vfnmadd_pd_256, memopv2f64, memopv4f64>; -defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", ssmem, sse_load_f32, - int_x86_fma_vfnmsub_ss>; -defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfnmsub_sd>; -defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", int_x86_fma_vfnmsub_ps, - int_x86_fma_vfnmsub_ps_256, memopv4f32, memopv8f32>; -defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", int_x86_fma_vfnmsub_pd, - int_x86_fma_vfnmsub_pd_256, memopv2f64, memopv4f64>; -defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", int_x86_fma_vfmaddsub_ps, - int_x86_fma_vfmaddsub_ps_256, memopv4f32, memopv8f32>; -defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", int_x86_fma_vfmaddsub_pd, - int_x86_fma_vfmaddsub_pd_256, memopv2f64, memopv4f64>; -defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", int_x86_fma_vfmsubadd_ps, - int_x86_fma_vfmsubadd_ps_256, memopv4f32, memopv8f32>; -defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", int_x86_fma_vfmsubadd_pd, - int_x86_fma_vfmsubadd_pd_256, memopv2f64, memopv4f64>; +defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, + fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32, + int_x86_fma_vfmadd_ss>; +defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, + fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64, + int_x86_fma_vfmadd_sd>; +defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, + fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32, + int_x86_fma_vfmsub_ss>; +defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, + fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64, + int_x86_fma_vfmsub_sd>; +defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, + X86Fnmadd, loadf32>, + fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32, + int_x86_fma_vfnmadd_ss>; +defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, + X86Fnmadd, loadf64>, + fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64, + int_x86_fma_vfnmadd_sd>; +defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, + X86Fnmsub, loadf32>, + fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32, + int_x86_fma_vfnmsub_ss>; +defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, + X86Fnmsub, loadf64>, + fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64, + int_x86_fma_vfnmsub_sd>; + +defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64, + memopv2f64, memopv4f64>; +defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64, + memopv2f64, memopv4f64>; +defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64, + memopv2f64, memopv4f64>; +defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64, + memopv2f64, memopv4f64>; +defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64, + memopv2f64, memopv4f64>; +defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64, + memopv2f64, memopv4f64>; } // HasFMA4 diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 81b4f81..55ad2ec 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -287,12 +287,14 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } +def __xs : XS; + // SI - SSE 1 & 2 scalar instructions class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); + !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -303,7 +305,7 @@ class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); + !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -314,18 +316,25 @@ class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin, Domain d> : I<o, F, outs, ins, asm, pattern, itin, d> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); + !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); } +// MMXPI - SSE 1 & 2 packed instructions with MMX operands +class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, + InstrItinClass itin, Domain d> + : I<o, F, outs, ins, asm, pattern, itin, d> { + let Predicates = !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]); +} + // PIi8 - SSE 1 & 2 packed instructions with immediate class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin, Domain d> : Ii8<o, F, outs, ins, asm, pattern, itin, d> { let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX], - !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); + !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm); @@ -341,18 +350,18 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>; + : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>; class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>; + : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>; class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS, @@ -372,27 +381,31 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. // VSDI - SSE2 instructions with XD prefix in AVX form. // VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form. +// MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix as well as +// MMX operands. +// MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as +// MMX operands. class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; + : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>; class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; + : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>; class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE2]>; + : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE2]>; class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; + : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>; class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD, @@ -405,6 +418,12 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[HasAVX]>; +class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; +class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; // SSE3 Instruction Templates: // @@ -415,21 +434,23 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS, - Requires<[HasSSE3]>; + Requires<[UseSSE3]>; class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD, - Requires<[HasSSE3]>; + Requires<[UseSSE3]>; class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, - Requires<[HasSSE3]>; + Requires<[UseSSE3]>; // SSSE3 Instruction Templates: // // SS38I - SSSE3 instructions with T8 prefix. // SS3AI - SSSE3 instructions with TA prefix. +// MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands. +// MMXSS3AI - SSSE3 instructions with TA prefix and MMX operands. // // Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version // uses the MMX registers. The 64-bit versions are grouped with the MMX @@ -438,10 +459,18 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, - Requires<[HasSSSE3]>; + Requires<[UseSSSE3]>; class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, + Requires<[UseSSSE3]>; +class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, + Requires<[HasSSSE3]>; +class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[HasSSSE3]>; // SSE4.1 Instruction Templates: @@ -452,11 +481,11 @@ class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, - Requires<[HasSSE41]>; + Requires<[UseSSE41]>; class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, - Requires<[HasSSE41]>; + Requires<[UseSSE41]>; // SSE4.2 Instruction Templates: // @@ -464,9 +493,10 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, - Requires<[HasSSE42]>; + Requires<[UseSSE42]>; // SS42FI - SSE 4.2 instructions with T8XD prefix. +// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns. class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>; @@ -475,7 +505,7 @@ class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, - Requires<[HasSSE42]>; + Requires<[UseSSE42]>; // AVX Instruction Templates: // Instructions introduced in AVX (no SSE equivalent forms) diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index ee2d3c4..9035435 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -183,8 +183,8 @@ def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>; def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>; -def X86Fmaddsub : SDNode<"X86ISD::FMSUBADD", SDTFma>; -def X86Fmsubadd : SDNode<"X86ISD::FMADDSUB", SDTFma>; +def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>; +def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>; def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>, @@ -240,6 +240,10 @@ def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; +// 128-/256-bit extload pattern fragments +def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>; +def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>; + // Like 'store', but always requires 128-bit vector alignment. def alignedstore : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 459f01a..4f3d824 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1110,6 +1110,36 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 }, { X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 }, // FIXME: add AVX 256-bit foldable instructions + + // FMA4 foldable patterns + { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_16 }, + { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_16 }, + { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_16 }, + { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_16 }, + { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_32 }, + { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_32 }, + { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_16 }, + { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_16 }, + { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_32 }, + { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_32 }, + { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_16 }, + { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_16 }, + { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_16 }, + { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_16 }, + { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_32 }, + { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_32 }, + { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_16 }, + { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_16 }, + { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_32 }, + { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, TB_ALIGN_32 }, + { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_16 }, + { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_16 }, + { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, TB_ALIGN_32 }, + { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, TB_ALIGN_32 }, + { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_16 }, + { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_16 }, + { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_32 }, + { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { @@ -1237,6 +1267,36 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 }, { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 }, { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 }, + + // FMA4 foldable patterns + { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_16 }, + { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_16 }, + { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_16 }, + { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_16 }, + { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_32 }, + { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_32 }, + { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_16 }, + { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_16 }, + { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_32 }, + { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_32 }, + { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_16 }, + { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_16 }, + { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_16 }, + { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_16 }, + { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_32 }, + { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_32 }, + { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_16 }, + { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_16 }, + { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_32 }, + { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_32 }, + { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_16 }, + { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_16 }, + { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_32 }, + { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_32 }, + { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_16 }, + { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 }, + { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 }, + { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) { @@ -1786,10 +1846,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr *MI = MBBI; MachineFunction &MF = *MI->getParent()->getParent(); // All instructions input are two-addr instructions. Get the known operands. - unsigned Dest = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isKill = MI->getOperand(1).isKill(); + const MachineOperand &Dest = MI->getOperand(0); + const MachineOperand &Src = MI->getOperand(1); MachineInstr *NewMI = NULL; // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When @@ -1807,11 +1865,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned B = MI->getOperand(1).getReg(); unsigned C = MI->getOperand(2).getReg(); if (B != C) return 0; - unsigned A = MI->getOperand(0).getReg(); unsigned M = MI->getOperand(3).getImm(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) - .addReg(A, RegState::Define | getDeadRegState(isDead)) - .addReg(B, getKillRegState(isKill)).addImm(M); + .addOperand(Dest).addOperand(Src).addImm(M); break; } case X86::SHUFPDrri: { @@ -1821,15 +1877,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned B = MI->getOperand(1).getReg(); unsigned C = MI->getOperand(2).getReg(); if (B != C) return 0; - unsigned A = MI->getOperand(0).getReg(); unsigned M = MI->getOperand(3).getImm(); // Convert to PSHUFD mask. M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44; NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) - .addReg(A, RegState::Define | getDeadRegState(isDead)) - .addReg(B, getKillRegState(isKill)).addImm(M); + .addOperand(Dest).addOperand(Src).addImm(M); break; } case X86::SHL64ri: { @@ -1840,15 +1894,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (ShAmt == 0 || ShAmt >= 4) return 0; // LEA can't handle RSP. - if (TargetRegisterInfo::isVirtualRegister(Src) && - !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass)) + if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && + !MF.getRegInfo().constrainRegClass(Src.getReg(), + &X86::GR64_NOSPRegClass)) return 0; NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)) - .addImm(0).addReg(0); + .addOperand(Dest) + .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0); break; } case X86::SHL32ri: { @@ -1859,15 +1912,15 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (ShAmt == 0 || ShAmt >= 4) return 0; // LEA can't handle ESP. - if (TargetRegisterInfo::isVirtualRegister(Src) && - !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass)) + if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && + !MF.getRegInfo().constrainRegClass(Src.getReg(), + &X86::GR32_NOSPRegClass)) return 0; unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0); + .addOperand(Dest) + .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0); break; } case X86::SHL16ri: { @@ -1880,10 +1933,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)) - .addImm(0).addReg(0); + .addOperand(Dest) + .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0); break; } default: { @@ -1906,14 +1957,12 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, (const TargetRegisterClass*)&X86::GR32_NOSPRegClass; // LEA can't handle RSP. - if (TargetRegisterInfo::isVirtualRegister(Src) && - !MF.getRegInfo().constrainRegClass(Src, RC)) + if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && + !MF.getRegInfo().constrainRegClass(Src.getReg(), RC)) return 0; - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, 1); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addOperand(Dest).addOperand(Src), 1); break; } case X86::INC16r: @@ -1921,10 +1970,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, 1); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addOperand(Dest).addOperand(Src), 1); break; case X86::DEC64r: case X86::DEC32r: @@ -1936,14 +1983,12 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, (const TargetRegisterClass*)&X86::GR64_NOSPRegClass : (const TargetRegisterClass*)&X86::GR32_NOSPRegClass; // LEA can't handle RSP. - if (TargetRegisterInfo::isVirtualRegister(Src) && - !MF.getRegInfo().constrainRegClass(Src, RC)) + if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && + !MF.getRegInfo().constrainRegClass(Src.getReg(), RC)) return 0; - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, -1); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addOperand(Dest).addOperand(Src), -1); break; } case X86::DEC16r: @@ -1951,10 +1996,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, -1); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addOperand(Dest).addOperand(Src), -1); break; case X86::ADD64rr: case X86::ADD64rr_DB: @@ -1981,9 +2024,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return 0; NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, Src2, isKill2); + .addOperand(Dest), + Src.getReg(), Src.isKill(), Src2, isKill2); // Preserve undefness of the operands. bool isUndef = MI->getOperand(1).isUndef(); @@ -2003,9 +2045,15 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, Src2, isKill2); + .addOperand(Dest), + Src.getReg(), Src.isKill(), Src2, isKill2); + + // Preserve undefness of the operands. + bool isUndef = MI->getOperand(1).isUndef(); + bool isUndef2 = MI->getOperand(2).isUndef(); + NewMI->getOperand(1).setIsUndef(isUndef); + NewMI->getOperand(3).setIsUndef(isUndef2); + if (LV && isKill2) LV->replaceKillInstruction(Src2, MI, NewMI); break; @@ -2015,10 +2063,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD64ri32_DB: case X86::ADD64ri8_DB: assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, MI->getOperand(2).getImm()); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) + .addOperand(Dest).addOperand(Src), + MI->getOperand(2).getImm()); break; case X86::ADD32ri: case X86::ADD32ri8: @@ -2026,10 +2073,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD32ri8_DB: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, MI->getOperand(2).getImm()); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addOperand(Dest).addOperand(Src), + MI->getOperand(2).getImm()); break; } case X86::ADD16ri: @@ -2039,10 +2085,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, MI->getOperand(2).getImm()); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addOperand(Dest).addOperand(Src), + MI->getOperand(2).getImm()); break; } } @@ -2051,10 +2096,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (!NewMI) return 0; if (LV) { // Update live variables - if (isKill) - LV->replaceKillInstruction(Src, MI, NewMI); - if (isDead) - LV->replaceKillInstruction(Dest, MI, NewMI); + if (Src.isKill()) + LV->replaceKillInstruction(Src.getReg(), MI, NewMI); + if (Dest.isDead()) + LV->replaceKillInstruction(Dest.getReg(), MI, NewMI); } MFI->insert(MBBI, NewMI); // Insert the new inst @@ -3444,6 +3489,13 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case X86::FsFLD0SS: case X86::FsFLD0SD: return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); + case X86::AVX_SET0: + assert(HasAVX && "AVX not supported"); + return Expand2AddrUndef(MI, get(X86::VXORPSYrr)); + case X86::V_SETALLONES: + return Expand2AddrUndef(MI, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr)); + case X86::AVX2_SETALLONES: + return Expand2AddrUndef(MI, get(X86::VPCMPEQDYrr)); case X86::TEST8ri_NOREX: MI->setDesc(get(X86::TEST8ri)); return true; @@ -3557,14 +3609,16 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, OpcodeTablePtr = &RegOp2MemOpTable2Addr; isTwoAddrFold = true; } else if (i == 0) { // If operand 0 - if (MI->getOpcode() == X86::MOV64r0) - NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); - else if (MI->getOpcode() == X86::MOV32r0) - NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); - else if (MI->getOpcode() == X86::MOV16r0) - NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); - else if (MI->getOpcode() == X86::MOV8r0) - NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); + unsigned Opc = 0; + switch (MI->getOpcode()) { + default: break; + case X86::MOV64r0: Opc = X86::MOV64mi32; break; + case X86::MOV32r0: Opc = X86::MOV32mi; break; + case X86::MOV16r0: Opc = X86::MOV16mi; break; + case X86::MOV8r0: Opc = X86::MOV8mi; break; + } + if (Opc) + NewMI = MakeM0Inst(*this, Opc, MOs, MI); if (NewMI) return NewMI; @@ -3793,15 +3847,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = (*LoadMI->memoperands_begin())->getAlignment(); else switch (LoadMI->getOpcode()) { - case X86::AVX_SET0PSY: - case X86::AVX_SET0PDY: case X86::AVX2_SETALLONES: - case X86::AVX2_SET0: + case X86::AVX_SET0: Alignment = 32; break; case X86::V_SET0: case X86::V_SETALLONES: - case X86::AVX_SETALLONES: Alignment = 16; break; case X86::FsFLD0SD: @@ -3837,11 +3888,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, switch (LoadMI->getOpcode()) { case X86::V_SET0: case X86::V_SETALLONES: - case X86::AVX_SET0PSY: - case X86::AVX_SET0PDY: - case X86::AVX_SETALLONES: case X86::AVX2_SETALLONES: - case X86::AVX2_SET0: + case X86::AVX_SET0: case X86::FsFLD0SD: case X86::FsFLD0SS: { // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. @@ -3873,15 +3921,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Ty = Type::getFloatTy(MF.getFunction()->getContext()); else if (Opc == X86::FsFLD0SD) Ty = Type::getDoubleTy(MF.getFunction()->getContext()); - else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY) - Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); - else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX2_SET0) + else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0) Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8); else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); - bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES || - Opc == X86::AVX2_SETALLONES); + bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES); const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) : Constant::getNullValue(Ty); unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); @@ -3956,6 +4001,8 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, OpcodeTablePtr = &RegOp2MemOpTable1; } else if (OpNum == 2) { OpcodeTablePtr = &RegOp2MemOpTable2; + } else if (OpNum == 3) { + OpcodeTablePtr = &RegOp2MemOpTable3; } if (OpcodeTablePtr && OpcodeTablePtr->count(Opc)) diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index d293156..304676d 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -114,7 +114,7 @@ def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>; def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER, - [SDNPHasChain]>; + [SDNPHasChain,SDNPSideEffect]>; def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER, [SDNPHasChain]>; def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER, @@ -552,14 +552,21 @@ def HasMMX : Predicate<"Subtarget->hasMMX()">; def Has3DNow : Predicate<"Subtarget->has3DNow()">; def Has3DNowA : Predicate<"Subtarget->has3DNowA()">; def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; +def UseSSE1 : Predicate<"Subtarget->hasSSE1() && Subtarget->hasNoAVX()">; def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; +def UseSSE2 : Predicate<"Subtarget->hasSSE2() && Subtarget->hasNoAVX()">; def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; +def UseSSE3 : Predicate<"Subtarget->hasSSE3() && Subtarget->hasNoAVX()">; def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; +def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && Subtarget->hasNoAVX()">; def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; +def UseSSE41 : Predicate<"Subtarget->hasSSE41() && Subtarget->hasNoAVX()">; def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; +def UseSSE42 : Predicate<"Subtarget->hasSSE42() && Subtarget->hasNoAVX()">; def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; +def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index c8f40bb..bd54858 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -118,11 +118,11 @@ let Constraints = "$src1 = $dst" in { /// Unary MMX instructions requiring SSSE3. multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr, Intrinsic IntId64, OpndItins itins> { - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), + def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>; - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), + def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 (bitconvert (memopmmx addr:$src))))], @@ -134,11 +134,11 @@ let ImmT = NoImm, Constraints = "$src1 = $dst" in { multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, Intrinsic IntId64, OpndItins itins> { let isCommutable = 0 in - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), + def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>; - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), + def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, @@ -149,11 +149,11 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, /// PALIGN MMX instructions (require SSSE3). multiclass ssse3_palign_mm<string asm, Intrinsic IntId> { - def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), + def R64irr : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>; - def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), + def R64irm : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, @@ -163,12 +163,10 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId> { multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, OpndItins itins, Domain d> { - def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (Int SrcRC:$src))], - itins.rr, d>; - def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (Int (ld_frag addr:$src)))], - itins.rm, d>; + def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>; + def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>; } multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -243,29 +241,30 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), [(store (x86mmx VR64:$src), addr:$dst)], IIC_MMX_MOVQ_RM>; -def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), - (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, - (x86mmx (bitconvert - (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))))], - IIC_MMX_MOVQ_RR>; - -def MMX_MOVQ2DQrr : S2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst), - (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2i64 (scalar_to_vector - (i64 (bitconvert (x86mmx VR64:$src))))))], - IIC_MMX_MOVQ_RR>; +def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), + (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, + (x86mmx (bitconvert + (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))))))], + IIC_MMX_MOVQ_RR>; + +def MMX_MOVQ2DQrr : MMXS2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst), + (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 + (scalar_to_vector + (i64 (bitconvert (x86mmx VR64:$src))))))], + IIC_MMX_MOVQ_RR>; let neverHasSideEffects = 1 in -def MMX_MOVQ2FR64rr: S2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst), - (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [], - IIC_MMX_MOVQ_RR>; +def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst), + (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", + [], IIC_MMX_MOVQ_RR>; -def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), - (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", [], - IIC_MMX_MOVQ_RR>; +def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), + (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", + [], IIC_MMX_MOVQ_RR>; def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movntq\t{$src, $dst|$dst, $src}", @@ -577,6 +576,7 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), IIC_MMX_MASKMOV>; // 64-bit bit convert. +let Predicates = [HasSSE2] in { def : Pat<(x86mmx (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; def : Pat<(i64 (bitconvert (x86mmx VR64:$src))), @@ -585,5 +585,6 @@ def : Pat<(f64 (bitconvert (x86mmx VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; def : Pat<(x86mmx (bitconvert (f64 FR64:$src))), (MMX_MOVFR642Qrr FR64:$src)>; +} diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 220c06d..17e91a6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -251,35 +251,37 @@ def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), // A 128-bit subvector extract from the first 256-bit vector position // is a subregister copy that needs no instruction. -def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (i32 0))), +def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (iPTR 0))), (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>; -def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (i32 0))), +def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (iPTR 0))), (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>; -def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (i32 0))), +def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (iPTR 0))), (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>; -def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (i32 0))), +def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (iPTR 0))), (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>; -def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (i32 0))), +def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>; -def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (i32 0))), +def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>; // A 128-bit subvector insert to the first 256-bit vector position // is a subregister copy that needs no instruction. -def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (i32 0)), +let AddedComplexity = 25 in { // to give priority over vinsertf128rm +def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; -def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (i32 0)), +def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; -def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (i32 0)), +def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; -def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (i32 0)), +def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; -def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (i32 0)), +def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; -def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)), +def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; +} // Implicitly promote a 32-bit scalar to a vector. def : Pat<(v4f32 (scalar_to_vector FR32:$src)), @@ -362,7 +364,7 @@ let Predicates = [HasAVX] in { def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>; } -// Alias instructions that map fld0 to pxor for sse. +// Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isPseudo = 1 in { @@ -382,11 +384,11 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, neverHasSideEffects = 1 in { -def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", []>; + isPseudo = 1 in { +def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4f32 immAllZerosV))]>; } -def : Pat<(v4f32 immAllZerosV), (V_SET0)>; def : Pat<(v2f64 immAllZerosV), (V_SET0)>; def : Pat<(v4i32 immAllZerosV), (V_SET0)>; def : Pat<(v2i64 immAllZerosV), (V_SET0)>; @@ -394,35 +396,29 @@ def : Pat<(v8i16 immAllZerosV), (V_SET0)>; def : Pat<(v16i8 immAllZerosV), (V_SET0)>; -// The same as done above but for AVX. The 256-bit ISA does not support PI, +// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, // and doesn't need it because on sandy bridge the register is set to zero // at the rename stage without using any execution unit, so SET0PSY // and SET0PDY can be used for vector int instructions without penalty -// FIXME: Change encoding to pseudo! This is blocked right now by the x86 -// JIT implementatioan, it does not expand the instructions below like -// X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1 in { -let Predicates = [HasAVX] in { -def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", - [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V; -def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", - [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V; -} -let Predicates = [HasAVX2], neverHasSideEffects = 1 in -def AVX2_SET0 : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "", - []>, VEX_4V; + isPseudo = 1, Predicates = [HasAVX] in { +def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v8f32 immAllZerosV))]>; } -let Predicates = [HasAVX2], AddedComplexity = 5 in { - def : Pat<(v4i64 immAllZerosV), (AVX2_SET0)>; - def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>; - def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>; - def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>; +let Predicates = [HasAVX] in + def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; + +let Predicates = [HasAVX2] in { + def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; + def : Pat<(v8i32 immAllZerosV), (AVX_SET0)>; + def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; + def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; } -// AVX has no support for 256-bit integer instructions, but since the 128-bit +// AVX1 has no support for 256-bit integer instructions, but since the 128-bit // VPXOR instruction writes zero to its upper part, it's safe build zeros. +let Predicates = [HasAVX1Only] in { def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; def : Pat<(bc_v32i8 (v8f32 immAllZerosV)), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; @@ -438,22 +434,17 @@ def : Pat<(bc_v8i32 (v8f32 immAllZerosV)), def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; +} // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. -// FIXME: Change encoding to pseudo! This is blocked right now by the x86 -// JIT implementation, it does not expand the instructions below like -// X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, ExeDomain = SSEPackedInt in { - let Predicates = [HasAVX] in - def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V; - def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllOnesV))]>; + isPseudo = 1 in { + def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllOnesV))]>; let Predicates = [HasAVX2] in - def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "", - [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V; + def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v8i32 immAllOnesV))]>; } @@ -605,27 +596,27 @@ let Predicates = [HasAVX] in { // Represent the same patterns above but in the form they appear for // 256-bit types def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, - (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))), + (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, - (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))), + (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, - (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))), + (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; } def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, - (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))), + (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)), sub_xmm)>; def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, - (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))), + (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))), (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))), + (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>; // Move low f64 and clear high bits. @@ -704,7 +695,7 @@ let Predicates = [HasAVX] in { (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { let AddedComplexity = 15 in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVSS to the lower bits. @@ -738,7 +729,7 @@ let Predicates = [HasSSE1] in { (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { let AddedComplexity = 15 in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVSD to the lower bits. @@ -916,16 +907,16 @@ let isCodeGenOnly = 1 in { let Predicates = [HasAVX] in { def : Pat<(v8i32 (X86vzmovl - (insert_subvector undef, (v4i32 VR128:$src), (i32 0)))), + (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl - (insert_subvector undef, (v2i64 VR128:$src), (i32 0)))), + (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; def : Pat<(v8f32 (X86vzmovl - (insert_subvector undef, (v4f32 VR128:$src), (i32 0)))), + (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; def : Pat<(v4f64 (X86vzmovl - (insert_subvector undef, (v2f64 VR128:$src), (i32 0)))), + (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; } @@ -975,10 +966,10 @@ let Predicates = [HasAVX] in { (VMOVUPDmr addr:$dst, VR128:$src)>; } -let Predicates = [HasSSE1] in +let Predicates = [UseSSE1] in def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), (MOVUPSmr addr:$dst, VR128:$src)>; -let Predicates = [HasSSE2] in +let Predicates = [UseSSE2] in def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), (MOVUPDmr addr:$dst, VR128:$src)>; @@ -1028,12 +1019,52 @@ let Predicates = [HasAVX] in { (VMOVUPSYmr addr:$dst, VR256:$src)>; def : Pat<(store (v32i8 VR256:$src), addr:$dst), (VMOVUPSYmr addr:$dst, VR256:$src)>; + + // Special patterns for storing subvector extracts of lower 128-bits + // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr + def : Pat<(alignedstore (v2f64 (extract_subvector + (v4f64 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(alignedstore (v4f32 (extract_subvector + (v8f32 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(alignedstore (v2i64 (extract_subvector + (v4i64 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(alignedstore (v4i32 (extract_subvector + (v8i32 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(alignedstore (v8i16 (extract_subvector + (v16i16 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(alignedstore (v16i8 (extract_subvector + (v32i8 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + + def : Pat<(store (v2f64 (extract_subvector + (v4f64 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVUPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(store (v4f32 (extract_subvector + (v8f32 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVUPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(store (v2i64 (extract_subvector + (v4i64 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVUPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(store (v4i32 (extract_subvector + (v8i32 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(store (v8i16 (extract_subvector + (v16i16 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(store (v16i8 (extract_subvector + (v32i8 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; } // Use movaps / movups for SSE integer load / store (one byte shorter). // The instructions selected below are then converted to MOVDQA/MOVDQU // during the SSE domain pass. -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { def : Pat<(alignedloadv2i64 addr:$src), (MOVAPSrm addr:$src)>; def : Pat<(loadv2i64 addr:$src), @@ -1180,7 +1211,7 @@ let Predicates = [HasAVX] in { (VMOVLPDmr addr:$src1, VR128:$src2)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)), (iPTR 0))), addr:$src1), @@ -1205,7 +1236,7 @@ let Predicates = [HasSSE1] in { (MOVLPSmr addr:$src1, VR128:$src2)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // Shuffle with MOVLPD def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>; @@ -1279,7 +1310,7 @@ let Predicates = [HasAVX] in { (VMOVHPDrm VR128:$src1, addr:$src2)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { // MOVHPS patterns def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), @@ -1289,7 +1320,7 @@ let Predicates = [HasSSE1] in { (MOVHPSrm VR128:$src1, addr:$src2)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem // is during lowering, where it's not possible to recognize the load fold // cause it has two uses through a bitcast. One use disappears at isel time @@ -1346,7 +1377,7 @@ let Predicates = [HasAVX] in { (VMOVHLPSrr VR128:$src1, VR128:$src2)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { // MOVLHPS patterns def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), (MOVLHPSrr VR128:$src1, VR128:$src2)>; @@ -1456,7 +1487,7 @@ def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>; -let Predicates = [HasAVX], AddedComplexity = 1 in { +let Predicates = [HasAVX] in { def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), @@ -1633,7 +1664,7 @@ defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem, defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, SSE_CVT_PS>, - TB, Requires<[HasSSE2]>; + TB, Requires<[UseSSE2]>; /// SSE 2 Only @@ -1663,7 +1694,7 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), [(set FR32:$dst, (fround (loadf64 addr:$src)))], IIC_SSE_CVT_Scalar_RM>, XD, - Requires<[HasSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>; def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1684,13 +1715,13 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>; def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>; } // Convert scalar single to scalar double @@ -1709,30 +1740,28 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; } -let AddedComplexity = 1 in { // give AVX priority - def : Pat<(f64 (fextend FR32:$src)), - (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>; - def : Pat<(fextend (loadf32 addr:$src)), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>; +def : Pat<(f64 (fextend FR32:$src)), + (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>; +def : Pat<(fextend (loadf32 addr:$src)), + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>; - def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[HasAVX, OptForSize]>; - def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, - Requires<[HasAVX, OptForSpeed]>; -} // AddedComplexity = 1 +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, + Requires<[HasAVX, OptForSpeed]>; def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fextend FR32:$src))], IIC_SSE_CVT_Scalar_RR>, XS, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (extloadf32 addr:$src))], IIC_SSE_CVT_Scalar_RM>, XS, - Requires<[HasSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>; // extload f32 -> f64. This matches load+fextend because we have a hack in // the isel (PreprocessForFPConvert) that can introduce loads after dag @@ -1740,9 +1769,9 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), // Since these loads aren't folded into the fextend, we have to match it // explicitly here. def : Pat<(fextend (loadf32 addr:$src)), - (CVTSS2SDrm addr:$src)>, Requires<[HasSSE2]>; + (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>; def : Pat<(extloadf32 addr:$src), - (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>; + (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1762,13 +1791,13 @@ def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>; def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>; } // Convert packed single/double fp to doubleword @@ -1904,7 +1933,7 @@ let Predicates = [HasAVX] in { (VCVTTPS2DQYrm addr:$src)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), (CVTDQ2PSrr VR128:$src)>; def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), @@ -1978,10 +2007,10 @@ def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], IIC_SSE_CVT_PD_RR>, TB, VEX; -let neverHasSideEffects = 1, mayLoad = 1 in def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RM>, TB, VEX; + "vcvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], + IIC_SSE_CVT_PD_RM>, TB, VEX; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, @@ -1994,15 +2023,15 @@ def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), IIC_SSE_CVT_PD_RM>, TB, VEX; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], IIC_SSE_CVT_PD_RR>, TB; -let neverHasSideEffects = 1, mayLoad = 1 in def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RM>, TB; + "cvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], + IIC_SSE_CVT_PD_RM>, TB; } // Convert Packed DW Integers to Packed Double FP @@ -2105,11 +2134,11 @@ let Predicates = [HasAVX] in { (VCVTPS2PDrr VR128:$src)>; def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), (VCVTPS2PDYrr VR128:$src)>; - def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), + def : Pat<(v4f64 (extloadv4f32 addr:$src)), (VCVTPS2PDYrm addr:$src)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // Match fextend for 128 conversions def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), (CVTPS2PDrr VR128:$src)>; @@ -2336,14 +2365,14 @@ def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)), (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), @@ -2420,7 +2449,7 @@ let Predicates = [HasAVX] in { (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { def : Pat<(v4i32 (X86Shufp VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; @@ -2428,7 +2457,7 @@ let Predicates = [HasSSE1] in { (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // Generic SHUFPD patterns def : Pat<(v2i64 (X86Shufp VR128:$src1, (memopv2i64 addr:$src2), (i8 imm:$imm))), @@ -2500,7 +2529,27 @@ let Constraints = "$src1 = $dst" in { SSEPackedDouble>, TB, OpSize; } // Constraints = "$src1 = $dst" -let Predicates = [HasAVX], AddedComplexity = 1 in { +let Predicates = [HasAVX1Only] in { + def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), + (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; + + def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), + (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; +} + +let Predicates = [HasAVX] in { // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel @@ -2509,7 +2558,7 @@ let Predicates = [HasAVX], AddedComplexity = 1 in { (VUNPCKLPDrr VR128:$src, VR128:$src)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel @@ -2578,16 +2627,16 @@ defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", def : Pat<(i32 (X86fgetsign FR32:$src)), (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; def : Pat<(i64 (X86fgetsign FR32:$src)), (MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; def : Pat<(i32 (X86fgetsign FR64:$src)), (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; def : Pat<(i64 (X86fgetsign FR64:$src)), (MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Logical Instructions @@ -2683,14 +2732,12 @@ multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr, } // Alias bitwise logical operations using SSE logical ops on packed FP values. -let mayLoad = 0 in { - defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand, - SSE_BIT_ITINS_P>; - defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for, - SSE_BIT_ITINS_P>; - defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor, - SSE_BIT_ITINS_P>; -} +defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand, + SSE_BIT_ITINS_P>; +defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for, + SSE_BIT_ITINS_P>; +defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor, + SSE_BIT_ITINS_P>; let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef, @@ -2794,27 +2841,23 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, SizeItins itins, bit Is2Addr = 1> { - let mayLoad = 0 in { defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, v4f32, f128mem, memopv4f32, SSEPackedSingle, itins.s, Is2Addr>, TB; defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, v2f64, f128mem, memopv2f64, SSEPackedDouble, itins.d, Is2Addr>, TB, OpSize; - } } multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode, SizeItins itins> { - let mayLoad = 0 in { - defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256, + defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256, v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>, TB; - defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256, + defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256, v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>, TB, OpSize; - } } multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, @@ -2924,7 +2967,7 @@ let Constraints = "$src1 = $dst" in { } } -let isCommutable = 1, isCodeGenOnly = 1 in { +let isCodeGenOnly = 1 in { defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>, @@ -2978,7 +3021,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[HasSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>; def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>; @@ -2992,7 +3035,7 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - let mayLoad = 1 in + let mayLoad = 1 in { def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; @@ -3000,6 +3043,7 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { (ins VR128:$src1, ssmem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + } } /// sse1_fp_unop_p - SSE1 unops in packed form. @@ -3062,7 +3106,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD, - Requires<[HasSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>; def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>; @@ -3072,20 +3116,20 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, } /// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. +let hasSideEffects = 0 in multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { - let neverHasSideEffects = 1 in { def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - let mayLoad = 1 in + let mayLoad = 1 in { def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - } def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + } } /// sse2_fp_unop_p - SSE2 unops in vector forms. @@ -3176,7 +3220,6 @@ let Predicates = [HasAVX] in { SSE_RCPP>, VEX; } -let AddedComplexity = 1 in { def : Pat<(f32 (fsqrt FR32:$src)), (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; def : Pat<(f32 (fsqrt (load addr:$src))), @@ -3199,9 +3242,8 @@ def : Pat<(f32 (X86frcp FR32:$src)), def : Pat<(f32 (X86frcp (load addr:$src))), (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX, OptForSize]>; -} -let Predicates = [HasAVX], AddedComplexity = 1 in { +let Predicates = [HasAVX] in { def : Pat<(int_x86_sse_sqrt_ss VR128:$src), (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS VR128:$src, FR32)), @@ -3322,7 +3364,7 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), IIC_SSE_MOVNT>; def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), @@ -3482,7 +3524,7 @@ def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>; + [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; // For Disassembler let isCodeGenOnly = 1 in { @@ -3492,7 +3534,7 @@ def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>; + [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; } let canFoldAsLoad = 1, mayLoad = 1 in { @@ -3504,7 +3546,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (loadv2i64 addr:$src))*/], IIC_SSE_MOVU_P_RM>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; } let mayStore = 1 in { @@ -3516,7 +3558,7 @@ def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(store (v2i64 VR128:$src), addr:$dst)*/], IIC_SSE_MOVU_P_MR>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; } // Intrinsic forms of MOVDQU load and store @@ -3530,7 +3572,7 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)], IIC_SSE_MOVU_P_MR>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; } // ExeDomain = SSEPackedInt @@ -4028,7 +4070,7 @@ let Predicates = [HasAVX2] in { (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), @@ -4210,7 +4252,7 @@ let Predicates = [HasAVX2] in { defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, XD, VEX; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { let AddedComplexity = 5 in defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize; @@ -4325,28 +4367,6 @@ let Constraints = "$src1 = $dst" in { } } // ExeDomain = SSEPackedInt -// Patterns for using AVX1 instructions with integer vectors -// Here to give AVX2 priority -let Predicates = [HasAVX] in { - def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), - (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), - (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), - (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), - (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; - - def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))), - (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), - (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))), - (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), - (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; -} - //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Extract and Insert //===---------------------------------------------------------------------===// @@ -4395,7 +4415,7 @@ let Predicates = [HasAVX] in { } let Constraints = "$src1 = $dst" in - defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>; + defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[UseSSE2]>; } // ExeDomain = SSEPackedInt @@ -4556,7 +4576,7 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), // Move Packed Doubleword Int first element to Doubleword Int // def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "vmov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, @@ -4672,14 +4692,14 @@ let Predicates = [HasAVX] in { } // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, - (v4i32 (scalar_to_vector GR32:$src)),(i32 0)))), + (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector GR64:$src)),(i32 0)))), + (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; } -let Predicates = [HasSSE2], AddedComplexity = 20 in { +let Predicates = [UseSSE2], AddedComplexity = 20 in { def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), @@ -4719,7 +4739,7 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), [(set VR128:$dst, (v2i64 (scalar_to_vector (loadi64 addr:$src))))], IIC_SSE_MOVDQ>, XS, - Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix + Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix //===---------------------------------------------------------------------===// // Move Packed Quadword Int to Quadword Int @@ -4762,7 +4782,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; let Predicates = [HasAVX], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), @@ -4773,7 +4793,7 @@ let Predicates = [HasAVX], AddedComplexity = 20 in { (VMOVZQI2PQIrm addr:$src)>; } -let Predicates = [HasSSE2], AddedComplexity = 20 in { +let Predicates = [UseSSE2], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (MOVZQI2PQIrm addr:$src)>; def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), @@ -4803,7 +4823,7 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], IIC_SSE_MOVQ_RR>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; let AddedComplexity = 20 in def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), @@ -4818,7 +4838,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), [(set VR128:$dst, (v2i64 (X86vzmovl (loadv2i64 addr:$src))))], IIC_SSE_MOVDQ>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; } let AddedComplexity = 20 in { @@ -4828,7 +4848,7 @@ let AddedComplexity = 20 in { def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (VMOVZPQILo2PQIrr VR128:$src)>; } - let Predicates = [HasSSE2] in { + let Predicates = [UseSSE2] in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (MOVZPQILo2PQIrm addr:$src)>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), @@ -4908,7 +4928,7 @@ let Predicates = [HasAVX] in { (VMOVSLDUPYrm addr:$src)>; } -let Predicates = [HasSSE3] in { +let Predicates = [UseSSE3] in { def : Pat<(v4i32 (X86Movshdup VR128:$src)), (MOVSHDUPrr VR128:$src)>; def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), @@ -4977,7 +4997,7 @@ let Predicates = [HasAVX] in { (VMOVDDUPYrr VR256:$src)>; } -let Predicates = [HasSSE3] in { +let Predicates = [UseSSE3] in { def : Pat<(X86Movddup (memopv2f64 addr:$src)), (MOVDDUPrm addr:$src)>; def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), @@ -5041,7 +5061,7 @@ let Predicates = [HasAVX] in { f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V; } } -let Constraints = "$src1 = $dst", Predicates = [HasSSE3] in { +let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { let ExeDomain = SSEPackedSingle in defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, f128mem, SSE_ALU_F32P>, TB, XD; @@ -5424,7 +5444,7 @@ let Predicates = [HasAVX] in defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; let Predicates = [HasAVX2] in defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V; -let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in +let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in defm PALIGN : ssse3_palign<"palignr">; let Predicates = [HasAVX2] in { @@ -5449,7 +5469,7 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } -let Predicates = [HasSSSE3] in { +let Predicates = [UseSSSE3] in { def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), @@ -5583,7 +5603,7 @@ let Predicates = [HasAVX] in { (VPMOVZXDQrm addr:$src)>; } -let Predicates = [HasSSE41] in { +let Predicates = [UseSSE41] in { // Common patterns involving scalar load. def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), (PMOVSXBWrm addr:$src)>; @@ -5633,7 +5653,7 @@ let Predicates = [HasAVX] in { def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; } -let Predicates = [HasSSE41] in { +let Predicates = [UseSSE41] in { def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; } @@ -5704,7 +5724,7 @@ let Predicates = [HasAVX] in { (VPMOVZXWQrm addr:$src)>; } -let Predicates = [HasSSE41] in { +let Predicates = [UseSSE41] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), (PMOVSXBDrm addr:$src)>; @@ -5772,7 +5792,7 @@ let Predicates = [HasAVX] in { (VPMOVZXBQrm addr:$src)>; } -let Predicates = [HasSSE41] in { +let Predicates = [UseSSE41] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq (bitconvert (v4i32 (X86vzmovl @@ -5918,7 +5938,7 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))), addr:$dst), (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, - Requires<[HasSSE41]>; + Requires<[UseSSE41]>; //===----------------------------------------------------------------------===// // SSE4.1 - Insert Instructions @@ -6190,6 +6210,15 @@ let Predicates = [HasAVX] in { (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; def : Pat<(f64 (ftrunc FR64:$src)), (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; + + def : Pat<(v4f32 (ffloor VR128:$src)), + (VROUNDPSr VR128:$src, (i32 0x1))>; + def : Pat<(v2f64 (ffloor VR128:$src)), + (VROUNDPDr VR128:$src, (i32 0x1))>; + def : Pat<(v8f32 (ffloor VR256:$src)), + (VROUNDYPSr VR256:$src, (i32 0x1))>; + def : Pat<(v4f64 (ffloor VR256:$src)), + (VROUNDYPDr VR256:$src, (i32 0x1))>; } defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, @@ -6199,26 +6228,33 @@ let Constraints = "$src1 = $dst" in defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", int_x86_sse41_round_ss, int_x86_sse41_round_sd>; -def : Pat<(ffloor FR32:$src), - (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; -def : Pat<(f64 (ffloor FR64:$src)), - (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; -def : Pat<(f32 (fnearbyint FR32:$src)), - (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; -def : Pat<(f64 (fnearbyint FR64:$src)), - (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; -def : Pat<(f32 (fceil FR32:$src)), - (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; -def : Pat<(f64 (fceil FR64:$src)), - (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; -def : Pat<(f32 (frint FR32:$src)), - (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; -def : Pat<(f64 (frint FR64:$src)), - (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; -def : Pat<(f32 (ftrunc FR32:$src)), - (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; -def : Pat<(f64 (ftrunc FR64:$src)), - (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; +let Predicates = [UseSSE41] in { + def : Pat<(ffloor FR32:$src), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; + def : Pat<(f64 (ffloor FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; + def : Pat<(f32 (fnearbyint FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; + def : Pat<(f64 (fnearbyint FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; + def : Pat<(f32 (fceil FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; + def : Pat<(f64 (fceil FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; + def : Pat<(f32 (frint FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; + def : Pat<(f64 (frint FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; + def : Pat<(f32 (ftrunc FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; + def : Pat<(f64 (ftrunc FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; + + def : Pat<(v4f32 (ffloor VR128:$src)), + (ROUNDPSr VR128:$src, (i32 0x1))>; + def : Pat<(v2f64 (ffloor VR128:$src)), + (ROUNDPDr VR128:$src, (i32 0x1))>; +} //===----------------------------------------------------------------------===// // SSE4.1 - Packed Bit Test @@ -6356,7 +6392,7 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; } -/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator +/// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr, Intrinsic IntId256> { let isCommutable = 1 in @@ -6705,7 +6741,7 @@ def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>; -let Predicates = [HasSSE41] in { +let Predicates = [UseSSE41] in { def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1), (v16i8 VR128:$src2))), (PBLENDVBrr0 VR128:$src2, VR128:$src1)>; @@ -6802,9 +6838,8 @@ multiclass pseudo_pcmpistrm<string asm> { } let Defs = [EFLAGS], usesCustomInserter = 1 in { - let AddedComplexity = 1 in - defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; - defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>; + defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; + defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>; } let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in { @@ -6840,9 +6875,8 @@ multiclass pseudo_pcmpestrm<string asm> { } let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { - let AddedComplexity = 1 in - defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; - defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>; + defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; + defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>; } let Predicates = [HasAVX], @@ -7237,40 +7271,59 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), let Predicates = [HasAVX] in { def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; + +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2), + (iPTR imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2), + (iPTR imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +} + +let Predicates = [HasAVX1Only] in { def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), - (i32 imm)), +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2), + (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), - (i32 imm)), +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), + (bc_v4i32 (memopv2i64 addr:$src2)), + (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), - (i32 imm)), +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), + (bc_v16i8 (memopv2i64 addr:$src2)), + (iPTR imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), + (bc_v8i16 (memopv2i64 addr:$src2)), + (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; } @@ -7290,56 +7343,61 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), []>, VEX; } -// Extract and store. -let Predicates = [HasAVX] in { - def : Pat<(alignedstore (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), addr:$dst), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; - def : Pat<(alignedstore (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), addr:$dst), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; - def : Pat<(alignedstore (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), addr:$dst), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; - - def : Pat<(int_x86_sse_storeu_ps addr:$dst, (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2)), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; - def : Pat<(int_x86_sse2_storeu_pd addr:$dst, (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2)), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; - def : Pat<(int_x86_sse2_storeu_dq addr:$dst, (bc_v16i8 (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2))), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; -} - // AVX1 patterns let Predicates = [HasAVX] in { -def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), - (VEXTRACTF128rr VR256:$src1, imm:$src2)>; -def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), - (VEXTRACTF128rr VR256:$src1, imm:$src2)>; -def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), - (VEXTRACTF128rr VR256:$src1, imm:$src2)>; - -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v4f32 (VEXTRACTF128rr (v8f32 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v2f64 (VEXTRACTF128rr (v4f64 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + +def : Pat<(alignedstore (v4f32 (vextractf128_extract:$ext (v8f32 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v2f64 (vextractf128_extract:$ext (v4f64 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +} + +let Predicates = [HasAVX1Only] in { +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v2i64 (VEXTRACTF128rr - (v4i64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4i64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v4i32 (VEXTRACTF128rr - (v8i32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v8i32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v8i16 (VEXTRACTF128rr - (v16i16 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v16i16 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v16i8 (VEXTRACTF128rr - (v32i8 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + (v32i8 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + +def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; } //===----------------------------------------------------------------------===// @@ -7456,29 +7514,29 @@ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), } let Predicates = [HasAVX] in { +def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, + (memopv4f64 addr:$src2), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +} + +let Predicates = [HasAVX1Only] in { def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, - (memopv8f32 addr:$src2), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2), (i8 imm:$imm))), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, - (memopv4f64 addr:$src2), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; @@ -7665,19 +7723,22 @@ let Predicates = [HasAVX2] in { } // AVX1 broadcast patterns -let Predicates = [HasAVX] in { +let Predicates = [HasAVX1Only] in { def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), (VBROADCASTSSYrm addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), (VBROADCASTSDYrm addr:$src)>; +def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), + (VBROADCASTSSrm addr:$src)>; +} + +let Predicates = [HasAVX] in { def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), (VBROADCASTSSYrm addr:$src)>; def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), (VBROADCASTSDYrm addr:$src)>; def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), (VBROADCASTSSrm addr:$src)>; -def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), - (VBROADCASTSSrm addr:$src)>; // Provide fallback in case the load node that is used in the patterns above // is used by additional users, which prevents the pattern selection. @@ -7757,7 +7818,6 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W; //===----------------------------------------------------------------------===// // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks // -let AddedComplexity = 1 in { def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -7768,9 +7828,8 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2), (i8 imm:$src3)))]>, VEX_4V; -} -let Predicates = [HasAVX2], AddedComplexity = 1 in { +let Predicates = [HasAVX2] in { def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), @@ -7805,23 +7864,43 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), []>, VEX_4V; } -let Predicates = [HasAVX2], AddedComplexity = 1 in { +let Predicates = [HasAVX2] in { def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; + +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2), + (iPTR imm)), + (VINSERTI128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), + (bc_v4i32 (memopv2i64 addr:$src2)), + (iPTR imm)), + (VINSERTI128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), + (bc_v16i8 (memopv2i64 addr:$src2)), + (iPTR imm)), + (VINSERTI128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), + (bc_v8i16 (memopv2i64 addr:$src2)), + (iPTR imm)), + (VINSERTI128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; } //===----------------------------------------------------------------------===// @@ -7838,23 +7917,40 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), (ins i128mem:$dst, VR256:$src1, i8imm:$src2), "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; -let Predicates = [HasAVX2], AddedComplexity = 1 in { -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +let Predicates = [HasAVX2] in { +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v2i64 (VEXTRACTI128rr (v4i64 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v4i32 (VEXTRACTI128rr (v8i32 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v8i16 (VEXTRACTI128rr (v16i16 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v16i8 (VEXTRACTI128rr (v32i8 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + +def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTI128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTI128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTI128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTI128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 7ac4cec..764aa5d 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -532,7 +532,7 @@ uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) { #endif } -template<typename T> void addUnaligned(void *Pos, T Delta) { +template<typename T> static void addUnaligned(void *Pos, T Delta) { T Value; std::memcpy(reinterpret_cast<char*>(&Value), reinterpret_cast<char*>(Pos), sizeof(T)); diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 9c0ce4e..1c2ef25 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -377,12 +377,6 @@ ReSimplify: case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break; case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break; case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break; - case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break; - case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break; - case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break; - case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break; - case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break; - case X86::AVX2_SET0: LowerUnaryToTwoAddr(OutMI, X86::VPXORYrr); break; case X86::MOV16r0: LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0 diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 877b8f6..3b4cfc4 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -522,7 +522,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const{ + int SPAdj, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 9087852..0d7b664 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -346,6 +346,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasVectorUAMem(false) , HasCmpxchg16b(false) , UseLeaForSP(false) + , HasSlowDivide(false) , PostRAScheduler(false) , stackAlignment(4) // FIXME: this is a known good value for Yonah. How about others? diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 6841c5b..dde7e24 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -136,6 +136,10 @@ protected: /// the stack pointer. This is an optimization for Intel Atom processors. bool UseLeaForSP; + /// HasSlowDivide - True if smaller divides are significantly faster than + /// full divides and should be used when possible. + bool HasSlowDivide; + /// PostRAScheduler - True if using post-register-allocation scheduler. bool PostRAScheduler; @@ -198,6 +202,7 @@ public: bool hasSSE42() const { return X86SSELevel >= SSE42; } bool hasAVX() const { return X86SSELevel >= AVX; } bool hasAVX2() const { return X86SSELevel >= AVX2; } + bool hasNoAVX() const { return X86SSELevel < AVX; } bool hasSSE4A() const { return HasSSE4A; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } @@ -205,7 +210,8 @@ public: bool hasAES() const { return HasAES; } bool hasPCLMUL() const { return HasPCLMUL; } bool hasFMA() const { return HasFMA; } - bool hasFMA4() const { return HasFMA4; } + // FIXME: Favor FMA when both are enabled. Is this the right thing to do? + bool hasFMA4() const { return HasFMA4 && !HasFMA; } bool hasXOP() const { return HasXOP; } bool hasMOVBE() const { return HasMOVBE; } bool hasRDRAND() const { return HasRDRAND; } @@ -219,6 +225,7 @@ public: bool hasVectorUAMem() const { return HasVectorUAMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } + bool hasSlowDivide() const { return HasSlowDivide; } bool isAtom() const { return X86ProcFamily == IntelAtom; } diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index 80b75dc..449eed3 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -42,7 +42,6 @@ namespace { private: const TargetInstrInfo *TII; // Machine instruction info. - MachineBasicBlock *MBB; // Current basic block // Any YMM register live-in to this function? bool FnHasLiveInYmm; @@ -84,7 +83,7 @@ namespace { // 2) All states must be clean for the result to be clean // 3) If none above and one unknown, the result state is also unknown // - unsigned computeState(unsigned PrevState, unsigned CurState) { + static unsigned computeState(unsigned PrevState, unsigned CurState) { if (PrevState == ST_INIT) return CurState; @@ -122,7 +121,7 @@ static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) { } static bool hasYmmReg(MachineInstr *MI) { - for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -189,7 +188,6 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { bool Changed = false; unsigned BBNum = BB.getNumber(); - MBB = &BB; // Don't process already solved BBs if (BBSolved[BBNum]) @@ -207,7 +205,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, // The entry MBB for the function may set the initial state to dirty if // the function receives any YMM incoming arguments - if (MBB == MF.begin()) { + if (&BB == MF.begin()) { EntryState = ST_CLEAN; if (FnHasLiveInYmm) EntryState = ST_DIRTY; @@ -253,7 +251,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, // When unknown, only compute the information within the block to have // it available in the exit if possible, but don't change the block. if (EntryState != ST_UNKNOWN) { - BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER)); + BuildMI(BB, I, dl, TII->get(X86::VZEROUPPER)); ++NumVZU; } diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index ae646a2..3e7666b 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -33,7 +33,7 @@ def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink, SDNPVariadic]>; def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad]>; def SDT_XCoreBR_JT : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; @@ -58,7 +58,7 @@ def cprelwrapper : SDNode<"XCoreISD::CPRelativeWrapper", SDT_XCoreAddress, def SDT_XCoreStwsp : SDTypeProfile<0, 2, [SDTCisInt<1>]>; def XCoreStwsp : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp, - [SDNPHasChain]>; + [SDNPHasChain, SDNPMayStore]>; // These are target-independent nodes, but have target-specific formats. def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index cdd0a08..be5855a 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -176,7 +176,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, #ifndef NDEBUG DEBUG(errs() << "\nFunction : " - << MF.getFunction()->getName() << "\n"); + << MF.getName() << "\n"); DEBUG(errs() << "<--------->\n"); DEBUG(MI.print(errs())); DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 6d950d2..b888e95 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -346,7 +346,7 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) { /// Given a value that is stored to a global but never read, determine whether /// it's safe to remove the store and the chain of computation that feeds the /// store. -static bool IsSafeComputationToRemove(Value *V) { +static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { do { if (isa<Constant>(V)) return true; @@ -355,7 +355,7 @@ static bool IsSafeComputationToRemove(Value *V) { if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) || isa<GlobalValue>(V)) return false; - if (isAllocationFn(V)) + if (isAllocationFn(V, TLI)) return true; Instruction *I = cast<Instruction>(V); @@ -376,7 +376,8 @@ static bool IsSafeComputationToRemove(Value *V) { /// of the global and clean up any that obviously don't assign the global a /// value that isn't dynamically allocated. /// -static bool CleanupPointerRootUsers(GlobalVariable *GV) { +static bool CleanupPointerRootUsers(GlobalVariable *GV, + const TargetLibraryInfo *TLI) { // A brief explanation of leak checkers. The goal is to find bugs where // pointers are forgotten, causing an accumulating growth in memory // usage over time. The common strategy for leak checkers is to whitelist the @@ -432,18 +433,18 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV) { C->destroyConstant(); // This could have invalidated UI, start over from scratch. Dead.clear(); - CleanupPointerRootUsers(GV); + CleanupPointerRootUsers(GV, TLI); return true; } } } for (int i = 0, e = Dead.size(); i != e; ++i) { - if (IsSafeComputationToRemove(Dead[i].first)) { + if (IsSafeComputationToRemove(Dead[i].first, TLI)) { Dead[i].second->eraseFromParent(); Instruction *I = Dead[i].first; do { - if (isAllocationFn(I)) + if (isAllocationFn(I, TLI)) break; Instruction *J = dyn_cast<Instruction>(I->getOperand(0)); if (!J) @@ -975,7 +976,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, // nor is the global. if (AllNonStoreUsesGone) { if (isLeakCheckerRoot(GV)) { - Changed |= CleanupPointerRootUsers(GV); + Changed |= CleanupPointerRootUsers(GV, TLI); } else { Changed = true; CleanupConstantGlobalUsers(GV, 0, TD, TLI); @@ -1465,9 +1466,10 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, - Value *NElems, TargetData *TD) { + Value *NElems, TargetData *TD, + const TargetLibraryInfo *TLI) { DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); - Type *MAT = getMallocAllocatedType(CI); + Type *MAT = getMallocAllocatedType(CI, TLI); StructType *STy = cast<StructType>(MAT); // There is guaranteed to be at least one use of the malloc (storing @@ -1688,7 +1690,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // This eliminates dynamic allocation, avoids an indirection accessing the // data, and exposes the resultant global to further GlobalOpt. // We cannot optimize the malloc if we cannot determine malloc array size. - Value *NElems = getMallocArraySize(CI, TD, true); + Value *NElems = getMallocArraySize(CI, TD, TLI, true); if (!NElems) return false; @@ -1725,7 +1727,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is a fixed size array, transform the Malloc to be an alloc of // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { + if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) { Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); @@ -1742,7 +1744,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CI = cast<CallInst>(Malloc); } - GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true), TD); + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, TLI, true), + TD, TLI); return true; } @@ -1771,8 +1774,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, // Optimize away any trapping uses of the loaded value. if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI)) return true; - } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { - Type *MallocType = getMallocAllocatedType(CI); + } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) { + Type *MallocType = getMallocAllocatedType(CI, TLI); if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI, TD, TLI)) @@ -1964,7 +1967,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, bool Changed; if (isLeakCheckerRoot(GV)) { // Delete any constant stores to the global. - Changed = CleanupPointerRootUsers(GV); + Changed = CleanupPointerRootUsers(GV, TLI); } else { // Delete any stores we can find to the global. We may not be able to // make it completely dead though. diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 712888a..69a22fb 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -339,6 +340,7 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraph>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); SmallPtrSet<Function*, 8> SCCFunctions; DEBUG(dbgs() << "Inliner visiting SCC:"); @@ -417,7 +419,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // just delete the call instead of trying to inline it, regardless of // size. This happens because IPSCCP propagates the result out of the // call and then we're left with the dead call. - if (isInstructionTriviallyDead(CS.getInstruction())) { + if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) { DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction() << "\n"); // Update the call graph by deleting the edge from Callee to Caller. diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index cbe1ca4..b12fc01 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -168,7 +168,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { /// the heavy lifting. /// Instruction *InstCombiner::visitCallInst(CallInst &CI) { - if (isFreeCall(&CI)) + if (isFreeCall(&CI, TLI)) return visitFree(CI); // If the caller function is nounwind, mark the call as nounwind, even if the @@ -243,7 +243,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { default: break; case Intrinsic::objectsize: { uint64_t Size; - if (getObjectSize(II->getArgOperand(0), Size, TD)) + if (getObjectSize(II->getArgOperand(0), Size, TD, TLI)) return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); return 0; } @@ -877,7 +877,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) { // visitCallSite - Improvements for call and invoke instructions. // Instruction *InstCombiner::visitCallSite(CallSite CS) { - if (isAllocLikeFn(CS.getInstruction())) + if (isAllocLikeFn(CS.getInstruction(), TLI)) return visitAllocSite(*CS.getInstruction()); bool Changed = false; diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 35a0bbb..2a7182f 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -462,6 +462,16 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { } } + // (x lshr C1) udiv C2 --> x udiv (C2 << C1) + if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) { + Value *X; + ConstantInt *C1; + if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) { + APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1)); + return BinaryOperator::CreateUDiv(X, Builder->getInt(NC)); + } + } + // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) { const APInt *CI; Value *N; if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) || diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 68ecd51..ff758c4 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1068,7 +1068,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. if (isa<AllocaInst>(BCI->getOperand(0)) || - isAllocationFn(BCI->getOperand(0))) { + isAllocationFn(BCI->getOperand(0), TLI)) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { @@ -1107,7 +1107,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { static bool -isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) { +isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users, + const TargetLibraryInfo *TLI) { SmallVector<Instruction*, 4> Worklist; Worklist.push_back(AI); @@ -1163,7 +1164,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) { } } - if (isFreeCall(I)) { + if (isFreeCall(I, TLI)) { Users.push_back(I); continue; } @@ -1188,7 +1189,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { // to null and free calls, delete the calls and replace the comparisons with // true or false as appropriate. SmallVector<WeakVH, 64> Users; - if (isAllocSiteRemovable(&MI, Users)) { + if (isAllocSiteRemovable(&MI, Users, TLI)) { for (unsigned i = 0, e = Users.size(); i != e; ++i) { Instruction *I = cast_or_null<Instruction>(&*Users[i]); if (!I) continue; @@ -1872,7 +1873,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, Instruction *Inst = BBI++; // DCE instruction if trivially dead. - if (isInstructionTriviallyDead(Inst)) { + if (isInstructionTriviallyDead(Inst, TLI)) { ++NumDeadInst; DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); Inst->eraseFromParent(); @@ -2002,7 +2003,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { if (I == 0) continue; // skip null values. // Check to see if we can DCE the instruction. - if (isInstructionTriviallyDead(I)) { + if (isInstructionTriviallyDead(I, TLI)) { DEBUG(errs() << "IC: DCE: " << *I << '\n'); EraseInstFromFunction(*I); ++NumDeadInst; @@ -2102,7 +2103,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // If the instruction was modified, it's possible that it is now dead. // if so, remove it. - if (isInstructionTriviallyDead(I)) { + if (isInstructionTriviallyDead(I, TLI)) { EraseInstFromFunction(*I); } else { Worklist.Add(I); diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 06f4d2f..0775cf4 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -15,7 +15,7 @@ #define DEBUG_TYPE "asan" -#include "FunctionBlackList.h" +#include "BlackList.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" #include "llvm/InlineAsm.h" @@ -217,7 +217,7 @@ struct AddressSanitizer : public ModulePass { Function *AsanCtorFunction; Function *AsanInitFunction; Instruction *CtorInsertBefore; - OwningPtr<FunctionBlackList> BL; + OwningPtr<BlackList> BL; // This array is indexed by AccessIsWrite and log2(AccessSize). Function *AsanErrorCallback[2][kNumberOfAccessSizes]; InlineAsm *EmptyAsm; @@ -544,6 +544,7 @@ bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { Type *Ty = cast<PointerType>(G->getType())->getElementType(); DEBUG(dbgs() << "GLOBAL: " << *G); + if (BL->isIn(*G)) return false; if (!Ty->isSized()) return false; if (!G->hasInitializer()) return false; // Touch only those globals that will not be defined in other modules. @@ -643,6 +644,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); // Determine whether this global should be poisoned in initialization. bool GlobalHasDynamicInitializer = HasDynamicInitializer(G); + // Don't check initialization order if this global is blacklisted. + GlobalHasDynamicInitializer &= !BL->isInInit(*G); StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL); Constant *NewInitializer = ConstantStruct::get( @@ -736,7 +739,7 @@ bool AddressSanitizer::runOnModule(Module &M) { TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; - BL.reset(new FunctionBlackList(ClBlackListFile)); + BL.reset(new BlackList(ClBlackListFile)); C = &(M.getContext()); LongSize = TD->getPointerSizeInBits(); @@ -774,7 +777,7 @@ bool AddressSanitizer::runOnModule(Module &M) { /*hasSideEffects=*/true); llvm::Triple targetTriple(M.getTargetTriple()); - bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::ANDROIDEABI; + bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android; MappingOffset = isAndroid ? kDefaultShadowOffsetAndroid : (LongSize == 32 ? kDefaultShadowOffset32 : kDefaultShadowOffset64); diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp new file mode 100644 index 0000000..2cb1199 --- /dev/null +++ b/lib/Transforms/Instrumentation/BlackList.cpp @@ -0,0 +1,102 @@ +//===-- BlackList.cpp - blacklist for sanitizers --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions or global +// variables based on a user-supplied blacklist. +// +//===----------------------------------------------------------------------===// + +#include <utility> +#include <string> + +#include "BlackList.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Module.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" + +namespace llvm { + +BlackList::BlackList(const StringRef Path) { + // Validate and open blacklist file. + if (!Path.size()) return; + OwningPtr<MemoryBuffer> File; + if (error_code EC = MemoryBuffer::getFile(Path, File)) { + report_fatal_error("Can't open blacklist file: " + Path + ": " + + EC.message()); + } + + // Iterate through each line in the blacklist file. + SmallVector<StringRef, 16> Lines; + SplitString(File.take()->getBuffer(), Lines, "\n\r"); + StringMap<std::string> Regexps; + for (SmallVector<StringRef, 16>::iterator I = Lines.begin(), E = Lines.end(); + I != E; ++I) { + // Get our prefix and unparsed regexp. + std::pair<StringRef, StringRef> SplitLine = I->split(":"); + StringRef Prefix = SplitLine.first; + std::string Regexp = SplitLine.second; + + // Replace * with .* + for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos; + pos += strlen(".*")) { + Regexp.replace(pos, strlen("*"), ".*"); + } + + // Check that the regexp is valid. + Regex CheckRE(Regexp); + std::string Error; + if (!CheckRE.isValid(Error)) { + report_fatal_error("malformed blacklist regex: " + SplitLine.second + + ": " + Error); + } + + // Add this regexp into the proper group by its prefix. + if (Regexps[Prefix].size()) + Regexps[Prefix] += "|"; + Regexps[Prefix] += Regexp; + } + + // Iterate through each of the prefixes, and create Regexs for them. + for (StringMap<std::string>::iterator I = Regexps.begin(), E = Regexps.end(); + I != E; ++I) { + Entries[I->getKey()] = new Regex(I->getValue()); + } +} + +bool BlackList::isIn(const Function &F) { + return isIn(*F.getParent()) || inSection("fun", F.getName()); +} + +bool BlackList::isIn(const GlobalVariable &G) { + return isIn(*G.getParent()) || inSection("global", G.getName()); +} + +bool BlackList::isIn(const Module &M) { + return inSection("src", M.getModuleIdentifier()); +} + +bool BlackList::isInInit(const GlobalVariable &G) { + return isIn(*G.getParent()) || inSection("global-init", G.getName()); +} + +bool BlackList::inSection(const StringRef Section, + const StringRef Query) { + Regex *FunctionRegex = Entries[Section]; + return FunctionRegex ? FunctionRegex->match(Query) : false; +} + +} // namespace llvm diff --git a/lib/Transforms/Instrumentation/BlackList.h b/lib/Transforms/Instrumentation/BlackList.h new file mode 100644 index 0000000..73977fc --- /dev/null +++ b/lib/Transforms/Instrumentation/BlackList.h @@ -0,0 +1,55 @@ +//===-- BlackList.h - blacklist for sanitizers ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions or global +// variables based on a user-supplied blacklist. +// +// The blacklist disables instrumentation of various functions and global +// variables. Each line contains a prefix, followed by a wild card expression. +// --- +// fun:*_ZN4base6subtle* +// global:*global_with_bad_access_or_initialization* +// global-init:*global_with_initialization_issues* +// src:file_with_tricky_code.cc +// --- +// Note that the wild card is in fact an llvm::Regex, but * is automatically +// replaced with .* +// This is similar to the "ignore" feature of ThreadSanitizer. +// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores +// +//===----------------------------------------------------------------------===// +// + +#include "llvm/ADT/StringMap.h" + +namespace llvm { +class Function; +class GlobalVariable; +class Module; +class Regex; +class StringRef; + +class BlackList { + public: + BlackList(const StringRef Path); + // Returns whether either this function or it's source file are blacklisted. + bool isIn(const Function &F); + // Returns whether either this global or it's source file are blacklisted. + bool isIn(const GlobalVariable &G); + // Returns whether this module is blacklisted by filename. + bool isIn(const Module &M); + // Returns whether a global should be excluded from initialization checking. + bool isInInit(const GlobalVariable &G); + private: + StringMap<Regex*> Entries; + + bool inSection(const StringRef Section, const StringRef Query); +}; + +} // namespace llvm diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index 09e0f14..6429081 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -24,6 +24,7 @@ #include "llvm/Support/TargetFolder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Instrumentation.h" using namespace llvm; @@ -48,10 +49,12 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetData>(); + AU.addRequired<TargetLibraryInfo>(); } private: const TargetData *TD; + const TargetLibraryInfo *TLI; ObjectSizeOffsetEvaluator *ObjSizeEval; BuilderTy *Builder; Instruction *Inst; @@ -166,11 +169,12 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { bool BoundsChecking::runOnFunction(Function &F) { TD = &getAnalysis<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); TrapBB = 0; BuilderTy TheBuilder(F.getContext(), TargetFolder(TD)); Builder = &TheBuilder; - ObjectSizeOffsetEvaluator TheObjSizeEval(TD, F.getContext()); + ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext()); ObjSizeEval = &TheObjSizeEval; // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 00de882..058f68c 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,8 +1,8 @@ add_llvm_library(LLVMInstrumentation AddressSanitizer.cpp + BlackList.cpp BoundsChecking.cpp EdgeProfiling.cpp - FunctionBlackList.cpp GCOVProfiling.cpp Instrumentation.cpp OptimalEdgeProfiling.cpp diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.cpp b/lib/Transforms/Instrumentation/FunctionBlackList.cpp deleted file mode 100644 index 188ea4d..0000000 --- a/lib/Transforms/Instrumentation/FunctionBlackList.cpp +++ /dev/null @@ -1,79 +0,0 @@ -//===-- FunctionBlackList.cpp - blacklist of functions --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is a utility class for instrumentation passes (like AddressSanitizer -// or ThreadSanitizer) to avoid instrumenting some functions based on -// user-supplied blacklist. -// -//===----------------------------------------------------------------------===// - -#include "FunctionBlackList.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Function.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" - -namespace llvm { - -FunctionBlackList::FunctionBlackList(const std::string &Path) { - Functions = NULL; - const char *kFunPrefix = "fun:"; - if (!Path.size()) return; - std::string Fun; - - OwningPtr<MemoryBuffer> File; - if (error_code EC = MemoryBuffer::getFile(Path.c_str(), File)) { - report_fatal_error("Can't open blacklist file " + Path + ": " + - EC.message()); - } - MemoryBuffer *Buff = File.take(); - const char *Data = Buff->getBufferStart(); - size_t DataLen = Buff->getBufferSize(); - SmallVector<StringRef, 16> Lines; - SplitString(StringRef(Data, DataLen), Lines, "\n\r"); - for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) { - if (Lines[i].startswith(kFunPrefix)) { - std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix)); - std::string ThisFuncRE; - // add ThisFunc replacing * with .* - for (size_t j = 0, n = ThisFunc.size(); j < n; j++) { - if (ThisFunc[j] == '*') - ThisFuncRE += '.'; - ThisFuncRE += ThisFunc[j]; - } - // Check that the regexp is valid. - Regex CheckRE(ThisFuncRE); - std::string Error; - if (!CheckRE.isValid(Error)) - report_fatal_error("malformed blacklist regex: " + ThisFunc + - ": " + Error); - // Append to the final regexp. - if (Fun.size()) - Fun += "|"; - Fun += ThisFuncRE; - } - } - if (Fun.size()) { - Functions = new Regex(Fun); - } -} - -bool FunctionBlackList::isIn(const Function &F) { - if (Functions) { - bool Res = Functions->match(F.getName()); - return Res; - } - return false; -} - -} // namespace llvm diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.h b/lib/Transforms/Instrumentation/FunctionBlackList.h deleted file mode 100644 index c1239b9..0000000 --- a/lib/Transforms/Instrumentation/FunctionBlackList.h +++ /dev/null @@ -1,37 +0,0 @@ -//===-- FunctionBlackList.cpp - blacklist of functions ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -//===----------------------------------------------------------------------===// -// -// This is a utility class for instrumentation passes (like AddressSanitizer -// or ThreadSanitizer) to avoid instrumenting some functions based on -// user-supplied blacklist. -// -//===----------------------------------------------------------------------===// -// - -#include <string> - -namespace llvm { -class Function; -class Regex; - -// Blacklisted functions are not instrumented. -// The blacklist file contains one or more lines like this: -// --- -// fun:FunctionWildCard -// --- -// This is similar to the "ignore" feature of ThreadSanitizer. -// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores -class FunctionBlackList { - public: - FunctionBlackList(const std::string &Path); - bool isIn(const Function &F); - private: - Regex *Functions; -}; - -} // namespace llvm diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 264a6a6..9fcde31 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -88,11 +88,10 @@ namespace { // Add the function to write out all our counters to the global destructor // list. - void insertCounterWriteout(SmallVector<std::pair<GlobalVariable *, - MDNode *>, 8> &); + void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); void insertIndirectCounterIncrement(); - std::string mangleName(DICompileUnit CU, std::string NewStem); + std::string mangleName(DICompileUnit CU, const char *NewStem); bool EmitNotes; bool EmitData; @@ -329,7 +328,7 @@ namespace { }; } -std::string GCOVProfiler::mangleName(DICompileUnit CU, std::string NewStem) { +std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) { if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) { for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) { MDNode *N = GCov->getOperand(i); @@ -630,7 +629,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() { } void GCOVProfiler::insertCounterWriteout( - SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &CountersBySP) { + ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) { FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *WriteoutF = Function::Create(WriteoutFTy, @@ -652,7 +651,7 @@ void GCOVProfiler::insertCounterWriteout( std::string FilenameGcda = mangleName(compile_unit, "gcda"); Builder.CreateCall(StartFile, Builder.CreateGlobalStringPtr(FilenameGcda)); - for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator + for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator I = CountersBySP.begin(), E = CountersBySP.end(); I != E; ++I) { DISubprogram SP(I->second); diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index dc0fa71..17b7775 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -21,7 +21,7 @@ #define DEBUG_TYPE "tsan" -#include "FunctionBlackList.h" +#include "BlackList.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" @@ -50,7 +50,7 @@ static cl::opt<std::string> ClBlackListFile("tsan-blacklist", STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); -STATISTIC(NumOmittedReadsBeforeWrite, +STATISTIC(NumOmittedReadsBeforeWrite, "Number of reads ignored due to following writes"); STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size"); STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes"); @@ -77,7 +77,7 @@ struct ThreadSanitizer : public FunctionPass { int getMemoryAccessFuncIndex(Value *Addr); TargetData *TD; - OwningPtr<FunctionBlackList> BL; + OwningPtr<BlackList> BL; IntegerType *OrdTy; // Callbacks to run-time library are computed in doInitialization. Function *TsanFuncEntry; @@ -121,7 +121,7 @@ bool ThreadSanitizer::doInitialization(Module &M) { TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; - BL.reset(new FunctionBlackList(ClBlackListFile)); + BL.reset(new BlackList(ClBlackListFile)); // Always insert a call to __tsan_init into the module's CTORs. IRBuilder<> IRB(M.getContext()); @@ -186,7 +186,7 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { NumOmittedReadsFromConstantGlobals++; return true; } - } else if(LoadInst *L = dyn_cast<LoadInst>(Addr)) { + } else if (LoadInst *L = dyn_cast<LoadInst>(Addr)) { if (isVtableAccess(L)) { // Reads from a vtable pointer can not race with any writes. NumOmittedReadsFromVtable++; @@ -344,7 +344,7 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { case NotAtomic: assert(false); case Unordered: // Fall-through. case Monotonic: v = 1 << 0; break; - // case Consume: v = 1 << 1; break; // Not specified yet. + // case Consume: v = 1 << 1; break; // Not specified yet. case Acquire: v = 1 << 2; break; case Release: v = 1 << 3; break; case AcquireRelease: v = 1 << 4; break; diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index a8deda8..5912107 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -43,6 +43,7 @@ #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -148,7 +149,19 @@ bool CodeGenPrepare::runOnFunction(Function &F) { PFI = getAnalysisIfAvailable<ProfileInfo>(); OptSize = F.hasFnAttr(Attribute::OptimizeForSize); - // First pass, eliminate blocks that contain only PHI nodes and an + /// This optimization identifies DIV instructions that can be + /// profitably bypassed and carried out with a shorter, faster divide. + if (TLI && TLI->isSlowDivBypassed()) { + const DenseMap<Type *, Type *> &BypassTypeMap = TLI->getBypassSlowDivTypes(); + + for (Function::iterator I = F.begin(); I != F.end(); I++) { + EverMadeChange |= bypassSlowDivision(F, + I, + BypassTypeMap); + } + } + + // Eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); @@ -988,7 +1001,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, WeakVH IterHandle(CurInstIterator); BasicBlock *BB = CurInstIterator->getParent(); - RecursivelyDeleteTriviallyDeadInstructions(Repl); + RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); if (IterHandle != CurInstIterator) { // If the iterator instruction was recursively deleted, start over at the @@ -1174,17 +1187,32 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { } +/// If we have a SelectInst that will likely profit from branch prediction, +/// turn it into a branch. bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { - // If we have a SelectInst that will likely profit from branch prediction, - // turn it into a branch. - if (DisableSelectToBranch || OptSize || !TLI || - !TLI->isPredictableSelectExpensive()) - return false; + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); - if (!SI->getCondition()->getType()->isIntegerTy(1) || - !isFormingBranchFromSelectProfitable(SI)) + // Can we convert the 'select' to CF ? + if (DisableSelectToBranch || OptSize || !TLI || VectorCond) return false; + TargetLowering::SelectSupportKind SelectKind; + if (VectorCond) + SelectKind = TargetLowering::VectorMaskSelect; + else if (SI->getType()->isVectorTy()) + SelectKind = TargetLowering::ScalarCondVectorVal; + else + SelectKind = TargetLowering::ScalarValSelect; + + // Do we have efficient codegen support for this kind of 'selects' ? + if (TLI->isSelectSupported(SelectKind)) { + // We have efficient codegen support for the select instruction. + // Check if it is profitable to keep this 'select'. + if (!TLI->isPredictableSelectExpensive() || + !isFormingBranchFromSelectProfitable(SI)) + return false; + } + ModifiedDT = true; // First, we split the block containing the select into 2 blocks. diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 8dbcc23..086f0a1 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -22,6 +22,7 @@ #include "llvm/Instruction.h" #include "llvm/Pass.h" #include "llvm/Support/InstIterator.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -38,10 +39,11 @@ namespace { initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry()); } virtual bool runOnBasicBlock(BasicBlock &BB) { + TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); bool Changed = false; for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) { Instruction *Inst = DI++; - if (isInstructionTriviallyDead(Inst)) { + if (isInstructionTriviallyDead(Inst, TLI)) { Inst->eraseFromParent(); Changed = true; ++DIEEliminated; @@ -87,6 +89,8 @@ char DCE::ID = 0; INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false) bool DCE::runOnFunction(Function &F) { + TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); + // Start out with all of the instructions in the worklist... std::vector<Instruction*> WorkList; for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) @@ -101,7 +105,7 @@ bool DCE::runOnFunction(Function &F) { Instruction *I = WorkList.back(); WorkList.pop_back(); - if (isInstructionTriviallyDead(I)) { // If the instruction is dead. + if (isInstructionTriviallyDead(I, TLI)) { // If the instruction is dead. // Loop over all of the values that the instruction uses, if there are // instructions being used, add them to the worklist, because they might // go dead after this one is removed. diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 8b1283f..1ff4329 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -106,6 +106,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } /// static void DeleteDeadInstruction(Instruction *I, MemoryDependenceAnalysis &MD, + const TargetLibraryInfo *TLI, SmallSetVector<Value*, 16> *ValueSet = 0) { SmallVector<Instruction*, 32> NowDeadInsts; @@ -130,7 +131,7 @@ static void DeleteDeadInstruction(Instruction *I, if (!Op->use_empty()) continue; if (Instruction *OpI = dyn_cast<Instruction>(Op)) - if (isInstructionTriviallyDead(OpI)) + if (isInstructionTriviallyDead(OpI, TLI)) NowDeadInsts.push_back(OpI); } @@ -276,7 +277,7 @@ static Value *getStoredPointerOperand(Instruction *I) { static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) { uint64_t Size; - if (getObjectSize(V, Size, AA.getTargetData())) + if (getObjectSize(V, Size, AA.getTargetData(), AA.getTargetLibraryInfo())) return Size; return AliasAnalysis::UnknownSize; } @@ -454,7 +455,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { Instruction *Inst = BBI++; // Handle 'free' calls specially. - if (CallInst *F = isFreeCall(Inst)) { + if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) { MadeChange |= HandleFree(F); continue; } @@ -483,7 +484,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // in case we need it. WeakVH NextInst(BBI); - DeleteDeadInstruction(SI, *MD); + DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo()); if (NextInst == 0) // Next instruction deleted. BBI = BB.begin(); @@ -530,7 +531,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { << *DepWrite << "\n KILLER: " << *Inst << '\n'); // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD); + DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo()); ++NumFastStores; MadeChange = true; @@ -640,7 +641,7 @@ bool DSE::HandleFree(CallInst *F) { Instruction *Next = llvm::next(BasicBlock::iterator(Dependency)); // DCE instructions only used to calculate that store - DeleteDeadInstruction(Dependency, *MD); + DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo()); ++NumFastStores; MadeChange = true; @@ -680,7 +681,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. - else if (isAllocLikeFn(I) && !PointerMayBeCaptured(I, true, true)) + else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) && + !PointerMayBeCaptured(I, true, true)) DeadStackObjects.insert(I); } @@ -724,7 +726,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { dbgs() << '\n'); // DCE instructions only used to calculate that store. - DeleteDeadInstruction(Dead, *MD, &DeadStackObjects); + DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(), + &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; @@ -732,9 +735,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } // Remove any dead non-memory-mutating instructions. - if (isInstructionTriviallyDead(BBI)) { + if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) { Instruction *Inst = BBI++; - DeleteDeadInstruction(Inst, *MD, &DeadStackObjects); + DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(), + &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; @@ -750,7 +754,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (CallSite CS = cast<Value>(BBI)) { // Remove allocation function calls from the list of dead stack objects; // there can't be any references before the definition. - if (isAllocLikeFn(BBI)) + if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo())) DeadStackObjects.remove(BBI); // If this call does not access memory, it can't be loading any of our @@ -771,15 +775,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) { LiveAllocas.push_back(*I); } - for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), - E = LiveAllocas.end(); I != E; ++I) - DeadStackObjects.remove(*I); - // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. - if (DeadStackObjects.empty()) + if (DeadStackObjects.size() == LiveAllocas.size()) break; + for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), + E = LiveAllocas.end(); I != E; ++I) + DeadStackObjects.remove(*I); + continue; } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 9759549..2627113 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -374,7 +374,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { Instruction *Inst = I++; // Dead instructions should just be removed. - if (isInstructionTriviallyDead(Inst)) { + if (isInstructionTriviallyDead(Inst, TLI)) { DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n'); Inst->eraseFromParent(); Changed = true; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 4822fd0..16ae6ad 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -271,16 +271,16 @@ void ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); } -uint32_t ValueTable::lookup_or_add_call(CallInst* C) { +uint32_t ValueTable::lookup_or_add_call(CallInst *C) { if (AA->doesNotAccessMemory(C)) { Expression exp = create_expression(C); - uint32_t& e = expressionNumbering[exp]; + uint32_t &e = expressionNumbering[exp]; if (!e) e = nextValueNumber++; valueNumbering[C] = e; return e; } else if (AA->onlyReadsMemory(C)) { Expression exp = create_expression(C); - uint32_t& e = expressionNumbering[exp]; + uint32_t &e = expressionNumbering[exp]; if (!e) { e = nextValueNumber++; valueNumbering[C] = e; @@ -413,7 +413,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) { case Instruction::LShr: case Instruction::AShr: case Instruction::And: - case Instruction::Or : + case Instruction::Or: case Instruction::Xor: case Instruction::ICmp: case Instruction::FCmp: @@ -632,6 +632,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) +#ifndef NDEBUG void GVN::dump(DenseMap<uint32_t, Value*>& d) { errs() << "{\n"; for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), @@ -641,6 +642,7 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) { } errs() << "}\n"; } +#endif /// IsValueFullyAvailableInBlock - Return true if we can prove that the value /// we're analyzing is fully available in the specified block. As we go, keep @@ -1436,7 +1438,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { Instruction *DepInst = DepInfo.getInst(); // Loading the allocation -> undef. - if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst) || + if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) || // Loading immediately after lifetime begin -> undef. isLifetimeStart(DepInst)) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, @@ -1951,7 +1953,7 @@ bool GVN::processLoad(LoadInst *L) { // If this load really doesn't depend on anything, then we must be loading an // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. - if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst)) { + if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); markInstructionForDeletion(L); ++NumGVNLoad; @@ -2231,12 +2233,20 @@ bool GVN::processInstruction(Instruction *I) { Value *SwitchCond = SI->getCondition(); BasicBlock *Parent = SI->getParent(); bool Changed = false; + + // Remember how many outgoing edges there are to every successor. + SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; + for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i) + ++SwitchEdges[SI->getSuccessor(i)]; + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { BasicBlock *Dst = i.getCaseSuccessor(); - BasicBlockEdge E(Parent, Dst); - if (E.isSingleEdge()) + // If there is only a single edge, propagate the case value into it. + if (SwitchEdges.lookup(Dst) == 1) { + BasicBlockEdge E(Parent, Dst); Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E); + } } return Changed; } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 37f8bdf..c933a17 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -44,6 +44,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -68,6 +69,7 @@ namespace { ScalarEvolution *SE; DominatorTree *DT; TargetData *TD; + TargetLibraryInfo *TLI; SmallVector<WeakVH, 16> DeadInsts; bool Changed; @@ -414,11 +416,11 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // new comparison. NewCompare->takeName(Compare); Compare->replaceAllUsesWith(NewCompare); - RecursivelyDeleteTriviallyDeadInstructions(Compare); + RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); // Delete the old floating point increment. Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); - RecursivelyDeleteTriviallyDeadInstructions(Incr); + RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); // If the FP induction variable still has uses, this is because something else // in the loop uses its value. In order to canonicalize the induction @@ -431,7 +433,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", PN->getParent()->getFirstInsertionPt()); PN->replaceAllUsesWith(Conv); - RecursivelyDeleteTriviallyDeadInstructions(PN); + RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); } Changed = true; } @@ -550,14 +552,14 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { PN->setIncomingValue(i, ExitVal); // If this instruction is dead now, delete it. - RecursivelyDeleteTriviallyDeadInstructions(Inst); + RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); if (NumPreds == 1) { // Completely replace a single-pred PHI. This is safe, because the // NewVal won't be variant in the loop, so we don't need an LCSSA phi // node anymore. PN->replaceAllUsesWith(ExitVal); - RecursivelyDeleteTriviallyDeadInstructions(PN); + RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); } } if (NumPreds != 1) { @@ -1697,6 +1699,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTree>(); TD = getAnalysisIfAvailable<TargetData>(); + TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); DeadInsts.clear(); Changed = false; @@ -1763,7 +1766,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { while (!DeadInsts.empty()) if (Instruction *Inst = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); + RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); // The Rewriter may not be used from this point on. @@ -1772,7 +1775,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { SinkUnusedInvariants(L); // Clean up dead instructions. - Changed |= DeleteDeadPHIs(L->getHeader()); + Changed |= DeleteDeadPHIs(L->getHeader(), TLI); // Check a post-condition. assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!"); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index dd42c59..20844c6 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -1455,7 +1455,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation. - SimplifyInstructionsInBlock(NewBB, TD); + SimplifyInstructionsInBlock(NewBB, TD, TLI); // Threaded an edge! ++NumThreads; diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 0192e92..99bedce 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -108,6 +108,9 @@ namespace { BasicBlock *Preheader; // The preheader block of the current loop... Loop *CurLoop; // The current loop we are working on... AliasSetTracker *CurAST; // AliasSet information for the current loop... + bool MayThrow; // The current loop contains an instruction which + // may throw, thus preventing code motion of + // instructions with side effects. DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap; /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. @@ -240,6 +243,15 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { CurAST->add(*BB); // Incorporate the specified basic block } + MayThrow = false; + // TODO: We've already searched for instructions which may throw in subloops. + // We may want to reuse this information. + for (Loop::block_iterator BB = L->block_begin(), BBE = L->block_end(); + (BB != BBE) && !MayThrow ; ++BB) + for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); + (I != E) && !MayThrow; ++I) + MayThrow |= I->mayThrow(); + // We want to visit all of the instructions in this loop... that are not parts // of our subloops (they have already had their invariants hoisted out of // their loop, into this loop, so there is no need to process the BODIES of @@ -307,7 +319,7 @@ void LICM::SinkRegion(DomTreeNode *N) { // If the instruction is dead, we would try to sink it because it isn't used // in the loop, instead, just delete it. - if (isInstructionTriviallyDead(&I)) { + if (isInstructionTriviallyDead(&I, TLI)) { DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); ++II; CurAST->deleteValue(&I); @@ -418,17 +430,22 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { if (!FoundMod) return true; } - // FIXME: This should use mod/ref information to see if we can hoist or sink - // the call. + // FIXME: This should use mod/ref information to see if we can hoist or + // sink the call. return false; } - // Otherwise these instructions are hoistable/sinkable - return isa<BinaryOperator>(I) || isa<CastInst>(I) || - isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || - isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || - isa<ShuffleVectorInst>(I); + // Only these instructions are hoistable/sinkable. + bool HoistableKind = (isa<BinaryOperator>(I) || isa<CastInst>(I) || + isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || + isa<CmpInst>(I) || isa<InsertElementInst>(I) || + isa<ExtractElementInst>(I) || + isa<ShuffleVectorInst>(I)); + if (!HoistableKind) + return false; + + return isSafeToExecuteUnconditionally(I); } /// isNotUsedInLoop - Return true if the only users of this instruction are @@ -604,6 +621,12 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { } bool LICM::isGuaranteedToExecute(Instruction &Inst) { + + // Somewhere in this loop there is an instruction which may throw and make us + // exit the loop. + if (MayThrow) + return false; + // Otherwise we have to check to make sure that the instruction dominates all // of the exit blocks. If it doesn't, then there is a path out of the loop // which does not execute this instruction, so we can't hoist it. diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index ac1082c..a72e288 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -132,7 +132,8 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } /// and zero out all the operands of this instruction. If any of them become /// dead, delete them and the computation tree that feeds them. /// -static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { +static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE, + const TargetLibraryInfo *TLI) { SmallVector<Instruction*, 32> NowDeadInsts; NowDeadInsts.push_back(I); @@ -153,7 +154,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { if (!Op->use_empty()) continue; if (Instruction *OpI = dyn_cast<Instruction>(Op)) - if (isInstructionTriviallyDead(OpI)) + if (isInstructionTriviallyDead(OpI, TLI)) NowDeadInsts.push_back(OpI); } @@ -164,10 +165,11 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { /// deleteIfDeadInstruction - If the specified value is a dead instruction, /// delete it and any recursively used instructions. -static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { +static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE, + const TargetLibraryInfo *TLI) { if (Instruction *I = dyn_cast<Instruction>(V)) - if (isInstructionTriviallyDead(I)) - deleteDeadInstruction(I, SE); + if (isInstructionTriviallyDead(I, TLI)) + deleteDeadInstruction(I, SE, TLI); } bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -490,7 +492,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){ Expander.clear(); // If we generated new code for the base pointer, clean up. - deleteIfDeadInstruction(BasePtr, *SE); + deleteIfDeadInstruction(BasePtr, *SE, TLI); return false; } @@ -538,7 +540,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - deleteDeadInstruction(TheStore, *SE); + deleteDeadInstruction(TheStore, *SE, TLI); ++NumMemSet; return true; } @@ -579,7 +581,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, getAnalysis<AliasAnalysis>(), SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. - deleteIfDeadInstruction(StoreBasePtr, *SE); + deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); return false; } @@ -594,8 +596,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, StoreSize, getAnalysis<AliasAnalysis>(), SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. - deleteIfDeadInstruction(LoadBasePtr, *SE); - deleteIfDeadInstruction(StoreBasePtr, *SE); + deleteIfDeadInstruction(LoadBasePtr, *SE, TLI); + deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); return false; } @@ -628,7 +630,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - deleteDeadInstruction(SI, *SE); + deleteDeadInstruction(SI, *SE, TLI); ++NumMemCpy; return true; } diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 982400c..f5daa7b 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -120,7 +120,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ++NumSimplified; } } - LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I); + LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); if (IsSubloopHeader && !isa<PHINode>(I)) break; diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 7eeb152..abe07aa 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -256,6 +257,7 @@ bool LoopRotate::rotateLoop(Loop *L) { return false; BasicBlock *OrigHeader = L->getHeader(); + BasicBlock *OrigLatch = L->getLoopLatch(); BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); if (BI == 0 || BI->isUnconditional()) @@ -267,13 +269,9 @@ bool LoopRotate::rotateLoop(Loop *L) { if (!L->isLoopExiting(OrigHeader)) return false; - // Updating PHInodes in loops with multiple exits adds complexity. - // Keep it simple, and restrict loop rotation to loops with one exit only. - // In future, lift this restriction and support for multiple exits if - // required. - SmallVector<BasicBlock*, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - if (ExitBlocks.size() > 1) + // If the loop latch already contains a branch that leaves the loop then the + // loop is already rotated. + if (OrigLatch == 0 || L->isLoopExiting(OrigLatch)) return false; // Check size of original header and reject loop if it is very big. @@ -286,11 +284,10 @@ bool LoopRotate::rotateLoop(Loop *L) { // Now, this loop is suitable for rotation. BasicBlock *OrigPreheader = L->getLoopPreheader(); - BasicBlock *OrigLatch = L->getLoopLatch(); // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. - if (OrigPreheader == 0 || OrigLatch == 0) + if (OrigPreheader == 0) return false; // Anything ScalarEvolution may know about this loop or the PHI nodes @@ -298,6 +295,8 @@ bool LoopRotate::rotateLoop(Loop *L) { if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) SE->forgetLoop(L); + DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); + // Find new Loop header. NewHeader is a Header's one and only successor // that is inside loop. Header's other successor is outside the // loop. Otherwise loop is not suitable for rotation. @@ -408,10 +407,19 @@ bool LoopRotate::rotateLoop(Loop *L) { // Update DominatorTree to reflect the CFG change we just made. Then split // edges as necessary to preserve LoopSimplify form. if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { - // Since OrigPreheader now has the conditional branch to Exit block, it is - // the dominator of Exit. - DT->changeImmediateDominator(Exit, OrigPreheader); - DT->changeImmediateDominator(NewHeader, OrigPreheader); + // Everything that was dominated by the old loop header is now dominated + // by the original loop preheader. Conceptually the header was merged + // into the preheader, even though we reuse the actual block as a new + // loop latch. + DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); + SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(), + OrigHeaderNode->end()); + DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader); + for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) + DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode); + + assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode); + assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode); // Update OrigHeader to be dominated by the new header block. DT->changeImmediateDominator(OrigHeader, OrigLatch); @@ -440,6 +448,35 @@ bool LoopRotate::rotateLoop(Loop *L) { // Update OrigHeader to be dominated by the new header block. DT->changeImmediateDominator(NewHeader, OrigPreheader); DT->changeImmediateDominator(OrigHeader, OrigLatch); + + // Brute force incremental dominator tree update. Call + // findNearestCommonDominator on all CFG predecessors of each child of the + // original header. + DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); + SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(), + OrigHeaderNode->end()); + bool Changed; + do { + Changed = false; + for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) { + DomTreeNode *Node = HeaderChildren[I]; + BasicBlock *BB = Node->getBlock(); + + pred_iterator PI = pred_begin(BB); + BasicBlock *NearestDom = *PI; + for (pred_iterator PE = pred_end(BB); PI != PE; ++PI) + NearestDom = DT->findNearestCommonDominator(NearestDom, *PI); + + // Remember if this changes the DomTree. + if (Node->getIDom()->getBlock() != NearestDom) { + DT->changeImmediateDominator(BB, NearestDom); + Changed = true; + } + } + + // If the dominator changed, this may have an effect on other + // predecessors, continue until we reach a fixpoint. + } while (Changed); } } @@ -452,6 +489,8 @@ bool LoopRotate::rotateLoop(Loop *L) { // emitted code isn't too gross in this common case. MergeBlockIntoPredecessor(OrigHeader, this); + DEBUG(dbgs() << "LoopRotation: into "; L->dump()); + ++NumRotated; return true; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 0ae7a51..d7495da 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -121,9 +121,11 @@ void RegSortData::print(raw_ostream &OS) const { OS << "[NumUses=" << UsedByIndices.count() << ']'; } +#ifndef NDEBUG void RegSortData::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -414,9 +416,11 @@ void Formula::print(raw_ostream &OS) const { } } +#ifndef NDEBUG void Formula::dump() const { print(errs()); errs() << '\n'; } +#endif /// isAddRecSExtable - Return true if the given addrec can be sign-extended /// without changing its value. @@ -974,9 +978,11 @@ void Cost::print(raw_ostream &OS) const { OS << ", plus " << SetupCost << " setup cost"; } +#ifndef NDEBUG void Cost::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -1060,9 +1066,11 @@ void LSRFixup::print(raw_ostream &OS) const { OS << ", Offset=" << Offset; } +#ifndef NDEBUG void LSRFixup::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -1252,9 +1260,11 @@ void LSRUse::print(raw_ostream &OS) const { OS << ", widest fixup type: " << *WidestFixupType; } +#ifndef NDEBUG void LSRUse::dump() const { print(errs()); errs() << '\n'; } +#endif /// isLegalUse - Test whether the use described by AM is "legal", meaning it can /// be completely folded into the user instruction at isel time. This includes @@ -3436,9 +3446,11 @@ void WorkItem::print(raw_ostream &OS) const { << " , add offset " << Imm; } +#ifndef NDEBUG void WorkItem::dump() const { print(errs()); errs() << '\n'; } +#endif /// GenerateCrossUseConstantOffsets - Look for registers which are a constant /// distance apart and try to form reuse opportunities between them. @@ -4731,9 +4743,11 @@ void LSRInstance::print(raw_ostream &OS) const { print_uses(OS); } +#ifndef NDEBUG void LSRInstance::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 3222f20..dce8e8b 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -1236,16 +1236,19 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) { // An ObjC-Identified object can't alias a load if it is never locally stored. if (AIsIdentified) { + // Check for an obvious escape. + if (isa<LoadInst>(B)) + return isStoredObjCPointer(A); if (BIsIdentified) { - // If both pointers have provenance, they can be directly compared. - if (A != B) - return false; - } else { - if (isa<LoadInst>(B)) - return isStoredObjCPointer(A); + // Check for an obvious escape. + if (isa<LoadInst>(A)) + return isStoredObjCPointer(B); + // Both pointers are identified and escapes aren't an evident problem. + return false; } - } else { - if (BIsIdentified && isa<LoadInst>(A)) + } else if (BIsIdentified) { + // Check for an obvious escape. + if (isa<LoadInst>(A)) return isStoredObjCPointer(B); } @@ -1381,9 +1384,6 @@ namespace { /// PtrState - This class summarizes several per-pointer runtime properties /// which are propogated through the flow graph. class PtrState { - /// NestCount - The known minimum level of retain+release nesting. - unsigned NestCount; - /// KnownPositiveRefCount - True if the reference count is known to /// be incremented. bool KnownPositiveRefCount; @@ -1401,7 +1401,7 @@ namespace { /// TODO: Encapsulate this better. RRInfo RRI; - PtrState() : NestCount(0), KnownPositiveRefCount(false), Partial(false), + PtrState() : KnownPositiveRefCount(false), Partial(false), Seq(S_None) {} void SetKnownPositiveRefCount() { @@ -1416,18 +1416,6 @@ namespace { return KnownPositiveRefCount; } - void IncrementNestCount() { - if (NestCount != UINT_MAX) ++NestCount; - } - - void DecrementNestCount() { - if (NestCount != 0) --NestCount; - } - - bool IsKnownNested() const { - return NestCount > 0; - } - void SetSeq(Sequence NewSeq) { Seq = NewSeq; } @@ -1454,7 +1442,6 @@ void PtrState::Merge(const PtrState &Other, bool TopDown) { Seq = MergeSeqs(Seq, Other.Seq, TopDown); KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount; - NestCount = std::min(NestCount, Other.NestCount); // We can't merge a plain objc_retain with an objc_retainBlock. if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) @@ -1868,6 +1855,26 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { return AutoreleaseCallee; } +/// IsPotentialUse - Test whether the given value is possible a +/// reference-counted pointer, including tests which utilize AliasAnalysis. +static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) { + // First make the rudimentary check. + if (!IsPotentialUse(Op)) + return false; + + // Objects in constant memory are not reference-counted. + if (AA.pointsToConstantMemory(Op)) + return false; + + // Pointers in constant memory are not pointing to reference-counted objects. + if (const LoadInst *LI = dyn_cast<LoadInst>(Op)) + if (AA.pointsToConstantMemory(LI->getPointerOperand())) + return false; + + // Otherwise assume the worst. + return true; +} + /// CanAlterRefCount - Test whether the given instruction can result in a /// reference count modification (positive or negative) for the pointer's /// object. @@ -1894,7 +1901,7 @@ CanAlterRefCount(const Instruction *Inst, const Value *Ptr, for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) { const Value *Op = *I; - if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -1919,14 +1926,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, // Comparing a pointer with null, or any other constant, isn't really a use, // because we don't care what the pointer points to, or about the values // of any other dynamic reference-counted pointers. - if (!IsPotentialUse(ICI->getOperand(1))) + if (!IsPotentialUse(ICI->getOperand(1), *PA.getAA())) return false; } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) { // For calls, just check the arguments (and not the callee operand). for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), OE = CS.arg_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -1936,14 +1943,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); // If we can't tell what the underlying object was, assume there is a // dependence. - return IsPotentialUse(Op) && PA.related(Op, Ptr); + return IsPotentialUse(Op, *PA.getAA()) && PA.related(Op, Ptr); } // Check each operand for a match. for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -2612,11 +2619,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); S.RRI.ReleaseMetadata = ReleaseMetadata; - S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); + S.RRI.KnownSafe = S.IsKnownIncremented(); S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); S.RRI.Calls.insert(Inst); - S.IncrementNestCount(); + S.SetKnownPositiveRefCount(); break; } case IC_RetainBlock: @@ -2631,7 +2638,6 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, PtrState &S = MyStates.getPtrBottomUpState(Arg); S.SetKnownPositiveRefCount(); - S.DecrementNestCount(); switch (S.GetSeq()) { case S_Stop: @@ -2747,8 +2753,9 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, // Merge the states from each successor to compute the initial state // for the current block. - for (BBState::edge_iterator SI(MyStates.succ_begin()), - SE(MyStates.succ_end()); SI != SE; ++SI) { + BBState::edge_iterator SI(MyStates.succ_begin()), + SE(MyStates.succ_end()); + if (SI != SE) { const BasicBlock *Succ = *SI; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); assert(I != BBStates.end()); @@ -2760,7 +2767,6 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, assert(I != BBStates.end()); MyStates.MergeSucc(I->second); } - break; } // Visit all the instructions, bottom-up. @@ -2823,12 +2829,11 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, S.ResetSequenceProgress(S_Retain); S.RRI.IsRetainBlock = Class == IC_RetainBlock; - // Don't check S.IsKnownIncremented() here because it's not sufficient. - S.RRI.KnownSafe = S.IsKnownNested(); + S.RRI.KnownSafe = S.IsKnownIncremented(); S.RRI.Calls.insert(Inst); } - S.IncrementNestCount(); + S.SetKnownPositiveRefCount(); // A retain can be a potential use; procede to the generic checking // code below. @@ -2838,7 +2843,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, Arg = GetObjCArg(Inst); PtrState &S = MyStates.getPtrTopDownState(Arg); - S.DecrementNestCount(); + S.ClearRefCount(); switch (S.GetSeq()) { case S_Retain: @@ -2935,8 +2940,9 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, // Merge the states from each predecessor to compute the initial state // for the current block. - for (BBState::edge_iterator PI(MyStates.pred_begin()), - PE(MyStates.pred_end()); PI != PE; ++PI) { + BBState::edge_iterator PI(MyStates.pred_begin()), + PE(MyStates.pred_end()); + if (PI != PE) { const BasicBlock *Pred = *PI; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); assert(I != BBStates.end()); @@ -2948,7 +2954,6 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, assert(I != BBStates.end()); MyStates.MergePred(I->second); } - break; } // Visit all the instructions, top-down. diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index d13e4ab..6d27db1 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -59,9 +59,9 @@ FunctionPass *llvm::createCFGSimplificationPass() { return new CFGSimplifyPass(); } -/// ChangeToUnreachable - Insert an unreachable instruction before the specified +/// changeToUnreachable - Insert an unreachable instruction before the specified /// instruction, making it and the rest of the code in the block dead. -static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { +static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { BasicBlock *BB = I->getParent(); // Loop over all of the successors, removing BB's entry from any PHI // nodes. @@ -87,8 +87,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { } } -/// ChangeToCall - Convert the specified invoke into a normal call. -static void ChangeToCall(InvokeInst *II) { +/// changeToCall - Convert the specified invoke into a normal call. +static void changeToCall(InvokeInst *II) { SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); NewCall->takeName(II); @@ -105,7 +105,7 @@ static void ChangeToCall(InvokeInst *II) { II->eraseFromParent(); } -static bool MarkAliveBlocks(BasicBlock *BB, +static bool markAliveBlocks(BasicBlock *BB, SmallPtrSet<BasicBlock*, 128> &Reachable) { SmallVector<BasicBlock*, 128> Worklist; @@ -129,7 +129,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, ++BBI; if (!isa<UnreachableInst>(BBI)) { // Don't insert a call to llvm.trap right before the unreachable. - ChangeToUnreachable(BBI, false); + changeToUnreachable(BBI, false); Changed = true; } break; @@ -148,7 +148,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (isa<UndefValue>(Ptr) || (isa<ConstantPointerNull>(Ptr) && SI->getPointerAddressSpace() == 0)) { - ChangeToUnreachable(SI, true); + changeToUnreachable(SI, true); Changed = true; break; } @@ -159,7 +159,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { Value *Callee = II->getCalledValue(); if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { - ChangeToUnreachable(II, true); + changeToUnreachable(II, true); Changed = true; } else if (II->doesNotThrow()) { if (II->use_empty() && II->onlyReadsMemory()) { @@ -168,7 +168,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, II->getUnwindDest()->removePredecessor(II->getParent()); II->eraseFromParent(); } else - ChangeToCall(II); + changeToCall(II); Changed = true; } } @@ -180,12 +180,12 @@ static bool MarkAliveBlocks(BasicBlock *BB, return Changed; } -/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. -static bool RemoveUnreachableBlocksFromFn(Function &F) { +static bool removeUnreachableBlocksFromFn(Function &F) { SmallPtrSet<BasicBlock*, 128> Reachable; - bool Changed = MarkAliveBlocks(F.begin(), Reachable); + bool Changed = markAliveBlocks(F.begin(), Reachable); // If there are unreachable blocks in the CFG... if (Reachable.size() == F.size()) @@ -215,9 +215,9 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) { return true; } -/// MergeEmptyReturnBlocks - If we have more than one empty (other than phi +/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi /// node) return blocks, merge them together to promote recursive block merging. -static bool MergeEmptyReturnBlocks(Function &F) { +static bool mergeEmptyReturnBlocks(Function &F) { bool Changed = false; BasicBlock *RetBlock = 0; @@ -291,9 +291,9 @@ static bool MergeEmptyReturnBlocks(Function &F) { return Changed; } -/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function, +/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. -static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { +static bool iterativelySimplifyCFG(Function &F, const TargetData *TD) { bool Changed = false; bool LocalChange = true; while (LocalChange) { @@ -317,24 +317,24 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { // bool CFGSimplifyPass::runOnFunction(Function &F) { const TargetData *TD = getAnalysisIfAvailable<TargetData>(); - bool EverChanged = RemoveUnreachableBlocksFromFn(F); - EverChanged |= MergeEmptyReturnBlocks(F); - EverChanged |= IterativeSimplifyCFG(F, TD); + bool EverChanged = removeUnreachableBlocksFromFn(F); + EverChanged |= mergeEmptyReturnBlocks(F); + EverChanged |= iterativelySimplifyCFG(F, TD); // If neither pass changed anything, we're done. if (!EverChanged) return false; - // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens, - // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should + // iterativelySimplifyCFG can (rarely) make some loops dead. If this happens, + // removeUnreachableBlocksFromFn is needed to nuke them, which means we should // iterate between the two optimizations. We structure the code like this to - // avoid reruning IterativeSimplifyCFG if the second pass of - // RemoveUnreachableBlocksFromFn doesn't do anything. - if (!RemoveUnreachableBlocksFromFn(F)) + // avoid reruning iterativelySimplifyCFG if the second pass of + // removeUnreachableBlocksFromFn doesn't do anything. + if (!removeUnreachableBlocksFromFn(F)) return true; do { - EverChanged = IterativeSimplifyCFG(F, TD); - EverChanged |= RemoveUnreachableBlocksFromFn(F); + EverChanged = iterativelySimplifyCFG(F, TD); + EverChanged |= removeUnreachableBlocksFromFn(F); } while (EverChanged); return true; diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 3904419..65311fe 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" @@ -38,6 +39,10 @@ using namespace llvm; STATISTIC(NumSimplified, "Number of library calls simplified"); STATISTIC(NumAnnotated, "Number of attributes added to library functions"); +static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden, + cl::init(false), + cl::desc("Enable unsafe double to float " + "shrinking for math lib calls")); //===----------------------------------------------------------------------===// // Optimizer Base Class //===----------------------------------------------------------------------===// @@ -893,16 +898,56 @@ struct MemSetOpt : public LibCallOptimization { //===----------------------------------------------------------------------===// //===---------------------------------------===// -// 'cos*' Optimizations +// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' + +struct UnaryDoubleFPOpt : public LibCallOptimization { + bool CheckRetType; + UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {} + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || + !FT->getParamType(0)->isDoubleTy()) + return 0; + + if (CheckRetType) { + // Check if all the uses for function like 'sin' are converted to float. + for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end(); + ++UseI) { + FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI); + if (Cast == 0 || !Cast->getType()->isFloatTy()) + return 0; + } + } + + // If this is something like 'floor((double)floatval)', convert to floorf. + FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); + if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) + return 0; + + // floor((double)floatval) -> (double)floorf(floatval) + Value *V = Cast->getOperand(0); + V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); + return B.CreateFPExt(V, B.getDoubleTy()); + } +}; +//===---------------------------------------===// +// 'cos*' Optimizations struct CosOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "cos" && + TLI->has(LibFunc::cosf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); + } + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the // result type. if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return Ret; // cos(-x) -> cos(x) Value *Op1 = CI->getArgOperand(0); @@ -910,7 +955,7 @@ struct CosOpt : public LibCallOptimization { BinaryOperator *BinExpr = cast<BinaryOperator>(Op1); return B.CreateCall(Callee, BinExpr->getOperand(1), "cos"); } - return 0; + return Ret; } }; @@ -919,13 +964,20 @@ struct CosOpt : public LibCallOptimization { struct PowOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "pow" && + TLI->has(LibFunc::powf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); + } + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return Ret; Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { @@ -936,7 +988,7 @@ struct PowOpt : public LibCallOptimization { } ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); - if (Op2C == 0) return 0; + if (Op2C == 0) return Ret; if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 return ConstantFP::get(CI->getType(), 1.0); @@ -974,12 +1026,19 @@ struct PowOpt : public LibCallOptimization { struct Exp2Opt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "exp2" && + TLI->has(LibFunc::exp2)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); + } + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the // result type. if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return Ret; Value *Op = CI->getArgOperand(0); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 @@ -1016,29 +1075,7 @@ struct Exp2Opt : public LibCallOptimization { return CI; } - return 0; - } -}; - -//===---------------------------------------===// -// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' - -struct UnaryDoubleFPOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || - !FT->getParamType(0)->isDoubleTy()) - return 0; - - // If this is something like 'floor((double)floatval)', convert to floorf. - FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); - if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) - return 0; - - // floor((double)floatval) -> (double)floorf(floatval) - Value *V = Cast->getOperand(0); - V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); - return B.CreateFPExt(V, B.getDoubleTy()); + return Ret; } }; @@ -1534,7 +1571,8 @@ namespace { StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; // Math Library Optimizations - CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP; + CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; + UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP; // Integer Optimizations FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii; ToAsciiOpt ToAscii; @@ -1547,10 +1585,13 @@ namespace { public: static char ID; // Pass identification SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true), - StpCpy(false), StpCpyChk(true) { + StpCpy(false), StpCpyChk(true), + UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true) { initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); } void AddOpt(LibFunc::Func F, LibCallOptimization* Opt); + void AddOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt); + void InitOptimizations(); bool runOnFunction(Function &F); @@ -1586,6 +1627,12 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F, LibCallOptimization* Opt) { Optimizations[TLI->getName(F)] = Opt; } +void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2, + LibCallOptimization* Opt) { + if (TLI->has(F1) && TLI->has(F2)) + Optimizations[TLI->getName(F1)] = Opt; +} + /// Optimizations - Populate the Optimizations map with all the optimizations /// we know. void SimplifyLibCalls::InitOptimizations() { @@ -1641,20 +1688,37 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["llvm.exp2.f64"] = &Exp2; Optimizations["llvm.exp2.f32"] = &Exp2; - if (TLI->has(LibFunc::fabs) && TLI->has(LibFunc::fabsf)) - Optimizations["fabs"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf)) - Optimizations["floor"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf)) - Optimizations["ceil"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::round) && TLI->has(LibFunc::roundf)) - Optimizations["round"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::rint) && TLI->has(LibFunc::rintf)) - Optimizations["rint"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf)) - Optimizations["nearbyint"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::trunc) && TLI->has(LibFunc::truncf)) - Optimizations["trunc"] = &UnaryDoubleFP; + AddOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP); + AddOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP); + AddOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP); + AddOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP); + AddOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP); + AddOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP); + AddOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP); + + if(UnsafeFPShrink) { + AddOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP); + } // Integer Optimizations Optimizations["ffs"] = &FFS; diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index d831452..1e6586b 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -55,10 +55,12 @@ void ExtAddrMode::print(raw_ostream &OS) const { OS << ']'; } +#ifndef NDEBUG void ExtAddrMode::dump() const { print(dbgs()); dbgs() << '\n'; } +#endif /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 2679b93..75a7817 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -94,7 +94,7 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) { /// is dead. Also recursively delete any operands that become dead as /// a result. This includes tracing the def-use list from the PHI to see if /// it is ultimately unused or if it reaches an unused cycle. -bool llvm::DeleteDeadPHIs(BasicBlock *BB) { +bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { // Recursively deleting a PHI may cause multiple PHIs to be deleted // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. SmallVector<WeakVH, 8> PHIs; @@ -105,7 +105,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB) { bool Changed = false; for (unsigned i = 0, e = PHIs.size(); i != e; ++i) if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*())) - Changed |= RecursivelyDeleteDeadPHINode(PN); + Changed |= RecursivelyDeleteDeadPHINode(PN, TLI); return Changed; } diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp new file mode 100644 index 0000000..30d60be --- /dev/null +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -0,0 +1,253 @@ +//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an optimization for div and rem on architectures that +// execute short instructions significantly faster than longer instructions. +// For example, on Intel Atom 32-bit divides are slow enough that during +// runtime it is profitable to check the value of the operands, and if they are +// positive and less than 256 use an unsigned 8-bit divide. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "bypass-slow-division" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" + +using namespace llvm; + +namespace { + struct DivOpInfo { + bool SignedOp; + Value *Dividend; + Value *Divisor; + + DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) + : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} + }; + + struct DivPhiNodes { + PHINode *Quotient; + PHINode *Remainder; + + DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) + : Quotient(InQuotient), Remainder(InRemainder) {} + }; +} + +namespace llvm { + template<> + struct DenseMapInfo<DivOpInfo> { + static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { + return Val1.SignedOp == Val2.SignedOp && + Val1.Dividend == Val2.Dividend && + Val1.Divisor == Val2.Divisor; + } + + static DivOpInfo getEmptyKey() { + return DivOpInfo(false, 0, 0); + } + + static DivOpInfo getTombstoneKey() { + return DivOpInfo(true, 0, 0); + } + + static unsigned getHashValue(const DivOpInfo &Val) { + return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ + reinterpret_cast<uintptr_t>(Val.Divisor)) ^ + (unsigned)Val.SignedOp; + } + }; + + typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; +} + +// insertFastDiv - Substitutes the div/rem instruction with code that checks the +// value of the operands and uses a shorter-faster div/rem instruction when +// possible and the longer-slower div/rem instruction otherwise. +static bool insertFastDiv(Function &F, + Function::iterator &I, + BasicBlock::iterator &J, + IntegerType *BypassType, + bool UseDivOp, + bool UseSignedOp, + DivCacheTy &PerBBDivCache) { + // Get instruction operands + Instruction *Instr = J; + Value *Dividend = Instr->getOperand(0); + Value *Divisor = Instr->getOperand(1); + + if (isa<ConstantInt>(Divisor) || + (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { + // Operations with immediate values should have + // been solved and replaced during compile time. + return false; + } + + // Basic Block is split before divide + BasicBlock *MainBB = I; + BasicBlock *SuccessorBB = I->splitBasicBlock(J); + ++I; //advance iterator I to successorBB + + // Add new basic block for slow divide operation + BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", + MainBB->getParent(), SuccessorBB); + SlowBB->moveBefore(SuccessorBB); + IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); + Value *SlowQuotientV; + Value *SlowRemainderV; + if (UseSignedOp) { + SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); + } else { + SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); + } + SlowBuilder.CreateBr(SuccessorBB); + + // Add new basic block for fast divide operation + BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", + MainBB->getParent(), SuccessorBB); + FastBB->moveBefore(SlowBB); + IRBuilder<> FastBuilder(FastBB, FastBB->begin()); + Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, + BypassType); + Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, + BypassType); + + // udiv/urem because optimization only handles positive numbers + Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, + ShortDivisorV); + Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, + ShortDivisorV); + Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, + ShortQuotientV, + Dividend->getType()); + Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, + ShortRemainderV, + Dividend->getType()); + FastBuilder.CreateBr(SuccessorBB); + + // Phi nodes for result of div and rem + IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); + PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + QuoPhi->addIncoming(SlowQuotientV, SlowBB); + QuoPhi->addIncoming(FastQuotientV, FastBB); + PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + RemPhi->addIncoming(SlowRemainderV, SlowBB); + RemPhi->addIncoming(FastRemainderV, FastBB); + + // Replace Instr with appropriate phi node + if (UseDivOp) + Instr->replaceAllUsesWith(QuoPhi); + else + Instr->replaceAllUsesWith(RemPhi); + Instr->eraseFromParent(); + + // Combine operands into a single value with OR for value testing below + MainBB->getInstList().back().eraseFromParent(); + IRBuilder<> MainBuilder(MainBB, MainBB->end()); + Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); + + // BitMask is inverted to check if the operands are + // larger than the bypass type + uint64_t BitMask = ~BypassType->getBitMask(); + Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); + + // Compare operand values and branch + Value *ZeroV = MainBuilder.getInt32(0); + Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); + MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); + + // point iterator J at first instruction of successorBB + J = I->begin(); + + // Cache phi nodes to be used later in place of other instances + // of div or rem with the same sign, dividend, and divisor + DivOpInfo Key(UseSignedOp, Dividend, Divisor); + DivPhiNodes Value(QuoPhi, RemPhi); + PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); + return true; +} + +// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if +// operands and operation are identical. Otherwise call insertFastDiv to perform +// the optimization and cache the resulting dividend and remainder. +static bool reuseOrInsertFastDiv(Function &F, + Function::iterator &I, + BasicBlock::iterator &J, + IntegerType *BypassType, + bool UseDivOp, + bool UseSignedOp, + DivCacheTy &PerBBDivCache) { + // Get instruction operands + Instruction *Instr = J; + DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); + DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); + + if (CacheI == PerBBDivCache.end()) { + // If previous instance does not exist, insert fast div + return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, + PerBBDivCache); + } + + // Replace operation value with previously generated phi node + DivPhiNodes &Value = CacheI->second; + if (UseDivOp) { + // Replace all uses of div instruction with quotient phi node + J->replaceAllUsesWith(Value.Quotient); + } else { + // Replace all uses of rem instruction with remainder phi node + J->replaceAllUsesWith(Value.Remainder); + } + + // Advance to next operation + ++J; + + // Remove redundant operation + Instr->eraseFromParent(); + return true; +} + +// bypassSlowDivision - This optimization identifies DIV instructions that can +// be profitably bypassed and carried out with a shorter, faster divide. +bool llvm::bypassSlowDivision(Function &F, + Function::iterator &I, + const DenseMap<Type *, Type *> &BypassTypeMap) { + DivCacheTy DivCache; + + bool MadeChange = false; + for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { + + // Get instruction details + unsigned Opcode = J->getOpcode(); + bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; + bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; + bool UseSignedOp = Opcode == Instruction::SDiv || + Opcode == Instruction::SRem; + + // Only optimize div or rem ops + if (!UseDivOp && !UseRemOp) + continue; + + // Continue if div/rem type is not bypassed + DenseMap<Type *, Type *>::const_iterator BT = + BypassTypeMap.find(J->getType()); + if (BT == BypassTypeMap.end()) + continue; + + IntegerType *BypassType = cast<IntegerType>(BT->second); + MadeChange |= reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp, + UseSignedOp, DivCache); + } + + return MadeChange; +} diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 4ff31ca..215a16f 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMTransformUtils BasicBlockUtils.cpp BreakCriticalEdges.cpp BuildLibCalls.cpp + BypassSlowDivision.cpp CloneFunction.cpp CloneModule.cpp CmpInstAnalysis.cpp diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index bed7d72..0601433 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -52,7 +52,8 @@ using namespace llvm; /// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch /// conditions and indirectbr addresses this might make dead if /// DeleteDeadConditions is true. -bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { +bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, + const TargetLibraryInfo *TLI) { TerminatorInst *T = BB->getTerminator(); IRBuilder<> Builder(T); @@ -96,7 +97,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { Value *Cond = BI->getCondition(); BI->eraseFromParent(); if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Cond); + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); return true; } return false; @@ -161,7 +162,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { Value *Cond = SI->getCondition(); SI->eraseFromParent(); if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Cond); + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); return true; } @@ -205,7 +206,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { Value *Address = IBI->getAddress(); IBI->eraseFromParent(); if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Address); + RecursivelyDeleteTriviallyDeadInstructions(Address, TLI); // If we didn't find our destination in the IBI successor list, then we // have undefined behavior. Replace the unconditional branch with an @@ -230,7 +231,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { /// isInstructionTriviallyDead - Return true if the result produced by the /// instruction is not used, and the instruction has no side effects. /// -bool llvm::isInstructionTriviallyDead(Instruction *I) { +bool llvm::isInstructionTriviallyDead(Instruction *I, + const TargetLibraryInfo *TLI) { if (!I->use_empty() || isa<TerminatorInst>(I)) return false; // We don't want the landingpad instruction removed by anything this general. @@ -265,9 +267,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { return isa<UndefValue>(II->getArgOperand(1)); } - if (isAllocLikeFn(I)) return true; + if (isAllocLikeFn(I, TLI)) return true; - if (CallInst *CI = isFreeCall(I)) + if (CallInst *CI = isFreeCall(I, TLI)) if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0))) return C->isNullValue() || isa<UndefValue>(C); @@ -278,9 +280,11 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { /// trivially dead instruction, delete it. If that makes any of its operands /// trivially dead, delete them too, recursively. Return true if any /// instructions were deleted. -bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { +bool +llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, + const TargetLibraryInfo *TLI) { Instruction *I = dyn_cast<Instruction>(V); - if (!I || !I->use_empty() || !isInstructionTriviallyDead(I)) + if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI)) return false; SmallVector<Instruction*, 16> DeadInsts; @@ -301,7 +305,7 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { // operand, and if it is 'trivially' dead, delete it in a future loop // iteration. if (Instruction *OpI = dyn_cast<Instruction>(OpV)) - if (isInstructionTriviallyDead(OpI)) + if (isInstructionTriviallyDead(OpI, TLI)) DeadInsts.push_back(OpI); } @@ -334,19 +338,20 @@ static bool areAllUsesEqual(Instruction *I) { /// either forms a cycle or is terminated by a trivially dead instruction, /// delete it. If that makes any of its operands trivially dead, delete them /// too, recursively. Return true if a change was made. -bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { +bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, + const TargetLibraryInfo *TLI) { SmallPtrSet<Instruction*, 4> Visited; for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects(); I = cast<Instruction>(*I->use_begin())) { if (I->use_empty()) - return RecursivelyDeleteTriviallyDeadInstructions(I); + return RecursivelyDeleteTriviallyDeadInstructions(I, TLI); // If we find an instruction more than once, we're on a cycle that // won't prove fruitful. if (!Visited.insert(I)) { // Break the cycle and delete the instruction and its operands. I->replaceAllUsesWith(UndefValue::get(I->getType())); - (void)RecursivelyDeleteTriviallyDeadInstructions(I); + (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI); return true; } } @@ -358,7 +363,8 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { /// /// This returns true if it changed the code, note that it can delete /// instructions in other blocks as well in this block. -bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { +bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD, + const TargetLibraryInfo *TLI) { bool MadeChange = false; #ifndef NDEBUG @@ -381,7 +387,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { continue; } - MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst); + MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); if (BIHandle != BI) BI = BB->begin(); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 518df7c..32d7fa1 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -22,6 +22,7 @@ #include "llvm/LLVMContext.h" #include "llvm/MDBuilder.h" #include "llvm/Metadata.h" +#include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Type.h" #include "llvm/ADT/DenseMap.h" @@ -54,6 +55,7 @@ DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); +STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); namespace { /// ValueEqualityComparisonCase - Represents a case of a switch. @@ -101,14 +103,14 @@ public: /// static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { if (SI1 == SI2) return false; // Can't merge with self! - + // It is not safe to merge these two switch instructions if they have a common // successor, and if that successor has a PHI node, and if *that* PHI node has // conflicting incoming values from the two switch blocks. BasicBlock *SI1BB = SI1->getParent(); BasicBlock *SI2BB = SI2->getParent(); SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); - + for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I) if (SI1Succs.count(*I)) for (BasicBlock::iterator BBI = (*I)->begin(); @@ -118,7 +120,7 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { PN->getIncomingValueForBlock(SI2BB)) return false; } - + return true; } @@ -135,7 +137,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1, assert(SI1->isUnconditional() && SI2->isConditional()); // We fold the unconditional branch if we can easily update all PHI nodes in - // common successors: + // common successors: // 1> We have a constant incoming value for the conditional branch; // 2> We have "Cond" as the incoming value for the unconditional branch; // 3> SI2->getCondition() and Cond have same operands. @@ -170,7 +172,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1, static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred) { if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do - + PHINode *PN; for (BasicBlock::iterator I = Succ->begin(); (PN = dyn_cast<PHINode>(I)); ++I) @@ -222,7 +224,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // doesn't dominate BB. if (Pred2->getSinglePredecessor() == 0) return 0; - + // If we found a conditional branch predecessor, make sure that it branches // to BB and Pred2Br. If it doesn't, this isn't an "if statement". if (Pred1Br->getSuccessor(0) == BB && @@ -252,7 +254,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // Otherwise, if this is a conditional branch, then we can use it! BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); if (BI == 0) return 0; - + assert(BI->isConditional() && "Two successors but not conditional?"); if (BI->getSuccessor(0) == Pred1) { IfTrue = Pred1; @@ -345,7 +347,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // If we aren't allowing aggressive promotion anymore, then don't consider // instructions in the 'if region'. if (AggressiveInsts == 0) return false; - + // If we have seen this instruction before, don't count it again. if (AggressiveInsts->count(I)) return true; @@ -411,7 +413,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, const TargetData *TD, bool isEQ, unsigned &UsedICmps) { Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return 0; - + // If this is an icmp against a constant, handle this as one of the cases. if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) { @@ -420,21 +422,21 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, Vals.push_back(C); return I->getOperand(0); } - + // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to // the set. ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue()); - + // If this is an and/!= check then we want to optimize "x ugt 2" into // x != 0 && x != 1. if (!isEQ) Span = Span.inverse(); - + // If there are a ton of values, we don't want to make a ginormous switch. if (Span.getSetSize().ugt(8) || Span.isEmptySet()) return 0; - + for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) Vals.push_back(ConstantInt::get(V->getContext(), Tmp)); UsedICmps++; @@ -442,11 +444,11 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, } return 0; } - + // Otherwise, we can only handle an | or &, depending on isEQ. if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And)) return 0; - + unsigned NumValsBeforeLHS = Vals.size(); unsigned UsedICmpsBeforeLHS = UsedICmps; if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD, @@ -467,12 +469,12 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, Extra = I->getOperand(1); return LHS; } - + Vals.resize(NumValsBeforeLHS); UsedICmps = UsedICmpsBeforeLHS; return 0; } - + // If the LHS can't be folded in, but Extra is available and RHS can, try to // use LHS as Extra. if (Extra == 0 || Extra == I->getOperand(0)) { @@ -484,7 +486,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, assert(Vals.size() == NumValsBeforeLHS); Extra = OldExtra; } - + return 0; } @@ -615,6 +617,9 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, assert(ThisVal && "This isn't a value comparison!!"); if (ThisVal != PredVal) return false; // Different predicates. + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + // Find out information about when control will move from Pred to TI's block. std::vector<ValueEqualityComparisonCase> PredCases; BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(), @@ -634,7 +639,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // can simplify TI. if (!ValuesOverlap(PredCases, ThisCases)) return false; - + if (isa<BranchInst>(TI)) { // Okay, one of the successors of this condbr is dead. Convert it to a // uncond br. @@ -652,7 +657,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, EraseTerminatorInstAndDCECond(TI); return true; } - + SwitchInst *SI = cast<SwitchInst>(TI); // Okay, TI has cases that are statically dead, prune them away. SmallPtrSet<Constant*, 16> DeadCases; @@ -673,7 +678,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; } - + // Otherwise, TI's block must correspond to some matched value. Find out // which value (or set of values) this is. ConstantInt *TIV = 0; @@ -729,8 +734,8 @@ namespace { } static int ConstantIntSortPredicate(const void *P1, const void *P2) { - const ConstantInt *LHS = *(const ConstantInt**)P1; - const ConstantInt *RHS = *(const ConstantInt**)P2; + const ConstantInt *LHS = *(const ConstantInt*const*)P1; + const ConstantInt *RHS = *(const ConstantInt*const*)P2; if (LHS->getValue().ult(RHS->getValue())) return 1; if (LHS->getValue() == RHS->getValue()) @@ -738,6 +743,67 @@ static int ConstantIntSortPredicate(const void *P1, const void *P2) { return -1; } +static inline bool HasBranchWeights(const Instruction* I) { + MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof); + if (ProfMD && ProfMD->getOperand(0)) + if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) + return MDS->getString().equals("branch_weights"); + + return false; +} + +/// Tries to get a branch weight for the given instruction, returns NULL if it +/// can't. Pos starts at 0. +static ConstantInt* GetWeight(Instruction* I, int Pos) { + MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof); + if (ProfMD && ProfMD->getOperand(0)) { + if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) { + if (MDS->getString().equals("branch_weights")) { + assert(ProfMD->getNumOperands() >= 3); + return dyn_cast<ConstantInt>(ProfMD->getOperand(1 + Pos)); + } + } + } + + return 0; +} + +/// Scale the given weights based on the successor TI's metadata. Scaling is +/// done by multiplying every weight by the sum of the successor's weights. +static void ScaleWeights(Instruction* STI, MutableArrayRef<uint64_t> Weights) { + // Sum the successor's weights + assert(HasBranchWeights(STI)); + unsigned Scale = 0; + MDNode* ProfMD = STI->getMetadata(LLVMContext::MD_prof); + for (unsigned i = 1; i < ProfMD->getNumOperands(); ++i) { + ConstantInt* CI = dyn_cast<ConstantInt>(ProfMD->getOperand(i)); + assert(CI); + Scale += CI->getValue().getZExtValue(); + } + + // Skip default, as it's replaced during the folding + for (unsigned i = 1; i < Weights.size(); ++i) { + Weights[i] *= Scale; + } +} + +/// Sees if any of the weights are too big for a uint32_t, and halves all the +/// weights if any are. +static void FitWeights(MutableArrayRef<uint64_t> Weights) { + bool Halve = false; + for (unsigned i = 0; i < Weights.size(); ++i) + if (Weights[i] > UINT_MAX) { + Halve = true; + break; + } + + if (! Halve) + return; + + for (unsigned i = 0; i < Weights.size(); ++i) + Weights[i] /= 2; +} + /// FoldValueComparisonIntoPredecessors - The specified terminator is a value /// equality comparison instruction (either a switch or a branch on "X == c"). /// See if any of the predecessors of the terminator block are value comparisons @@ -770,6 +836,55 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // build. SmallVector<BasicBlock*, 8> NewSuccessors; + // Update the branch weight metadata along the way + SmallVector<uint64_t, 8> Weights; + uint64_t PredDefaultWeight = 0; + bool PredHasWeights = HasBranchWeights(PTI); + bool SuccHasWeights = HasBranchWeights(TI); + + if (PredHasWeights) { + MDNode* MD = PTI->getMetadata(LLVMContext::MD_prof); + assert(MD); + for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) { + ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i)); + assert(CI); + Weights.push_back(CI->getValue().getZExtValue()); + } + + // If the predecessor is a conditional eq, then swap the default weight + // to be the first entry. + if (BranchInst* BI = dyn_cast<BranchInst>(PTI)) { + assert(Weights.size() == 2); + ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); + + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) { + std::swap(Weights.front(), Weights.back()); + } + } + + PredDefaultWeight = Weights.front(); + } else if (SuccHasWeights) { + // If there are no predecessor weights but there are successor weights, + // populate Weights with 1, which will later be scaled to the sum of + // successor's weights + Weights.assign(1 + PredCases.size(), 1); + PredDefaultWeight = 1; + } + + uint64_t SuccDefaultWeight = 0; + if (SuccHasWeights) { + int Index = 0; + if (BranchInst* BI = dyn_cast<BranchInst>(TI)) { + ICmpInst* ICI = dyn_cast<ICmpInst>(BI->getCondition()); + assert(ICI); + + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + Index = 1; + } + + SuccDefaultWeight = GetWeight(TI, Index)->getValue().getZExtValue(); + } + if (PredDefault == BB) { // If this is the default destination from PTI, only the edges in TI // that don't occur in PTI, or that branch to BB will be activated. @@ -780,6 +895,12 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, else { // The default destination is BB, we don't need explicit targets. std::swap(PredCases[i], PredCases.back()); + + if (PredHasWeights) { + std::swap(Weights[i+1], Weights.back()); + Weights.pop_back(); + } + PredCases.pop_back(); --i; --e; } @@ -790,14 +911,34 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, PredDefault = BBDefault; NewSuccessors.push_back(BBDefault); } + + if (SuccHasWeights) { + ScaleWeights(TI, Weights); + Weights.front() *= SuccDefaultWeight; + } else if (PredHasWeights) { + Weights.front() /= (1 + BBCases.size()); + } + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) { PredCases.push_back(BBCases[i]); NewSuccessors.push_back(BBCases[i].Dest); + if (SuccHasWeights) { + Weights.push_back(PredDefaultWeight * + GetWeight(TI, i)->getValue().getZExtValue()); + } else if (PredHasWeights) { + // Split the old default's weight amongst the children + Weights.push_back(PredDefaultWeight / (1 + BBCases.size())); + } } } else { + // FIXME: preserve branch weight metadata, similarly to the 'then' + // above. For now, drop it. + PredHasWeights = false; + SuccHasWeights = false; + // If this is not the default destination from PSI, only the edges // in SI that occur in PSI with a destination of BB will be // activated. @@ -822,7 +963,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // If there are any constants vectored to BB that TI doesn't handle, // they must go to the default destination of TI. - for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = + for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = PTIHandled.begin(), E = PTIHandled.end(); I != E; ++I) { PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault)); @@ -851,6 +992,17 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, for (unsigned i = 0, e = PredCases.size(); i != e; ++i) NewSI->addCase(PredCases[i].Value, PredCases[i].Dest); + if (PredHasWeights || SuccHasWeights) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(Weights); + + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + + NewSI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(MDWeights)); + } + EraseTerminatorInstAndDCECond(PTI); // Okay, last check. If BB is still a successor of PSI, then we must @@ -984,11 +1136,11 @@ HoistTerminator: Value *BB1V = PN->getIncomingValueForBlock(BB1); Value *BB2V = PN->getIncomingValueForBlock(BB2); if (BB1V == BB2V) continue; - + // These values do not agree. Insert a select instruction before NT // that determines the right value. SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; - if (SI == 0) + if (SI == 0) SI = cast<SelectInst> (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, BB1V->getName()+"."+BB2V->getName())); @@ -1056,7 +1208,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { // Do not hoist the instruction if any of its operands are defined but not // used in this BB. The transformation will prevent the operand from // being sunk into the use block. - for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); + for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); i != e; ++i) { Instruction *OpI = dyn_cast<Instruction>(*i); if (OpI && OpI->getParent() == BIParent && @@ -1112,7 +1264,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { // as well. if (PHIs.empty()) return false; - + // If we get here, we can hoist the instruction and if-convert. DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";); @@ -1162,13 +1314,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); unsigned Size = 0; - + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { if (isa<DbgInfoIntrinsic>(BBI)) continue; if (Size > 10) return false; // Don't clone large BB's. ++Size; - + // We can only support instructions that do not define values that are // live outside of the current basic block. for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); @@ -1176,7 +1328,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { Instruction *U = cast<Instruction>(*UI); if (U->getParent() != BB || isa<PHINode>(U)) return false; } - + // Looks ok, continue checking. } @@ -1194,31 +1346,31 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { // outside of the block. if (!PN || PN->getParent() != BB || !PN->hasOneUse()) return false; - + // Degenerate case of a single entry PHI. if (PN->getNumIncomingValues() == 1) { FoldSingleEntryPHINodes(PN->getParent()); - return true; + return true; } // Now we know that this block has multiple preds and two succs. if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; - + // Okay, this is a simple enough basic block. See if any phi values are // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i)); if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue; - + // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. BasicBlock *PredBB = PN->getIncomingBlock(i); BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); - + if (RealDest == BB) continue; // Skip self loops. // Skip if the predecessor's terminator is an indirect branch. if (isa<IndirectBrInst>(PredBB->getTerminator())) continue; - + // The dest block might have PHI nodes, other predecessors and other // difficult cases. Instead of being smart about this, just insert a new // block that jumps to the destination block, effectively splitting @@ -1227,7 +1379,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { RealDest->getName()+".critedge", RealDest->getParent(), RealDest); BranchInst::Create(RealDest, EdgeBB); - + // Update PHI nodes. AddPredecessorToBlock(RealDest, EdgeBB, BB); @@ -1244,7 +1396,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { // Clone the instruction. Instruction *N = BBI->clone(); if (BBI->hasName()) N->setName(BBI->getName()+".c"); - + // Update operands due to translation. for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; ++i) { @@ -1252,7 +1404,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { if (PI != TranslateMap.end()) *i = PI->second; } - + // Check for trivial simplification. if (Value *V = SimplifyInstruction(N, TD)) { TranslateMap[BBI] = V; @@ -1297,7 +1449,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { // Don't bother if the branch will be constant folded trivially. isa<ConstantInt>(IfCond)) return false; - + // Okay, we found that we can merge this two-entry phi node into a select. // Doing so would require us to fold *all* two entry phi nodes in this block. // At some point this becomes non-profitable (particularly if the target @@ -1307,14 +1459,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I) if (NumPhis > 2) return false; - + // Loop over the PHI's seeing if we can promote them all to select // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet<Instruction*, 4> AggressiveInsts; unsigned MaxCostVal0 = PHINodeFoldingThreshold, MaxCostVal1 = PHINodeFoldingThreshold; - + for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); if (Value *V = SimplifyInstruction(PN, TD)) { @@ -1322,19 +1474,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { PN->eraseFromParent(); continue; } - + if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, MaxCostVal0) || !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, MaxCostVal1)) return false; } - + // If we folded the first phi, PN dangles at this point. Refresh it. If // we ran out of PHIs then we simplified them all. PN = dyn_cast<PHINode>(BB->begin()); if (PN == 0) return true; - + // Don't fold i1 branches on PHIs which contain binary operators. These can // often be turned into switches and other things. if (PN->getType()->isIntegerTy(1) && @@ -1342,7 +1494,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { isa<BinaryOperator>(PN->getIncomingValue(1)) || isa<BinaryOperator>(IfCond))) return false; - + // If we all PHI nodes are promotable, check to make sure that all // instructions in the predecessor blocks can be promoted as well. If // not, we won't be able to get rid of the control flow, so it's not @@ -1362,7 +1514,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { return false; } } - + if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) { IfBlock2 = 0; } else { @@ -1375,15 +1527,15 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { return false; } } - + DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); - + // If we can still promote the PHI nodes after this gauntlet of tests, // do all of the PHI's now. Instruction *InsertPt = DomBlock->getTerminator(); IRBuilder<true, NoFolder> Builder(InsertPt); - + // Move all 'aggressive' instructions, which are defined in the // conditional parts of the if's up to the dominating block. if (IfBlock1) @@ -1394,19 +1546,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { DomBlock->getInstList().splice(InsertPt, IfBlock2->getInstList(), IfBlock2->begin(), IfBlock2->getTerminator()); - + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { // Change the PHI node into a select instruction. Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); - - SelectInst *NV = + + SelectInst *NV = cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, "")); PN->replaceAllUsesWith(NV); NV->takeName(PN); PN->eraseFromParent(); } - + // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement // has been flattened. Change DomBlock to jump directly to our new block to // avoid other simplifycfg's kicking in on the diamond. @@ -1420,14 +1572,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { /// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes /// to two returning blocks, try to merge them together into one return, /// introducing a select if the return values disagree. -static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, +static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder) { assert(BI->isConditional() && "Must be a conditional branch"); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator()); ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator()); - + // Check to ensure both blocks are empty (just a return) or optionally empty // with PHI nodes. If there are other instructions, merging would cause extra // computation on one path or the other. @@ -1447,12 +1599,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, EraseTerminatorInstAndDCECond(BI); return true; } - + // Otherwise, figure out what the true and false return values are // so we can insert a new select instruction. Value *TrueValue = TrueRet->getReturnValue(); Value *FalseValue = FalseRet->getReturnValue(); - + // Unwrap any PHI nodes in the return blocks. if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue)) if (TVPN->getParent() == TrueSucc) @@ -1460,7 +1612,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue)) if (FVPN->getParent() == FalseSucc) FalseValue = FVPN->getIncomingValueForBlock(BI->getParent()); - + // In order for this transformation to be safe, we must be able to // unconditionally execute both operands to the return. This is // normally the case, but we could have a potentially-trapping @@ -1472,12 +1624,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue)) if (FCV->canTrap()) return false; - + // Okay, we collected all the mapped values and checked them for sanity, and // defined to really do this transformation. First, update the CFG. TrueSucc->removePredecessor(BI->getParent()); FalseSucc->removePredecessor(BI->getParent()); - + // Insert select instructions where needed. Value *BrCond = BI->getCondition(); if (TrueValue) { @@ -1491,15 +1643,15 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, } } - Value *RI = !TrueValue ? + Value *RI = !TrueValue ? Builder.CreateRetVoid() : Builder.CreateRet(TrueValue); (void) RI; - + DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" << "\n " << *BI << "NewRet = " << *RI << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc); - + EraseTerminatorInstAndDCECond(BI); return true; @@ -1600,7 +1752,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (Cond == 0) return false; } - + if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return false; @@ -1623,7 +1775,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { isSafeToSpeculativelyExecute(FrontIt)) { BonusInst = &*FrontIt; ++FrontIt; - + // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; } @@ -1631,13 +1783,13 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Only a single bonus inst is allowed. if (&*FrontIt != Cond) return false; - + // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = Cond; ++CondIt; // Ingore dbg intrinsics. while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt; - + if (&*CondIt != BI) return false; @@ -1649,7 +1801,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1))) if (CE->canTrap()) return false; - + // Finally, don't infinitely unroll conditional loops. BasicBlock *TrueDest = BI->getSuccessor(0); BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0; @@ -1659,22 +1811,22 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *PredBlock = *PI; BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator()); - + // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. SmallVector<PHINode*, 4> PHIs; if (PBI == 0 || PBI->isUnconditional() || - (BI->isConditional() && + (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) || (!BI->isConditional() && !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs))) continue; - + // Determine if the two branches share a common destination. Instruction::BinaryOps Opc; bool InvertPredCond = false; - + if (BI->isConditional()) { if (PBI->getSuccessor(0) == TrueDest) Opc = Instruction::Or; @@ -1693,7 +1845,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Ensure that any values used in the bonus instruction are also used // by the terminator of the predecessor. This means that those values - // must already have been resolved, so we won't be inhibiting the + // must already have been resolved, so we won't be inhibiting the // out-of-order core by speculating them earlier. if (BonusInst) { // Collect the values used by the bonus inst @@ -1707,47 +1859,47 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { SmallVector<std::pair<Value*, unsigned>, 4> Worklist; Worklist.push_back(std::make_pair(PBI->getOperand(0), 0)); - + // Walk up to four levels back up the use-def chain of the predecessor's // terminator to see if all those values were used. The choice of four // levels is arbitrary, to provide a compile-time-cost bound. while (!Worklist.empty()) { std::pair<Value*, unsigned> Pair = Worklist.back(); Worklist.pop_back(); - + if (Pair.second >= 4) continue; UsedValues.erase(Pair.first); if (UsedValues.empty()) break; - + if (Instruction *I = dyn_cast<Instruction>(Pair.first)) { for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) Worklist.push_back(std::make_pair(OI->get(), Pair.second+1)); - } + } } - + if (!UsedValues.empty()) return false; } DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); - IRBuilder<> Builder(PBI); + IRBuilder<> Builder(PBI); // If we need to invert the condition in the pred block to match, do so now. if (InvertPredCond) { Value *NewCond = PBI->getCondition(); - + if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { CmpInst *CI = cast<CmpInst>(NewCond); CI->setPredicate(CI->getInversePredicate()); } else { - NewCond = Builder.CreateNot(NewCond, + NewCond = Builder.CreateNot(NewCond, PBI->getCondition()->getName()+".not"); } - + PBI->setCondition(NewCond); PBI->swapSuccessors(); } - + // If we have a bonus inst, clone it into the predecessor block. Instruction *NewBonus = 0; if (BonusInst) { @@ -1756,7 +1908,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { NewBonus->takeName(BonusInst); BonusInst->setName(BonusInst->getName()+".old"); } - + // Clone Cond into the predecessor basic block, and or/and the // two conditions together. Instruction *New = Cond->clone(); @@ -1764,9 +1916,9 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { PredBlock->getInstList().insert(PBI, New); New->takeName(Cond); Cond->setName(New->getName()+".old"); - + if (BI->isConditional()) { - Instruction *NewCond = + Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(), New, "or.cond")); PBI->setCondition(NewCond); @@ -1806,7 +1958,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C) // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond) // is false: PBI_Cond and BI_Value - MergedCond = + MergedCond = cast<Instruction>(Builder.CreateBinOp(Instruction::And, PBI->getCondition(), New, "and.cond")); @@ -1814,7 +1966,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { Instruction *NotCond = cast<Instruction>(Builder.CreateNot(PBI->getCondition(), "not.cond")); - MergedCond = + MergedCond = cast<Instruction>(Builder.CreateBinOp(Instruction::Or, NotCond, MergedCond, "or.cond")); @@ -1921,7 +2073,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (isa<DbgInfoIntrinsic>(*I)) I->clone()->insertBefore(PBI); - + return true; } return false; @@ -1936,7 +2088,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { BasicBlock *BB = BI->getParent(); // If this block ends with a branch instruction, and if there is a - // predecessor that ends on a branch of the same condition, make + // predecessor that ends on a branch of the same condition, make // this conditional branch redundant. if (PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { @@ -1945,11 +2097,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (BB->getSinglePredecessor()) { // Turn this into a branch on constant. bool CondIsTrue = PBI->getSuccessor(0) == BB; - BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue)); return true; // Nuke the branch on constant. } - + // Otherwise, if there are multiple predecessors, insert a PHI that merges // in the constant and simplify the block result. Subsequent passes of // simplifycfg will thread the block. @@ -1969,18 +2121,18 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { bool CondIsTrue = PBI->getSuccessor(0) == BB; - NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue), P); } else { NewPN->addIncoming(BI->getCondition(), P); } } - + BI->setCondition(NewPN); return true; } } - + // If this is a conditional branch in an empty block, and if any // predecessors is a conditional branch to one of our destinations, // fold the conditions into logical ops and one cond br. @@ -1991,11 +2143,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (&*BBI != BI) return false; - + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition())) if (CE->canTrap()) return false; - + int PBIOp, BIOp; if (PBI->getSuccessor(0) == BI->getSuccessor(0)) PBIOp = BIOp = 0; @@ -2007,31 +2159,31 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBIOp = BIOp = 1; else return false; - + // Check to make sure that the other destination of this branch // isn't BB itself. If so, this is an infinite loop that will // keep getting unwound. if (PBI->getSuccessor(PBIOp) == BB) return false; - - // Do not perform this transformation if it would require + + // Do not perform this transformation if it would require // insertion of a large number of select instructions. For targets // without predication/cmovs, this is a big pessimization. BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); - + unsigned NumPhis = 0; for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II); ++II, ++NumPhis) if (NumPhis > 2) // Disable this xform. return false; - + // Finally, if everything is ok, fold the branches to logical ops. BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); - + DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() << "AND: " << *BI->getParent()); - - + + // If OtherDest *is* BB, then BB is a basic block with a single conditional // branch in it, where one edge (OtherDest) goes back to itself but the other // exits. We don't *know* that the program avoids the infinite loop @@ -2046,13 +2198,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); OtherDest = InfLoopBlock; - } - + } + DEBUG(dbgs() << *PBI->getParent()->getParent()); // BI may have other predecessors. Because of this, we leave // it alone, but modify PBI. - + // Make sure we get to CommonDest on True&True directions. Value *PBICond = PBI->getCondition(); IRBuilder<true, NoFolder> Builder(PBI); @@ -2065,16 +2217,16 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Merge the conditions. Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge"); - + // Modify PBI to branch on the new condition to the new dests. PBI->setCondition(Cond); PBI->setSuccessor(0, CommonDest); PBI->setSuccessor(1, OtherDest); - + // OtherDest may have phi nodes. If so, add an entry from PBI's // block that are identical to the entries for BI's block. AddPredecessorToBlock(OtherDest, PBI->getParent(), BB); - + // We know that the CommonDest already had an edge from PBI to // it. If it has PHIs though, the PHIs may have different // entries for BB and PBI's BB. If so, insert a select to make @@ -2092,10 +2244,10 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PN->setIncomingValue(PBBIdx, NV); } } - + DEBUG(dbgs() << "INTO: " << *PBI->getParent()); DEBUG(dbgs() << *PBI->getParent()->getParent()); - + // This basic block is probably dead. We know it has at least // one fewer predecessor. return true; @@ -2214,7 +2366,7 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// br label %end /// end: /// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] -/// +/// /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, @@ -2228,17 +2380,17 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, Value *V = ICI->getOperand(0); ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1)); - + // The pattern we're looking for is where our only predecessor is a switch on // 'V' and this block is the default case for the switch. In this case we can // fold the compared value into the switch to simplify things. BasicBlock *Pred = BB->getSinglePredecessor(); if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false; - + SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator()); if (SI->getCondition() != V) return false; - + // If BB is reachable on a non-default case, then we simply know the value of // V in this block. Substitute it and constant fold the icmp instruction // away. @@ -2246,7 +2398,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, ConstantInt *VVal = SI->findCaseDest(BB); assert(VVal && "Should have a unique destination value"); ICI->setOperand(0, VVal); - + if (Value *V = SimplifyInstruction(ICI, TD)) { ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); @@ -2254,7 +2406,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, // BB is now empty, so it is likely to simplify away. return SimplifyCFG(BB) | true; } - + // Ok, the block is reachable from the default dest. If the constant we're // comparing exists in one of the other edges, then we can constant fold ICI // and zap it. @@ -2264,13 +2416,13 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, V = ConstantInt::getFalse(BB->getContext()); else V = ConstantInt::getTrue(BB->getContext()); - + ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. return SimplifyCFG(BB) | true; } - + // The use of the icmp has to be in the 'end' block, by the only PHI node in // the block. BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); @@ -2297,7 +2449,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB); SI->addCase(Cst, NewBB); - + // NewBB branches to the phi block, add the uncond branch and the phi entry. Builder.SetInsertPoint(NewBB); Builder.SetCurrentDebugLocation(SI->getDebugLoc()); @@ -2313,8 +2465,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, IRBuilder<> &Builder) { Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (Cond == 0) return false; - - + + // Change br (X == 0 | X == 1), T, F into a switch instruction. // If this is a bunch of seteq's or'd together, or if it's a bunch of // 'setne's and'ed together, collect them. @@ -2323,7 +2475,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, bool TrueWhenEqual = true; Value *ExtraCase = 0; unsigned UsedICmps = 0; - + if (Cond->getOpcode() == Instruction::Or) { CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true, UsedICmps); @@ -2332,7 +2484,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, UsedICmps); TrueWhenEqual = false; } - + // If we didn't have a multiply compared value, fail. if (CompVal == 0) return false; @@ -2344,21 +2496,24 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, // instruction can't handle, remove them now. array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate); Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); - + // If Extra was used, we require at least two switch values to do the // transformation. A switch with one value is just an cond branch. if (ExtraCase && Values.size() < 2) return false; - + + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + // Figure out which block is which destination. BasicBlock *DefaultBB = BI->getSuccessor(1); BasicBlock *EdgeBB = BI->getSuccessor(0); if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); - + BasicBlock *BB = BI->getParent(); - + DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() << " cases into SWITCH. BB is:\n" << *BB); - + // If there are any extra values that couldn't be folded into the switch // then we evaluate them with an explicit branch first. Split the block // right before the condbr to handle it. @@ -2372,13 +2527,13 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB); else Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); - + OldTI->eraseFromParent(); - + // If there are PHI nodes in EdgeBB, then we need to add a new entry to them // for the edge we just added. AddPredecessorToBlock(EdgeBB, BB, NewBB); - + DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase << "\nEXTRABB = " << *BB); BB = NewBB; @@ -2392,14 +2547,14 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, TD->getIntPtrType(CompVal->getContext()), "magicptr"); } - + // Create the new switch instruction now. SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size()); // Add all of the 'cases' to the switch instruction. for (unsigned i = 0, e = Values.size(); i != e; ++i) New->addCase(Values[i], EdgeBB); - + // We added edges from PI to the EdgeBB. As such, if there were any // PHI nodes in EdgeBB, they need entries to be added corresponding to // the number of edges added. @@ -2410,10 +2565,10 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, for (unsigned i = 0, e = Values.size()-1; i != e; ++i) PN->addIncoming(InVal, BB); } - + // Erase the old branch instruction. EraseTerminatorInstAndDCECond(BI); - + DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); return true; } @@ -2467,7 +2622,7 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { BasicBlock *BB = RI->getParent(); if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; - + // Find predecessors that end with branches. SmallVector<BasicBlock*, 8> UncondBranchPreds; SmallVector<BranchInst*, 8> CondBranchPreds; @@ -2481,7 +2636,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { CondBranchPreds.push_back(BI); } } - + // If we found some, do the transformation! if (!UncondBranchPreds.empty() && DupRet) { while (!UncondBranchPreds.empty()) { @@ -2490,21 +2645,21 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { << "INTO UNCOND BRANCH PRED: " << *Pred); (void)FoldReturnIntoUncondBranch(RI, BB, Pred); } - + // If we eliminated all predecessors of the block, delete the block now. if (pred_begin(BB) == pred_end(BB)) // We know there are no successors, so just nuke the block. BB->eraseFromParent(); - + return true; } - + // Check out all of the conditional branches going to this return // instruction. If any of them just select between returns, change the // branch itself into a select/return pair. while (!CondBranchPreds.empty()) { BranchInst *BI = CondBranchPreds.pop_back_val(); - + // Check to see if the non-BB successor is also a return block. if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) && isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) && @@ -2516,9 +2671,9 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BasicBlock *BB = UI->getParent(); - + bool Changed = false; - + // If there are any instructions immediately before the unreachable that can // be removed, do so. while (UI != BB->begin()) { @@ -2558,11 +2713,11 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BBI->eraseFromParent(); Changed = true; } - + // If the unreachable instruction is the first in the block, take a gander // at all of the predecessors of this instruction, and simplify them. if (&BB->front() != UI) return Changed; - + SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); for (unsigned i = 0, e = Preds.size(); i != e; ++i) { TerminatorInst *TI = Preds[i]->getTerminator(); @@ -2615,7 +2770,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BasicBlock *MaxBlock = 0; for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { - if (I->second.first > MaxPop || + if (I->second.first > MaxPop || (I->second.first == MaxPop && MaxIndex > I->second.second)) { MaxPop = I->second.first; MaxIndex = I->second.second; @@ -2627,13 +2782,13 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // edges to it. SI->setDefaultDest(MaxBlock); Changed = true; - + // If MaxBlock has phinodes in it, remove MaxPop-1 entries from // it. if (isa<PHINode>(MaxBlock->begin())) for (unsigned i = 0; i != MaxPop-1; ++i) MaxBlock->removePredecessor(SI->getParent()); - + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) if (i.getCaseSuccessor() == MaxBlock) { @@ -2648,7 +2803,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // place to note that the call does not throw though. BranchInst *BI = Builder.CreateBr(II->getNormalDest()); II->removeFromParent(); // Take out of symbol table - + // Insert the call now... SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3); Builder.SetInsertPoint(BI); @@ -2663,7 +2818,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } } } - + // If this block is now dead, remove it. if (pred_begin(BB) == pred_end(BB) && BB != &BB->getParent()->getEntryBlock()) { @@ -2823,6 +2978,285 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { return Changed; } +/// ValidLookupTableConstant - Return true if the backend will be able to handle +/// initializing an array of constants like C. +static bool ValidLookupTableConstant(Constant *C) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + return CE->isGEPWithNoNotionalOverIndexing(); + + return isa<ConstantFP>(C) || + isa<ConstantInt>(C) || + isa<ConstantPointerNull>(C) || + isa<GlobalValue>(C) || + isa<UndefValue>(C); +} + +/// GetCaseResulsts - Try to determine the resulting constant values in phi +/// nodes at the common destination basic block for one of the case +/// destinations of a switch instruction. +static bool GetCaseResults(SwitchInst *SI, + BasicBlock *CaseDest, + BasicBlock **CommonDest, + SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) { + // The block from which we enter the common destination. + BasicBlock *Pred = SI->getParent(); + + // If CaseDest is empty, continue to its successor. + if (CaseDest->getFirstNonPHIOrDbg() == CaseDest->getTerminator() && + !isa<PHINode>(CaseDest->begin())) { + + TerminatorInst *Terminator = CaseDest->getTerminator(); + if (Terminator->getNumSuccessors() != 1) + return false; + + Pred = CaseDest; + CaseDest = Terminator->getSuccessor(0); + } + + // If we did not have a CommonDest before, use the current one. + if (!*CommonDest) + *CommonDest = CaseDest; + // If the destination isn't the common one, abort. + if (CaseDest != *CommonDest) + return false; + + // Get the values for this case from phi nodes in the destination block. + BasicBlock::iterator I = (*CommonDest)->begin(); + while (PHINode *PHI = dyn_cast<PHINode>(I++)) { + int Idx = PHI->getBasicBlockIndex(Pred); + if (Idx == -1) + continue; + + Constant *ConstVal = dyn_cast<Constant>(PHI->getIncomingValue(Idx)); + if (!ConstVal) + return false; + + // Be conservative about which kinds of constants we support. + if (!ValidLookupTableConstant(ConstVal)) + return false; + + Res.push_back(std::make_pair(PHI, ConstVal)); + } + + return true; +} + +/// BuildLookupTable - Build a lookup table with the contents of Results, using +/// DefaultResult to fill the holes in the table. If the table ends up +/// containing the same result in each element, set *SingleResult to that value +/// and return NULL. +static GlobalVariable *BuildLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Results, + Constant *DefaultResult, + Constant **SingleResult) { + assert(Results.size() && "Need values to build lookup table"); + assert(TableSize >= Results.size() && "Table needs to hold all values"); + + // If all values in the table are equal, this is that value. + Constant *SameResult = Results.begin()->second; + + // Build up the table contents. + std::vector<Constant*> TableContents(TableSize); + for (size_t I = 0, E = Results.size(); I != E; ++I) { + ConstantInt *CaseVal = Results[I].first; + Constant *CaseRes = Results[I].second; + + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); + TableContents[Idx] = CaseRes; + + if (CaseRes != SameResult) + SameResult = NULL; + } + + // Fill in any holes in the table with the default result. + if (Results.size() < TableSize) { + for (unsigned i = 0; i < TableSize; ++i) { + if (!TableContents[i]) + TableContents[i] = DefaultResult; + } + + if (DefaultResult != SameResult) + SameResult = NULL; + } + + // Same result was used in the entire table; just return that. + if (SameResult) { + *SingleResult = SameResult; + return NULL; + } + + ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize); + Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); + + GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true, + GlobalVariable::PrivateLinkage, + Initializer, + "switch.table"); + GV->setUnnamedAddr(true); + return GV; +} + +/// SwitchToLookupTable - If the switch is only used to initialize one or more +/// phi nodes in a common successor block with different constant values, +/// replace the switch with lookup tables. +static bool SwitchToLookupTable(SwitchInst *SI, + IRBuilder<> &Builder) { + assert(SI->getNumCases() > 1 && "Degenerate switch?"); + // FIXME: Handle unreachable cases. + + // FIXME: If the switch is too sparse for a lookup table, perhaps we could + // split off a dense part and build a lookup table for that. + + // FIXME: If the results are all integers and the lookup table would fit in a + // target-legal register, we should store them as a bitmap and use shift/mask + // to look up the result. + + // FIXME: This creates arrays of GEPs to constant strings, which means each + // GEP needs a runtime relocation in PIC code. We should just build one big + // string and lookup indices into that. + + // Ignore the switch if the number of cases are too small. + // This is similar to the check when building jump tables in + // SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Determine the best cut-off. + if (SI->getNumCases() < 4) + return false; + + // Figure out the corresponding result for each case value and phi node in the + // common destination, as well as the the min and max case values. + assert(SI->case_begin() != SI->case_end()); + SwitchInst::CaseIt CI = SI->case_begin(); + ConstantInt *MinCaseVal = CI.getCaseValue(); + ConstantInt *MaxCaseVal = CI.getCaseValue(); + + BasicBlock *CommonDest = NULL; + typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy; + SmallDenseMap<PHINode*, ResultListTy> ResultLists; + SmallDenseMap<PHINode*, Constant*> DefaultResults; + SmallDenseMap<PHINode*, Type*> ResultTypes; + SmallVector<PHINode*, 4> PHIs; + + for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { + ConstantInt *CaseVal = CI.getCaseValue(); + if (CaseVal->getValue().slt(MinCaseVal->getValue())) + MinCaseVal = CaseVal; + if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) + MaxCaseVal = CaseVal; + + // Resulting value at phi nodes for this case value. + typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy; + ResultsTy Results; + if (!GetCaseResults(SI, CI.getCaseSuccessor(), &CommonDest, Results)) + return false; + + // Append the result from this case to the list for each phi. + for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) { + if (!ResultLists.count(I->first)) + PHIs.push_back(I->first); + ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second)); + } + } + + // Get the resulting values for the default case. + SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; + if (!GetCaseResults(SI, SI->getDefaultDest(), &CommonDest, DefaultResultsList)) + return false; + for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { + PHINode *PHI = DefaultResultsList[I].first; + Constant *Result = DefaultResultsList[I].second; + DefaultResults[PHI] = Result; + ResultTypes[PHI] = Result->getType(); + } + + APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); + // The table density should be at lest 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + // Be careful to avoid overlow in the density computation. + if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1)) + return false; + uint64_t TableSize = RangeSpread.getLimitedValue() + 1; + if (SI->getNumCases() * 10 < TableSize * 4) + return false; + + // Build the lookup tables. + SmallDenseMap<PHINode*, GlobalVariable*> LookupTables; + SmallDenseMap<PHINode*, Constant*> SingleResults; + + Module &Mod = *CommonDest->getParent()->getParent(); + for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); + I != E; ++I) { + PHINode *PHI = *I; + + Constant *SingleResult = NULL; + LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal, + ResultLists[PHI], DefaultResults[PHI], + &SingleResult); + SingleResults[PHI] = SingleResult; + } + + // Create the BB that does the lookups. + BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(), + "switch.lookup", + CommonDest->getParent(), + CommonDest); + + // Check whether the condition value is within the case range, and branch to + // the new BB. + Builder.SetInsertPoint(SI); + Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, + "switch.tableidx"); + Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( + MinCaseVal->getType(), TableSize)); + Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + + // Populate the BB that does the lookups. + Builder.SetInsertPoint(LookupBB); + bool ReturnedEarly = false; + for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); + I != E; ++I) { + PHINode *PHI = *I; + // There was a single result for this phi; just use that. + if (Constant *SingleResult = SingleResults[PHI]) { + PHI->addIncoming(SingleResult, LookupBB); + continue; + } + + Value *GEPIndices[] = { Builder.getInt32(0), TableIndex }; + Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices, + "switch.gep"); + Value *Result = Builder.CreateLoad(GEP, "switch.load"); + + // If the result is only going to be used to return from the function, + // we want to do that right here. + if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin())) { + if (CommonDest->getFirstNonPHIOrDbg() == CommonDest->getTerminator()) { + Builder.CreateRet(Result); + ReturnedEarly = true; + } + } + + if (!ReturnedEarly) + PHI->addIncoming(Result, LookupBB); + } + + if (!ReturnedEarly) + Builder.CreateBr(CommonDest); + + // Remove the switch. + for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) { + BasicBlock *Succ = SI->getSuccessor(i); + if (Succ == SI->getDefaultDest()) continue; + Succ->removePredecessor(SI->getParent()); + } + SI->eraseFromParent(); + + ++NumLookupTables; + return true; +} + bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // If this switch is too complex to want to look at, ignore it. if (!isValueEqualityComparison(SI)) @@ -2862,13 +3296,16 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB) | true; + if (SwitchToLookupTable(SI, Builder)) + return SimplifyCFG(BB) | true; + return false; } bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { BasicBlock *BB = IBI->getParent(); bool Changed = false; - + // Eliminate redundant destinations. SmallPtrSet<Value *, 8> Succs; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { @@ -2879,7 +3316,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { --i; --e; Changed = true; } - } + } if (IBI->getNumDestinations() == 0) { // If the indirectbr has no successors, change it to unreachable. @@ -2887,14 +3324,14 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { EraseTerminatorInstAndDCECond(IBI); return true; } - + if (IBI->getNumDestinations() == 1) { // If the indirectbr has one successor, change it to a direct branch. BranchInst::Create(IBI->getDestination(0), IBI); EraseTerminatorInstAndDCECond(IBI); return true; } - + if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) return SimplifyCFG(BB) | true; @@ -2904,13 +3341,13 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); - + // If the Terminator is the only non-phi instruction, simplify the block. BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; - + // If the only instruction in the block is a seteq/setne comparison // against a constant, try to simplify the block. if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) @@ -2921,7 +3358,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder)) return true; } - + // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value @@ -2934,7 +3371,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); - + // Conditional branch if (isValueEqualityComparison(BI)) { // If we only have one predecessor, and if it is a branch on this value, @@ -2943,7 +3380,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) return SimplifyCFG(BB) | true; - + // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. BasicBlock::iterator I = BB->begin(); @@ -2962,17 +3399,17 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { return SimplifyCFG(BB) | true; } } - + // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. if (SimplifyBranchOnICmpChain(BI, TD, Builder)) return true; - + // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. if (FoldBranchToCommonDest(BI)) return SimplifyCFG(BB) | true; - + // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if // there is any identical code in the "then" and "else" blocks. If so, we @@ -2999,14 +3436,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) return SimplifyCFG(BB) | true; } - + // If this is a branch on a phi node in the current block, thread control // through this block if any PHI node entries are constants. if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) if (PN->getParent() == BI->getParent()) if (FoldCondBranchOnPHI(BI, TD)) return SimplifyCFG(BB) | true; - + // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) @@ -3114,7 +3551,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // if (MergeBlockIntoPredecessor(BB)) return true; - + IRBuilder<> Builder(BB); // If there is a trivial two-entry PHI node in this basic block, and we can diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 81eb9e0..528e6a1 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -72,7 +72,7 @@ namespace { ++NumSimplified; Changed = true; } - Changed |= RecursivelyDeleteTriviallyDeadInstructions(I); + Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); } // Place the list of instructions to simplify on the next loop iteration diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 62d23cb..c09dcd2 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -601,7 +601,7 @@ namespace { // It is important to cleanup here so that future iterations of this // function have less work to do. - (void) SimplifyInstructionsInBlock(&BB, TD); + (void) SimplifyInstructionsInBlock(&BB, TD, AA->getTargetLibraryInfo()); return true; } diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index c09c69b..f3f24ae 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -1029,6 +1029,9 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, Out << "sideeffect "; if (IA->isAlignStack()) Out << "alignstack "; + // We don't emit the AD_ATT dialect as it's the assumed default. + if (IA->getDialect() == InlineAsm::AD_Intel) + Out << "inteldialect "; Out << '"'; PrintEscapedString(IA->getAsmString(), Out); Out << "\", \""; diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index c8219eb..d466ac6 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -88,9 +88,6 @@ std::string Attribute::getAsString(Attributes Attrs) { Result += utostr(Attribute::getAlignmentFromAttrs(Attrs)); Result += " "; } - if (Attrs & Attribute::IANSDialect) - Result += "ia_nsdialect "; - // Trim the trailing space. assert(!Result.empty() && "Unknown attribute!"); Result.erase(Result.end()-1); diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index 6a20be6..c17e794 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -42,7 +42,7 @@ add_llvm_library(LLVMCore # Workaround: It takes over 20 minutes to compile with msvc10. # FIXME: Suppressing optimizations to core libraries would not be good thing. -if( MSVC_VERSION EQUAL 1600 ) +if( MSVC_VERSION LESS 1700 ) set_property( SOURCE Function.cpp PROPERTY COMPILE_FLAGS "/Og-" diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h index 8903a8f..0f81b3e 100644 --- a/lib/VMCore/ConstantsContext.h +++ b/lib/VMCore/ConstantsContext.h @@ -352,18 +352,21 @@ struct ExprMapKeyType { struct InlineAsmKeyType { InlineAsmKeyType(StringRef AsmString, StringRef Constraints, bool hasSideEffects, - bool isAlignStack) + bool isAlignStack, InlineAsm::AsmDialect asmDialect) : asm_string(AsmString), constraints(Constraints), - has_side_effects(hasSideEffects), is_align_stack(isAlignStack) {} + has_side_effects(hasSideEffects), is_align_stack(isAlignStack), + asm_dialect(asmDialect) {} std::string asm_string; std::string constraints; bool has_side_effects; bool is_align_stack; + InlineAsm::AsmDialect asm_dialect; bool operator==(const InlineAsmKeyType& that) const { return this->asm_string == that.asm_string && this->constraints == that.constraints && this->has_side_effects == that.has_side_effects && - this->is_align_stack == that.is_align_stack; + this->is_align_stack == that.is_align_stack && + this->asm_dialect == that.asm_dialect; } bool operator<(const InlineAsmKeyType& that) const { if (this->asm_string != that.asm_string) @@ -374,6 +377,8 @@ struct InlineAsmKeyType { return this->has_side_effects < that.has_side_effects; if (this->is_align_stack != that.is_align_stack) return this->is_align_stack < that.is_align_stack; + if (this->asm_dialect != that.asm_dialect) + return this->asm_dialect < that.asm_dialect; return false; } @@ -490,7 +495,8 @@ template<> struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType> { static InlineAsm *create(PointerType *Ty, const InlineAsmKeyType &Key) { return new InlineAsm(Ty, Key.asm_string, Key.constraints, - Key.has_side_effects, Key.is_align_stack); + Key.has_side_effects, Key.is_align_stack, + Key.asm_dialect); } }; @@ -499,7 +505,8 @@ struct ConstantKeyData<InlineAsm> { typedef InlineAsmKeyType ValType; static ValType getValType(InlineAsm *Asm) { return InlineAsmKeyType(Asm->getAsmString(), Asm->getConstraintString(), - Asm->hasSideEffects(), Asm->isAlignStack()); + Asm->hasSideEffects(), Asm->isAlignStack(), + Asm->getDialect()); } }; diff --git a/lib/VMCore/GCOV.cpp b/lib/VMCore/GCOV.cpp index 003a5d4..5bc1ac9 100644 --- a/lib/VMCore/GCOV.cpp +++ b/lib/VMCore/GCOV.cpp @@ -28,19 +28,19 @@ GCOVFile::~GCOVFile() { } /// isGCDAFile - Return true if Format identifies a .gcda file. -static bool isGCDAFile(GCOVFormat Format) { - return Format == GCDA_402 || Format == GCDA_404; +static bool isGCDAFile(GCOV::GCOVFormat Format) { + return Format == GCOV::GCDA_402 || Format == GCOV::GCDA_404; } /// isGCNOFile - Return true if Format identifies a .gcno file. -static bool isGCNOFile(GCOVFormat Format) { - return Format == GCNO_402 || Format == GCNO_404; +static bool isGCNOFile(GCOV::GCOVFormat Format) { + return Format == GCOV::GCNO_402 || Format == GCOV::GCNO_404; } /// read - Read GCOV buffer. bool GCOVFile::read(GCOVBuffer &Buffer) { - GCOVFormat Format = Buffer.readGCOVFormat(); - if (Format == InvalidGCOV) + GCOV::GCOVFormat Format = Buffer.readGCOVFormat(); + if (Format == GCOV::InvalidGCOV) return false; unsigned i = 0; @@ -87,21 +87,21 @@ GCOVFunction::~GCOVFunction() { /// read - Read a aunction from the buffer. Return false if buffer cursor /// does not point to a function tag. -bool GCOVFunction::read(GCOVBuffer &Buff, GCOVFormat Format) { +bool GCOVFunction::read(GCOVBuffer &Buff, GCOV::GCOVFormat Format) { if (!Buff.readFunctionTag()) return false; Buff.readInt(); // Function header length Ident = Buff.readInt(); Buff.readInt(); // Checksum #1 - if (Format != GCNO_402) + if (Format != GCOV::GCNO_402) Buff.readInt(); // Checksum #2 Name = Buff.readString(); - if (Format == GCNO_402 || Format == GCNO_404) + if (Format == GCOV::GCNO_402 || Format == GCOV::GCNO_404) Filename = Buff.readString(); - if (Format == GCDA_402 || Format == GCDA_404) { + if (Format == GCOV::GCDA_402 || Format == GCOV::GCDA_404) { Buff.readArcTag(); uint32_t Count = Buff.readInt() / 2; for (unsigned i = 0, e = Count; i != e; ++i) { diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp index 736e370..2e636aa 100644 --- a/lib/VMCore/InlineAsm.cpp +++ b/lib/VMCore/InlineAsm.cpp @@ -27,19 +27,20 @@ InlineAsm::~InlineAsm() { InlineAsm *InlineAsm::get(FunctionType *Ty, StringRef AsmString, StringRef Constraints, bool hasSideEffects, - bool isAlignStack) { - InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack); + bool isAlignStack, AsmDialect asmDialect) { + InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack, + asmDialect); LLVMContextImpl *pImpl = Ty->getContext().pImpl; return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(Ty), Key); } InlineAsm::InlineAsm(PointerType *Ty, const std::string &asmString, const std::string &constraints, bool hasSideEffects, - bool isAlignStack) + bool isAlignStack, AsmDialect asmDialect) : Value(Ty, Value::InlineAsmVal), - AsmString(asmString), - Constraints(constraints), HasSideEffects(hasSideEffects), - IsAlignStack(isAlignStack) { + AsmString(asmString), Constraints(constraints), + HasSideEffects(hasSideEffects), IsAlignStack(isAlignStack), + Dialect(asmDialect) { // Do various checks on the constraint string and type. assert(Verify(getFunctionType(), constraints) && diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index 4530c04..53f1149 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -1189,7 +1189,7 @@ void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P, assert(PassDebugging >= Details); if (Set.empty()) return; - dbgs() << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:"; + dbgs() << (const void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:"; for (unsigned i = 0; i != Set.size(); ++i) { if (i) dbgs() << ','; const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]); |