diff options
author | Stephen Hines <srhines@google.com> | 2013-08-07 15:07:10 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2013-08-07 15:07:10 -0700 |
commit | fab2daa4a1127ecb217abe2b07c1769122b6fee1 (patch) | |
tree | 268ebfd1963fd98ba412e76819afdf95a7d4267b /lib/CodeGen | |
parent | 8197ac1c1a0a91baa70c4dea8cb488f254ef974c (diff) | |
parent | 10251753b6897adcd22cc981c0cc42f348c109de (diff) | |
download | external_llvm-fab2daa4a1127ecb217abe2b07c1769122b6fee1.zip external_llvm-fab2daa4a1127ecb217abe2b07c1769122b6fee1.tar.gz external_llvm-fab2daa4a1127ecb217abe2b07c1769122b6fee1.tar.bz2 |
Merge commit '10251753b6897adcd22cc981c0cc42f348c109de' into merge-20130807
Conflicts:
lib/Archive/ArchiveReader.cpp
lib/Support/Unix/PathV2.inc
Change-Id: I29d8c1e321a4a380b6013f00bac6a8e4b593cc4e
Diffstat (limited to 'lib/CodeGen')
95 files changed, 3767 insertions, 2544 deletions
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 4731af5..ca08b5b 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -202,161 +202,271 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { } static bool isNoopBitcast(Type *T1, Type *T2, - const TargetLowering& TLI) { + const TargetLoweringBase& TLI) { return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) || (isa<VectorType>(T1) && isa<VectorType>(T2) && TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2))); } -/// sameNoopInput - Return true if V1 == V2, else if either V1 or V2 is a noop -/// (i.e., lowers to no machine code), look through it (and any transitive noop -/// operands to it) and check if it has the same noop input value. This is -/// used to determine if a tail call can be formed. -static bool sameNoopInput(const Value *V1, const Value *V2, - SmallVectorImpl<unsigned> &Els1, - SmallVectorImpl<unsigned> &Els2, - const TargetLowering &TLI) { - using std::swap; - bool swapParity = false; - bool equalEls = Els1 == Els2; +/// Look through operations that will be free to find the earliest source of +/// this value. +/// +/// @param ValLoc If V has aggegate type, we will be interested in a particular +/// scalar component. This records its address; the reverse of this list gives a +/// sequence of indices appropriate for an extractvalue to locate the important +/// value. This value is updated during the function and on exit will indicate +/// similar information for the Value returned. +/// +/// @param DataBits If this function looks through truncate instructions, this +/// will record the smallest size attained. +static const Value *getNoopInput(const Value *V, + SmallVectorImpl<unsigned> &ValLoc, + unsigned &DataBits, + const TargetLoweringBase &TLI) { while (true) { - if ((equalEls && V1 == V2) || isa<UndefValue>(V1) || isa<UndefValue>(V2)) { - if (swapParity) - // Revert to original Els1 and Els2 to avoid confusing recursive calls - swap(Els1, Els2); - return true; - } - // Try to look through V1; if V1 is not an instruction, it can't be looked // through. - const Instruction *I = dyn_cast<Instruction>(V1); + const Instruction *I = dyn_cast<Instruction>(V); + if (!I || I->getNumOperands() == 0) return V; const Value *NoopInput = 0; - if (I != 0 && I->getNumOperands() > 0) { - Value *Op = I->getOperand(0); - if (isa<TruncInst>(I)) { - // Look through truly no-op truncates. - if (TLI.isTruncateFree(Op->getType(), I->getType())) - NoopInput = Op; - } else if (isa<BitCastInst>(I)) { - // Look through truly no-op bitcasts. - if (isNoopBitcast(Op->getType(), I->getType(), TLI)) - NoopInput = Op; - } else if (isa<GetElementPtrInst>(I)) { - // Look through getelementptr - if (cast<GetElementPtrInst>(I)->hasAllZeroIndices()) - NoopInput = Op; - } else if (isa<IntToPtrInst>(I)) { - // Look through inttoptr. - // Make sure this isn't a truncating or extending cast. We could - // support this eventually, but don't bother for now. - if (!isa<VectorType>(I->getType()) && - TLI.getPointerTy().getSizeInBits() == - cast<IntegerType>(Op->getType())->getBitWidth()) - NoopInput = Op; - } else if (isa<PtrToIntInst>(I)) { - // Look through ptrtoint. - // Make sure this isn't a truncating or extending cast. We could - // support this eventually, but don't bother for now. - if (!isa<VectorType>(I->getType()) && - TLI.getPointerTy().getSizeInBits() == - cast<IntegerType>(I->getType())->getBitWidth()) - NoopInput = Op; - } else if (isa<CallInst>(I)) { - // Look through call - for (User::const_op_iterator i = I->op_begin(), - // Skip Callee - e = I->op_end() - 1; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } + + Value *Op = I->getOperand(0); + if (isa<BitCastInst>(I)) { + // Look through truly no-op bitcasts. + if (isNoopBitcast(Op->getType(), I->getType(), TLI)) + NoopInput = Op; + } else if (isa<GetElementPtrInst>(I)) { + // Look through getelementptr + if (cast<GetElementPtrInst>(I)->hasAllZeroIndices()) + NoopInput = Op; + } else if (isa<IntToPtrInst>(I)) { + // Look through inttoptr. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa<VectorType>(I->getType()) && + TLI.getPointerTy().getSizeInBits() == + cast<IntegerType>(Op->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa<PtrToIntInst>(I)) { + // Look through ptrtoint. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa<VectorType>(I->getType()) && + TLI.getPointerTy().getSizeInBits() == + cast<IntegerType>(I->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa<TruncInst>(I) && + TLI.allowTruncateForTailCall(Op->getType(), I->getType())) { + DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits()); + NoopInput = Op; + } else if (isa<CallInst>(I)) { + // Look through call (skipping callee) + for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; } - } else if (isa<InvokeInst>(I)) { - // Look through invoke - for (User::const_op_iterator i = I->op_begin(), - // Skip BB, BB, Callee - e = I->op_end() - 3; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } + } + } else if (isa<InvokeInst>(I)) { + // Look through invoke (skipping BB, BB, Callee) + for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; } } + } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) { + // Value may come from either the aggregate or the scalar + ArrayRef<unsigned> InsertLoc = IVI->getIndices(); + if (std::equal(InsertLoc.rbegin(), InsertLoc.rend(), + ValLoc.rbegin())) { + // The type being inserted is a nested sub-type of the aggregate; we + // have to remove those initial indices to get the location we're + // interested in for the operand. + ValLoc.resize(ValLoc.size() - InsertLoc.size()); + NoopInput = IVI->getInsertedValueOperand(); + } else { + // The struct we're inserting into has the value we're interested in, no + // change of address. + NoopInput = Op; + } + } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) { + // The part we're interested in will inevitably be some sub-section of the + // previous aggregate. Combine the two paths to obtain the true address of + // our element. + ArrayRef<unsigned> ExtractLoc = EVI->getIndices(); + std::copy(ExtractLoc.rbegin(), ExtractLoc.rend(), + std::back_inserter(ValLoc)); + NoopInput = Op; } + // Terminate if we couldn't find anything to look through. + if (!NoopInput) + return V; - if (NoopInput) { - V1 = NoopInput; - continue; - } + V = NoopInput; + } +} + +/// Return true if this scalar return value only has bits discarded on its path +/// from the "tail call" to the "ret". This includes the obvious noop +/// instructions handled by getNoopInput above as well as free truncations (or +/// extensions prior to the call). +static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, + SmallVectorImpl<unsigned> &RetIndices, + SmallVectorImpl<unsigned> &CallIndices, + const TargetLoweringBase &TLI) { - // If we already swapped, avoid infinite loop - if (swapParity) - break; + // Trace the sub-value needed by the return value as far back up the graph as + // possible, in the hope that it will intersect with the value produced by the + // call. In the simple case with no "returned" attribute, the hope is actually + // that we end up back at the tail call instruction itself. + unsigned BitsRequired = UINT_MAX; + RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI); + + // If this slot in the value returned is undef, it doesn't matter what the + // call puts there, it'll be fine. + if (isa<UndefValue>(RetVal)) + return true; + + // Now do a similar search up through the graph to find where the value + // actually returned by the "tail call" comes from. In the simple case without + // a "returned" attribute, the search will be blocked immediately and the loop + // a Noop. + unsigned BitsProvided = UINT_MAX; + CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI); + + // There's no hope if we can't actually trace them to (the same part of!) the + // same value. + if (CallVal != RetVal || CallIndices != RetIndices) + return false; - // Otherwise, swap V1<->V2, Els1<->Els2 - swap(V1, V2); - swap(Els1, Els2); - swapParity = !swapParity; + // However, intervening truncates may have made the call non-tail. Make sure + // all the bits that are needed by the "ret" have been provided by the "tail + // call". FIXME: with sufficiently cunning bit-tracking, we could look through + // extensions too. + if (BitsProvided < BitsRequired) + return false; + + return true; +} + +/// For an aggregate type, determine whether a given index is within bounds or +/// not. +static bool indexReallyValid(CompositeType *T, unsigned Idx) { + if (ArrayType *AT = dyn_cast<ArrayType>(T)) + return Idx < AT->getNumElements(); + + return Idx < cast<StructType>(T)->getNumElements(); +} + +/// Move the given iterators to the next leaf type in depth first traversal. +/// +/// Performs a depth-first traversal of the type as specified by its arguments, +/// stopping at the next leaf node (which may be a legitimate scalar type or an +/// empty struct or array). +/// +/// @param SubTypes List of the partial components making up the type from +/// outermost to innermost non-empty aggregate. The element currently +/// represented is SubTypes.back()->getTypeAtIndex(Path.back() - 1). +/// +/// @param Path Set of extractvalue indices leading from the outermost type +/// (SubTypes[0]) to the leaf node currently represented. +/// +/// @returns true if a new type was found, false otherwise. Calling this +/// function again on a finished iterator will repeatedly return +/// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty +/// aggregate or a non-aggregate +static bool +advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes, + SmallVectorImpl<unsigned> &Path) { + // First march back up the tree until we can successfully increment one of the + // coordinates in Path. + while (!Path.empty() && !indexReallyValid(SubTypes.back(), Path.back() + 1)) { + Path.pop_back(); + SubTypes.pop_back(); } - for (unsigned n = 0; n < 2; ++n) { - if (isa<InsertValueInst>(V1)) { - if (isa<StructType>(V1->getType())) { - // Look through insertvalue - unsigned i, e; - for (i = 0, e = cast<StructType>(V1->getType())->getNumElements(); - i != e; ++i) { - const Value *InScalar = FindInsertedValue(const_cast<Value*>(V1), i); - if (InScalar == 0) - break; - Els1.push_back(i); - if (!sameNoopInput(InScalar, V2, Els1, Els2, TLI)) { - Els1.pop_back(); - break; - } - Els1.pop_back(); - } - if (i == e) { - if (swapParity) - swap(Els1, Els2); - return true; - } - } - } else if (!Els1.empty() && isa<ExtractValueInst>(V1)) { - const ExtractValueInst *EVI = cast<ExtractValueInst>(V1); - unsigned i = Els1.back(); - // If the scalar value being inserted is an extractvalue of the right - // index from the call, then everything is good. - if (isa<StructType>(EVI->getOperand(0)->getType()) && - EVI->getNumIndices() == 1 && EVI->getIndices()[0] == i) { - // Look through extractvalue - Els1.pop_back(); - if (sameNoopInput(EVI->getOperand(0), V2, Els1, Els2, TLI)) { - Els1.push_back(i); - if (swapParity) - swap(Els1, Els2); - return true; - } - Els1.push_back(i); - } - } + // If we reached the top, then the iterator is done. + if (Path.empty()) + return false; + + // We know there's *some* valid leaf now, so march back down the tree picking + // out the left-most element at each node. + ++Path.back(); + Type *DeeperType = SubTypes.back()->getTypeAtIndex(Path.back()); + while (DeeperType->isAggregateType()) { + CompositeType *CT = cast<CompositeType>(DeeperType); + if (!indexReallyValid(CT, 0)) + return true; + + SubTypes.push_back(CT); + Path.push_back(0); - swap(V1, V2); - swap(Els1, Els2); - swapParity = !swapParity; + DeeperType = CT->getTypeAtIndex(0U); } - if (swapParity) - swap(Els1, Els2); - return false; + return true; +} + +/// Find the first non-empty, scalar-like type in Next and setup the iterator +/// components. +/// +/// Assuming Next is an aggregate of some kind, this function will traverse the +/// tree from left to right (i.e. depth-first) looking for the first +/// non-aggregate type which will play a role in function return. +/// +/// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup +/// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first +/// i32 in that type. +static bool firstRealType(Type *Next, + SmallVectorImpl<CompositeType *> &SubTypes, + SmallVectorImpl<unsigned> &Path) { + // First initialise the iterator components to the first "leaf" node + // (i.e. node with no valid sub-type at any index, so {} does count as a leaf + // despite nominally being an aggregate). + while (Next->isAggregateType() && + indexReallyValid(cast<CompositeType>(Next), 0)) { + SubTypes.push_back(cast<CompositeType>(Next)); + Path.push_back(0); + Next = cast<CompositeType>(Next)->getTypeAtIndex(0U); + } + + // If there's no Path now, Next was originally scalar already (or empty + // leaf). We're done. + if (Path.empty()) + return true; + + // Otherwise, use normal iteration to keep looking through the tree until we + // find a non-aggregate type. + while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()) { + if (!advanceToNextLeafType(SubTypes, Path)) + return false; + } + + return true; +} + +/// Set the iterator data-structures to the next non-empty, non-aggregate +/// subtype. +bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes, + SmallVectorImpl<unsigned> &Path) { + do { + if (!advanceToNextLeafType(SubTypes, Path)) + return false; + + assert(!Path.empty() && "found a leaf but didn't set the path?"); + } while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()); + + return true; } + /// Test if the given instruction is in a position to be optimized /// with a tail-call. This roughly means that it's in a block with /// a return and there's nothing that needs to be scheduled @@ -422,7 +532,50 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) return false; - // Otherwise, make sure the return value and I have the same value - SmallVector<unsigned, 4> Els1, Els2; - return sameNoopInput(Ret->getOperand(0), I, Els1, Els2, TLI); + const Value *RetVal = Ret->getOperand(0), *CallVal = I; + SmallVector<unsigned, 4> RetPath, CallPath; + SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes; + + bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath); + bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath); + + // Nothing's actually returned, it doesn't matter what the callee put there + // it's a valid tail call. + if (RetEmpty) + return true; + + // Iterate pairwise through each of the value types making up the tail call + // and the corresponding return. For each one we want to know whether it's + // essentially going directly from the tail call to the ret, via operations + // that end up not generating any code. + // + // We allow a certain amount of covariance here. For example it's permitted + // for the tail call to define more bits than the ret actually cares about + // (e.g. via a truncate). + do { + if (CallEmpty) { + // We've exhausted the values produced by the tail call instruction, the + // rest are essentially undef. The type doesn't really matter, but we need + // *something*. + Type *SlotType = RetSubTypes.back()->getTypeAtIndex(RetPath.back()); + CallVal = UndefValue::get(SlotType); + } + + // The manipulations performed when we're looking through an insertvalue or + // an extractvalue would happen at the front of the RetPath list, so since + // we have to copy it anyway it's more efficient to create a reversed copy. + using std::copy; + SmallVector<unsigned, 4> TmpRetPath, TmpCallPath; + copy(RetPath.rbegin(), RetPath.rend(), std::back_inserter(TmpRetPath)); + copy(CallPath.rbegin(), CallPath.rend(), std::back_inserter(TmpCallPath)); + + // Finally, we can check whether the value produced by the tail call at this + // index is compatible with the value we return. + if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, TLI)) + return false; + + CallEmpty = !nextRealType(CallSubTypes, CallPath); + } while(nextRealType(RetSubTypes, RetPath)); + + return true; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 69ff329..12c3574 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -49,9 +50,9 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -static const char *DWARFGroupName = "DWARF Emission"; -static const char *DbgTimerName = "DWARF Debug Writer"; -static const char *EHTimerName = "DWARF Exception Writer"; +static const char *const DWARFGroupName = "DWARF Emission"; +static const char *const DbgTimerName = "DWARF Debug Writer"; +static const char *const EHTimerName = "DWARF Exception Writer"; STATISTIC(EmittedInsts, "Number of machine instrs printed"); @@ -97,7 +98,7 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) { - DD = 0; DE = 0; MMI = 0; LI = 0; + DD = 0; DE = 0; MMI = 0; LI = 0; MF = 0; CurrentFnSym = CurrentFnSymForSize = 0; GCMetadataPrinters = 0; VerboseAsm = Streamer.isVerboseAsm(); @@ -154,8 +155,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { } bool AsmPrinter::doInitialization(Module &M) { - OutStreamer.InitStreamer(); - MMI = getAnalysisIfAvailable<MachineModuleInfo>(); MMI->AnalyzeModule(M); @@ -163,6 +162,8 @@ bool AsmPrinter::doInitialization(Module &M) { const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) .Initialize(OutContext, TM); + OutStreamer.InitStreamer(); + Mang = new Mangler(OutContext, &TM); // Allow the target to emit any magic that it wants at the start of the file. @@ -367,9 +368,10 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { MCSymbol *MangSym = OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); - if (GVKind.isThreadBSS()) + if (GVKind.isThreadBSS()) { + TheSection = getObjFileLowering().getTLSBSSSection(); OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); - else if (GVKind.isThreadData()) { + } else if (GVKind.isThreadData()) { OutStreamer.SwitchSection(TheSection); EmitAlignment(AlignLog, GV); @@ -395,7 +397,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // - pointer to mangled symbol above with initializer unsigned PtrSize = TD->getPointerSizeInBits()/8; OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), - PtrSize); + PtrSize); OutStreamer.EmitIntValue(0, PtrSize); OutStreamer.EmitSymbolValue(MangSym, PtrSize); @@ -562,10 +564,17 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { // cast away const; DIetc do not take const operands for some reason. DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata())); - if (V.getContext().isSubprogram()) - OS << DISubprogram(V.getContext()).getDisplayName() << ":"; + if (V.getContext().isSubprogram()) { + StringRef Name = DISubprogram(V.getContext()).getDisplayName(); + if (!Name.empty()) + OS << Name << ":"; + } OS << V.getName() << " <- "; + // The second operand is only an offset if it's an immediate. + bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); + int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0; + // Register or immediate value. Register 0 means undef. if (MI->getOperand(0).isFPImm()) { APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); @@ -586,18 +595,31 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } else if (MI->getOperand(0).isCImm()) { MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/); } else { - assert(MI->getOperand(0).isReg() && "Unknown operand type"); - if (MI->getOperand(0).getReg() == 0) { + unsigned Reg; + if (MI->getOperand(0).isReg()) { + Reg = MI->getOperand(0).getReg(); + } else { + assert(MI->getOperand(0).isFI() && "Unknown operand type"); + const TargetFrameLowering *TFI = AP.TM.getFrameLowering(); + Offset += TFI->getFrameIndexReference(*AP.MF, + MI->getOperand(0).getIndex(), Reg); + Deref = true; + } + if (Reg == 0) { // Suppress offset, it is not meaningful here. OS << "undef"; // NOTE: Want this comment at start of line, don't emit with AddComment. AP.OutStreamer.EmitRawText(OS.str()); return true; } - OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg()); + if (Deref) + OS << '['; + OS << AP.TM.getRegisterInfo()->getName(Reg); } - OS << '+' << MI->getOperand(1).getImm(); + if (Deref) + OS << '+' << Offset << ']'; + // NOTE: Want this comment at start of line, don't emit with AddComment. AP.OutStreamer.EmitRawText(OS.str()); return true; @@ -790,16 +812,9 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.AddBlankLine(); } -/// getDebugValueLocation - Get location information encoded by DBG_VALUE -/// operands. -MachineLocation AsmPrinter:: -getDebugValueLocation(const MachineInstr *MI) const { - // Target specific DBG_VALUE instructions are handled by each target. - return MachineLocation(); -} - /// EmitDwarfRegOp - Emit dwarf register operation. -void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { +void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc, + bool Indirect) const { const TargetRegisterInfo *TRI = TM.getRegisterInfo(); int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); @@ -817,7 +832,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { // caller might be in the middle of an dwarf expression. We should // probably assert that Reg >= 0 once debug info generation is more mature. - if (MLoc.isIndirect()) { + if (MLoc.isIndirect() || Indirect) { if (Reg < 32) { OutStreamer.AddComment( dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); @@ -828,7 +843,9 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { OutStreamer.AddComment(Twine(Reg)); EmitULEB128(Reg); } - EmitSLEB128(MLoc.getOffset()); + EmitSLEB128(!MLoc.isIndirect() ? 0 : MLoc.getOffset()); + if (MLoc.isIndirect() && Indirect) + EmitInt8(dwarf::DW_OP_deref); } else { if (Reg < 32) { OutStreamer.AddComment( @@ -1400,6 +1417,10 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, unsigned Size) const { + if (MAI->needsDwarfSectionOffsetDirective() && Size == 4) { // secrel32 ONLY works for 32bits. + OutStreamer.EmitCOFFSecRel32(Label); + return; + } // Emit Label+Offset (or just Label if Offset is zero) const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext); @@ -1561,8 +1582,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { } } -static void emitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, - AsmPrinter &AP); +static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP); /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the @@ -1624,7 +1644,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { } static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, - unsigned AddrSpace,AsmPrinter &AP){ + AsmPrinter &AP){ // See if we can aggregate this into a .fill, if so, emit it as such. int Value = isRepeatedByteSequence(CDS, AP.TM); @@ -1632,12 +1652,12 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType()); // Don't emit a 1-byte object as a .fill. if (Bytes > 1) - return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + return AP.OutStreamer.EmitFill(Bytes, Value); } // If this can be emitted with .ascii/.asciz, emit it as such. if (CDS->isString()) - return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace); + return AP.OutStreamer.EmitBytes(CDS->getAsString()); // Otherwise, emit the values in successive locations. unsigned ElementByteSize = CDS->getElementByteSize(); @@ -1647,7 +1667,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(i)); AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i), - ElementByteSize, AddrSpace); + ElementByteSize); } } else if (ElementByteSize == 4) { // FP Constants are printed as integer constants to avoid losing @@ -1662,7 +1682,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, F = CDS->getElementAsFloat(i); if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << "float " << F << '\n'; - AP.OutStreamer.EmitIntValue(I, 4, AddrSpace); + AP.OutStreamer.EmitIntValue(I, 4); } } else { assert(CDS->getElementType()->isDoubleTy()); @@ -1675,7 +1695,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, F = CDS->getElementAsDouble(i); if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << "double " << F << '\n'; - AP.OutStreamer.EmitIntValue(I, 8, AddrSpace); + AP.OutStreamer.EmitIntValue(I, 8); } } @@ -1684,41 +1704,38 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) * CDS->getNumElements(); if (unsigned Padding = Size - EmittedSize) - AP.OutStreamer.EmitZeros(Padding, AddrSpace); + AP.OutStreamer.EmitZeros(Padding); } -static void emitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, - AsmPrinter &AP) { +static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. int Value = isRepeatedByteSequence(CA, AP.TM); if (Value != -1) { uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType()); - AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + AP.OutStreamer.EmitFill(Bytes, Value); } else { for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - emitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); + emitGlobalConstantImpl(CA->getOperand(i), AP); } } -static void emitGlobalConstantVector(const ConstantVector *CV, - unsigned AddrSpace, AsmPrinter &AP) { +static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) - emitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); + emitGlobalConstantImpl(CV->getOperand(i), AP); const DataLayout &TD = *AP.TM.getDataLayout(); unsigned Size = TD.getTypeAllocSize(CV->getType()); unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) * CV->getType()->getNumElements(); if (unsigned Padding = Size - EmittedSize) - AP.OutStreamer.EmitZeros(Padding, AddrSpace); + AP.OutStreamer.EmitZeros(Padding); } -static void emitGlobalConstantStruct(const ConstantStruct *CS, - unsigned AddrSpace, AsmPrinter &AP) { +static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) { // Print the fields in successive locations. Pad to align if needed! const DataLayout *TD = AP.TM.getDataLayout(); unsigned Size = TD->getTypeAllocSize(CS->getType()); @@ -1734,19 +1751,18 @@ static void emitGlobalConstantStruct(const ConstantStruct *CS, SizeSoFar += FieldSize + PadSize; // Now print the actual field value. - emitGlobalConstantImpl(Field, AddrSpace, AP); + emitGlobalConstantImpl(Field, AP); // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well // as padding to ensure that the next field starts at the right offset. - AP.OutStreamer.EmitZeros(PadSize, AddrSpace); + AP.OutStreamer.EmitZeros(PadSize); } assert(SizeSoFar == Layout->getSizeInBytes() && "Layout of constant struct may be incorrect!"); } -static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, - AsmPrinter &AP) { +static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { APInt API = CFP->getValueAPF().bitcastToAPInt(); // First print a comment with what we think the original floating-point value @@ -1772,27 +1788,26 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, int Chunk = API.getNumWords() - 1; if (TrailingBytes) - AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes, AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes); for (; Chunk >= 0; --Chunk) - AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t)); } else { unsigned Chunk; for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk) - AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t)); if (TrailingBytes) - AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes, AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes); } // Emit the tail padding for the long double. const DataLayout &TD = *AP.TM.getDataLayout(); AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - - TD.getTypeStoreSize(CFP->getType()), AddrSpace); + TD.getTypeStoreSize(CFP->getType())); } -static void emitGlobalConstantLargeInt(const ConstantInt *CI, - unsigned AddrSpace, AsmPrinter &AP) { +static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { const DataLayout *TD = AP.TM.getDataLayout(); unsigned BitWidth = CI->getBitWidth(); @@ -1832,7 +1847,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, const uint64_t *RawData = Realigned.getRawData(); for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i]; - AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); + AP.OutStreamer.EmitIntValue(Val, 8); } if (ExtraBitsSize) { @@ -1844,16 +1859,15 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, assert(Size && Size * 8 >= ExtraBitsSize && (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize))) == ExtraBits && "Directive too small for extra bits."); - AP.OutStreamer.EmitIntValue(ExtraBits, Size, AddrSpace); + AP.OutStreamer.EmitIntValue(ExtraBits, Size); } } -static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, - AsmPrinter &AP) { +static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { const DataLayout *TD = AP.TM.getDataLayout(); uint64_t Size = TD->getTypeAllocSize(CV->getType()); if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) - return AP.OutStreamer.EmitZeros(Size, AddrSpace); + return AP.OutStreamer.EmitZeros(Size); if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { switch (Size) { @@ -1864,36 +1878,36 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", CI->getZExtValue()); - AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); + AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size); return; default: - emitGlobalConstantLargeInt(CI, AddrSpace, AP); + emitGlobalConstantLargeInt(CI, AP); return; } } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) - return emitGlobalConstantFP(CFP, AddrSpace, AP); + return emitGlobalConstantFP(CFP, AP); if (isa<ConstantPointerNull>(CV)) { - AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); + AP.OutStreamer.EmitIntValue(0, Size); return; } if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV)) - return emitGlobalConstantDataSequential(CDS, AddrSpace, AP); + return emitGlobalConstantDataSequential(CDS, AP); if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) - return emitGlobalConstantArray(CVA, AddrSpace, AP); + return emitGlobalConstantArray(CVA, AP); if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) - return emitGlobalConstantStruct(CVS, AddrSpace, AP); + return emitGlobalConstantStruct(CVS, AP); if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of // vectors). if (CE->getOpcode() == Instruction::BitCast) - return emitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); + return emitGlobalConstantImpl(CE->getOperand(0), AP); if (Size > 8) { // If the constant expression's size is greater than 64-bits, then we have @@ -1901,27 +1915,27 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, // that way. Constant *New = ConstantFoldConstantExpression(CE, TD); if (New && New != CE) - return emitGlobalConstantImpl(New, AddrSpace, AP); + return emitGlobalConstantImpl(New, AP); } } if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) - return emitGlobalConstantVector(V, AddrSpace, AP); + return emitGlobalConstantVector(V, AP); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace); + AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. -void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { +void AsmPrinter::EmitGlobalConstant(const Constant *CV) { uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType()); if (Size) - emitGlobalConstantImpl(CV, AddrSpace, *this); + emitGlobalConstantImpl(CV, *this); else if (MAI->hasSubsectionsViaSymbols()) { // If the global has zero size, emit a single byte so that two labels don't // look like they are at the same location. - OutStreamer.EmitIntValue(0, 1, AddrSpace); + OutStreamer.EmitIntValue(0, 1); } } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index e6d67e8..c141d60 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -33,7 +33,7 @@ using namespace llvm; //===----------------------------------------------------------------------===// /// EmitSLEB128 - emit the specified signed leb128 value. -void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { +void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const { if (isVerbose() && Desc) OutStreamer.AddComment(Desc); @@ -41,7 +41,7 @@ void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { } /// EmitULEB128 - emit the specified signed leb128 value. -void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc, +void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, unsigned PadTo) const { if (isVerbose() && Desc) OutStreamer.AddComment(Desc); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index abfa330..d8e9c95 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -213,7 +213,7 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, } else { unsigned OpFlags = MI->getOperand(OpNo).getImm(); ++OpNo; // Skip over the ID number. - + if (InlineAsm::isMemKind(OpFlags)) { Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, /*Modifier*/ 0, OS); diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index 8d15c06..65e7bee 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -13,3 +13,5 @@ add_llvm_library(LLVMAsmPrinter OcamlGCPrinter.cpp Win64Exception.cpp ) + +add_dependencies(LLVMAsmPrinter intrinsics_gen) diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 5f451a7..ab03861 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "DIE.h" +#include "DwarfDebug.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/IR/DataLayout.h" @@ -23,6 +24,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/MD5.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -120,6 +122,18 @@ DIE *DIE::getCompileUnit() { llvm_unreachable("We should not have orphaned DIEs."); } +DIEValue *DIE::findAttribute(unsigned Attribute) { + const SmallVectorImpl<DIEValue *> &Values = getValues(); + const DIEAbbrev &Abbrevs = getAbbrev(); + + // Iterate through all the attributes until we find the one we're + // looking for, if we can't find it return NULL. + for (size_t i = 0; i < Values.size(); ++i) + if (Abbrevs.getData()[i].getAttribute() == Attribute) + return Values[i]; + return NULL; +} + #ifndef NDEBUG void DIE::print(raw_ostream &O, unsigned IndentCount) const { const std::string Indent(IndentCount, ' '); @@ -247,13 +261,39 @@ void DIEInteger::print(raw_ostream &O) const { #endif //===----------------------------------------------------------------------===// +// DIEExpr Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit expression value. +/// +void DIEExpr::EmitValue(AsmPrinter *AP, unsigned Form) const { + AP->OutStreamer.EmitValue(Expr, SizeOf(AP, Form)); +} + +/// SizeOf - Determine size of expression value in bytes. +/// +unsigned DIEExpr::SizeOf(AsmPrinter *AP, unsigned Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; + return AP->getDataLayout().getPointerSize(); +} + +#ifndef NDEBUG +void DIEExpr::print(raw_ostream &O) const { + O << "Expr: "; + Expr->print(O); +} +#endif + +//===----------------------------------------------------------------------===// // DIELabel Implementation //===----------------------------------------------------------------------===// /// EmitValue - Emit label value. /// void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { - AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form)); + AP->EmitLabelReference(Label, SizeOf(AP, Form)); } /// SizeOf - Determine size of label value in bytes. @@ -296,6 +336,29 @@ void DIEDelta::print(raw_ostream &O) const { #endif //===----------------------------------------------------------------------===// +// DIEString Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit string value. +/// +void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const { + Access->EmitValue(AP, Form); +} + +/// SizeOf - Determine size of delta value in bytes. +/// +unsigned DIEString::SizeOf(AsmPrinter *AP, unsigned Form) const { + return Access->SizeOf(AP, Form); +} + +#ifndef NDEBUG +void DIEString::print(raw_ostream &O) const { + O << "String: " << Str << "\tSymbol: "; + Access->print(O); +} +#endif + +//===----------------------------------------------------------------------===// // DIEEntry Implementation //===----------------------------------------------------------------------===// @@ -305,6 +368,16 @@ void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const { AP->EmitInt32(Entry->getOffset()); } +unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) { + // DWARF4: References that use the attribute form DW_FORM_ref_addr are + // specified to be four bytes in the DWARF 32-bit format and eight bytes + // in the DWARF 64-bit format, while DWARF Version 2 specifies that such + // references have the same size as an address on the target system. + if (AP->getDwarfDebug()->getDwarfVersion() == 2) + return AP->getDataLayout().getPointerSize(); + return sizeof(int32_t); +} + #ifndef NDEBUG void DIEEntry::print(raw_ostream &O) const { O << format("Die: 0x%lx", (long)(intptr_t)Entry); diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index c29144d..bfd7d1d 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -18,11 +18,13 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Dwarf.h" +#include "llvm/MC/MCExpr.h" #include <vector> namespace llvm { class AsmPrinter; class MCSymbol; + class MCSymbolRefExpr; class raw_ostream; //===--------------------------------------------------------------------===// @@ -173,6 +175,10 @@ namespace llvm { Child->Parent = this; } + /// findAttribute - Find a value in the DIE with the attribute given, returns NULL + /// if no such attribute exists. + DIEValue *findAttribute(unsigned Attribute); + #ifndef NDEBUG void print(raw_ostream &O, unsigned IndentCount = 0) const; void dump(); @@ -188,6 +194,7 @@ namespace llvm { enum { isInteger, isString, + isExpr, isLabel, isDelta, isEntry, @@ -261,7 +268,35 @@ namespace llvm { }; //===--------------------------------------------------------------------===// - /// DIELabel - A label expression DIE. + /// DIEExpr - An expression DIE. + // + class DIEExpr : public DIEValue { + const MCExpr *Expr; + public: + explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {} + + /// EmitValue - Emit expression value. + /// + virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + + /// getValue - Get MCExpr. + /// + const MCExpr *getValue() const { return Expr; } + + /// SizeOf - Determine size of expression value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isExpr; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O) const; +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIELabel - A label DIE. // class DIELabel : public DIEValue { const MCSymbol *Label; @@ -315,6 +350,36 @@ namespace llvm { }; //===--------------------------------------------------------------------===// + /// DIEString - A container for string values. + /// + class DIEString : public DIEValue { + const DIEValue *Access; + const StringRef Str; + + public: + DIEString(const DIEValue *Acc, const StringRef S) + : DIEValue(isString), Access(Acc), Str(S) {} + + /// getString - Grab the string out of the object. + StringRef getString() const { return Str; } + + /// EmitValue - Emit delta value. + /// + virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + + /// SizeOf - Determine size of delta value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *D) { return D->getType() == isString; } + + #ifndef NDEBUG + virtual void print(raw_ostream &O) const; + #endif + }; + + //===--------------------------------------------------------------------===// /// DIEEntry - A pointer to another debug information entry. An instance of /// this class can also be used as a proxy for a debug information entry not /// yet defined (ie. types.) @@ -334,9 +399,13 @@ namespace llvm { /// SizeOf - Determine size of debug information entry in bytes. /// virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const { - return sizeof(int32_t); + return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP) : + sizeof(int32_t); } + /// Returns size of a ref_addr entry. + static unsigned getRefAddrSize(AsmPrinter *AP); + // Implement isa/cast/dyncast. static bool classof(const DIEValue *E) { return E->getType() == isEntry; } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index c3b6c10..df8ca17 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -27,6 +27,7 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -131,21 +132,24 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute, /// into another table which gets us the static offset into the string /// table. void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) { + DIEValue *Value; + unsigned Form; if (!DD->useSplitDwarf()) { MCSymbol *Symb = DU->getStringPoolEntry(String); - DIEValue *Value; if (Asm->needsRelocationsForDwarfStringPool()) Value = new (DIEValueAllocator) DIELabel(Symb); else { MCSymbol *StringPool = DU->getStringPoolSym(); Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); } - Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); + Form = dwarf::DW_FORM_strp; } else { unsigned idx = DU->getStringPoolIndex(String); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); - Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Value); + Value = new (DIEValueAllocator) DIEInteger(idx); + Form = dwarf::DW_FORM_GNU_str_index; } + DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); + Die->addValue(Attribute, Form, Str); } /// addLocalString - Add a string attribute data and value. This is guaranteed @@ -163,6 +167,14 @@ void CompileUnit::addLocalString(DIE *Die, unsigned Attribute, Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); } +/// addExpr - Add a Dwarf expression attribute data and value. +/// +void CompileUnit::addExpr(DIE *Die, unsigned Attribute, unsigned Form, + const MCExpr *Expr) { + DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr); + Die->addValue(Attribute, Form, Value); +} + /// addLabel - Add a Dwarf label attribute data and value. /// void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form, @@ -194,16 +206,13 @@ void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute, /// addOpAddress - Add a dwarf op address data and value using the /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. /// -void CompileUnit::addOpAddress(DIE *Die, MCSymbol *Sym) { - +void CompileUnit::addOpAddress(DIE *Die, const MCSymbol *Sym) { if (!DD->useSplitDwarf()) { addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); addLabel(Die, 0, dwarf::DW_FORM_udata, Sym); } else { - unsigned idx = DU->getAddrPoolIndex(Sym); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); - Die->addValue(0, dwarf::DW_FORM_GNU_addr_index, Value); + addUInt(Die, 0, dwarf::DW_FORM_GNU_addr_index, DU->getAddrPoolIndex(Sym)); } } @@ -235,7 +244,7 @@ void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, /// entry. void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { // Verify variable. - if (!V.Verify()) + if (!V.isVariable()) return; unsigned Line = V.getLineNumber(); @@ -253,7 +262,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { /// entry. void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { // Verify global variable. - if (!G.Verify()) + if (!G.isGlobalVariable()) return; unsigned Line = G.getLineNumber(); @@ -270,7 +279,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { /// entry. void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { // Verify subprogram. - if (!SP.Verify()) + if (!SP.isSubprogram()) return; // If the line number is 0, don't add it. @@ -289,7 +298,7 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { /// entry. void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { // Verify type. - if (!Ty.Verify()) + if (!Ty.isType()) return; unsigned Line = Ty.getLineNumber(); @@ -306,7 +315,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { /// entry. void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { // Verify type. - if (!Ty.Verify()) + if (!Ty.isObjCProperty()) return; unsigned Line = Ty.getLineNumber(); @@ -341,14 +350,15 @@ void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { /// addVariableAddress - Add DW_AT_location attribute for a /// DbgVariable based on provided MachineLocation. -void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, +void CompileUnit::addVariableAddress(const DbgVariable &DV, DIE *Die, MachineLocation Location) { - if (DV->variableHasComplexAddress()) + if (DV.variableHasComplexAddress()) addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); - else if (DV->isBlockByrefVariable()) + else if (DV.isBlockByrefVariable()) addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); else - addAddress(Die, dwarf::DW_AT_location, Location); + addAddress(Die, dwarf::DW_AT_location, Location, + DV.getVariable().isIndirect()); } /// addRegisterOp - Add register operand. @@ -384,13 +394,17 @@ void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg, /// addAddress - Add an address attribute to a die based on the location /// provided. void CompileUnit::addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location) { + const MachineLocation &Location, bool Indirect) { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - if (Location.isReg()) + if (Location.isReg() && !Indirect) addRegisterOp(Block, Location.getReg()); - else + else { addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + if (Indirect && !Location.isReg()) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } + } // Now attach the location information to the DIE. addBlock(Die, Attribute, 0, Block); @@ -401,17 +415,17 @@ void CompileUnit::addAddress(DIE *Die, unsigned Attribute, /// given the extra address information encoded in the DIVariable, starting from /// the starting location. Add the DWARF information to the die. /// -void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, +void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, unsigned Attribute, const MachineLocation &Location) { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - unsigned N = DV->getNumAddrElements(); + unsigned N = DV.getNumAddrElements(); unsigned i = 0; if (Location.isReg()) { - if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) { + if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { // If first address element is OpPlus then emit // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1)); + addRegisterOffset(Block, Location.getReg(), DV.getAddrElement(1)); i = 2; } else addRegisterOp(Block, Location.getReg()); @@ -420,10 +434,10 @@ void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, addRegisterOffset(Block, Location.getReg(), Location.getOffset()); for (;i < N; ++i) { - uint64_t Element = DV->getAddrElement(i); + uint64_t Element = DV.getAddrElement(i); if (Element == DIBuilder::OpPlus) { addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); + addUInt(Block, 0, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); } else if (Element == DIBuilder::OpDeref) { if (!Location.isReg()) addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); @@ -494,15 +508,15 @@ void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, /// starting location. Add the DWARF information to the die. For /// more information, read large comment just above here. /// -void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, +void CompileUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die, unsigned Attribute, const MachineLocation &Location) { - DIType Ty = DV->getType(); + DIType Ty = DV.getType(); DIType TmpTy = Ty; unsigned Tag = Ty.getTag(); bool isPointer = false; - StringRef varName = DV->getName(); + StringRef varName = DV.getName(); if (Tag == dwarf::DW_TAG_pointer_type) { DIDerivedType DTy = DIDerivedType(Ty); @@ -586,7 +600,7 @@ static bool isTypeSigned(DIType Ty, int *SizeInBits) { } /// addConstantValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, +void CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty) { // FIXME: This is a bit conservative/simple - it emits negative values at // their maximum bit width which is a bit unfortunate (& doesn't prefer @@ -607,11 +621,10 @@ bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, : addUInt(Block, 0, Form, MO.getImm()); addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; } /// addConstantFPValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { +void CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { assert (MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); APFloat FPImm = MO.getFPImm()->getValueAPF(); @@ -632,23 +645,21 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { (unsigned char)0xFF & FltPtr[Start]); addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; } /// addConstantFPValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { - return addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false); +void CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { + addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false); } /// addConstantValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, +void CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned) { - return addConstantValue(Die, CI->getValue(), Unsigned); + addConstantValue(Die, CI->getValue(), Unsigned); } // addConstantValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val, - bool Unsigned) { +void CompileUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) { unsigned CIBitWidth = Val.getBitWidth(); if (CIBitWidth <= 64) { unsigned form = 0; @@ -664,7 +675,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val, addUInt(Die, dwarf::DW_AT_const_value, form, Val.getZExtValue()); else addSInt(Die, dwarf::DW_AT_const_value, form, Val.getSExtValue()); - return true; + return; } DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); @@ -686,7 +697,6 @@ bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val, } addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; } /// addTemplateParams - Add template parameters into buffer. @@ -727,7 +737,7 @@ void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { /// given DIType. DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { DIType Ty(TyNode); - if (!Ty.Verify()) + if (!Ty.isType()) return NULL; DIE *TyDIE = getDIE(Ty); if (TyDIE) @@ -766,7 +776,7 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { /// addType - Add a new type attribute to the specified entity. void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) { - if (!Ty.Verify()) + if (!Ty.isType()) return; // Check for pre-existence. @@ -811,7 +821,7 @@ void CompileUnit::addPubTypes(DISubprogram SP) { DIArray Args = SPTy.getTypeArray(); for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) { DIType ATy(Args.getElement(i)); - if (!ATy.Verify()) + if (!ATy.isType()) continue; addGlobalType(ATy); } @@ -827,7 +837,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) { Buffer.setTag(dwarf::DW_TAG_unspecified_type); - // Unspecified types has only name, nothing else. + // An unspecified type only has a name attribute. return; } @@ -871,6 +881,39 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { addSourceLine(&Buffer, DTy); } +/// Return true if the type is appropriately scoped to be contained inside +/// its own type unit. +static bool isTypeUnitScoped(DIType Ty) { + DIScope Parent = Ty.getContext(); + while (Parent) { + // Don't generate a hash for anything scoped inside a function. + if (Parent.isSubprogram()) + return false; + Parent = Parent.getContext(); + } + return true; +} + +/// Return true if the type should be split out into a type unit. +static bool shouldCreateTypeUnit(DICompositeType CTy) { + unsigned Tag = CTy.getTag(); + + switch (Tag) { + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + case dwarf::DW_TAG_class_type: + // If this is a class, structure, union, or enumeration type + // that is not a declaration, is a type definition, and not scoped + // inside a function then separate this out as a type unit. + if (CTy.isForwardDecl() || !isTypeUnitScoped(CTy)) + return 0; + return 1; + default: + return 0; + } +} + /// constructTypeDIE - Construct type DIE from DICompositeType. void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Get core information. @@ -897,7 +940,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } } DIType DTy = CTy.getTypeDerivedFrom(); - if (DTy.Verify()) { + if (DTy.isType()) { addType(&Buffer, DTy); addUInt(&Buffer, dwarf::DW_AT_enum_class, dwarf::DW_FORM_flag, 1); } @@ -937,16 +980,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_class_type: { - // Add elements to structure type. - DIArray Elements = CTy.getTypeArray(); - - // A forward struct declared type may not have elements available. - unsigned N = Elements.getNumElements(); - if (N == 0) + if (CTy.isForwardDecl()) break; // Add elements to structure type. - for (unsigned i = 0; i < N; ++i) { + DIArray Elements = CTy.getTypeArray(); + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); DIE *ElemDie = NULL; if (Element.isSubprogram()) { @@ -1019,10 +1058,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (DIDescriptor(ContainingType).isCompositeType()) addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, getOrCreateTypeDIE(DIType(ContainingType))); - else { - DIDescriptor Context = CTy.getContext(); - addToContextOwner(&Buffer, Context); - } + else + addToContextOwner(&Buffer, CTy.getContext()); if (CTy.isObjcClassComplete()) addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type); @@ -1070,6 +1107,10 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, RLang); } + // If this is a type applicable to a type unit it then add it to the + // list of types we'll compute a hash for later. + if (shouldCreateTypeUnit(CTy)) + DD->addTypeUnitType(&Buffer); } /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE @@ -1082,7 +1123,8 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); addType(ParamDIE, TP.getType()); - addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); + if (!TP.getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); return ParamDIE; } @@ -1094,7 +1136,7 @@ CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){ if (ParamDIE) return ParamDIE; - ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); + ParamDIE = new DIE(TPV.getTag()); addType(ParamDIE, TPV.getType()); if (!TPV.getName().empty()) addString(ParamDIE, dwarf::DW_AT_name, TPV.getName()); @@ -1110,6 +1152,14 @@ CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){ // parameter, rather than a pointer to it. addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); addBlock(ParamDIE, dwarf::DW_AT_location, 0, Block); + } else if (TPV.getTag() == dwarf::DW_TAG_GNU_template_template_param) { + assert(isa<MDString>(Val)); + addString(ParamDIE, dwarf::DW_AT_GNU_template_name, + cast<MDString>(Val)->getString()); + } else if (TPV.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { + assert(isa<MDNode>(Val)); + DIArray A(cast<MDNode>(Val)); + addTemplateParams(*ParamDIE, A); } } @@ -1280,7 +1330,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { return; DIGlobalVariable GV(N); - if (!GV.Verify()) + if (!GV.isGlobalVariable()) return; DIDescriptor GVContext = GV.getContext(); @@ -1332,7 +1382,29 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (isGlobalVariable) { addToAccelTable = true; DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addOpAddress(Block, Asm->Mang->getSymbol(GV.getGlobal())); + const MCSymbol *Sym = Asm->Mang->getSymbol(GV.getGlobal()); + if (GV.getGlobal()->isThreadLocal()) { + // FIXME: Make this work with -gsplit-dwarf. + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + const MCExpr *Expr = + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym); + // Based on GCC's support for TLS: + if (!DD->useSplitDwarf()) { + // 1) Start with a constNu of the appropriate pointer size + addUInt(Block, 0, dwarf::DW_FORM_data1, + PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); + // 2) containing the (relocated) address of the TLS variable + addExpr(Block, 0, dwarf::DW_FORM_udata, Expr); + } else { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(Block, 0, dwarf::DW_FORM_udata, DU->getAddrPoolIndex(Expr)); + } + // 3) followed by a custom OP to tell the debugger about TLS (presumably) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_lo_user); + } else + addOpAddress(Block, Sym); // Do not create specification DIE if context is either compile unit // or a subprogram. if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && @@ -1394,8 +1466,6 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) addAccelName(GV.getLinkageName(), AddrDIE); } - - return; } /// constructSubrangeDIE - Construct subrange DIE from DISubrange. @@ -1484,7 +1554,8 @@ void CompileUnit::constructContainingTypeDIEs() { } /// constructVariableDIE - Construct a DIE for the given DbgVariable. -DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { +DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, + bool isScopeAbstract) { StringRef Name = DV->getName(); // Translate tag to proper Dwarf tag. @@ -1515,59 +1586,31 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { unsigned Offset = DV->getDotDebugLocOffset(); if (Offset != ~0U) { - addLabel(VariableDie, dwarf::DW_AT_location, - dwarf::DW_FORM_data4, - Asm->GetTempSymbol("debug_loc", Offset)); + addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4, + Asm->GetTempSymbol("debug_loc", Offset)); DV->setDIE(VariableDie); return VariableDie; } // Check if variable is described by a DBG_VALUE instruction. if (const MachineInstr *DVInsn = DV->getMInsn()) { - bool updated = false; - if (DVInsn->getNumOperands() == 3) { - if (DVInsn->getOperand(0).isReg()) { - const MachineOperand RegOp = DVInsn->getOperand(0); - const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); - if (DVInsn->getOperand(1).isImm() && - TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) { - unsigned FrameReg = 0; - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = - TFI->getFrameIndexReference(*Asm->MF, - DVInsn->getOperand(1).getImm(), - FrameReg); - MachineLocation Location(FrameReg, Offset); - addVariableAddress(DV, VariableDie, Location); - - } else if (RegOp.getReg()) - addVariableAddress(DV, VariableDie, - MachineLocation(RegOp.getReg())); - updated = true; - } - else if (DVInsn->getOperand(0).isImm()) - updated = - addConstantValue(VariableDie, DVInsn->getOperand(0), - DV->getType()); - else if (DVInsn->getOperand(0).isFPImm()) - updated = - addConstantFPValue(VariableDie, DVInsn->getOperand(0)); - else if (DVInsn->getOperand(0).isCImm()) - updated = - addConstantValue(VariableDie, - DVInsn->getOperand(0).getCImm(), - DV->getType().isUnsignedDIType()); - } else { - addVariableAddress(DV, VariableDie, - Asm->getDebugValueLocation(DVInsn)); - updated = true; - } - if (!updated) { - // If variableDie is not updated then DBG_VALUE instruction does not - // have valid variable info. - delete VariableDie; - return NULL; - } + assert(DVInsn->getNumOperands() == 3); + if (DVInsn->getOperand(0).isReg()) { + const MachineOperand RegOp = DVInsn->getOperand(0); + // If the second operand is an immediate, this is an indirect value. + if (DVInsn->getOperand(1).isImm()) { + MachineLocation Location(RegOp.getReg(), DVInsn->getOperand(1).getImm()); + addVariableAddress(*DV, VariableDie, Location); + } else if (RegOp.getReg()) + addVariableAddress(*DV, VariableDie, MachineLocation(RegOp.getReg())); + } else if (DVInsn->getOperand(0).isImm()) + addConstantValue(VariableDie, DVInsn->getOperand(0), DV->getType()); + else if (DVInsn->getOperand(0).isFPImm()) + addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isCImm()) + addConstantValue(VariableDie, DVInsn->getOperand(0).getCImm(), + DV->getType().isUnsignedDIType()); + DV->setDIE(VariableDie); return VariableDie; } else { @@ -1579,7 +1622,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); MachineLocation Location(FrameReg, Offset); - addVariableAddress(DV, VariableDie, Location); + addVariableAddress(*DV, VariableDie, Location); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index e1af572..3908b37 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -19,6 +19,7 @@ #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/StringMap.h" #include "llvm/DebugInfo.h" +#include "llvm/MC/MCExpr.h" namespace llvm { @@ -96,7 +97,7 @@ class CompileUnit { public: CompileUnit(unsigned UID, unsigned L, DIE *D, const MDNode *N, AsmPrinter *A, - DwarfDebug *DW, DwarfUnits *DWU); + DwarfDebug *DW, DwarfUnits *DWU); ~CompileUnit(); // Accessors. @@ -149,13 +150,13 @@ public: DIEs.push_back(Die); } void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) { - std::vector<std::pair<DIE*, unsigned > > &DIEs = AccelTypes[Name]; + std::vector<std::pair<DIE *, unsigned> > &DIEs = AccelTypes[Name]; DIEs.push_back(Die); } /// getDIE - Returns the debug information entry map slot for the /// specified debug variable. - DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } + DIE *getDIE(const MDNode *N) const { return MDNodeToDieMap.lookup(N); } DIEBlock *getDIEBlock() { return new (DIEValueAllocator) DIEBlock(); @@ -168,12 +169,8 @@ public: /// getDIEEntry - Returns the debug information entry for the specified /// debug variable. - DIEEntry *getDIEEntry(const MDNode *N) { - DenseMap<const MDNode *, DIEEntry *>::iterator I = - MDNodeToDIEEntryMap.find(N); - if (I == MDNodeToDIEEntryMap.end()) - return NULL; - return I->second; + DIEEntry *getDIEEntry(const MDNode *N) const { + return MDNodeToDIEEntryMap.lookup(N); } /// insertDIEEntry - Insert debug information entry into the map. @@ -217,6 +214,11 @@ public: /// void addLocalString(DIE *Die, unsigned Attribute, const StringRef Str); + /// addExpr - Add a Dwarf expression attribute data and value. + /// + void addExpr(DIE *Die, unsigned Attribute, unsigned Form, + const MCExpr *Expr); + /// addLabel - Add a Dwarf label attribute data and value. /// void addLabel(DIE *Die, unsigned Attribute, unsigned Form, @@ -230,7 +232,8 @@ public: /// addOpAddress - Add a dwarf op address data and value using the /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. /// - void addOpAddress(DIE *Die, MCSymbol *Label); + void addOpAddress(DIE *Die, const MCSymbol *Label); + void addOpAddress(DIE *Die, const MCSymbolRefExpr *Label); /// addDelta - Add a label delta attribute data and value. /// @@ -257,16 +260,16 @@ public: /// addAddress - Add an address attribute to a die based on the location /// provided. void addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location); + const MachineLocation &Location, bool Indirect = false); /// addConstantValue - Add constant value entry in variable DIE. - bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); - bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); - bool addConstantValue(DIE *Die, const APInt &Val, bool Unsigned); + void addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); + void addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); + void addConstantValue(DIE *Die, const APInt &Val, bool Unsigned); /// addConstantFPValue - Add constant value entry in variable DIE. - bool addConstantFPValue(DIE *Die, const MachineOperand &MO); - bool addConstantFPValue(DIE *Die, const ConstantFP *CFP); + void addConstantFPValue(DIE *Die, const MachineOperand &MO); + void addConstantFPValue(DIE *Die, const ConstantFP *CFP); /// addTemplateParams - Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DIArray TParams); @@ -282,7 +285,7 @@ public: /// (navigating the extra location information encoded in the type) based on /// the starting location. Add the DWARF information to the die. /// - void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + void addComplexAddress(const DbgVariable &DV, DIE *Die, unsigned Attribute, const MachineLocation &Location); // FIXME: Should be reformulated in terms of addComplexAddress. @@ -292,12 +295,13 @@ public: /// starting location. Add the DWARF information to the die. Obsolete, /// please use addComplexAddress instead. /// - void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + void addBlockByrefAddress(const DbgVariable &DV, DIE *Die, unsigned Attribute, const MachineLocation &Location); /// addVariableAddress - Add DW_AT_location attribute for a /// DbgVariable based on provided MachineLocation. - void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location); + void addVariableAddress(const DbgVariable &DV, DIE *Die, + MachineLocation Location); /// addToContextOwner - Add Die into the list of its context owner's children. void addToContextOwner(DIE *Die, DIDescriptor Context); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 7915e2f..979c0c3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -36,6 +36,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" #include "llvm/Support/Timer.h" #include "llvm/Support/ValueHandle.h" @@ -46,54 +47,60 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", - cl::Hidden, - cl::desc("Disable debug info printing")); +static cl::opt<bool> +DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, + cl::desc("Disable debug info printing")); -static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, - cl::desc("Make an absence of debug location information explicit."), - cl::init(false)); +static cl::opt<bool> UnknownLocations( + "use-unknown-locations", cl::Hidden, + cl::desc("Make an absence of debug location information explicit."), + cl::init(false)); -static cl::opt<bool> GenerateDwarfPubNamesSection("generate-dwarf-pubnames", - cl::Hidden, cl::init(false), - cl::desc("Generate DWARF pubnames section")); +static cl::opt<bool> +GenerateDwarfPubNamesSection("generate-dwarf-pubnames", cl::Hidden, + cl::init(false), + cl::desc("Generate DWARF pubnames section")); -namespace { - enum DefaultOnOff { - Default, Enable, Disable - }; -} +static cl::opt<bool> +GenerateODRHash("generate-odr-hash", cl::Hidden, + cl::desc("Add an ODR hash to external type DIEs."), + cl::init(false)); -static cl::opt<DefaultOnOff> DwarfAccelTables("dwarf-accel-tables", cl::Hidden, - cl::desc("Output prototype dwarf accelerator tables."), - cl::values( - clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), - clEnumValEnd), - cl::init(Default)); - -static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden, - cl::desc("Compatibility with Darwin gdb."), - cl::values( - clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), - clEnumValEnd), - cl::init(Default)); - -static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden, - cl::desc("Output prototype dwarf split debug info."), - cl::values( - clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), - clEnumValEnd), - cl::init(Default)); +namespace { +enum DefaultOnOff { + Default, + Enable, + Disable +}; +} + +static cl::opt<DefaultOnOff> +DwarfAccelTables("dwarf-accel-tables", cl::Hidden, + cl::desc("Output prototype dwarf accelerator tables."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + +static cl::opt<DefaultOnOff> +DarwinGDBCompat("darwin-gdb-compat", cl::Hidden, + cl::desc("Compatibility with Darwin gdb."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + +static cl::opt<DefaultOnOff> +SplitDwarf("split-dwarf", cl::Hidden, + cl::desc("Output prototype dwarf split debug info."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); namespace { - const char *DWARFGroupName = "DWARF Emission"; - const char *DbgTimerName = "DWARF Debug Writer"; + const char *const DWARFGroupName = "DWARF Emission"; + const char *const DbgTimerName = "DWARF Debug Writer"; struct CompareFirst { template <typename T> bool operator()(const T &lhs, const T &rhs) const { @@ -162,6 +169,14 @@ DIType DbgVariable::getType() const { } // end llvm namespace +/// Return Dwarf Version by checking module flags. +static unsigned getDwarfVersionFromModule(const Module *M) { + Value *Val = M->getModuleFlag("Dwarf Version"); + if (!Val) + return dwarf::DWARF_VERSION; + return cast<ConstantInt>(Val)->getZExtValue(); +} + DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : Asm(A), MMI(Asm->MMI), FirstCU(0), AbbreviationsSet(InitAbbreviationsSetSize), @@ -204,6 +219,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) else HasSplitDwarf = SplitDwarf == Enable ? true : false; + DwarfVersion = getDwarfVersionFromModule(MMI->getModule()); + { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(); @@ -247,22 +264,21 @@ unsigned DwarfUnits::getStringPoolIndex(StringRef Str) { return Entry.second; } -unsigned DwarfUnits::getAddrPoolIndex(MCSymbol *Sym) { - std::pair<MCSymbol*, unsigned> &Entry = AddressPool[Sym]; - if (Entry.first) return Entry.second; +unsigned DwarfUnits::getAddrPoolIndex(const MCSymbol *Sym) { + return getAddrPoolIndex(MCSymbolRefExpr::Create(Sym, Asm->OutContext)); +} - Entry.second = NextAddrPoolNumber++; - Entry.first = Sym; - return Entry.second; +unsigned DwarfUnits::getAddrPoolIndex(const MCExpr *Sym) { + std::pair<DenseMap<const MCExpr *, unsigned>::iterator, bool> P = + AddressPool.insert(std::make_pair(Sym, NextAddrPoolNumber)); + if (P.second) + ++NextAddrPoolNumber; + return P.first->second; } // Define a unique number for the abbreviation. // void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) { - // Profile the node so that we can make it unique. - FoldingSetNodeID ID; - Abbrev.Profile(ID); - // Check the set for priors. DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev); @@ -423,11 +439,11 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, if (Scope->isAbstractScope()) return ScopeDIE; - const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges(); + const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); if (Ranges.empty()) return 0; - SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(); + // If we have multiple ranges, emit them into the range section. if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in // .debug_range as a uint, size 4, for now. emitDIE will handle @@ -435,16 +451,20 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, DebugRangeSymbols.size() * Asm->getDataLayout().getPointerSize()); - for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), + for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); } + + // Terminate the range list. DebugRangeSymbols.push_back(NULL); DebugRangeSymbols.push_back(NULL); return ScopeDIE; } + // Construct the address range for this DIE. + SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(); MCSymbol *Start = getLabelBeforeInsn(RI->first); MCSymbol *End = getLabelAfterInsn(RI->second); @@ -463,7 +483,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, // represent this concrete inlined copy of the function. DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { - const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges(); + const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); assert(Ranges.empty() == false && "LexicalScope does not have instruction markers!"); @@ -477,18 +497,6 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, return NULL; } - SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(); - MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); - MCSymbol *EndLabel = getLabelAfterInsn(RI->second); - - if (StartLabel == 0 || EndLabel == 0) { - llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); - } - assert(StartLabel->isDefined() && - "Invalid starting label for an inlined scope!"); - assert(EndLabel->isDefined() && - "Invalid end label for an inlined scope!"); - DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, OriginDIE); @@ -500,7 +508,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, DebugRangeSymbols.size() * Asm->getDataLayout().getPointerSize()); - for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), + for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); @@ -508,32 +516,46 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, DebugRangeSymbols.push_back(NULL); DebugRangeSymbols.push_back(NULL); } else { + SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(); + MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); + MCSymbol *EndLabel = getLabelAfterInsn(RI->second); + + if (StartLabel == 0 || EndLabel == 0) + llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); + + assert(StartLabel->isDefined() && + "Invalid starting label for an inlined scope!"); + assert(EndLabel->isDefined() && "Invalid end label for an inlined scope!"); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel); TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel); } InlinedSubprogramDIEs.insert(OriginDIE); - // Track the start label for this inlined function. - //.debug_inlined section specification does not clearly state how - // to emit inlined scope that is split into multiple instruction ranges. - // For now, use first instruction range and emit low_pc/high_pc pair and - // corresponding .debug_inlined section entry for this pair. - DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator - I = InlineInfo.find(InlinedSP); - - if (I == InlineInfo.end()) { - InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE)); - InlinedSPNodes.push_back(InlinedSP); - } else - I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); - + // Add the call site information to the DIE. DILocation DL(Scope->getInlinedAt()); TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, getOrCreateSourceID(DL.getFilename(), DL.getDirectory(), TheCU->getUniqueID())); TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + // Track the start label for this inlined function. + //.debug_inlined section specification does not clearly state how + // to emit inlined scopes that are split into multiple instruction ranges. + // For now, use the first instruction range and emit low_pc/high_pc pair and + // corresponding the .debug_inlined section entry for this pair. + if (Asm->MAI->doesDwarfUseInlineInfoSection()) { + MCSymbol *StartLabel = getLabelBeforeInsn(Ranges.begin()->first); + InlineInfoMap::iterator I = InlineInfo.find(InlinedSP); + + if (I == InlineInfo.end()) { + InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE)); + InlinedSPNodes.push_back(InlinedSP); + } else + I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); + } + // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_inlined_subprogram nodes. addSubprogramNames(TheCU, InlinedSP, ScopeDIE); @@ -566,14 +588,14 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { } // Collect lexical scope children first. - const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope); + const SmallVectorImpl<DbgVariable *> &Variables =ScopeVariables.lookup(Scope); for (unsigned i = 0, N = Variables.size(); i < N; ++i) if (DIE *Variable = TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) { Children.push_back(Variable); if (Variables[i]->isObjectPointer()) ObjectPointer = Variable; } - const SmallVector<LexicalScope *, 4> &Scopes = Scope->getChildren(); + const SmallVectorImpl<LexicalScope *> &Scopes = Scope->getChildren(); for (unsigned j = 0, M = Scopes.size(); j < M; ++j) if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j])) Children.push_back(Nested); @@ -601,14 +623,15 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { if (Children.empty() && Range.first == Range.second) return NULL; ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); - for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; ++i) + for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; + ++i) constructImportedEntityDIE(TheCU, i->second, ScopeDIE); } if (!ScopeDIE) return NULL; // Add children - for (SmallVector<DIE *, 8>::iterator I = Children.begin(), + for (SmallVectorImpl<DIE *>::iterator I = Children.begin(), E = Children.end(); I != E; ++I) ScopeDIE->addChild(*I); @@ -812,7 +835,8 @@ void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, TheCU->getUniqueID()); TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, 0, FileID); TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, 0, Module.getLineNumber()); - TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, dwarf::DW_FORM_ref4, EntityDie); + TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, dwarf::DW_FORM_ref4, + EntityDie); StringRef Name = Module.getName(); if (!Name.empty()) TheCU->addString(IMDie, dwarf::DW_AT_name, Name); @@ -910,7 +934,7 @@ void DwarfDebug::collectDeadVariables() { for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { DISubprogram SP(Subprograms.getElement(i)); if (ProcessedSPNodes.count(SP) != 0) continue; - if (!SP.Verify()) continue; + if (!SP.isSubprogram()) continue; if (!SP.isDefinition()) continue; DIArray Variables = SP.getVariables(); if (Variables.getNumElements() == 0) continue; @@ -926,10 +950,10 @@ void DwarfDebug::collectDeadVariables() { DIE *ScopeDIE = SPCU->getDIE(SP); for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { DIVariable DV(Variables.getElement(vi)); - if (!DV.Verify()) continue; - DbgVariable *NewVar = new DbgVariable(DV, NULL); + if (!DV.isVariable()) continue; + DbgVariable NewVar(DV, NULL); if (DIE *VariableDIE = - SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope())) + SPCU->constructVariableDIE(&NewVar, Scope->isAbstractScope())) ScopeDIE->addChild(VariableDIE); } } @@ -938,6 +962,136 @@ void DwarfDebug::collectDeadVariables() { DeleteContainerSeconds(DeadFnScopeMap); } +// Type Signature [7.27] computation code. +typedef ArrayRef<uint8_t> HashValue; + +/// \brief Grabs the string in whichever attribute is passed in and returns +/// a reference to it. Returns "" if the attribute doesn't exist. +static StringRef getDIEStringAttr(DIE *Die, unsigned Attr) { + DIEValue *V = Die->findAttribute(Attr); + + if (DIEString *S = dyn_cast_or_null<DIEString>(V)) + return S->getString(); + + return StringRef(""); +} + +/// \brief Adds the string in \p Str to the hash in \p Hash. This also hashes +/// a trailing NULL with the string. +static void addStringToHash(MD5 &Hash, StringRef Str) { + DEBUG(dbgs() << "Adding string " << Str << " to hash.\n"); + Hash.update(Str); + Hash.update(makeArrayRef((uint8_t)'\0')); +} + +// FIXME: These are copied and only slightly modified out of LEB128.h. + +/// \brief Adds the unsigned in \p N to the hash in \p Hash. This also encodes +/// the unsigned as a ULEB128. +static void addULEB128ToHash(MD5 &Hash, uint64_t Value) { + DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + if (Value != 0) + Byte |= 0x80; // Mark this byte to show that more bytes will follow. + Hash.update(Byte); + } while (Value != 0); +} + +/// \brief Including \p Parent adds the context of Parent to \p Hash. +static void addParentContextToHash(MD5 &Hash, DIE *Parent) { + + DEBUG(dbgs() << "Adding parent context to hash...\n"); + + // [7.27.2] For each surrounding type or namespace beginning with the + // outermost such construct... + SmallVector<DIE *, 1> Parents; + while (Parent->getTag() != dwarf::DW_TAG_compile_unit) { + Parents.push_back(Parent); + Parent = Parent->getParent(); + } + + // Reverse iterate over our list to go from the outermost construct to the + // innermost. + for (SmallVectorImpl<DIE *>::reverse_iterator I = Parents.rbegin(), + E = Parents.rend(); + I != E; ++I) { + DIE *Die = *I; + + // ... Append the letter "C" to the sequence... + addULEB128ToHash(Hash, 'C'); + + // ... Followed by the DWARF tag of the construct... + addULEB128ToHash(Hash, Die->getTag()); + + // ... Then the name, taken from the DW_AT_name attribute. + StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name); + DEBUG(dbgs() << "... adding context: " << Name << "\n"); + if (!Name.empty()) + addStringToHash(Hash, Name); + } +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is the md5 hash of a flattened description of the DIE with +/// the exception that we are hashing only the context and the name of the type. +static void addDIEODRSignature(MD5 &Hash, CompileUnit *CU, DIE *Die) { + + // Add the contexts to the hash. We won't be computing the ODR hash for + // function local types so it's safe to use the generic context hashing + // algorithm here. + // FIXME: If we figure out how to account for linkage in some way we could + // actually do this with a slight modification to the parent hash algorithm. + DIE *Parent = Die->getParent(); + if (Parent) + addParentContextToHash(Hash, Parent); + + // Add the current DIE information. + + // Add the DWARF tag of the DIE. + addULEB128ToHash(Hash, Die->getTag()); + + // Add the name of the type to the hash. + addStringToHash(Hash, getDIEStringAttr(Die, dwarf::DW_AT_name)); + + // Now get the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and store those as the attribute. + // Our MD5 implementation always returns its results in little endian, swap + // bytes appropriately. + uint64_t Signature = *reinterpret_cast<support::ulittle64_t *>(Result + 8); + + // FIXME: This should be added onto the type unit, not the type, but this + // works as an intermediate stage. + CU->addUInt(Die, dwarf::DW_AT_GNU_odr_signature, dwarf::DW_FORM_data8, + Signature); +} + +/// Return true if the current DIE is contained within an anonymous namespace. +static bool isContainedInAnonNamespace(DIE *Die) { + DIE *Parent = Die->getParent(); + + while (Parent) { + if (Parent->getTag() == dwarf::DW_TAG_namespace && + getDIEStringAttr(Parent, dwarf::DW_AT_name) == "") + return true; + Parent = Parent->getParent(); + } + + return false; +} + +/// Test if the current CU language is C++ and that we have +/// a named type that is not contained in an anonymous namespace. +static bool shouldAddODRHash(CompileUnit *CU, DIE *Die) { + return CU->getLanguage() == dwarf::DW_LANG_C_plus_plus && + getDIEStringAttr(Die, dwarf::DW_AT_name) != "" && + !isContainedInAnonNamespace(Die); + } + void DwarfDebug::finalizeModuleInfo() { // Collect info for variables that were optimized out. collectDeadVariables(); @@ -953,6 +1107,18 @@ void DwarfDebug::finalizeModuleInfo() { TheCU->constructContainingTypeDIEs(); } + // Split out type units and conditionally add an ODR tag to the split + // out type. + // FIXME: Do type splitting. + for (unsigned i = 0, e = TypeUnits.size(); i != e; ++i) { + MD5 Hash; + DIE *Die = TypeUnits[i]; + // If we've requested ODR hashes and it's applicable for an ODR hash then + // add the ODR signature now. + if (GenerateODRHash && shouldAddODRHash(CUMap.begin()->second, Die)) + addDIEODRSignature(Hash, CUMap.begin()->second, Die); + } + // Compute DIE offsets and sizes. InfoHolder.computeSizeAndOffsets(); if (useSplitDwarf()) @@ -1074,7 +1240,7 @@ void DwarfDebug::endModule() { E = CUMap.end(); I != E; ++I) delete I->second; - for (SmallVector<CompileUnit *, 1>::iterator I = SkeletonCUs.begin(), + for (SmallVectorImpl<CompileUnit *>::iterator I = SkeletonCUs.begin(), E = SkeletonCUs.end(); I != E; ++I) delete *I; @@ -1160,7 +1326,8 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) { assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); return MI->getNumOperands() == 3 && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() && - MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; + (MI->getOperand(1).isImm() || + (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == 0U)); } // Get .debug_loc entry for the instruction range starting at MI. @@ -1170,16 +1337,12 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, const MachineInstr *MI) { const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata(); - if (MI->getNumOperands() != 3) { - MachineLocation MLoc = Asm->getDebugValueLocation(MI); - return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); - } - if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) { + assert(MI->getNumOperands() == 3); + if (MI->getOperand(0).isReg()) { MachineLocation MLoc; - // TODO: Currently an offset of 0 in a DBG_VALUE means - // we need to generate a direct register value. - // There is no way to specify an indirect value with offset 0. - if (MI->getOperand(1).getImm() == 0) + // If the second operand is an immediate, this is a + // register-indirect address. + if (!MI->getOperand(1).isImm()) MLoc.set(MI->getOperand(0).getReg()); else MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); @@ -1200,7 +1363,7 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF, SmallPtrSet<const MDNode *, 16> &Processed) { - // collection info from MMI table. + // Grab the variable info that was squirreled away in the MMI side-table. collectVariableInfoFromMMITable(MF, Processed); for (SmallVectorImpl<const MDNode*>::const_iterator @@ -1293,7 +1456,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables(); for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { DIVariable DV(Variables.getElement(i)); - if (!DV || !DV.Verify() || !Processed.insert(DV)) + if (!DV || !DV.isVariable() || !Processed.insert(DV)) continue; if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) addScopeVariable(Scope, new DbgVariable(DV, NULL)); @@ -1390,19 +1553,19 @@ void DwarfDebug::identifyScopeMarkers() { while (!WorkList.empty()) { LexicalScope *S = WorkList.pop_back_val(); - const SmallVector<LexicalScope *, 4> &Children = S->getChildren(); + const SmallVectorImpl<LexicalScope *> &Children = S->getChildren(); if (!Children.empty()) - for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(), + for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(), SE = Children.end(); SI != SE; ++SI) WorkList.push_back(*SI); if (S->isAbstractScope()) continue; - const SmallVector<InsnRange, 4> &Ranges = S->getRanges(); + const SmallVectorImpl<InsnRange> &Ranges = S->getRanges(); if (Ranges.empty()) continue; - for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), + for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { assert(RI->first && "InsnRange does not have first instruction!"); assert(RI->second && "InsnRange does not have second instruction!"); @@ -1424,7 +1587,7 @@ static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) { static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { const MDNode *Scope = getScopeNode(DL, Ctx); DISubprogram SP = getDISubprogram(Scope); - if (SP.Verify()) { + if (SP.isSubprogram()) { // Check for number of operands since the compatibility is // cheap here. if (SP->getNumOperands() > 19) @@ -1492,7 +1655,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // The first mention of a function argument gets the FunctionBeginSym // label, so arguments are visible when breaking at function entry. DIVariable DV(Var); - if (DV.Verify() && DV.getTag() == dwarf::DW_TAG_arg_variable && + if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable && DISubprogram(getDISubprogram(DV.getContext())) .describes(MF->getFunction())) LabelsBeforeInsn[MI] = FunctionBeginSym; @@ -1522,11 +1685,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n" << "\t" << *Prev << "\n"); History.pop_back(); - } - else { + } else if (llvm::next(PrevMBB) != PrevMBB->getParent()->end()) // Terminate after LastMI. History.push_back(LastMI); - } } } } @@ -1595,7 +1756,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (LastMI == PrevMBB->end()) // Drop DBG_VALUE for empty range. History.pop_back(); - else { + else if (PrevMBB != &PrevMBB->getParent()->back()) { // Terminate after LastMI. History.push_back(LastMI); } @@ -1680,12 +1841,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { for (unsigned i = 0, e = AList.size(); i != e; ++i) { LexicalScope *AScope = AList[i]; DISubprogram SP(AScope->getScopeNode()); - if (SP.Verify()) { + if (SP.isSubprogram()) { // Collect info for variables that were optimized out. DIArray Variables = SP.getVariables(); for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { DIVariable DV(Variables.getElement(i)); - if (!DV || !DV.Verify() || !ProcessedVars.insert(DV)) + if (!DV || !DV.isVariable() || !ProcessedVars.insert(DV)) continue; // Check that DbgVariable for DV wasn't created earlier, when // findAbstractVariable() was called for inlined instance of DV. @@ -1707,7 +1868,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); // Clear debug info - for (DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> >::iterator + for (ScopeVariablesMap::iterator I = ScopeVariables.begin(), E = ScopeVariables.end(); I != E; ++I) DeleteContainerPointers(I->second); ScopeVariables.clear(); @@ -1904,7 +2065,8 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; Addr += Holder.getCUOffset(Origin->getCompileUnit()); } - Asm->EmitInt32(Addr); + Asm->OutStreamer.EmitIntValue(Addr, + Form == dwarf::DW_FORM_ref_addr ? DIEEntry::getRefAddrSize(Asm) : 4); break; } case dwarf::DW_AT_ranges: { @@ -1988,7 +2150,7 @@ void DwarfUnits::emitUnits(DwarfDebug *DD, Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); Asm->EmitInt32(ContentSize); Asm->OutStreamer.AddComment("DWARF version number"); - Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->EmitInt16(DD->getDwarfVersion()); Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()), ASectionSym); @@ -2229,7 +2391,7 @@ void DwarfDebug::emitDebugPubnames() { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID)); Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->EmitInt16(DwarfVersion); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), @@ -2251,7 +2413,7 @@ void DwarfDebug::emitDebugPubnames() { if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); } Asm->OutStreamer.AddComment("End Mark"); @@ -2276,7 +2438,7 @@ void DwarfDebug::emitDebugPubTypes() { TheCU->getUniqueID())); if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->EmitInt16(DwarfVersion); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); @@ -2363,24 +2525,18 @@ void DwarfUnits::emitAddresses(const MCSection *AddrSection) { // Start the dwarf addr section. Asm->OutStreamer.SwitchSection(AddrSection); - // Get all of the string pool entries and put them in an array by their ID so - // we can sort them. - SmallVector<std::pair<unsigned, - std::pair<MCSymbol*, unsigned>* >, 64> Entries; + // Order the address pool entries by ID + SmallVector<const MCExpr *, 64> Entries(AddressPool.size()); - for (DenseMap<MCSymbol*, std::pair<MCSymbol*, unsigned> >::iterator - I = AddressPool.begin(), E = AddressPool.end(); + for (DenseMap<const MCExpr *, unsigned>::iterator I = AddressPool.begin(), + E = AddressPool.end(); I != E; ++I) - Entries.push_back(std::make_pair(I->second.second, &(I->second))); - - array_pod_sort(Entries.begin(), Entries.end()); + Entries[I->second] = I->first; for (unsigned i = 0, e = Entries.size(); i != e; ++i) { - // Emit a label for reference from debug information entries. - MCSymbol *Sym = Entries[i].second->first; - if (Sym) - Asm->EmitLabelReference(Entries[i].second->first, - Asm->getDataLayout().getPointerSize()); + // Emit an expression for reference from debug information entries. + if (const MCExpr *Expr = Entries[i]) + Asm->OutStreamer.EmitValue(Expr, Asm->getDataLayout().getPointerSize()); else Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize()); } @@ -2393,7 +2549,7 @@ void DwarfDebug::emitDebugStr() { Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); } -// Emit visible names into a debug loc section. +// Emit locations into the debug loc section. void DwarfDebug::emitDebugLoc() { if (DotDebugLocEntries.empty()) return; @@ -2422,9 +2578,9 @@ void DwarfDebug::emitDebugLoc() { Asm->OutStreamer.EmitIntValue(0, Size); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index)); } else { - Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size); - Asm->OutStreamer.EmitSymbolValue(Entry.End, Size); - DIVariable DV(Entry.Variable); + Asm->OutStreamer.EmitSymbolValue(Entry.getBeginSym(), Size); + Asm->OutStreamer.EmitSymbolValue(Entry.getEndSym(), Size); + DIVariable DV(Entry.getVariable()); Asm->OutStreamer.AddComment("Loc expr size"); MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol(); @@ -2444,17 +2600,18 @@ void DwarfDebug::emitDebugLoc() { Asm->EmitULEB128(Entry.getInt()); } } else if (Entry.isLocation()) { + MachineLocation Loc = Entry.getLoc(); if (!DV.hasComplexAddress()) // Regular entry. - Asm->EmitDwarfRegOp(Entry.Loc); + Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); else { // Complex address entry. unsigned N = DV.getNumAddrElements(); unsigned i = 0; if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { - if (Entry.Loc.getOffset()) { + if (Loc.getOffset()) { i = 2; - Asm->EmitDwarfRegOp(Entry.Loc); + Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); Asm->OutStreamer.AddComment("DW_OP_deref"); Asm->EmitInt8(dwarf::DW_OP_deref); Asm->OutStreamer.AddComment("DW_OP_plus_uconst"); @@ -2463,12 +2620,12 @@ void DwarfDebug::emitDebugLoc() { } else { // If first address element is OpPlus then emit // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1)); - Asm->EmitDwarfRegOp(Loc); + MachineLocation TLoc(Loc.getReg(), DV.getAddrElement(1)); + Asm->EmitDwarfRegOp(TLoc, DV.isIndirect()); i = 2; } } else { - Asm->EmitDwarfRegOp(Entry.Loc); + Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); } // Emit remaining complex address elements. @@ -2478,7 +2635,7 @@ void DwarfDebug::emitDebugLoc() { Asm->EmitInt8(dwarf::DW_OP_plus_uconst); Asm->EmitULEB128(DV.getAddrElement(++i)); } else if (Element == DIBuilder::OpDeref) { - if (!Entry.Loc.isReg()) + if (!Loc.isReg()) Asm->EmitInt8(dwarf::DW_OP_deref); } else llvm_unreachable("unknown Opcode found in complex address"); @@ -2559,7 +2716,7 @@ void DwarfDebug::emitDebugInlineInfo() { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1)); Asm->OutStreamer.AddComment("Dwarf Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->EmitInt16(DwarfVersion); Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); @@ -2567,8 +2724,7 @@ void DwarfDebug::emitDebugInlineInfo() { E = InlinedSPNodes.end(); I != E; ++I) { const MDNode *Node = *I; - DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II - = InlineInfo.find(Node); + InlineInfoMap::iterator II = InlineInfo.find(Node); SmallVectorImpl<InlineInfoLabels> &Labels = II->second; DISubprogram SP(Node); StringRef LName = SP.getLinkageName(); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 6cc792b..e14f9b1 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -41,7 +41,6 @@ class DIEAbbrev; class DIE; class DIEBlock; class DIEEntry; -class DwarfDebug; //===----------------------------------------------------------------------===// /// \brief This class is used to record source line correspondence. @@ -63,13 +62,12 @@ public: /// \brief This struct describes location entries emitted in the .debug_loc /// section. -typedef struct DotDebugLocEntry { +class DotDebugLocEntry { + // Begin and end symbols for the address range that this location is valid. const MCSymbol *Begin; const MCSymbol *End; - MachineLocation Loc; - const MDNode *Variable; - bool Merged; - bool Constant; + + // Type of entry that this represents. enum EntryType { E_Location, E_Integer, @@ -83,23 +81,42 @@ typedef struct DotDebugLocEntry { const ConstantFP *CFP; const ConstantInt *CIP; } Constants; - DotDebugLocEntry() - : Begin(0), End(0), Variable(0), Merged(false), - Constant(false) { Constants.Int = 0;} + + // The location in the machine frame. + MachineLocation Loc; + + // The variable to which this location entry corresponds. + const MDNode *Variable; + + // Whether this location has been merged. + bool Merged; + +public: + DotDebugLocEntry() : Begin(0), End(0), Variable(0), Merged(false) { + Constants.Int = 0; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, const MDNode *V) - : Begin(B), End(E), Loc(L), Variable(V), Merged(false), - Constant(false) { Constants.Int = 0; EntryKind = E_Location; } + : Begin(B), End(E), Loc(L), Variable(V), Merged(false) { + Constants.Int = 0; + EntryKind = E_Location; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i) - : Begin(B), End(E), Variable(0), Merged(false), - Constant(true) { Constants.Int = i; EntryKind = E_Integer; } + : Begin(B), End(E), Variable(0), Merged(false) { + Constants.Int = i; + EntryKind = E_Integer; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr) - : Begin(B), End(E), Variable(0), Merged(false), - Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; } + : Begin(B), End(E), Variable(0), Merged(false) { + Constants.CFP = FPtr; + EntryKind = E_ConstantFP; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr) - : Begin(B), End(E), Variable(0), Merged(false), - Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; } + : Begin(B), End(E), Variable(0), Merged(false) { + Constants.CIP = IPtr; + EntryKind = E_ConstantInt; + } /// \brief Empty entries are also used as a trigger to emit temp label. Such /// labels are referenced is used to find debug_loc offset for a given DIE. @@ -115,10 +132,14 @@ typedef struct DotDebugLocEntry { bool isInt() const { return EntryKind == E_Integer; } bool isConstantFP() const { return EntryKind == E_ConstantFP; } bool isConstantInt() const { return EntryKind == E_ConstantInt; } - int64_t getInt() { return Constants.Int; } - const ConstantFP *getConstantFP() { return Constants.CFP; } - const ConstantInt *getConstantInt() { return Constants.CIP; } -} DotDebugLocEntry; + int64_t getInt() const { return Constants.Int; } + const ConstantFP *getConstantFP() const { return Constants.CFP; } + const ConstantInt *getConstantInt() const { return Constants.CIP; } + const MDNode *getVariable() const { return Variable; } + const MCSymbol *getBeginSym() const { return Begin; } + const MCSymbol *getEndSym() const { return End; } + MachineLocation getLoc() const { return Loc; } +}; //===----------------------------------------------------------------------===// /// \brief This class is used to track local variable information. @@ -172,15 +193,15 @@ public: } bool variableHasComplexAddress() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); + assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.hasComplexAddress(); } bool isBlockByrefVariable() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); + assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.isBlockByrefVariable(); } unsigned getNumAddrElements() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); + assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.getNumAddrElements(); } uint64_t getAddrElement(unsigned i) const { @@ -189,16 +210,6 @@ public: DIType getType() const; }; - -// A String->Symbol mapping of strings used by indirect -// references. -typedef StringMap<std::pair<MCSymbol*, unsigned>, - BumpPtrAllocator&> StrPool; - -// A Symbol->pair<Symbol, unsigned> mapping of addresses used by indirect -// references. -typedef DenseMap<MCSymbol *, std::pair<MCSymbol *, unsigned> > AddrPool; - /// \brief Collects and handles information specific to a particular /// collection of units. class DwarfUnits { @@ -215,21 +226,28 @@ class DwarfUnits { SmallVector<CompileUnit *, 1> CUs; // Collection of strings for this unit and assorted symbols. + // A String->Symbol mapping of strings used by indirect + // references. + typedef StringMap<std::pair<MCSymbol*, unsigned>, + BumpPtrAllocator&> StrPool; StrPool StringPool; unsigned NextStringPoolNumber; std::string StringPref; // Collection of addresses for this unit and assorted labels. + // A Symbol->unsigned mapping of addresses used by indirect + // references. + typedef DenseMap<const MCExpr *, unsigned> AddrPool; AddrPool AddressPool; unsigned NextAddrPoolNumber; public: DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS, std::vector<DIEAbbrev *> *A, const char *Pref, - BumpPtrAllocator &DA) : - Asm(AP), AbbreviationsSet(AS), Abbreviations(A), - StringPool(DA), NextStringPoolNumber(0), StringPref(Pref), - AddressPool(), NextAddrPoolNumber(0) {} + BumpPtrAllocator &DA) + : Asm(AP), AbbreviationsSet(AS), Abbreviations(A), StringPool(DA), + NextStringPoolNumber(0), StringPref(Pref), AddressPool(), + NextAddrPoolNumber(0) {} /// \brief Compute the size and offset of a DIE given an incoming Offset. unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); @@ -245,14 +263,15 @@ public: /// \brief Emit all of the units to the section listed with the given /// abbreviation section. - void emitUnits(DwarfDebug *, const MCSection *, const MCSection *, - const MCSymbol *); + void emitUnits(DwarfDebug *DD, const MCSection *USection, + const MCSection *ASection, const MCSymbol *ASectionSym); /// \brief Emit all of the strings to the section given. - void emitStrings(const MCSection *, const MCSection *, const MCSymbol *); + void emitStrings(const MCSection *StrSection, const MCSection *OffsetSection, + const MCSymbol *StrSecSym); /// \brief Emit all of the addresses to the section given. - void emitAddresses(const MCSection *); + void emitAddresses(const MCSection *AddrSection); /// \brief Returns the entry into the start of the pool. MCSymbol *getStringPoolSym(); @@ -270,7 +289,8 @@ public: /// \brief Returns the index into the address pool with the given /// label/symbol. - unsigned getAddrPoolIndex(MCSymbol *); + unsigned getAddrPoolIndex(const MCExpr *Sym); + unsigned getAddrPoolIndex(const MCSymbol *Sym); /// \brief Returns the address pool. AddrPool *getAddrPool() { return &AddressPool; } @@ -291,10 +311,7 @@ class DwarfDebug { // All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; - //===--------------------------------------------------------------------===// - // Attribute used to construct specific Dwarf sections. - // - + // Handle to the a compile unit used for the inline extension handling. CompileUnit *FirstCU; // Maps MDNode with its corresponding CompileUnit. @@ -318,7 +335,7 @@ class DwarfDebug { // Provides a unique id per text section. SetVector<const MCSection*> SectionMap; - // List of Arguments (DbgValues) for current function. + // List of arguments for current function. SmallVector<DbgVariable *, 8> CurrentFnArguments; LexicalScopes LScopes; @@ -327,7 +344,9 @@ class DwarfDebug { DenseMap<const MDNode *, DIE *> AbstractSPDies; // Collection of dbg variables of a scope. - DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables; + typedef DenseMap<LexicalScope *, + SmallVector<DbgVariable *, 8> > ScopeVariablesMap; + ScopeVariablesMap ScopeVariables; // Collection of abstract variables. DenseMap<const MDNode *, DbgVariable *> AbstractVariables; @@ -342,7 +361,9 @@ class DwarfDebug { // Keep track of inlined functions and their location. This // information is used to populate the debug_inlined section. typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels; - DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo; + typedef DenseMap<const MDNode *, + SmallVector<InlineInfoLabels, 4> > InlineInfoMap; + InlineInfoMap InlineInfo; SmallVector<const MDNode *, 4> InlinedSPNodes; // This is a collection of subprogram MDNodes that are processed to @@ -387,7 +408,7 @@ class DwarfDebug { MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym; // As an optimization, there is no need to emit an entry in the directory - // table for the same directory as DW_at_comp_dir. + // table for the same directory as DW_AT_comp_dir. StringRef CompilationDir; // Counter for assigning globally unique IDs for CUs. @@ -402,10 +423,20 @@ class DwarfDebug { // Whether or not we're emitting info for older versions of gdb on darwin. bool IsDarwinGDBCompat; + // Holder for imported entities. + typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32> + ImportedEntityMap; + ImportedEntityMap ScopesWithImportedEntities; + + // Holder for types that are going to be extracted out into a type unit. + std::vector<DIE *> TypeUnits; + // DWARF5 Experimental Options bool HasDwarfAccelTables; bool HasSplitDwarf; + unsigned DwarfVersion; + // Separated Dwarf Variables // In general these will all be for bits that are left in the // original object file, rather than things that are meant @@ -423,10 +454,6 @@ class DwarfDebug { // Holder for the skeleton information. DwarfUnits SkeletonHolder; - typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32> - ImportedEntityMap; - ImportedEntityMap ScopesWithImportedEntities; - private: void addScopeVariable(LexicalScope *LS, DbgVariable *Var); @@ -627,6 +654,10 @@ public: /// \brief Process end of an instruction. void endInstruction(const MachineInstr *MI); + /// \brief Add a DIE to the set of types that we're going to pull into + /// type units. + void addTypeUnitType(DIE *Die) { TypeUnits.push_back(Die); } + /// \brief Look up the source id with the given directory and source file /// names. If none currently exists, create a new id and insert it in the /// SourceIds map. @@ -649,6 +680,9 @@ public: /// \brief Returns whether or not to change the current debug info for the /// split dwarf proposal support. bool useSplitDwarf() { return HasSplitDwarf; } + + /// Returns the Dwarf Version. + unsigned getDwarfVersion() const { return DwarfVersion; } }; } // End of namespace llvm diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index 92a5bb7..b48b817 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -26,18 +26,20 @@ using namespace llvm; namespace { class BasicTTI : public ImmutablePass, public TargetTransformInfo { - const TargetLoweringBase *TLI; + const TargetMachine *TM; /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the result needs to be inserted and/or extracted from vectors. unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); } + public: - BasicTTI() : ImmutablePass(ID), TLI(0) { + BasicTTI() : ImmutablePass(ID), TM(0) { llvm_unreachable("This pass cannot be directly constructed"); } - BasicTTI(const TargetLoweringBase *TLI) : ImmutablePass(ID), TLI(TLI) { + BasicTTI(const TargetMachine *TM) : ImmutablePass(ID), TM(TM) { initializeBasicTTIPass(*PassRegistry::getPassRegistry()); } @@ -63,6 +65,8 @@ public: return this; } + virtual bool hasBranchDivergence() const; + /// \name Scalar TTI Implementations /// @{ @@ -106,7 +110,7 @@ public: virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, ArrayRef<Type*> Tys) const; virtual unsigned getNumberOfParts(Type *Tp) const; - virtual unsigned getAddressComputationCost(Type *Ty) const; + virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const; /// @} }; @@ -118,17 +122,18 @@ INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti", char BasicTTI::ID = 0; ImmutablePass * -llvm::createBasicTargetTransformInfoPass(const TargetLoweringBase *TLI) { - return new BasicTTI(TLI); +llvm::createBasicTargetTransformInfoPass(const TargetMachine *TM) { + return new BasicTTI(TM); } +bool BasicTTI::hasBranchDivergence() const { return false; } bool BasicTTI::isLegalAddImmediate(int64_t imm) const { - return TLI->isLegalAddImmediate(imm); + return getTLI()->isLegalAddImmediate(imm); } bool BasicTTI::isLegalICmpImmediate(int64_t imm) const { - return TLI->isLegalICmpImmediate(imm); + return getTLI()->isLegalICmpImmediate(imm); } bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, @@ -139,7 +144,7 @@ bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, AM.BaseOffs = BaseOffset; AM.HasBaseReg = HasBaseReg; AM.Scale = Scale; - return TLI->isLegalAddressingMode(AM, Ty); + return getTLI()->isLegalAddressingMode(AM, Ty); } int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, @@ -150,27 +155,28 @@ int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, AM.BaseOffs = BaseOffset; AM.HasBaseReg = HasBaseReg; AM.Scale = Scale; - return TLI->getScalingFactorCost(AM, Ty); + return getTLI()->getScalingFactorCost(AM, Ty); } bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const { - return TLI->isTruncateFree(Ty1, Ty2); + return getTLI()->isTruncateFree(Ty1, Ty2); } bool BasicTTI::isTypeLegal(Type *Ty) const { - EVT T = TLI->getValueType(Ty); - return TLI->isTypeLegal(T); + EVT T = getTLI()->getValueType(Ty); + return getTLI()->isTypeLegal(T); } unsigned BasicTTI::getJumpBufAlignment() const { - return TLI->getJumpBufAlignment(); + return getTLI()->getJumpBufAlignment(); } unsigned BasicTTI::getJumpBufSize() const { - return TLI->getJumpBufSize(); + return getTLI()->getJumpBufSize(); } bool BasicTTI::shouldBuildLookupTables() const { + const TargetLoweringBase *TLI = getTLI(); return TLI->supportJumpTables() && (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); @@ -213,6 +219,7 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, OperandValueKind) const { // Check if any of the operands are vector operands. + const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -259,6 +266,7 @@ unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { + const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -352,6 +360,7 @@ unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const { unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const { + const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -396,7 +405,7 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { assert(!Src->isVoidTy() && "Invalid type"); - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src); // Assume that all loads of legal types cost 1. return LT.first; @@ -437,12 +446,18 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::floor: ISD = ISD::FFLOOR; break; case Intrinsic::ceil: ISD = ISD::FCEIL; break; case Intrinsic::trunc: ISD = ISD::FTRUNC; break; + case Intrinsic::nearbyint: + ISD = ISD::FNEARBYINT; break; case Intrinsic::rint: ISD = ISD::FRINT; break; case Intrinsic::pow: ISD = ISD::FPOW; break; case Intrinsic::fma: ISD = ISD::FMA; break; case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add? + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return 0; } + const TargetLoweringBase *TLI = getTLI(); std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy); if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { @@ -476,10 +491,10 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, } unsigned BasicTTI::getNumberOfParts(Type *Tp) const { - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp); return LT.first; } -unsigned BasicTTI::getAddressComputationCost(Type *Ty) const { +unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { return 0; } diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index bf0ea5a..9cd4208 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -406,7 +406,8 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, /// MBB so that the part before the iterator falls into the part starting at the /// iterator. This returns the new MBB. MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, - MachineBasicBlock::iterator BBI1) { + MachineBasicBlock::iterator BBI1, + const BasicBlock *BB) { if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) return 0; @@ -414,7 +415,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // Create the fall-through block. MachineFunction::iterator MBBI = &CurMBB; - MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock()); + MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB); CurMBB.getParent()->insert(++MBBI, NewMBB); // Move all the successors of this block to the specified block. @@ -647,6 +648,7 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, /// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist /// only of the common tail. Create a block that does by splitting one. bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + MachineBasicBlock *SuccBB, unsigned maxCommonTailLength, unsigned &commonTailIndex) { commonTailIndex = 0; @@ -676,7 +678,12 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size " << maxCommonTailLength); - MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); + // If the split block unconditionally falls-thru to SuccBB, it will be + // merged. In control flow terms it should then take SuccBB's name. e.g. If + // SuccBB is an inner loop, the common tail is still part of the inner loop. + const BasicBlock *BB = (SuccBB && MBB->succ_size() == 1) ? + SuccBB->getBasicBlock() : MBB->getBasicBlock(); + MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB); if (!newMBB) { DEBUG(dbgs() << "... failed!"); return false; @@ -784,7 +791,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, !SameTails[commonTailIndex].tailIsWholeBlock())) { // None of the blocks consist entirely of the common tail. // Split a block so that one does. - if (!CreateCommonTailOnlyBlock(PredBB, + if (!CreateCommonTailOnlyBlock(PredBB, SuccBB, maxCommonTailLength, commonTailIndex)) { RemoveBlocksWithHash(CurHash, SuccBB, PredBB); continue; diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index df795df..26bdca9 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -100,13 +100,15 @@ namespace llvm { void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest); MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, - MachineBasicBlock::iterator BBI1); + MachineBasicBlock::iterator BBI1, + const BasicBlock *BB); unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB); void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + MachineBasicBlock *SuccBB, unsigned maxCommonTailLength, unsigned &commonTailIndex); diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 38ae17d..b03c325 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -33,6 +34,7 @@ INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights", void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { au.addRequired<LiveIntervals>(); + au.addRequired<MachineBlockFrequencyInfo>(); au.addRequired<MachineLoopInfo>(); au.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(au); @@ -45,7 +47,8 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) { LiveIntervals &LIS = getAnalysis<LiveIntervals>(); MachineRegisterInfo &MRI = MF.getRegInfo(); - VirtRegAuxInfo VRAI(MF, LIS, getAnalysis<MachineLoopInfo>()); + VirtRegAuxInfo VRAI(MF, LIS, getAnalysis<MachineLoopInfo>(), + getAnalysis<MachineBlockFrequencyInfo>()); for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI.reg_nodbg_empty(Reg)) @@ -107,12 +110,12 @@ static bool isRematerializable(const LiveInterval &LI, return true; } -void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { +void +VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { MachineRegisterInfo &mri = MF.getRegInfo(); const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); MachineBasicBlock *mbb = 0; MachineLoop *loop = 0; - unsigned loopDepth = 0; bool isExiting = false; float totalWeight = 0; SmallPtrSet<MachineInstr*, 8> visited; @@ -140,14 +143,14 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { if (mi->getParent() != mbb) { mbb = mi->getParent(); loop = Loops.getLoopFor(mbb); - loopDepth = loop ? loop->getLoopDepth() : 0; isExiting = loop ? loop->isLoopExiting(mbb) : false; } // Calculate instr weight. bool reads, writes; tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); - weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth); + weight = LiveIntervals::getSpillWeight( + writes, reads, MBFI.getBlockFreq(mi->getParent())); // Give extra weight to what looks like a loop induction variable update. if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb)) diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 75f4b96..fcfc9dc 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -24,7 +24,7 @@ using namespace llvm; CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, - const TargetMachine &tm, SmallVector<CCValAssign, 16> &locs, + const TargetMachine &tm, SmallVectorImpl<CCValAssign> &locs, LLVMContext &C) : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm), TRI(*TM.getRegisterInfo()), Locs(locs), Context(C), diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index d4955b3..18c8e0a 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -361,7 +361,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin, unsigned AntiDepReg, unsigned LastNewReg, const TargetRegisterClass *RC, - SmallVector<unsigned, 2> &Forbid) + SmallVectorImpl<unsigned> &Forbid) { ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC); for (unsigned i = 0; i != Order.size(); ++i) { @@ -388,7 +388,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin, continue; // If NewReg overlaps any of the forbidden registers, we can't use it. bool Forbidden = false; - for (SmallVector<unsigned, 2>::iterator it = Forbid.begin(), + for (SmallVectorImpl<unsigned>::iterator it = Forbid.begin(), ite = Forbid.end(); it != ite; ++it) if (TRI->regsOverlap(NewReg, *it)) { Forbidden = true; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index df13dd3..565d20b 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -103,7 +103,7 @@ class TargetRegisterInfo; unsigned AntiDepReg, unsigned LastNewReg, const TargetRegisterClass *RC, - SmallVector<unsigned, 2> &Forbid); + SmallVectorImpl<unsigned> &Forbid); }; } diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 05ac58a..c7c1752 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -32,7 +32,7 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { class DwarfEHPrepare : public FunctionPass { - const TargetLoweringBase *TLI; + const TargetMachine *TM; // RewindFunction - _Unwind_Resume or the target equivalent. Constant *RewindFunction; @@ -42,8 +42,8 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - DwarfEHPrepare(const TargetLoweringBase *TLI) : - FunctionPass(ID), TLI(TLI), RewindFunction(0) { + DwarfEHPrepare(const TargetMachine *TM) : + FunctionPass(ID), TM(TM), RewindFunction(0) { initializeDominatorTreePass(*PassRegistry::getPassRegistry()); } @@ -59,8 +59,8 @@ namespace { char DwarfEHPrepare::ID = 0; -FunctionPass *llvm::createDwarfEHPass(const TargetLoweringBase *TLI) { - return new DwarfEHPrepare(TLI); +FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) { + return new DwarfEHPrepare(TM); } /// GetExceptionObject - Return the exception object from the value passed into @@ -117,6 +117,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { return false; // Find the rewind function if we didn't already. + const TargetLowering *TLI = TM->getTargetLowering(); if (!RewindFunction) { LLVMContext &Ctx = Resumes[0]->getContext(); FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 562a610..e277f5c 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -573,7 +573,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Kill off any remaining uses that don't match available, and build a list of // incoming DomainValues that we want to merge. SmallVector<LiveReg, 4> Regs; - for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { + for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { int rx = *i; const LiveReg &LR = LiveRegs[rx]; // This useless DomainValue could have been missed above. @@ -583,7 +583,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { } // Sorted insertion. bool Inserted = false; - for (SmallVector<LiveReg, 4>::iterator i = Regs.begin(), e = Regs.end(); + for (SmallVectorImpl<LiveReg>::iterator i = Regs.begin(), e = Regs.end(); i != e && !Inserted; ++i) { if (LR.Def < i->Def) { Inserted = true; @@ -614,7 +614,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { continue; // If latest didn't merge, it is useless now. Kill all registers using it. - for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i) + for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) if (LiveRegs[*i].Value == Latest) kill(*i); } diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index f4485d5..1ae7e3b 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -720,9 +720,9 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, if (BBI.IsDone || BBI.IsUnpredicable) return false; - // If it is already predicated, check if its predicate subsumes the new - // predicate. - if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred)) + // If it is already predicated, check if the new predicate subsumes + // its predicate. + if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate)) return false; if (BBI.BrCond.size()) { diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 3bdc30d..8910652 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -22,8 +22,10 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -64,6 +66,7 @@ class InlineSpiller : public Spiller { MachineRegisterInfo &MRI; const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; + const MachineBlockFrequencyInfo &MBFI; // Variables that are valid during spill(), but used by multiple methods. LiveRangeEdit *Edit; @@ -147,7 +150,8 @@ public: MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()), TII(*mf.getTarget().getInstrInfo()), - TRI(*mf.getTarget().getRegisterInfo()) {} + TRI(*mf.getTarget().getRegisterInfo()), + MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {} void spill(LiveRangeEdit &); @@ -1050,6 +1054,34 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, : TII.foldMemoryOperand(MI, FoldOps, StackSlot); if (!FoldMI) return false; + + // Remove LIS for any dead defs in the original MI not in FoldMI. + for (MIBundleOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || + MRI.isReserved(Reg)) { + continue; + } + MIBundleOperands::PhysRegInfo RI = + MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI); + if (MO->readsReg()) { + assert(RI.Reads && "Cannot fold physreg reader"); + continue; + } + if (RI.Defines) + continue; + // FoldMI does not define this physreg. Remove the LI segment. + assert(MO->isDead() && "Cannot fold physreg def"); + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) { + if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) { + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + if (VNInfo *VNI = LI->getVNInfoAt(Idx)) + LI->removeValNo(VNI); + } + } + } LIS.ReplaceMachineInstrInMaps(MI, FoldMI); MI->eraseFromParent(); @@ -1120,18 +1152,14 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Debug values are not allowed to affect codegen. if (MI->isDebugValue()) { // Modify DBG_VALUE now that the value is in a spill slot. - uint64_t Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->getOperand(1).isImm(); + uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *MDPtr = MI->getOperand(2).getMetadata(); DebugLoc DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = TII.emitFrameIndexDebugValue(MF, StackSlot, - Offset, MDPtr, DL)) { - DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); - MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MBB->erase(MI), NewDV); - } else { - DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); - MI->eraseFromParent(); - } + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(StackSlot).addImm(Offset).addMetadata(MDPtr); continue; } @@ -1294,5 +1322,5 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { if (!RegsToSpill.empty()) spillAll(); - Edit->calculateRegClassAndHint(MF, Loops); + Edit->calculateRegClassAndHint(MF, Loops, MBFI); } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index f647fd6..6c9b2e5 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -83,7 +83,7 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, } void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) { - PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createBasicTargetTransformInfoPass(this)); } /// addPassesToX helper drives creation and initialization of TargetPassConfig. @@ -115,7 +115,6 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(), &TM->getTargetLowering()->getObjFileLowering()); PM.add(MMI); - MCContext *Context = &MMI->getContext(); // Return the MCContext by-ref. // Set up a MachineFunction for the rest of CodeGen to work on. PM.add(new MachineFunctionAnalysis(*TM)); @@ -134,7 +133,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, PassConfig->setInitialized(); - return Context; + return &MMI->getContext(); } bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, @@ -164,6 +163,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, const MCAsmInfo &MAI = *getMCAsmInfo(); const MCRegisterInfo &MRI = *getRegisterInfo(); + const MCInstrInfo &MII = *getInstrInfo(); const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); OwningPtr<MCStreamer> AsmStreamer; @@ -171,16 +171,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, - *getInstrInfo(), - Context->getRegisterInfo(), STI); + MII, MRI, STI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = 0; MCAsmBackend *MAB = 0; if (ShowMCEncoding) { - const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, - *Context); + MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context); MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); } @@ -198,8 +195,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, - STI, *Context); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, + *Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); if (MCE == 0 || MAB == 0) diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index 8172154..ffe407a 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -212,15 +212,15 @@ LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) { /// constructScopeNest void LexicalScopes::constructScopeNest(LexicalScope *Scope) { - assert (Scope && "Unable to calculate scop edominance graph!"); + assert (Scope && "Unable to calculate scope dominance graph!"); SmallVector<LexicalScope *, 4> WorkStack; WorkStack.push_back(Scope); unsigned Counter = 0; while (!WorkStack.empty()) { LexicalScope *WS = WorkStack.back(); - const SmallVector<LexicalScope *, 4> &Children = WS->getChildren(); + const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren(); bool visitedChildren = false; - for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(), + for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(), SE = Children.end(); SI != SE; ++SI) { LexicalScope *ChildScope = *SI; if (!ChildScope->getDFSOut()) { @@ -279,8 +279,8 @@ getMachineBasicBlocks(DebugLoc DL, return; } - SmallVector<InsnRange, 4> &InsnRanges = Scope->getRanges(); - for (SmallVector<InsnRange, 4>::iterator I = InsnRanges.begin(), + SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges(); + for (SmallVectorImpl<InsnRange>::iterator I = InsnRanges.begin(), E = InsnRanges.end(); I != E; ++I) { InsnRange &R = *I; MBBs.insert(R.first->getParent()); diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 0b117ac..85bed46 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -108,6 +108,7 @@ class LDVImpl; class UserValue { const MDNode *variable; ///< The debug info variable we are part of. unsigned offset; ///< Byte offset into variable. + bool IsIndirect; ///< true if this is a register-indirect+offset value. DebugLoc dl; ///< The debug location for the variable. This is ///< used by dwarf writer to find lexical scope. UserValue *leader; ///< Equivalence class leader. @@ -134,9 +135,10 @@ class UserValue { public: /// UserValue - Create a new UserValue. - UserValue(const MDNode *var, unsigned o, DebugLoc L, + UserValue(const MDNode *var, unsigned o, bool i, DebugLoc L, LocMap::Allocator &alloc) - : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc) + : variable(var), offset(o), IsIndirect(i), dl(L), leader(this), + next(0), locInts(alloc) {} /// getLeader - Get the leader of this value's equivalence class. @@ -299,7 +301,8 @@ class LDVImpl { UVMap userVarMap; /// getUserValue - Find or create a UserValue. - UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL); + UserValue *getUserValue(const MDNode *Var, unsigned Offset, + bool IsIndirect, DebugLoc DL); /// lookupVirtReg - Find the EC leader for VirtReg or null. UserValue *lookupVirtReg(unsigned VirtReg); @@ -414,7 +417,7 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) { } UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, - DebugLoc DL) { + bool IsIndirect, DebugLoc DL) { UserValue *&Leader = userVarMap[Var]; if (Leader) { UserValue *UV = Leader->getLeader(); @@ -424,7 +427,7 @@ UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, return UV; } - UserValue *UV = new UserValue(Var, Offset, DL, allocator); + UserValue *UV = new UserValue(Var, Offset, IsIndirect, DL, allocator); userValues.push_back(UV); Leader = UserValue::merge(Leader, UV); return UV; @@ -445,15 +448,17 @@ UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { // DBG_VALUE loc, offset, variable if (MI->getNumOperands() != 3 || - !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) { + !(MI->getOperand(1).isReg() || MI->getOperand(1).isImm()) || + !MI->getOperand(2).isMetadata()) { DEBUG(dbgs() << "Can't handle " << *MI); return false; } // Get or create the UserValue for (variable,offset). - unsigned Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->getOperand(1).isImm(); + unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *Var = MI->getOperand(2).getMetadata(); - UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc()); + UserValue *UV = getUserValue(Var, Offset, IsIndirect, MI->getDebugLoc()); UV->addDef(Idx, MI->getOperand(0)); return true; } @@ -921,19 +926,12 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, MachineOperand &Loc = locations[LocNo]; ++NumInsertedDebugValues; - // Frame index locations may require a target callback. - if (Loc.isFI()) { - MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(), - Loc.getIndex(), offset, variable, - findDebugLoc()); - if (MI) { - MBB->insert(I, MI); - return; - } - } - // This is not a frame index, or the target is happy with a standard FI. - BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) - .addOperand(Loc).addImm(offset).addMetadata(variable); + if (Loc.isReg()) + BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), + IsIndirect, Loc.getReg(), offset, variable); + else + BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) + .addOperand(Loc).addImm(offset).addMetadata(variable); } void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, @@ -992,4 +990,3 @@ void LiveDebugVariables::dump() { static_cast<LDVImpl*>(pImpl)->print(dbgs()); } #endif - diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 861e99b..6be6bf3 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -415,7 +415,7 @@ void LiveInterval::removeValNo(VNInfo *ValNo) { void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments, const int *RHSValNoAssignments, - SmallVector<VNInfo*, 16> &NewVNInfo, + SmallVectorImpl<VNInfo *> &NewVNInfo, MachineRegisterInfo *MRI) { verify(); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 1ca2d46..3680943 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Value.h" +#include "llvm/Support/BlockFrequency.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -51,6 +52,14 @@ INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_END(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) +#ifndef NDEBUG +static cl::opt<bool> EnablePrecomputePhysRegs( + "precompute-phys-liveness", cl::Hidden, + cl::desc("Eagerly compute live intervals for all physreg units.")); +#else +static bool EnablePrecomputePhysRegs = false; +#endif // NDEBUG + void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); @@ -115,6 +124,12 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { computeRegMasks(); computeLiveInRegUnits(); + if (EnablePrecomputePhysRegs) { + // For stress testing, precompute live ranges of all physical register + // units, including reserved registers. + for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) + getRegUnit(i); + } DEBUG(dump()); return true; } @@ -605,21 +620,9 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { } float -LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { - // Limit the loop depth ridiculousness. - if (loopDepth > 200) - loopDepth = 200; - - // The loop depth is used to roughly estimate the number of times the - // instruction is executed. Something like 10^d is simple, but will quickly - // overflow a float. This expression behaves like 10^d for small d, but is - // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of - // headroom before overflow. - // By the way, powf() might be unavailable here. For consistency, - // We may take pow(double,double). - float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth); - - return (isDef + isUse) * lc; +LiveIntervals::getSpillWeight(bool isDef, bool isUse, BlockFrequency freq) { + const float Scale = 1.0f / BlockFrequency::getEntryFrequency(); + return (isDef + isUse) * (freq.getFrequency() * Scale); } LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 7793e96..792ef54 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -13,7 +13,6 @@ #define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/LiveRangeEdit.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -216,108 +215,122 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, return true; } -void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, - ArrayRef<unsigned> RegsBeingSpilled) { - SetVector<LiveInterval*, - SmallVector<LiveInterval*, 8>, - SmallPtrSet<LiveInterval*, 8> > ToShrink; +/// Find all live intervals that need to shrink, then remove the instruction. +void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { + assert(MI->allDefsAreDead() && "Def isn't really dead"); + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); - for (;;) { - // Erase all dead defs. - while (!Dead.empty()) { - MachineInstr *MI = Dead.pop_back_val(); - assert(MI->allDefsAreDead() && "Def isn't really dead"); - SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); - - // Never delete inline asm. - if (MI->isInlineAsm()) { - DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); - continue; - } + // Never delete a bundled instruction. + if (MI->isBundled()) { + return; + } + // Never delete inline asm. + if (MI->isInlineAsm()) { + DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); + return; + } - // Use the same criteria as DeadMachineInstructionElim. - bool SawStore = false; - if (!MI->isSafeToMove(&TII, 0, SawStore)) { - DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); - continue; - } + // Use the same criteria as DeadMachineInstructionElim. + bool SawStore = false; + if (!MI->isSafeToMove(&TII, 0, SawStore)) { + DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); + return; + } - DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); - - // Collect virtual registers to be erased after MI is gone. - SmallVector<unsigned, 8> RegsToErase; - bool ReadsPhysRegs = false; - - // Check for live intervals that may shrink - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { - if (!MOI->isReg()) - continue; - unsigned Reg = MOI->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { - // Check if MI reads any unreserved physregs. - if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) - ReadsPhysRegs = true; - continue; - } - LiveInterval &LI = LIS.getInterval(Reg); - - // Shrink read registers, unless it is likely to be expensive and - // unlikely to change anything. We typically don't want to shrink the - // PIC base register that has lots of uses everywhere. - // Always shrink COPY uses that probably come from live range splitting. - if (MI->readsVirtualRegister(Reg) && - (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || - LI.killedAt(Idx))) - ToShrink.insert(&LI); - - // Remove defined value. - if (MOI->isDef()) { - if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { - if (TheDelegate) - TheDelegate->LRE_WillShrinkVirtReg(LI.reg); - LI.removeValNo(VNI); - if (LI.empty()) - RegsToErase.push_back(Reg); + DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); + + // Collect virtual registers to be erased after MI is gone. + SmallVector<unsigned, 8> RegsToErase; + bool ReadsPhysRegs = false; + + // Check for live intervals that may shrink + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (!MOI->isReg()) + continue; + unsigned Reg = MOI->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // Check if MI reads any unreserved physregs. + if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) + ReadsPhysRegs = true; + else if (MOI->isDef()) { + for (MCRegUnitIterator Units(Reg, MRI.getTargetRegisterInfo()); + Units.isValid(); ++Units) { + if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) { + if (VNInfo *VNI = LI->getVNInfoAt(Idx)) + LI->removeValNo(VNI); } } } - - // Currently, we don't support DCE of physreg live ranges. If MI reads - // any unreserved physregs, don't erase the instruction, but turn it into - // a KILL instead. This way, the physreg live ranges don't end up - // dangling. - // FIXME: It would be better to have something like shrinkToUses() for - // physregs. That could potentially enable more DCE and it would free up - // the physreg. It would not happen often, though. - if (ReadsPhysRegs) { - MI->setDesc(TII.get(TargetOpcode::KILL)); - // Remove all operands that aren't physregs. - for (unsigned i = MI->getNumOperands(); i; --i) { - const MachineOperand &MO = MI->getOperand(i-1); - if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - continue; - MI->RemoveOperand(i-1); - } - DEBUG(dbgs() << "Converted physregs to:\t" << *MI); - } else { + continue; + } + LiveInterval &LI = LIS.getInterval(Reg); + + // Shrink read registers, unless it is likely to be expensive and + // unlikely to change anything. We typically don't want to shrink the + // PIC base register that has lots of uses everywhere. + // Always shrink COPY uses that probably come from live range splitting. + if (MI->readsVirtualRegister(Reg) && + (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || + LI.killedAt(Idx))) + ToShrink.insert(&LI); + + // Remove defined value. + if (MOI->isDef()) { + if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { if (TheDelegate) - TheDelegate->LRE_WillEraseInstruction(MI); - LIS.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - ++NumDCEDeleted; + TheDelegate->LRE_WillShrinkVirtReg(LI.reg); + LI.removeValNo(VNI); + if (LI.empty()) + RegsToErase.push_back(Reg); } + } + } - // Erase any virtregs that are now empty and unused. There may be <undef> - // uses around. Keep the empty live range in that case. - for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) { - unsigned Reg = RegsToErase[i]; - if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) { - ToShrink.remove(&LIS.getInterval(Reg)); - eraseVirtReg(Reg); - } - } + // Currently, we don't support DCE of physreg live ranges. If MI reads + // any unreserved physregs, don't erase the instruction, but turn it into + // a KILL instead. This way, the physreg live ranges don't end up + // dangling. + // FIXME: It would be better to have something like shrinkToUses() for + // physregs. That could potentially enable more DCE and it would free up + // the physreg. It would not happen often, though. + if (ReadsPhysRegs) { + MI->setDesc(TII.get(TargetOpcode::KILL)); + // Remove all operands that aren't physregs. + for (unsigned i = MI->getNumOperands(); i; --i) { + const MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + MI->RemoveOperand(i-1); } + DEBUG(dbgs() << "Converted physregs to:\t" << *MI); + } else { + if (TheDelegate) + TheDelegate->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + ++NumDCEDeleted; + } + + // Erase any virtregs that are now empty and unused. There may be <undef> + // uses around. Keep the empty live range in that case. + for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) { + unsigned Reg = RegsToErase[i]; + if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) { + ToShrink.remove(&LIS.getInterval(Reg)); + eraseVirtReg(Reg); + } + } +} + +void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, + ArrayRef<unsigned> RegsBeingSpilled) { + ToShrinkSet ToShrink; + + for (;;) { + // Erase all dead defs. + while (!Dead.empty()) + eliminateDeadDef(Dead.pop_back_val(), ToShrink); if (ToShrink.empty()) break; @@ -331,7 +344,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, TheDelegate->LRE_WillShrinkVirtReg(LI->reg); if (!LIS.shrinkToUses(LI, &Dead)) continue; - + // Don't create new intervals for a register being spilled. // The new intervals would have to be spilled anyway so its not worth it. // Also they currently aren't spilled so creating them and not spilling @@ -343,7 +356,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, break; } } - + if (BeingSpilled) continue; // LI may have been separated, create new intervals. @@ -374,9 +387,11 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, } } -void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, - const MachineLoopInfo &Loops) { - VirtRegAuxInfo VRAI(MF, LIS, Loops); +void +LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, + const MachineLoopInfo &Loops, + const MachineBlockFrequencyInfo &MBFI) { + VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI); for (iterator I = begin(), E = end(); I != E; ++I) { LiveInterval &LI = **I; if (MRI.recomputeRegClass(LI.reg, MF.getTarget())) diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 4e83fc8..ed55d7a 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -441,7 +441,7 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) { } void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, - SmallVector<unsigned, 4> &Defs) { + SmallVectorImpl<unsigned> &Defs) { // What parts of the register are previously defined? SmallSet<unsigned, 32> Live; if (PhysRegDef[Reg] || PhysRegUse[Reg]) { @@ -484,7 +484,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, } void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, - SmallVector<unsigned, 4> &Defs) { + SmallVectorImpl<unsigned> &Defs) { while (!Defs.empty()) { unsigned Reg = Defs.back(); Defs.pop_back(); @@ -609,9 +609,9 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // if they have PHI nodes, and if so, we simulate an assignment at the end // of the current block. if (!PHIVarInfo[MBB->getNumber()].empty()) { - SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()]; + SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()]; - for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(), + for (SmallVectorImpl<unsigned>::iterator I = VarInfoVec.begin(), E = VarInfoVec.end(); I != E; ++I) // Mark it alive only in the block we are representing. MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 78e9950..5633271 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" @@ -51,7 +52,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const { if (!CachedMCSymbol) { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); - const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix(); + const char *Prefix = Ctx.getAsmInfo()->getPrivateGlobalPrefix(); CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber())); @@ -341,6 +342,38 @@ bool MachineBasicBlock::isLiveIn(unsigned Reg) const { return I != livein_end(); } +unsigned +MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) { + assert(getParent() && "MBB must be inserted in function"); + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Expected physreg"); + assert(RC && "Register class is required"); + assert((isLandingPad() || this == &getParent()->front()) && + "Only the entry block and landing pads can have physreg live ins"); + + bool LiveIn = isLiveIn(PhysReg); + iterator I = SkipPHIsAndLabels(begin()), E = end(); + MachineRegisterInfo &MRI = getParent()->getRegInfo(); + const TargetInstrInfo &TII = *getParent()->getTarget().getInstrInfo(); + + // Look for an existing copy. + if (LiveIn) + for (;I != E && I->isCopy(); ++I) + if (I->getOperand(1).getReg() == PhysReg) { + unsigned VirtReg = I->getOperand(0).getReg(); + if (!MRI.constrainRegClass(VirtReg, RC)) + llvm_unreachable("Incompatible live-in register class."); + return VirtReg; + } + + // No luck, create a virtual register. + unsigned VirtReg = MRI.createVirtualRegister(RC); + BuildMI(*this, I, DebugLoc(), TII.get(TargetOpcode::COPY), VirtReg) + .addReg(PhysReg, RegState::Kill); + if (!LiveIn) + addLiveIn(PhysReg); + return VirtReg; +} + void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) { getParent()->splice(NewAfter, this); } diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 070daf2..e269d24 100644 --- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -50,11 +50,6 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { return false; } -/// getblockFreq - Return block frequency. Return 0 if we don't have the -/// information. Please note that initial frequency is equal to 1024. It means -/// that we should not rely on the value itself, but only on the comparison to -/// the other block frequencies. We do this to avoid using of floating points. -/// BlockFrequency MachineBlockFrequencyInfo:: getBlockFreq(const MachineBasicBlock *MBB) const { return MBFI->getBlockFreq(MBB); diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 61d8d38..d228286 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -84,11 +84,11 @@ namespace { bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs, + SmallVectorImpl<unsigned> &PhysDefs, bool &PhysUseDef) const; bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs, + SmallVectorImpl<unsigned> &PhysDefs, bool &NonLocal) const; bool isCSECandidate(MachineInstr *MI); bool isProfitableToCSE(unsigned CSReg, unsigned Reg, @@ -193,7 +193,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs, + SmallVectorImpl<unsigned> &PhysDefs, bool &PhysUseDef) const{ // First, add all uses to PhysRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -244,7 +244,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs, + SmallVectorImpl<unsigned> &PhysDefs, bool &NonLocal) const { // For now conservatively returns false if the common subexpression is // not in the same basic block as the given instruction. The only exception diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 04321f3..0703df0 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -54,23 +55,28 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, GCModuleInfo* gmi) : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) { if (TM.getRegisterInfo()) - RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo()); + RegInfo = new (Allocator) MachineRegisterInfo(TM); else RegInfo = 0; + MFInfo = 0; - FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering(), - TM.Options.RealignStack); + FrameInfo = + new (Allocator) MachineFrameInfo(TM,!F->hasFnAttribute("no-realign-stack")); + if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::StackAlignment)) FrameInfo->ensureMaxAlignment(Fn->getAttributes(). getStackAlignment(AttributeSet::FunctionIndex)); - ConstantPool = new (Allocator) MachineConstantPool(TM.getDataLayout()); + + ConstantPool = new (Allocator) MachineConstantPool(TM); Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); + // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) Alignment = std::max(Alignment, TM.getTargetLowering()->getPrefFunctionAlignment()); + FunctionNumber = FunctionNum; JumpTableInfo = 0; } @@ -456,11 +462,15 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const { // MachineFrameInfo implementation //===----------------------------------------------------------------------===// +const TargetFrameLowering *MachineFrameInfo::getFrameLowering() const { + return TM.getFrameLowering(); +} + /// ensureMaxAlignment - Make sure the function is at least Align bytes /// aligned. void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { - if (!TFI.isStackRealignable() || !RealignOption) - assert(Align <= TFI.getStackAlignment() && + if (!getFrameLowering()->isStackRealignable() || !RealignOption) + assert(Align <= getFrameLowering()->getStackAlignment() && "For targets without stack realignment, Align is out of limit!"); if (MaxAlignment < Align) MaxAlignment = Align; } @@ -482,8 +492,10 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, bool MayNeedSP, const AllocaInst *Alloca) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); - Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, TFI.getStackAlignment()); + Alignment = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Alignment, getFrameLowering()->getStackAlignment()); Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP, Alloca)); int Index = (int)Objects.size() - NumFixedObjects - 1; @@ -498,8 +510,10 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, /// int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, unsigned Alignment) { - Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, TFI.getStackAlignment()); + Alignment = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Alignment, getFrameLowering()->getStackAlignment()); CreateStackObject(Size, Alignment, true, false); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); @@ -513,8 +527,10 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, /// int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) { HasVarSizedObjects = true; - Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, TFI.getStackAlignment()); + Alignment = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Alignment, getFrameLowering()->getStackAlignment()); Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; @@ -532,10 +548,12 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // the incoming frame position. If the frame object is at offset 32 and // the stack is guaranteed to be 16-byte aligned, then we know that the // object is 16-byte aligned. - unsigned StackAlign = TFI.getStackAlignment(); + unsigned StackAlign = getFrameLowering()->getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); - Align = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Align, TFI.getStackAlignment()); + Align = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Align, getFrameLowering()->getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, /*NeedSP*/ false, @@ -769,6 +787,10 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); } void MachineConstantPoolValue::anchor() { } +const DataLayout *MachineConstantPool::getDataLayout() const { + return TM.getDataLayout(); +} + Type *MachineConstantPoolEntry::getType() const { if (isMachineConstantPoolEntry()) return Val.MachineCPVal->getType(); @@ -850,7 +872,8 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, // FIXME, this could be made much more efficient for large constant pools. for (unsigned i = 0, e = Constants.size(); i != e; ++i) if (!Constants[i].isMachineConstantPoolEntry() && - CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, TD)) { + CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, + getDataLayout())) { if ((unsigned)Constants[i].getAlignment() < Alignment) Constants[i].Alignment = Alignment; return i; @@ -887,7 +910,7 @@ void MachineConstantPool::print(raw_ostream &OS) const { if (Constants[i].isMachineConstantPoolEntry()) Constants[i].Val.MachineCPVal->print(OS); else - OS << *(const Value*)Constants[i].Val.ConstVal; + WriteAsOperand(OS, Constants[i].Val.ConstVal, /*PrintType=*/false); OS << ", align=" << Constants[i].getAlignment(); OS << "\n"; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 32d0668..06bb80a 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1253,32 +1253,6 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, return true; } -/// isSafeToReMat - Return true if it's safe to rematerialize the specified -/// instruction which defined the specified register instead of copying it. -bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, - AliasAnalysis *AA, - unsigned DstReg) const { - bool SawStore = false; - if (!TII->isTriviallyReMaterializable(this, AA) || - !isSafeToMove(TII, AA, SawStore)) - return false; - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - const MachineOperand &MO = getOperand(i); - if (!MO.isReg()) - continue; - // FIXME: For now, do not remat any instruction with register operands. - // Later on, we can loosen the restriction is the register operands have - // not been modified between the def and use. Note, this is different from - // MachineSink because the code is no longer in two-address form (at least - // partially). - if (MO.isUse()) - return false; - else if (!MO.isDead() && MO.getReg() != DstReg) - return false; - } - return true; -} - /// hasOrderedMemoryRef - Return true if this instruction may have an ordered /// or volatile memory reference, or if the information describing the memory /// reference is not available. Return false if it is known to have no ordered @@ -1411,8 +1385,10 @@ static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, const LLVMContext &Ctx = MF->getFunction()->getContext(); if (!DL.isUnknown()) { // Print source line info. DIScope Scope(DL.getScope(Ctx)); + assert((!Scope || Scope.isScope()) && + "Scope of a DebugLoc should be null or a DIScope."); // Omit the directory, because it's likely to be long and uninteresting. - if (Scope.Verify()) + if (Scope) CommentOS << Scope.getFilename(); else CommentOS << "<unknown>"; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index ed3ed4d..6ad4e39 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -172,7 +172,7 @@ namespace { BitVector &PhysRegDefs, BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, - SmallVector<CandidateInfo, 32> &Candidates); + SmallVectorImpl<CandidateInfo> &Candidates); /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the /// current loop. @@ -404,7 +404,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, - SmallVector<CandidateInfo, 32> &Candidates) { + SmallVectorImpl<CandidateInfo> &Candidates) { bool RuledOut = false; bool HasNonInvariantUse = false; unsigned Def = 0; @@ -1084,7 +1084,7 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, return true; for (unsigned i = BackTrace.size(); i != 0; --i) { - SmallVector<unsigned, 8> &RP = BackTrace[i-1]; + SmallVectorImpl<unsigned> &RP = BackTrace[i-1]; if (RP[RCId] + Cost >= Limit) return true; } @@ -1130,7 +1130,7 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { // Update register pressure of blocks from loop header to current block. for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { - SmallVector<unsigned, 8> &RP = BackTrace[i]; + SmallVectorImpl<unsigned> &RP = BackTrace[i]; for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); CI != CE; ++CI) { unsigned RCId = CI->first; diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index a916bd6..bb54284 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -253,13 +253,12 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, const MCObjectFileInfo *MOFI) - : ImmutablePass(ID), Context(MAI, MRI, MOFI, 0, false) { + : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, 0, false) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } MachineModuleInfo::MachineModuleInfo() - : ImmutablePass(ID), - Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) { + : ImmutablePass(ID), Context(0, 0, 0) { llvm_unreachable("This MachineModuleInfo constructor should never be called, " "MMI should always be explicitly constructed by " "LLVMTargetMachine"); diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 68372f6..7f2c0ca 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -19,16 +19,18 @@ using namespace llvm; -MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) - : TRI(&TRI), IsSSA(true), TracksLiveness(true) { +MachineRegisterInfo::MachineRegisterInfo(const TargetMachine &TM) + : TM(TM), IsSSA(true), TracksLiveness(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); - UsedRegUnits.resize(TRI.getNumRegUnits()); - UsedPhysRegMask.resize(TRI.getNumRegs()); + UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits()); + UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs()); // Create the physreg use/def lists. - PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()]; - memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs()); + PhysRegUseDefLists = + new MachineOperand*[getTargetRegisterInfo()->getNumRegs()]; + memset(PhysRegUseDefLists, 0, + sizeof(MachineOperand*)*getTargetRegisterInfo()->getNumRegs()); } MachineRegisterInfo::~MachineRegisterInfo() { @@ -50,7 +52,8 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg, const TargetRegisterClass *OldRC = getRegClass(Reg); if (OldRC == RC) return RC; - const TargetRegisterClass *NewRC = TRI->getCommonSubClass(OldRC, RC); + const TargetRegisterClass *NewRC = + getTargetRegisterInfo()->getCommonSubClass(OldRC, RC); if (!NewRC || NewRC == OldRC) return NewRC; if (NewRC->getNumRegs() < MinNumRegs) @@ -63,7 +66,8 @@ bool MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterClass *OldRC = getRegClass(Reg); - const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC); + const TargetRegisterClass *NewRC = + getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC); // Stop early if there is no room to grow. if (NewRC == OldRC) @@ -73,14 +77,16 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E; ++I) { const TargetRegisterClass *OpRC = - I->getRegClassConstraint(I.getOperandNo(), TII, TRI); + I->getRegClassConstraint(I.getOperandNo(), TII, + getTargetRegisterInfo()); if (unsigned SubIdx = I.getOperand().getSubReg()) { if (OpRC) - NewRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, SubIdx); + NewRC = getTargetRegisterInfo()->getMatchingSuperRegClass(NewRC, OpRC, + SubIdx); else - NewRC = TRI->getSubClassWithSubReg(NewRC, SubIdx); + NewRC = getTargetRegisterInfo()->getSubClassWithSubReg(NewRC, SubIdx); } else if (OpRC) - NewRC = TRI->getCommonSubClass(NewRC, OpRC); + NewRC = getTargetRegisterInfo()->getCommonSubClass(NewRC, OpRC); if (!NewRC || NewRC == OldRC) return false; } @@ -126,24 +132,28 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const { MachineOperand *MO = &I.getOperand(); MachineInstr *MI = MO->getParent(); if (!MI) { - errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " use list MachineOperand " << MO << " has no parent instruction.\n"; Valid = false; } MachineOperand *MO0 = &MI->getOperand(0); unsigned NumOps = MI->getNumOperands(); if (!(MO >= MO0 && MO < MO0+NumOps)) { - errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " use list MachineOperand " << MO << " doesn't belong to parent MI: " << *MI; Valid = false; } if (!MO->isReg()) { - errs() << PrintReg(Reg, TRI) << " MachineOperand " << MO << ": " << *MO + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " MachineOperand " << MO << ": " << *MO << " is not a register\n"; Valid = false; } if (MO->getReg() != Reg) { - errs() << PrintReg(Reg, TRI) << " use-list MachineOperand " << MO << ": " + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " use-list MachineOperand " << MO << ": " << *MO << " is the wrong register\n"; Valid = false; } @@ -156,7 +166,7 @@ void MachineRegisterInfo::verifyUseLists() const { #ifndef NDEBUG for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) verifyUseList(TargetRegisterInfo::index2VirtReg(i)); - for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i) + for (unsigned i = 1, e = getTargetRegisterInfo()->getNumRegs(); i != e; ++i) verifyUseList(i); #endif } @@ -390,8 +400,8 @@ void MachineRegisterInfo::dumpUses(unsigned Reg) const { #endif void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { - ReservedRegs = TRI->getReservedRegs(MF); - assert(ReservedRegs.size() == TRI->getNumRegs() && + ReservedRegs = getTargetRegisterInfo()->getReservedRegs(MF); + assert(ReservedRegs.size() == getTargetRegisterInfo()->getNumRegs() && "Invalid ReservedRegs vector from target"); } @@ -401,7 +411,8 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, // Check if any overlapping register is modified, or allocatable so it may be // used later. - for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) + for (MCRegAliasIterator AI(PhysReg, getTargetRegisterInfo(), true); + AI.isValid(); ++AI) if (!def_empty(*AI) || isAllocatable(*AI)) return false; return true; diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index bb6aad7..17f0af8 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -77,7 +77,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) { static unsigned LookForIdenticalPHI(MachineBasicBlock *BB, - SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) { + SmallVectorImpl<std::pair<MachineBasicBlock*, unsigned> > &PredValues) { if (BB->empty()) return 0; diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index fff6b2b..a6c5a9f 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDFS.h" @@ -30,6 +31,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" #include <queue> using namespace llvm; @@ -51,11 +53,6 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG -// FIXME: remove this flag after initial testing. It should always be a good -// thing. -static cl::opt<bool> EnableCopyConstrain("misched-vcopy", cl::Hidden, - cl::desc("Constrain vreg copies."), cl::init(true)); - static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering."), cl::init(true)); @@ -207,7 +204,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); if (VerifyScheduling) { - DEBUG(LIS->print(dbgs())); + DEBUG(LIS->dump()); MF->verify(this, "Before machine scheduling."); } RegClassInfo->runOnMachineFunction(*MF); @@ -297,7 +294,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { Scheduler->finishBlock(); } Scheduler->finalizeSchedule(); - DEBUG(LIS->print(dbgs())); + DEBUG(LIS->dump()); if (VerifyScheduling) MF->verify(this, "After machine scheduling."); return true; @@ -309,7 +306,7 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ReadyQueue::dump() { - dbgs() << " " << Name << ": "; + dbgs() << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) dbgs() << Queue[i]->NodeNum << " "; dbgs() << "\n"; @@ -467,7 +464,7 @@ void ScheduleDAGMI::initRegPressure() { // Close the RPTracker to finalize live ins. RPTracker.closeRegion(); - DEBUG(RPTracker.getPressure().dump(TRI)); + DEBUG(RPTracker.dump()); // Initialize the live ins and live outs. TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); @@ -479,6 +476,13 @@ void ScheduleDAGMI::initRegPressure() { TopRPTracker.closeTop(); BotRPTracker.closeBottom(); + BotRPTracker.initLiveThru(RPTracker); + if (!BotRPTracker.getLiveThru().empty()) { + TopRPTracker.initLiveThru(BotRPTracker.getLiveThru()); + DEBUG(dbgs() << "Live Thru: "; + dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); + }; + // Account for liveness generated by the region boundary. if (LiveRegionEnd != RegionEnd) BotRPTracker.recede(); @@ -491,12 +495,13 @@ void ScheduleDAGMI::initRegPressure() { const std::vector<unsigned> &RegionPressure = RPTracker.getPressure().MaxSetPressure; for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { - unsigned Limit = TRI->getRegPressureSetLimit(i); - DEBUG(dbgs() << TRI->getRegPressureSetName(i) - << "Limit " << Limit - << " Actual " << RegionPressure[i] << "\n"); - if (RegionPressure[i] > Limit) + unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); + if (RegionPressure[i] > Limit) { + DEBUG(dbgs() << TRI->getRegPressureSetName(i) + << " Limit " << Limit + << " Actual " << RegionPressure[i] << "\n"); RegionCriticalPSets.push_back(PressureElement(i, 0)); + } } DEBUG(dbgs() << "Excess PSets: "; for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) @@ -517,7 +522,7 @@ updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) { } DEBUG( for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) { - unsigned Limit = TRI->getRegPressureSetLimit(i); + unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); if (NewMaxPressure[i] > Limit ) { dbgs() << " " << TRI->getRegPressureSetName(i) << ": " << NewMaxPressure[i] << " > " << Limit << "\n"; @@ -581,7 +586,8 @@ void ScheduleDAGMI::schedule() { /// Build the DAG and setup three register pressure trackers. void ScheduleDAGMI::buildDAGWithRegPressure() { // Initialize the register pressure tracker used by buildSchedGraph. - RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, + /*TrackUntiedDefs=*/true); // Account for liveness generate by the region boundary. if (LiveRegionEnd != RegionEnd) @@ -1019,6 +1025,12 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { GlobalSegment->start)) { return; } + // If the prior global segment may be defined by the same two-address + // instruction that also defines LocalLI, then can't make a hole here. + if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->start, + LocalLI->beginIndex())) { + return; + } // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise // it would be a disconnected component in the live range. assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() && @@ -1101,7 +1113,7 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) { } //===----------------------------------------------------------------------===// -// ConvergingScheduler - Implementation of the standard MachineSchedStrategy. +// ConvergingScheduler - Implementation of the generic MachineSchedStrategy. //===----------------------------------------------------------------------===// namespace { @@ -1112,10 +1124,9 @@ public: /// Represent the type of SchedCandidate found within a single queue. /// pickNodeBidirectional depends on these listed by decreasing priority. enum CandReason { - NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, Weak, + NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, - TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, - NodeOrder}; + TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; #ifndef NDEBUG static const char *getReasonStr(ConvergingScheduler::CandReason Reason); @@ -1160,6 +1171,9 @@ public: // The reason for this candidate. CandReason Reason; + // Set of reasons that apply to multiple candidates. + uint32_t RepeatReasonSet; + // Register pressure values for the best candidate. RegPressureDelta RPDelta; @@ -1167,7 +1181,7 @@ public: SchedResourceDelta ResDelta; SchedCandidate(const CandPolicy &policy) - : Policy(policy), SU(NULL), Reason(NoCand) {} + : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {} bool isValid() const { return SU; } @@ -1180,6 +1194,9 @@ public: ResDelta = Best.ResDelta; } + bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); } + void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); } + void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); }; @@ -1189,32 +1206,21 @@ public: // Critical path through the DAG in expected latency. unsigned CriticalPath; + // Scaled count of micro-ops left to schedule. + unsigned RemIssueCount; + // Unscheduled resources SmallVector<unsigned, 16> RemainingCounts; - // Critical resource for the unscheduled zone. - unsigned CritResIdx; - // Number of micro-ops left to schedule. - unsigned RemainingMicroOps; void reset() { CriticalPath = 0; + RemIssueCount = 0; RemainingCounts.clear(); - CritResIdx = 0; - RemainingMicroOps = 0; } SchedRemainder() { reset(); } void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); - - unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const { - if (!SchedModel->hasInstrSchedModel()) - return 0; - - return std::max( - RemainingMicroOps * SchedModel->getMicroOpFactor(), - RemainingCounts[CritResIdx]); - } }; /// Each Scheduling boundary is associated with ready queues. It tracks the @@ -1235,8 +1241,13 @@ public: ScheduleHazardRecognizer *HazardRec; + /// Number of cycles it takes to issue the instructions scheduled in this + /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. + /// See getStalls(). unsigned CurrCycle; - unsigned IssueCount; + + /// Micro-ops issued in the current cycle + unsigned CurrMOps; /// MinReadyCycle - Cycle of the soonest available instruction. unsigned MinReadyCycle; @@ -1244,20 +1255,35 @@ public: // The expected latency of the critical path in this scheduled zone. unsigned ExpectedLatency; - // Resources used in the scheduled zone beyond this boundary. - SmallVector<unsigned, 16> ResourceCounts; + // The latency of dependence chains leading into this zone. + // For each node scheduled bottom-up: DLat = max DLat, N.Depth. + // For each cycle scheduled: DLat -= 1. + unsigned DependentLatency; + + /// Count the scheduled (issued) micro-ops that can be retired by + /// time=CurrCycle assuming the first scheduled instr is retired at time=0. + unsigned RetiredMOps; + + // Count scheduled resources that have been executed. Resources are + // considered executed if they become ready in the time that it takes to + // saturate any resource including the one in question. Counts are scaled + // for direct comparison with other resources. Counts can be compared with + // MOps * getMicroOpFactor and Latency * getLatencyFactor. + SmallVector<unsigned, 16> ExecutedResCounts; + + /// Cache the max count for a single resource. + unsigned MaxExecutedResCount; // Cache the critical resources ID in this scheduled zone. - unsigned CritResIdx; + unsigned ZoneCritResIdx; // Is the scheduled region resource limited vs. latency limited. bool IsResourceLimited; - unsigned ExpectedCount; - #ifndef NDEBUG - // Remember the greatest min operand latency. - unsigned MaxMinLatency; + // Remember the greatest operand latency as an upper bound on the number of + // times we should retry the pending queue because of a hazard. + unsigned MaxObservedLatency; #endif void reset() { @@ -1270,19 +1296,20 @@ public: NextSUs.clear(); HazardRec = 0; CurrCycle = 0; - IssueCount = 0; + CurrMOps = 0; MinReadyCycle = UINT_MAX; ExpectedLatency = 0; - ResourceCounts.resize(1); - assert(!ResourceCounts[0] && "nonzero count for bad resource"); - CritResIdx = 0; + DependentLatency = 0; + RetiredMOps = 0; + MaxExecutedResCount = 0; + ZoneCritResIdx = 0; IsResourceLimited = false; - ExpectedCount = 0; #ifndef NDEBUG - MaxMinLatency = 0; + MaxObservedLatency = 0; #endif // Reserve a zero-count for invalid CritResIdx. - ResourceCounts.resize(1); + ExecutedResCounts.resize(1); + assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); } /// Pending queues extend the ready queues with the same ID and the @@ -1303,25 +1330,60 @@ public: return Available.getID() == ConvergingScheduler::TopQID; } +#ifndef NDEBUG + const char *getResourceName(unsigned PIdx) { + if (!PIdx) + return "MOps"; + return SchedModel->getProcResource(PIdx)->Name; + } +#endif + + /// Get the number of latency cycles "covered" by the scheduled + /// instructions. This is the larger of the critical path within the zone + /// and the number of cycles required to issue the instructions. + unsigned getScheduledLatency() const { + return std::max(ExpectedLatency, CurrCycle); + } + unsigned getUnscheduledLatency(SUnit *SU) const { - if (isTop()) - return SU->getHeight(); - return SU->getDepth() + SU->Latency; + return isTop() ? SU->getHeight() : SU->getDepth(); + } + + unsigned getResourceCount(unsigned ResIdx) const { + return ExecutedResCounts[ResIdx]; } + /// Get the scaled count of scheduled micro-ops and resources, including + /// executed resources. unsigned getCriticalCount() const { - return ResourceCounts[CritResIdx]; + if (!ZoneCritResIdx) + return RetiredMOps * SchedModel->getMicroOpFactor(); + return getResourceCount(ZoneCritResIdx); + } + + /// Get a scaled count for the minimum execution time of the scheduled + /// micro-ops that are ready to execute by getExecutedCount. Notice the + /// feedback loop. + unsigned getExecutedCount() const { + return std::max(CurrCycle * SchedModel->getLatencyFactor(), + MaxExecutedResCount); } bool checkHazard(SUnit *SU); - void setLatencyPolicy(CandPolicy &Policy); + unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); + + unsigned getOtherResourceCount(unsigned &OtherCritIdx); + + void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone); void releaseNode(SUnit *SU, unsigned ReadyCycle); - void bumpCycle(); + void bumpCycle(unsigned NextCycle); + + void incExecutedResources(unsigned PIdx, unsigned Count); - void countResource(unsigned PIdx, unsigned Cycles); + unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); void bumpNode(SUnit *SU); @@ -1330,6 +1392,10 @@ public: void removeReady(SUnit *SU); SUnit *pickOnlyChoice(); + +#ifndef NDEBUG + void dumpScheduledState(); +#endif }; private: @@ -1366,15 +1432,6 @@ public: virtual void registerRoots(); protected: - void balanceZones( - ConvergingScheduler::SchedBoundary &CriticalZone, - ConvergingScheduler::SchedCandidate &CriticalCand, - ConvergingScheduler::SchedBoundary &OppositeZone, - ConvergingScheduler::SchedCandidate &OppositeCand); - - void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, - ConvergingScheduler::SchedCandidate &BotCand); - void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary &Zone, @@ -1404,7 +1461,8 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { for (std::vector<SUnit>::iterator I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); - RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); + RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC) + * SchedModel->getMicroOpFactor(); for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { @@ -1413,13 +1471,6 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { RemainingCounts[PIdx] += (Factor * PI->Cycles); } } - for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds(); - PIdx != PEnd; ++PIdx) { - if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx]) - >= (int)SchedModel->getLatencyFactor()) { - CritResIdx = PIdx; - } - } } void ConvergingScheduler::SchedBoundary:: @@ -1429,7 +1480,7 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { SchedModel = smodel; Rem = rem; if (SchedModel->hasInstrSchedModel()) - ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); + ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); } void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { @@ -1460,13 +1511,15 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) { for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { + if (I->isWeak()) + continue; unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned MinLatency = I->getMinLatency(); + unsigned Latency = I->getLatency(); #ifndef NDEBUG - Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); + Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency); #endif - if (SU->TopReadyCycle < PredReadyCycle + MinLatency) - SU->TopReadyCycle = PredReadyCycle + MinLatency; + if (SU->TopReadyCycle < PredReadyCycle + Latency) + SU->TopReadyCycle = PredReadyCycle + Latency; } Top.releaseNode(SU, SU->TopReadyCycle); } @@ -1482,12 +1535,12 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) { if (I->isWeak()) continue; unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned MinLatency = I->getMinLatency(); + unsigned Latency = I->getLatency(); #ifndef NDEBUG - Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); + Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency); #endif - if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) - SU->BotReadyCycle = SuccReadyCycle + MinLatency; + if (SU->BotReadyCycle < SuccReadyCycle + Latency) + SU->BotReadyCycle = SuccReadyCycle + Latency; } Bot.releaseNode(SU, SU->BotReadyCycle); } @@ -1521,7 +1574,7 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); - if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { + if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); return true; @@ -1529,45 +1582,125 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { return false; } -/// Compute the remaining latency to determine whether ILP should be increased. -void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { - // FIXME: compile time. In all, we visit four queues here one we should only - // need to visit the one that was last popped if we cache the result. +// Find the unscheduled node in ReadySUs with the highest latency. +unsigned ConvergingScheduler::SchedBoundary:: +findMaxLatency(ArrayRef<SUnit*> ReadySUs) { + SUnit *LateSU = 0; unsigned RemLatency = 0; - for (ReadyQueue::iterator I = Available.begin(), E = Available.end(); + for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end(); I != E; ++I) { unsigned L = getUnscheduledLatency(*I); - DEBUG(dbgs() << " " << Available.getName() - << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n'); - if (L > RemLatency) + if (L > RemLatency) { RemLatency = L; + LateSU = *I; + } } - for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end(); - I != E; ++I) { - unsigned L = getUnscheduledLatency(*I); - if (L > RemLatency) - RemLatency = L; + if (LateSU) { + DEBUG(dbgs() << Available.getName() << " RemLatency SU(" + << LateSU->NodeNum << ") " << RemLatency << "c\n"); + } + return RemLatency; +} + +// Count resources in this zone and the remaining unscheduled +// instruction. Return the max count, scaled. Set OtherCritIdx to the critical +// resource index, or zero if the zone is issue limited. +unsigned ConvergingScheduler::SchedBoundary:: +getOtherResourceCount(unsigned &OtherCritIdx) { + OtherCritIdx = 0; + if (!SchedModel->hasInstrSchedModel()) + return 0; + + unsigned OtherCritCount = Rem->RemIssueCount + + (RetiredMOps * SchedModel->getMicroOpFactor()); + DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " + << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); + for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); + PIdx != PEnd; ++PIdx) { + unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; + if (OtherCount > OtherCritCount) { + OtherCritCount = OtherCount; + OtherCritIdx = PIdx; + } + } + if (OtherCritIdx) { + DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " + << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) + << " " << getResourceName(OtherCritIdx) << "\n"); + } + return OtherCritCount; +} + +/// Set the CandPolicy for this zone given the current resources and latencies +/// inside and outside the zone. +void ConvergingScheduler::SchedBoundary::setPolicy(CandPolicy &Policy, + SchedBoundary &OtherZone) { + // Now that potential stalls have been considered, apply preemptive heuristics + // based on the the total latency and resources inside and outside this + // zone. + + // Compute remaining latency. We need this both to determine whether the + // overall schedule has become latency-limited and whether the instructions + // outside this zone are resource or latency limited. + // + // The "dependent" latency is updated incrementally during scheduling as the + // max height/depth of scheduled nodes minus the cycles since it was + // scheduled: + // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone + // + // The "independent" latency is the max ready queue depth: + // ILat = max N.depth for N in Available|Pending + // + // RemainingLatency is the greater of independent and dependent latency. + unsigned RemLatency = DependentLatency; + RemLatency = std::max(RemLatency, findMaxLatency(Available.elements())); + RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements())); + + // Compute the critical resource outside the zone. + unsigned OtherCritIdx; + unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx); + + bool OtherResLimited = false; + if (SchedModel->hasInstrSchedModel()) { + unsigned LFactor = SchedModel->getLatencyFactor(); + OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; } - unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); - DEBUG(dbgs() << " " << Available.getName() - << " ExpectedLatency " << ExpectedLatency - << " CP Limit " << CriticalPathLimit << '\n'); - if (RemLatency + ExpectedLatency >= CriticalPathLimit - && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { - Policy.ReduceLatency = true; - DEBUG(dbgs() << " Increase ILP: " << Available.getName() << '\n'); + if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) { + Policy.ReduceLatency |= true; + DEBUG(dbgs() << " " << Available.getName() << " RemainingLatency " + << RemLatency << " + " << CurrCycle << "c > CritPath " + << Rem->CriticalPath << "\n"); } + // If the same resource is limiting inside and outside the zone, do nothing. + if (ZoneCritResIdx == OtherCritIdx) + return; + + DEBUG( + if (IsResourceLimited) { + dbgs() << " " << Available.getName() << " ResourceLimited: " + << getResourceName(ZoneCritResIdx) << "\n"; + } + if (OtherResLimited) + dbgs() << " RemainingLimit: " << getResourceName(OtherCritIdx) << "\n"; + if (!IsResourceLimited && !OtherResLimited) + dbgs() << " Latency limited both directions.\n"); + + if (IsResourceLimited && !Policy.ReduceResIdx) + Policy.ReduceResIdx = ZoneCritResIdx; + + if (OtherResLimited) + Policy.DemandResIdx = OtherCritIdx; } void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { - if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; // Check for interlocks first. For the purpose of other heuristics, an // instruction that cannot issue appears as if it's not in the ReadyQueue. - if (ReadyCycle > CurrCycle || checkHazard(SU)) + bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; + if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU)) Pending.push(SU); else Available.push(SU); @@ -1577,16 +1710,21 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, } /// Move the boundary of scheduled code by one cycle. -void ConvergingScheduler::SchedBoundary::bumpCycle() { - unsigned Width = SchedModel->getIssueWidth(); - IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; - - unsigned NextCycle = CurrCycle + 1; - assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); - if (MinReadyCycle > NextCycle) { - IssueCount = 0; - NextCycle = MinReadyCycle; - } +void ConvergingScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) { + if (SchedModel->getMicroOpBufferSize() == 0) { + assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + if (MinReadyCycle > NextCycle) + NextCycle = MinReadyCycle; + } + // Update the current micro-ops, which will issue in the next cycle. + unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); + CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; + + // Decrement DependentLatency based on the next cycle. + if ((NextCycle - CurrCycle) > DependentLatency) + DependentLatency = 0; + else + DependentLatency -= (NextCycle - CurrCycle); if (!HazardRec->isEnabled()) { // Bypass HazardRec virtual calls. @@ -1602,34 +1740,50 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() { } } CheckPending = true; - IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); + unsigned LFactor = SchedModel->getLatencyFactor(); + IsResourceLimited = + (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) + > (int)LFactor; + + DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); +} - DEBUG(dbgs() << " " << Available.getName() - << " Cycle: " << CurrCycle << '\n'); +void ConvergingScheduler::SchedBoundary::incExecutedResources(unsigned PIdx, + unsigned Count) { + ExecutedResCounts[PIdx] += Count; + if (ExecutedResCounts[PIdx] > MaxExecutedResCount) + MaxExecutedResCount = ExecutedResCounts[PIdx]; } /// Add the given processor resource to this scheduled zone. -void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, - unsigned Cycles) { +/// +/// \param Cycles indicates the number of consecutive (non-pipelined) cycles +/// during which this resource is consumed. +/// +/// \return the next cycle at which the instruction may execute without +/// oversubscribing resources. +unsigned ConvergingScheduler::SchedBoundary:: +countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { unsigned Factor = SchedModel->getResourceFactor(PIdx); - DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name - << " +(" << Cycles << "x" << Factor - << ") / " << SchedModel->getLatencyFactor() << '\n'); - unsigned Count = Factor * Cycles; - ResourceCounts[PIdx] += Count; + DEBUG(dbgs() << " " << getResourceName(PIdx) + << " +" << Cycles << "x" << Factor << "u\n"); + + // Update Executed resources counts. + incExecutedResources(PIdx, Count); assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); Rem->RemainingCounts[PIdx] -= Count; - // Check if this resource exceeds the current critical resource by a full - // cycle. If so, it becomes the critical resource. - if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) - >= (int)SchedModel->getLatencyFactor()) { - CritResIdx = PIdx; + // Check if this resource exceeds the current critical resource. If so, it + // becomes the critical resource. + if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { + ZoneCritResIdx = PIdx; DEBUG(dbgs() << " *** Critical resource " - << SchedModel->getProcResource(PIdx)->Name << " x" - << ResourceCounts[PIdx] << '\n'); + << getResourceName(PIdx) << ": " + << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); } + // TODO: We don't yet model reserved resources. It's not hard though. + return CurrCycle; } /// Move the boundary of scheduled code by one SUnit. @@ -1643,40 +1797,96 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { } HazardRec->EmitInstruction(SU); } + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); + CurrMOps += IncMOps; + // checkHazard prevents scheduling multiple instructions per cycle that exceed + // issue width. However, we commonly reach the maximum. In this case + // opportunistically bump the cycle to avoid uselessly checking everything in + // the readyQ. Furthermore, a single instruction may produce more than one + // cycle's worth of micro-ops. + // + // TODO: Also check if this SU must end a dispatch group. + unsigned NextCycle = CurrCycle; + if (CurrMOps >= SchedModel->getIssueWidth()) { + ++NextCycle; + DEBUG(dbgs() << " *** Max MOps " << CurrMOps + << " at cycle " << CurrCycle << '\n'); + } + unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); + DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); + + switch (SchedModel->getMicroOpBufferSize()) { + case 0: + assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); + break; + case 1: + if (ReadyCycle > NextCycle) { + NextCycle = ReadyCycle; + DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); + } + break; + default: + // We don't currently model the OOO reorder buffer, so consider all + // scheduled MOps to be "retired". + break; + } + RetiredMOps += IncMOps; + // Update resource counts and critical resource. if (SchedModel->hasInstrSchedModel()) { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); - Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); + unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); + assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); + Rem->RemIssueCount -= DecRemIssue; + if (ZoneCritResIdx) { + // Scale scheduled micro-ops for comparing with the critical resource. + unsigned ScaledMOps = + RetiredMOps * SchedModel->getMicroOpFactor(); + + // If scaled micro-ops are now more than the previous critical resource by + // a full cycle, then micro-ops issue becomes critical. + if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) + >= (int)SchedModel->getLatencyFactor()) { + ZoneCritResIdx = 0; + DEBUG(dbgs() << " *** Critical resource NumMicroOps: " + << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); + } + } for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { - countResource(PI->ProcResourceIdx, PI->Cycles); + unsigned RCycle = + countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle); + if (RCycle > NextCycle) + NextCycle = RCycle; } } - if (isTop()) { - if (SU->getDepth() > ExpectedLatency) - ExpectedLatency = SU->getDepth(); + // Update ExpectedLatency and DependentLatency. + unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; + unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; + if (SU->getDepth() > TopLatency) { + TopLatency = SU->getDepth(); + DEBUG(dbgs() << " " << Available.getName() + << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); } - else { - if (SU->getHeight() > ExpectedLatency) - ExpectedLatency = SU->getHeight(); + if (SU->getHeight() > BotLatency) { + BotLatency = SU->getHeight(); + DEBUG(dbgs() << " " << Available.getName() + << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); } - - IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); - - // Check the instruction group dispatch limit. - // TODO: Check if this SU must end a dispatch group. - IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); - - // checkHazard prevents scheduling multiple instructions per cycle that exceed - // issue width. However, we commonly reach the maximum. In this case - // opportunistically bump the cycle to avoid uselessly checking everything in - // the readyQ. Furthermore, a single instruction may produce more than one - // cycle's worth of micro-ops. - if (IssueCount >= SchedModel->getIssueWidth()) { - DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); - bumpCycle(); + // If we stall for any reason, bump the cycle. + if (NextCycle > CurrCycle) { + bumpCycle(NextCycle); + } + else { + // After updating ZoneCritResIdx and ExpectedLatency, check if we're + // resource limited. If a stall occured, bumpCycle does this. + unsigned LFactor = SchedModel->getLatencyFactor(); + IsResourceLimited = + (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) + > (int)LFactor; } + DEBUG(dumpScheduledState()); } /// Release pending ready nodes in to the available queue. This makes them @@ -1688,6 +1898,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() { // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. + bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; for (unsigned i = 0, e = Pending.size(); i != e; ++i) { SUnit *SU = *(Pending.begin()+i); unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; @@ -1695,7 +1906,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() { if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; - if (ReadyCycle > CurrCycle) + if (!IsBuffered && ReadyCycle > CurrCycle) continue; if (checkHazard(SU)) @@ -1726,7 +1937,7 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { if (CheckPending) releasePending(); - if (IssueCount > 0) { + if (CurrMOps > 0) { // Defer any ready instrs that now have a hazard. for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { if (checkHazard(*I)) { @@ -1738,9 +1949,9 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { } } for (unsigned i = 0; Available.empty(); ++i) { - assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && + assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) && "permanent hazard"); (void)i; - bumpCycle(); + bumpCycle(CurrCycle + 1); releasePending(); } if (Available.size() == 1) @@ -1748,104 +1959,31 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { return NULL; } -/// Record the candidate policy for opposite zones with different critical -/// resources. -/// -/// If the CriticalZone is latency limited, don't force a policy for the -/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed. -void ConvergingScheduler::balanceZones( - ConvergingScheduler::SchedBoundary &CriticalZone, - ConvergingScheduler::SchedCandidate &CriticalCand, - ConvergingScheduler::SchedBoundary &OppositeZone, - ConvergingScheduler::SchedCandidate &OppositeCand) { - - if (!CriticalZone.IsResourceLimited) - return; - assert(SchedModel->hasInstrSchedModel() && "required schedmodel"); - - SchedRemainder *Rem = CriticalZone.Rem; - - // If the critical zone is overconsuming a resource relative to the - // remainder, try to reduce it. - unsigned RemainingCritCount = - Rem->RemainingCounts[CriticalZone.CritResIdx]; - if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount) - > (int)SchedModel->getLatencyFactor()) { - CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; - DEBUG(dbgs() << " Balance " << CriticalZone.Available.getName() - << " reduce " - << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name - << '\n'); - } - // If the other zone is underconsuming a resource relative to the full zone, - // try to increase it. - unsigned OppositeCount = - OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; - if ((int)(OppositeZone.ExpectedCount - OppositeCount) - > (int)SchedModel->getLatencyFactor()) { - OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; - DEBUG(dbgs() << " Balance " << OppositeZone.Available.getName() - << " demand " - << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name - << '\n'); - } -} - -/// Determine if the scheduled zones exceed resource limits or critical path and -/// set each candidate's ReduceHeight policy accordingly. -void ConvergingScheduler::checkResourceLimits( - ConvergingScheduler::SchedCandidate &TopCand, - ConvergingScheduler::SchedCandidate &BotCand) { - - // Set ReduceLatency to true if needed. - Bot.setLatencyPolicy(BotCand.Policy); - Top.setLatencyPolicy(TopCand.Policy); - - // Handle resource-limited regions. - if (Top.IsResourceLimited && Bot.IsResourceLimited - && Top.CritResIdx == Bot.CritResIdx) { - // If the scheduled critical resource in both zones is no longer the - // critical remaining resource, attempt to reduce resource height both ways. - if (Top.CritResIdx != Rem.CritResIdx) { - TopCand.Policy.ReduceResIdx = Top.CritResIdx; - BotCand.Policy.ReduceResIdx = Bot.CritResIdx; - DEBUG(dbgs() << " Reduce scheduled " - << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); - } - return; - } - // Handle latency-limited regions. - if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { - // If the total scheduled expected latency exceeds the region's critical - // path then reduce latency both ways. - // - // Just because a zone is not resource limited does not mean it is latency - // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle - // to exceed expected latency. - if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) - && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { - TopCand.Policy.ReduceLatency = true; - BotCand.Policy.ReduceLatency = true; - DEBUG(dbgs() << " Reduce scheduled latency " << Top.ExpectedLatency - << " + " << Bot.ExpectedLatency << '\n'); - } - return; +#ifndef NDEBUG +// This is useful information to dump after bumpNode. +// Note that the Queue contents are more useful before pickNodeFromQueue. +void ConvergingScheduler::SchedBoundary::dumpScheduledState() { + unsigned ResFactor; + unsigned ResCount; + if (ZoneCritResIdx) { + ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); + ResCount = getResourceCount(ZoneCritResIdx); } - // The critical resource is different in each zone, so request balancing. - - // Compute the cost of each zone. - Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); - Top.ExpectedCount = std::max( - Top.getCriticalCount(), - Top.ExpectedCount * SchedModel->getLatencyFactor()); - Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); - Bot.ExpectedCount = std::max( - Bot.getCriticalCount(), - Bot.ExpectedCount * SchedModel->getLatencyFactor()); - - balanceZones(Top, TopCand, Bot, BotCand); - balanceZones(Bot, BotCand, Top, TopCand); + else { + ResFactor = SchedModel->getMicroOpFactor(); + ResCount = RetiredMOps * SchedModel->getMicroOpFactor(); + } + unsigned LFactor = SchedModel->getLatencyFactor(); + dbgs() << Available.getName() << " @" << CurrCycle << "c\n" + << " Retired: " << RetiredMOps; + dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; + dbgs() << "\n Critical: " << ResCount / LFactor << "c, " + << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx) + << "\n ExpectedLatency: " << ExpectedLatency << "c\n" + << (IsResourceLimited ? " - Resource" : " - Latency") + << " limited.\n"; } +#endif void ConvergingScheduler::SchedCandidate:: initResourceDelta(const ScheduleDAGMI *DAG, @@ -1864,6 +2002,7 @@ initResourceDelta(const ScheduleDAGMI *DAG, } } + /// Return true if this heuristic determines order. static bool tryLess(int TryVal, int CandVal, ConvergingScheduler::SchedCandidate &TryCand, @@ -1878,6 +2017,7 @@ static bool tryLess(int TryVal, int CandVal, Cand.Reason = Reason; return true; } + Cand.setRepeat(Reason); return false; } @@ -1894,9 +2034,34 @@ static bool tryGreater(int TryVal, int CandVal, Cand.Reason = Reason; return true; } + Cand.setRepeat(Reason); return false; } +static bool tryPressure(const PressureElement &TryP, + const PressureElement &CandP, + ConvergingScheduler::SchedCandidate &TryCand, + ConvergingScheduler::SchedCandidate &Cand, + ConvergingScheduler::CandReason Reason) { + // If both candidates affect the same set, go with the smallest increase. + if (TryP.PSetID == CandP.PSetID) { + return tryLess(TryP.UnitIncrease, CandP.UnitIncrease, TryCand, Cand, + Reason); + } + // If one candidate decreases and the other increases, go with it. + if (tryLess(TryP.UnitIncrease < 0, CandP.UnitIncrease < 0, TryCand, Cand, + Reason)) { + return true; + } + // If TryP has lower Rank, it has a higher priority. + int TryRank = TryP.PSetRank(); + int CandRank = CandP.PSetRank(); + // If the candidates are decreasing pressure, reverse priority. + if (TryP.UnitIncrease < 0) + std::swap(TryRank, CandRank); + return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); +} + static unsigned getWeakLeft(const SUnit *SU, bool isTop) { return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; } @@ -1962,20 +2127,16 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, TryCand, Cand, PhysRegCopy)) return; - // Avoid exceeding the target's limit. - if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, - Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) + // Avoid exceeding the target's limit. If signed PSetID is negative, it is + // invalid; convert it to INT_MAX to give it lowest priority. + if (tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, + RegExcess)) return; - if (Cand.Reason == SingleExcess) - Cand.Reason = MultiPressure; // Avoid increasing the max critical pressure in the scheduled region. - if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, - Cand.RPDelta.CriticalMax.UnitIncrease, - TryCand, Cand, SingleCritical)) + if (tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, + TryCand, Cand, RegCritical)) return; - if (Cand.Reason == SingleCritical) - Cand.Reason = MultiPressure; // Keep clustered nodes together to encourage downstream peephole // optimizations which may reduce resource requirements. @@ -1990,17 +2151,16 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, return; // Weak edges are for clustering and other constraints. - // - // Deferring TryCand here does not change Cand's reason. This is good in the - // sense that a bad candidate shouldn't affect a previous candidate's - // goodness, but bad in that it is assymetric and depends on queue order. - CandReason OrigReason = Cand.Reason; if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), getWeakLeft(Cand.SU, Zone.isTop()), TryCand, Cand, Weak)) { - Cand.Reason = OrigReason; return; } + // Avoid increasing the max pressure of the entire region. + if (tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, + TryCand, Cand, RegMax)) + return; + // Avoid critical resource consumption and balance the schedule. TryCand.initResourceDelta(DAG, SchedModel); if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, @@ -2014,8 +2174,7 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, // Avoid serializing long latency dependence chains. if (Cand.Policy.ReduceLatency) { if (Zone.isTop()) { - if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() - > Zone.ExpectedCount) { + if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), TryCand, Cand, TopDepthReduce)) return; @@ -2025,8 +2184,7 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, return; } else { - if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() - > Zone.ExpectedCount) { + if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, Cand, BotHeightReduce)) return; @@ -2037,16 +2195,9 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, } } - // Avoid increasing the max pressure of the entire region. - if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, - Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) - return; - if (Cand.Reason == SingleMax) - Cand.Reason = MultiPressure; - // Prefer immediate defs/users of the last scheduled instruction. This is a - // nice pressure avoidance strategy that also conserves the processor's - // register renaming resources and keeps the machine code readable. + // local pressure avoidance strategy that also makes the machine code + // readable. if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), TryCand, Cand, NextDefUse)) return; @@ -2058,49 +2209,17 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, } } -/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is -/// more desirable than RHS from scheduling standpoint. -static bool compareRPDelta(const RegPressureDelta &LHS, - const RegPressureDelta &RHS) { - // Compare each component of pressure in decreasing order of importance - // without checking if any are valid. Invalid PressureElements are assumed to - // have UnitIncrease==0, so are neutral. - - // Avoid increasing the max critical pressure in the scheduled region. - if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { - DEBUG(dbgs() << " RP excess top - bot: " - << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); - return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; - } - // Avoid increasing the max critical pressure in the scheduled region. - if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { - DEBUG(dbgs() << " RP critical top - bot: " - << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) - << '\n'); - return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; - } - // Avoid increasing the max pressure of the entire region. - if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { - DEBUG(dbgs() << " RP current top - bot: " - << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) - << '\n'); - return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; - } - return false; -} - #ifndef NDEBUG const char *ConvergingScheduler::getReasonStr( ConvergingScheduler::CandReason Reason) { switch (Reason) { case NoCand: return "NOCAND "; case PhysRegCopy: return "PREG-COPY"; - case SingleExcess: return "REG-EXCESS"; - case SingleCritical: return "REG-CRIT "; + case RegExcess: return "REG-EXCESS"; + case RegCritical: return "REG-CRIT "; case Cluster: return "CLUSTER "; case Weak: return "WEAK "; - case SingleMax: return "REG-MAX "; - case MultiPressure: return "REG-MULTI "; + case RegMax: return "REG-MAX "; case ResourceReduce: return "RES-REDUCE"; case ResourceDemand: return "RES-DEMAND"; case TopDepthReduce: return "TOP-DEPTH "; @@ -2120,13 +2239,13 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { switch (Cand.Reason) { default: break; - case SingleExcess: + case RegExcess: P = Cand.RPDelta.Excess; break; - case SingleCritical: + case RegCritical: P = Cand.RPDelta.CriticalMax; break; - case SingleMax: + case RegMax: P = Cand.RPDelta.CurrentMax; break; case ResourceReduce: @@ -2208,18 +2327,19 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { IsTopNode = false; - DEBUG(dbgs() << "Pick Top NOCAND\n"); + DEBUG(dbgs() << "Pick Bot NOCAND\n"); return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { IsTopNode = true; - DEBUG(dbgs() << "Pick Bot NOCAND\n"); + DEBUG(dbgs() << "Pick Top NOCAND\n"); return SU; } CandPolicy NoPolicy; SchedCandidate BotCand(NoPolicy); SchedCandidate TopCand(NoPolicy); - checkResourceLimits(TopCand, BotCand); + Bot.setPolicy(BotCand.Policy, Top); + Top.setPolicy(TopCand.Policy, Bot); // Prefer bottom scheduling when heuristics are silent. pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); @@ -2232,7 +2352,10 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { // affects picking from either Q. If scheduling in one direction must // increase pressure for one of the excess PSets, then schedule in that // direction first to provide more freedom in the other direction. - if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { + if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess)) + || (BotCand.Reason == RegCritical + && !BotCand.isRepeat(RegCritical))) + { IsTopNode = false; tracePick(BotCand, IsTopNode); return BotCand.SU; @@ -2241,30 +2364,13 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); assert(TopCand.Reason != NoCand && "failed to find the first candidate"); - // If either Q has a single candidate that minimizes pressure above the - // original region's pressure pick it. - if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { - if (TopCand.Reason < BotCand.Reason) { - IsTopNode = true; - tracePick(TopCand, IsTopNode); - return TopCand.SU; - } - IsTopNode = false; - tracePick(BotCand, IsTopNode); - return BotCand.SU; - } - // Check for a salient pressure difference and pick the best from either side. - if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { - IsTopNode = true; - tracePick(TopCand, IsTopNode); - return TopCand.SU; - } - // Otherwise prefer the bottom candidate, in node order if all else failed. + // Choose the queue with the most important (lowest enum) reason. if (TopCand.Reason < BotCand.Reason) { IsTopNode = true; tracePick(TopCand, IsTopNode); return TopCand.SU; } + // Otherwise prefer the bottom candidate, in node order if all else failed. IsTopNode = false; tracePick(BotCand, IsTopNode); return BotCand.SU; @@ -2348,13 +2454,13 @@ void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { /// them here. See comments in biasPhysRegCopy. void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { - SU->TopReadyCycle = Top.CurrCycle; + SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle); Top.bumpNode(SU); if (SU->hasPhysRegUses) reschedulePhysRegCopies(SU, true); } else { - SU->BotReadyCycle = Bot.CurrCycle; + SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle); Bot.bumpNode(SU); if (SU->hasPhysRegDefs) reschedulePhysRegCopies(SU, false); @@ -2372,8 +2478,7 @@ static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { // FIXME: extend the mutation API to allow earlier mutations to instantiate // data and pass it to later mutations. Have a single mutation that gathers // the interesting nodes in one pass. - if (EnableCopyConstrain) - DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); + DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); if (EnableLoadCluster) DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); if (EnableMacroFusion) diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 4dafbe5..dacdbdd 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -394,7 +394,7 @@ static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) { /// collectDebgValues - Scan instructions following MI and collect any /// matching DBG_VALUEs. static void collectDebugValues(MachineInstr *MI, - SmallVector<MachineInstr *, 2> & DbgValues) { + SmallVectorImpl<MachineInstr *> &DbgValues) { DbgValues.clear(); if (!MI->getOperand(0).isReg()) return; @@ -537,8 +537,8 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // We give successors with smaller loop depth higher priority. SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end()); std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI)); - for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(), - E = Succs.end(); SI != E; ++SI) { + for (SmallVectorImpl<MachineBasicBlock *>::iterator SI = Succs.begin(), + E = Succs.end(); SI != E; ++SI) { MachineBasicBlock *SuccBlock = *SI; bool LocalUse = false; if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, @@ -697,7 +697,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { ++MachineBasicBlock::iterator(MI)); // Move debug values. - for (SmallVector<MachineInstr *, 2>::iterator DBI = DbgValuesToSink.begin(), + for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(), DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) { MachineInstr *DbgMI = *DBI; SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI, diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 00f702c..6aa3f67 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -853,8 +853,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) DepCycle += MTM.SchedModel - .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp, - /* FindMin = */ false); + .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp); Cycle = std::max(Cycle, DepCycle); } // Remember the instruction depth. @@ -902,8 +901,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, // We may not know the UseMI of this dependency, if it came from the // live-in list. SchedModel can handle a NULL UseMI. DepHeight += SchedModel - .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op, - /* FindMin = */ false); + .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op); } Height = std::max(Height, DepHeight); // This regunit is dead above MI. @@ -941,7 +939,7 @@ static bool pushDepHeight(const DataDep &Dep, // Adjust height by Dep.DefMI latency. if (!Dep.DefMI->isTransient()) UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, - UseMI, Dep.UseOp, false); + UseMI, Dep.UseOp); // Update Heights[DefMI] to be the maximum height seen. MIHeightMap::iterator I; @@ -1171,7 +1169,7 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) DepCycle += TE.MTM.SchedModel - .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false); + .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp); return DepCycle; } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index a7f8112..e74bfc8 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -227,6 +228,8 @@ namespace { void verifyLiveIntervalValue(const LiveInterval&, VNInfo*); void verifyLiveIntervalSegment(const LiveInterval&, LiveInterval::const_iterator); + + void verifyStackFrame(); }; struct MachineVerifierPass : public MachineFunctionPass { @@ -268,8 +271,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { raw_ostream *OutFile = 0; if (OutFileName) { std::string ErrorInfo; - OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, - raw_fd_ostream::F_Append); + OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, sys::fs::F_Append); if (!ErrorInfo.empty()) { errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n'; exit(1); @@ -475,6 +477,8 @@ void MachineVerifier::visitMachineFunctionBefore() { // Check that the register use lists are sane. MRI->verifyUseLists(); + + verifyStackFrame(); } // Does iterator point to a and b as the first two elements? @@ -1606,3 +1610,130 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { } } } + +namespace { + // FrameSetup and FrameDestroy can have zero adjustment, so using a single + // integer, we can't tell whether it is a FrameSetup or FrameDestroy if the + // value is zero. + // We use a bool plus an integer to capture the stack state. + struct StackStateOfBB { + StackStateOfBB() : EntryValue(0), ExitValue(0), EntryIsSetup(false), + ExitIsSetup(false) { } + StackStateOfBB(int EntryVal, int ExitVal, bool EntrySetup, bool ExitSetup) : + EntryValue(EntryVal), ExitValue(ExitVal), EntryIsSetup(EntrySetup), + ExitIsSetup(ExitSetup) { } + // Can be negative, which means we are setting up a frame. + int EntryValue; + int ExitValue; + bool EntryIsSetup; + bool ExitIsSetup; + }; +} + +/// Make sure on every path through the CFG, a FrameSetup <n> is always followed +/// by a FrameDestroy <n>, stack adjustments are identical on all +/// CFG edges to a merge point, and frame is destroyed at end of a return block. +void MachineVerifier::verifyStackFrame() { + int FrameSetupOpcode = TII->getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); + + SmallVector<StackStateOfBB, 8> SPState; + SPState.resize(MF->getNumBlockIDs()); + SmallPtrSet<const MachineBasicBlock*, 8> Reachable; + + // Visit the MBBs in DFS order. + for (df_ext_iterator<const MachineFunction*, + SmallPtrSet<const MachineBasicBlock*, 8> > + DFI = df_ext_begin(MF, Reachable), DFE = df_ext_end(MF, Reachable); + DFI != DFE; ++DFI) { + const MachineBasicBlock *MBB = *DFI; + + StackStateOfBB BBState; + // Check the exit state of the DFS stack predecessor. + if (DFI.getPathLength() >= 2) { + const MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); + assert(Reachable.count(StackPred) && + "DFS stack predecessor is already visited.\n"); + BBState.EntryValue = SPState[StackPred->getNumber()].ExitValue; + BBState.EntryIsSetup = SPState[StackPred->getNumber()].ExitIsSetup; + BBState.ExitValue = BBState.EntryValue; + BBState.ExitIsSetup = BBState.EntryIsSetup; + } + + // Update stack state by checking contents of MBB. + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (I->getOpcode() == FrameSetupOpcode) { + // The first operand of a FrameOpcode should be i32. + int Size = I->getOperand(0).getImm(); + assert(Size >= 0 && + "Value should be non-negative in FrameSetup and FrameDestroy.\n"); + + if (BBState.ExitIsSetup) + report("FrameSetup is after another FrameSetup", I); + BBState.ExitValue -= Size; + BBState.ExitIsSetup = true; + } + + if (I->getOpcode() == FrameDestroyOpcode) { + // The first operand of a FrameOpcode should be i32. + int Size = I->getOperand(0).getImm(); + assert(Size >= 0 && + "Value should be non-negative in FrameSetup and FrameDestroy.\n"); + + if (!BBState.ExitIsSetup) + report("FrameDestroy is not after a FrameSetup", I); + int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue : + BBState.ExitValue; + if (BBState.ExitIsSetup && AbsSPAdj != Size) { + report("FrameDestroy <n> is after FrameSetup <m>", I); + *OS << "FrameDestroy <" << Size << "> is after FrameSetup <" + << AbsSPAdj << ">.\n"; + } + BBState.ExitValue += Size; + BBState.ExitIsSetup = false; + } + } + SPState[MBB->getNumber()] = BBState; + + // Make sure the exit state of any predecessor is consistent with the entry + // state. + for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), + E = MBB->pred_end(); I != E; ++I) { + if (Reachable.count(*I) && + (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue || + SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) { + report("The exit stack state of a predecessor is inconsistent.", MBB); + *OS << "Predecessor BB#" << (*I)->getNumber() << " has exit state (" + << SPState[(*I)->getNumber()].ExitValue << ", " + << SPState[(*I)->getNumber()].ExitIsSetup + << "), while BB#" << MBB->getNumber() << " has entry state (" + << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n"; + } + } + + // Make sure the entry state of any successor is consistent with the exit + // state. + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + if (Reachable.count(*I) && + (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue || + SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) { + report("The entry stack state of a successor is inconsistent.", MBB); + *OS << "Successor BB#" << (*I)->getNumber() << " has entry state (" + << SPState[(*I)->getNumber()].EntryValue << ", " + << SPState[(*I)->getNumber()].EntryIsSetup + << "), while BB#" << MBB->getNumber() << " has exit state (" + << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n"; + } + } + + // Make sure a basic block with return ends with zero stack adjustment. + if (!MBB->empty() && MBB->back().isReturn()) { + if (BBState.ExitIsSetup) + report("A return block ends with a FrameSetup.", MBB); + if (BBState.ExitValue) + report("A return block ends with a nonzero stack adjustment.", MBB); + } + } +} diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 5584708..bf23eca 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -66,7 +66,7 @@ namespace { /// bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); void LowerPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt); + MachineBasicBlock::iterator LastPHIIt); /// analyzePHINodes - Gather information about the PHI nodes in /// here. In particular, we want to map the number of uses of a virtual @@ -185,10 +185,11 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF, // Get an iterator to the first instruction after the last PHI node (this may // also be the end of the basic block). - MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin()); + MachineBasicBlock::iterator LastPHIIt = + prior(MBB.SkipPHIsAndLabels(MBB.begin())); while (MBB.front().isPHI()) - LowerPHINode(MBB, AfterPHIsIt); + LowerPHINode(MBB, LastPHIIt); return true; } @@ -218,8 +219,11 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, /// LowerPHINode - Lower the PHI node at the top of the specified block, /// void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt) { + MachineBasicBlock::iterator LastPHIIt) { ++NumLowered; + + MachineBasicBlock::iterator AfterPHIsIt = llvm::next(LastPHIIt); + // Unlink the PHI node from the basic block, but don't delete the PHI yet. MachineInstr *MPhi = MBB.remove(MBB.begin()); diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 1a6b62b..c0861c5 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -300,6 +300,8 @@ void TargetPassConfig::addPass(Pass *P) { if (Started && !Stopped) PM->add(P); + else + delete P; if (StopAfter == PassID) Stopped = true; if (StartAfter == PassID) @@ -331,7 +333,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { addPass(P); // Ends the lifetime of P. // Add the passes after the pass P if there is any. - for (SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4>::iterator + for (SmallVectorImpl<std::pair<AnalysisID, IdentifyingPassPtr> >::iterator I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end(); I != E; ++I) { if ((*I).first == PassID) { @@ -396,15 +398,15 @@ void TargetPassConfig::addPassesToHandleExceptions() { // removed from the parent invoke(s). This could happen when a landing // pad is shared by multiple invokes and is also a target of a normal // edge from elsewhere. - addPass(createSjLjEHPreparePass(TM->getTargetLowering())); + addPass(createSjLjEHPreparePass(TM)); // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: case ExceptionHandling::Win64: - addPass(createDwarfEHPass(TM->getTargetLowering())); + addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::None: - addPass(createLowerInvokePass(TM->getTargetLowering())); + addPass(createLowerInvokePass(TM)); // The lower invoke pass may create unreachable code. Remove it. addPass(createUnreachableBlockEliminationPass()); @@ -416,13 +418,13 @@ void TargetPassConfig::addPassesToHandleExceptions() { /// before exception handling preparation passes. void TargetPassConfig::addCodeGenPrepare() { if (getOptLevel() != CodeGenOpt::None && !DisableCGP) - addPass(createCodeGenPreparePass(getTargetLowering())); + addPass(createCodeGenPreparePass(TM)); } /// Add common passes that perform LLVM IR to IR transforms in preparation for /// instruction selection. void TargetPassConfig::addISelPrepare() { - addPass(createStackProtectorPass(getTargetLowering())); + addPass(createStackProtectorPass(TM)); addPreISel(); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 4b301d8..1965188 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/InlineAsm.h" @@ -227,7 +228,8 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { std::vector<CalleeSavedInfo> CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; - if (F.getRegInfo().isPhysRegUsed(Reg)) { + // Functions which call __builtin_unwind_init get all their registers saved. + if (F.getRegInfo().isPhysRegUsed(Reg) || F.getMMI().callsUnwindInit()) { // If the reg is modified, save it! CSI.push_back(CalleeSavedInfo(Reg)); } @@ -556,14 +558,18 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the - // frame pointer if a frame pointer is required. + // incoming stack pointer if a frame pointer is required and is closer + // to the incoming rather than the final stack pointer. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) && - !RegInfo->needsStackRealignment(Fn)) { + bool EarlyScavengingSlots = (TFI.hasFP(Fn) && + TFI.isFPCloseToIncomingSP() && + RegInfo->useFPForScavengingIndex(Fn) && + !RegInfo->needsStackRealignment(Fn)); + if (RS && EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); - for (SmallVector<int, 2>::iterator I = SFIs.begin(), - IE = SFIs.end(); I != IE; ++I) + for (SmallVectorImpl<int>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); } @@ -643,12 +649,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure the special register scavenging spill slot is closest to the // stack pointer. - if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) || - !RegInfo->useFPForScavengingIndex(Fn))) { + if (RS && !EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); - for (SmallVector<int, 2>::iterator I = SFIs.begin(), - IE = SFIs.end(); I != IE; ++I) + for (SmallVectorImpl<int>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); } @@ -723,6 +728,40 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { void PEI::replaceFrameIndices(MachineFunction &Fn) { if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? + // Store SPAdj at exit of a basic block. + SmallVector<int, 8> SPState; + SPState.resize(Fn.getNumBlockIDs()); + SmallPtrSet<MachineBasicBlock*, 8> Reachable; + + // Iterate over the reachable blocks in DFS order. + for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> > + DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); + DFI != DFE; ++DFI) { + int SPAdj = 0; + // Check the exit state of the DFS stack predecessor. + if (DFI.getPathLength() >= 2) { + MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); + assert(Reachable.count(StackPred) && + "DFS stack predecessor is already visited.\n"); + SPAdj = SPState[StackPred->getNumber()]; + } + MachineBasicBlock *BB = *DFI; + replaceFrameIndices(BB, Fn, SPAdj); + SPState[BB->getNumber()] = SPAdj; + } + + // Handle the unreachable blocks. + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + if (Reachable.count(BB)) + // Already handled in DFS traversal. + continue; + int SPAdj = 0; + replaceFrameIndices(BB, Fn, SPAdj); + } +} + +void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj) { const TargetMachine &TM = Fn.getTarget(); assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); @@ -733,89 +772,85 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); - for (MachineFunction::iterator BB = Fn.begin(), - E = Fn.end(); BB != E; ++BB) { -#ifndef NDEBUG - int SPAdjCount = 0; // frame setup / destroy count. -#endif - int SPAdj = 0; // SP offset due to call frame setup / destroy. - if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); + if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); - for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { - if (I->getOpcode() == FrameSetupOpcode || - I->getOpcode() == FrameDestroyOpcode) { -#ifndef NDEBUG - // Track whether we see even pairs of them - SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1; -#endif - // Remember how much SP has been adjusted to create the call - // frame. - int Size = I->getOperand(0).getImm(); - - if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || - (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) - Size = -Size; - - SPAdj += Size; - - MachineBasicBlock::iterator PrevI = BB->end(); - if (I != BB->begin()) PrevI = prior(I); - TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); - - // Visit the instructions created by eliminateCallFramePseudoInstr(). - if (PrevI == BB->end()) - I = BB->begin(); // The replaced instr was the first in the block. - else - I = llvm::next(PrevI); - continue; - } + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + // Remember how much SP has been adjusted to create the call + // frame. + int Size = I->getOperand(0).getImm(); - MachineInstr *MI = I; - bool DoIncr = true; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (!MI->getOperand(i).isFI()) - continue; + if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || + (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) + Size = -Size; - // Some instructions (e.g. inline asm instructions) can have - // multiple frame indices and/or cause eliminateFrameIndex - // to insert more than one instruction. We need the register - // scavenger to go through all of these instructions so that - // it can update its register information. We keep the - // iterator at the point before insertion so that we can - // revisit them in full. - bool AtBeginning = (I == BB->begin()); - if (!AtBeginning) --I; - - // If this instruction has a FrameIndex operand, we need to - // use that target machine register info object to eliminate - // it. - TRI.eliminateFrameIndex(MI, SPAdj, i, - FrameIndexVirtualScavenging ? NULL : RS); - - // Reset the iterator if we were at the beginning of the BB. - if (AtBeginning) { - I = BB->begin(); - DoIncr = false; - } + SPAdj += Size; + + MachineBasicBlock::iterator PrevI = BB->end(); + if (I != BB->begin()) PrevI = prior(I); + TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); - MI = 0; - break; + // Visit the instructions created by eliminateCallFramePseudoInstr(). + if (PrevI == BB->end()) + I = BB->begin(); // The replaced instr was the first in the block. + else + I = llvm::next(PrevI); + continue; + } + + MachineInstr *MI = I; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (!MI->getOperand(i).isFI()) + continue; + + // Frame indicies in debug values are encoded in a target independent + // way with simply the frame index and offset rather than any + // target-specific addressing mode. + if (MI->isDebugValue()) { + assert(i == 0 && "Frame indicies can only appear as the first " + "operand of a DBG_VALUE machine instruction"); + unsigned Reg; + MachineOperand &Offset = MI->getOperand(1); + Offset.setImm(Offset.getImm() + + TFI->getFrameIndexReference( + Fn, MI->getOperand(0).getIndex(), Reg)); + MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/); + continue; } - if (DoIncr && I != BB->end()) ++I; + // Some instructions (e.g. inline asm instructions) can have + // multiple frame indices and/or cause eliminateFrameIndex + // to insert more than one instruction. We need the register + // scavenger to go through all of these instructions so that + // it can update its register information. We keep the + // iterator at the point before insertion so that we can + // revisit them in full. + bool AtBeginning = (I == BB->begin()); + if (!AtBeginning) --I; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + TRI.eliminateFrameIndex(MI, SPAdj, i, + FrameIndexVirtualScavenging ? NULL : RS); + + // Reset the iterator if we were at the beginning of the BB. + if (AtBeginning) { + I = BB->begin(); + DoIncr = false; + } - // Update register states. - if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); + MI = 0; + break; } - // If we have evenly matched pairs of frame setup / destroy instructions, - // make sure the adjustments come out to zero. If we don't have matched - // pairs, we can't be sure the missing bit isn't in another basic block - // due to a custom inserter playing tricks, so just asserting SPAdj==0 - // isn't sufficient. See tMOVCC on Thumb1, for example. - assert((SPAdjCount || SPAdj == 0) && - "Unbalanced call frame setup / destroy pairs?"); + if (DoIncr && I != BB->end()) ++I; + + // Update register states. + if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } } diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h index 87fff9a..50f4daf 100644 --- a/lib/CodeGen/PrologEpilogInserter.h +++ b/lib/CodeGen/PrologEpilogInserter.h @@ -1,4 +1,4 @@ -//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===// +//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -*---===// // // The LLVM Compiler Infrastructure // @@ -112,13 +112,13 @@ namespace llvm { bool calcAvailInOut(MachineBasicBlock* MBB); void calculateAnticAvail(MachineFunction &Fn); bool addUsesForMEMERegion(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4>& blks); - bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks); + SmallVectorImpl<MachineBasicBlock *> &blks); + bool addUsesForTopLevelLoops(SmallVectorImpl<MachineBasicBlock *> &blks); bool calcSpillPlacements(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4> &blks, + SmallVectorImpl<MachineBasicBlock *> &blks, CSRegBlockMap &prevSpills); bool calcRestorePlacements(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4> &blks, + SmallVectorImpl<MachineBasicBlock *> &blks, CSRegBlockMap &prevRestores); void placeSpillsAndRestores(MachineFunction &Fn); void placeCSRSpillsAndRestores(MachineFunction &Fn); @@ -127,6 +127,8 @@ namespace llvm { void insertCSRSpillsAndRestores(MachineFunction &Fn); void calculateFrameObjectOffsets(MachineFunction &Fn); void replaceFrameIndices(MachineFunction &Fn); + void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj); void scavengeFrameVirtualRegs(MachineFunction &Fn); void insertPrologEpilogCode(MachineFunction &Fn); diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index c035590..df3e12a 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -43,7 +43,7 @@ static cl::opt<bool, true> VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), cl::desc("Verify during register allocation")); -const char *RegAllocBase::TimerGroupName = "Register Allocation"; +const char RegAllocBase::TimerGroupName[] = "Register Allocation"; bool RegAllocBase::VerifyEnabled = false; //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index 064e40f..ccaabba 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -93,7 +93,7 @@ protected: SmallVectorImpl<LiveInterval*> &splitLVRs) = 0; // Use this group name for NamedRegionTimer. - static const char *TimerGroupName; + static const char TimerGroupName[]; public: /// VerifyEnabled - True when -verify-regalloc is given. diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 7fcfe9e..d6a7d6f 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -145,6 +146,8 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<CalculateSpillWeights>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); AU.addRequiredID(MachineDominatorsID); AU.addPreservedID(MachineDominatorsID); AU.addRequired<MachineLoopInfo>(); diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index bb9c05c..6617e50 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -293,29 +293,26 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, // If this register is used by DBG_VALUE then insert new DBG_VALUE to // identify spilled location as the place to find corresponding variable's // value. - SmallVector<MachineInstr *, 4> &LRIDbgValues = + SmallVectorImpl<MachineInstr *> &LRIDbgValues = LiveDbgValueMap[LRI->VirtReg]; for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { MachineInstr *DBG = LRIDbgValues[li]; - const MDNode *MDPtr = - DBG->getOperand(DBG->getNumOperands()-1).getMetadata(); - int64_t Offset = 0; - if (DBG->getOperand(1).isImm()) - Offset = DBG->getOperand(1).getImm(); + const MDNode *MDPtr = DBG->getOperand(2).getMetadata(); + bool IsIndirect = DBG->getOperand(1).isImm(); // Register-indirect value? + uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0; DebugLoc DL; if (MI == MBB->end()) { // If MI is at basic block end then use last instruction's location. MachineBasicBlock::iterator EI = MI; DL = (--EI)->getDebugLoc(); - } - else + } else DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = - TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) { - MachineBasicBlock *MBB = DBG->getParent(); - MBB->insert(MI, NewDV); - DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); - } + MachineBasicBlock *MBB = DBG->getParent(); + MachineInstr *NewDV = + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(FI).addImm(Offset).addMetadata(MDPtr); + (void)NewDV; + DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); } // Now this register is spilled there is should not be any DBG_VALUE // pointing to this register because they are all pointing to spilled value @@ -859,25 +856,21 @@ void RAFast::AllocateBasicBlock() { } else { // Modify DBG_VALUE now that the value is in a spill slot. - int64_t Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->getOperand(1).isImm(); + uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *MDPtr = MI->getOperand(MI->getNumOperands()-1).getMetadata(); DebugLoc DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = - TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) { - DEBUG(dbgs() << "Modifying debug info due to spill:" << - "\t" << *MI); - MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MBB->erase(MI), NewDV); - // Scan NewDV operands from the beginning. - MI = NewDV; - ScanDbgValue = true; - break; - } else { - // We can't allocate a physreg for a DebugValue; sorry! - DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); - MO.setReg(0); - } + MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL, + TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(SS).addImm(Offset).addMetadata(MDPtr); + DEBUG(dbgs() << "Modifying debug info due to spill:" + << "\t" << *NewDV); + // Scan NewDV operands from the beginning. + MI = NewDV; + ScanDbgValue = true; + break; } } LiveDbgValueMap[Reg].push_back(MI); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 4974828..f9e363b 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -71,6 +72,7 @@ class RAGreedy : public MachineFunctionPass, // analyses SlotIndexes *Indexes; + MachineBlockFrequencyInfo *MBFI; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; EdgeBundles *Bundles; @@ -158,6 +160,8 @@ class RAGreedy : public MachineFunctionPass, EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {} + bool isMax() const { return BrokenHints == ~0u; } + bool operator<(const EvictionCost &O) const { if (BrokenHints != O.BrokenHints) return BrokenHints < O.BrokenHints; @@ -249,14 +253,15 @@ private: void LRE_WillShrinkVirtReg(unsigned); void LRE_DidCloneVirtReg(unsigned, unsigned); - float calcSpillCost(); - bool addSplitConstraints(InterferenceCache::Cursor, float&); + BlockFrequency calcSpillCost(); + bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); void growRegion(GlobalSplitCandidate &Cand); - float calcGlobalSplitCost(GlobalSplitCandidate&); + BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); bool calcCompactRegion(GlobalSplitCandidate&); void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); void calcGapWeights(unsigned, SmallVectorImpl<float>&); + unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); void evictInterference(LiveInterval&, unsigned, @@ -320,6 +325,8 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); AU.addRequired<AliasAnalysis>(); AU.addPreserved<AliasAnalysis>(); AU.addRequired<LiveIntervals>(); @@ -407,15 +414,28 @@ void RAGreedy::enqueue(LiveInterval *LI) { // everything else has been allocated. Prio = Size; } else { - // Everything is allocated in long->short order. Long ranges that don't fit - // should be spilled (or split) ASAP so they don't create interference. - Prio = (1u << 31) + Size; + if (ExtraRegInfo[Reg].Stage == RS_Assign && !LI->empty() && + LIS->intervalIsInOneMBB(*LI)) { + // Allocate original local ranges in linear instruction order. Since they + // are singly defined, this produces optimal coloring in the absence of + // global interference and other constraints. + Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); + } + else { + // Allocate global and split ranges in long->short order. Long ranges that + // don't fit should be spilled (or split) ASAP so they don't create + // interference. Mark a bit to prioritize global above local ranges. + Prio = (1u << 29) + Size; + } + // Mark a higher bit to prioritize global and local above RS_Split. + Prio |= (1u << 31); // Boost ranges that have a physical register hint. if (VRM->hasKnownPreference(Reg)) Prio |= (1u << 30); } - + // The virtual register number is a tie breaker for same-sized ranges. + // Give lower vreg numbers higher priority to assign them first. Queue.push(std::make_pair(Prio, ~Reg)); } @@ -476,6 +496,31 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // Interference eviction //===----------------------------------------------------------------------===// +unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + unsigned PhysReg; + while ((PhysReg = Order.next())) { + if (PhysReg == PrevReg) + continue; + + MCRegUnitIterator Units(PhysReg, TRI); + for (; Units.isValid(); ++Units) { + // Instantiate a "subquery", not to be confused with the Queries array. + LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]); + if (subQ.checkInterference()) + break; + } + // If no units have interference, break out with the current PhysReg. + if (!Units.isValid()) + break; + } + if (PhysReg) + DEBUG(dbgs() << "can reassign: " << VirtReg << " from " + << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI) + << '\n'); + return PhysReg; +} + /// shouldEvict - determine if A should evict the assigned live range B. The /// eviction policy defined by this function together with the allocation order /// defined by enqueue() decides which registers ultimately end up being split @@ -516,6 +561,8 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) return false; + bool IsLocal = LIS->intervalIsInOneMBB(VirtReg); + // Find VirtReg's cascade number. This will be unassigned if VirtReg was never // involved in an eviction before. If a cascade number was assigned, deny // evicting anything with the same or a newer cascade number. This prevents @@ -569,8 +616,17 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, // Abort if this would be too expensive. if (!(Cost < MaxCost)) return false; + if (Urgent) + continue; + // If !MaxCost.isMax(), then we're just looking for a cheap register. + // Evicting another local live range in this case could lead to suboptimal + // coloring. + if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && + !canReassign(*Intf, PhysReg)) { + return false; + } // Finally, apply the eviction policy for non-urgent evictions. - if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) + if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) return false; } } @@ -699,12 +755,12 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, /// that all preferences in SplitConstraints are met. /// Return false if there are no bundles with positive bias. bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, - float &Cost) { + BlockFrequency &Cost) { ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); // Reset interference dependent info. SplitConstraints.resize(UseBlocks.size()); - float StaticCost = 0; + BlockFrequency StaticCost = 0; for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; @@ -742,8 +798,8 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, } // Accumulate the total frequency of inserted spill code. - if (Ins) - StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + while (Ins--) + StaticCost += SpillPlacer->getBlockFrequency(BC.Number); } Cost = StaticCost; @@ -876,7 +932,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { SpillPlacer->prepare(Cand.LiveBundles); // The static split cost will be zero since Cand.Intf reports no interference. - float Cost; + BlockFrequency Cost; if (!addSplitConstraints(Cand.Intf, Cost)) { DEBUG(dbgs() << ", none.\n"); return false; @@ -901,8 +957,8 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { /// calcSpillCost - Compute how expensive it would be to split the live range in /// SA around all use blocks instead of forming bundle regions. -float RAGreedy::calcSpillCost() { - float Cost = 0; +BlockFrequency RAGreedy::calcSpillCost() { + BlockFrequency Cost = 0; ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; @@ -921,8 +977,8 @@ float RAGreedy::calcSpillCost() { /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// -float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { - float GlobalCost = 0; +BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { + BlockFrequency GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { @@ -936,8 +992,8 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); if (BI.LiveOut) Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); - if (Ins) - GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + while (Ins--) + GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); } for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { @@ -949,8 +1005,10 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { if (RegIn && RegOut) { // We need double spill code if this block has interference. Cand.Intf.moveToBlock(Number); - if (Cand.Intf.hasInterference()) - GlobalCost += 2*SpillPlacer->getBlockFrequency(Number); + if (Cand.Intf.hasInterference()) { + GlobalCost += SpillPlacer->getBlockFrequency(Number); + GlobalCost += SpillPlacer->getBlockFrequency(Number); + } continue; } // live-in / stack-out or stack-in live-out. @@ -1115,7 +1173,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<LiveInterval*> &NewVRegs) { unsigned NumCands = 0; unsigned BestCand = NoCand; - float BestCost; + BlockFrequency BestCost; SmallVector<unsigned, 8> UsedCands; // Check if we can split this live range around a compact region. @@ -1123,11 +1181,11 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, if (HasCompact) { // Yes, keep GlobalCand[0] as the compact region candidate. NumCands = 1; - BestCost = HUGE_VALF; + BestCost = BlockFrequency::getMaxFrequency(); } else { // No benefit from the compact region, our fallback will be per-block // splitting. Make sure we find a solution that is cheaper than spilling. - BestCost = Hysteresis * calcSpillCost(); + BestCost = calcSpillCost(); DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); } @@ -1157,7 +1215,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, Cand.reset(IntfCache, PhysReg); SpillPlacer->prepare(Cand.LiveBundles); - float Cost; + BlockFrequency Cost; if (!addSplitConstraints(Cand.Intf, Cost)) { DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; @@ -1193,7 +1251,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, }); if (Cost < BestCost) { BestCand = NumCands; - BestCost = Hysteresis * Cost; // Prevent rounding effects. + BestCost = Cost; } ++NumCands; } @@ -1511,7 +1569,9 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned BestAfter = 0; float BestDiff = 0; - const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber()); + const float blockFreq = + SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * + (1.0f / BlockFrequency::getEntryFrequency()); SmallVector<float, 8> GapWeight; Order.rewind(); @@ -1770,6 +1830,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { getAnalysis<LiveIntervals>(), getAnalysis<LiveRegMatrix>()); Indexes = &getAnalysis<SlotIndexes>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); DomTree = &getAnalysis<MachineDominatorTree>(); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); Loops = &getAnalysis<MachineLoopInfo>(); @@ -1777,8 +1838,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SpillPlacer = &getAnalysis<SpillPlacement>(); DebugVars = &getAnalysis<LiveDebugVariables>(); + DEBUG(LIS->dump()); + SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); - SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree)); + SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI)); ExtraRegInfo.clear(); ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 15a88e2..81ecca1 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -40,6 +40,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -96,7 +97,6 @@ public: initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); - initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); } @@ -130,8 +130,8 @@ private: const TargetMachine *tm; const TargetRegisterInfo *tri; const TargetInstrInfo *tii; - const MachineLoopInfo *loopInfo; MachineRegisterInfo *mri; + const MachineBlockFrequencyInfo *mbfi; OwningPtr<Spiller> spiller; LiveIntervals *lis; @@ -188,7 +188,7 @@ unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const { } PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, - const MachineLoopInfo *loopInfo, + const MachineBlockFrequencyInfo *mbfi, const RegSet &vregs) { LiveIntervals *LIS = const_cast<LiveIntervals*>(lis); @@ -313,10 +313,10 @@ void PBQPBuilder::addInterferenceCosts( PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, const LiveIntervals *lis, - const MachineLoopInfo *loopInfo, + const MachineBlockFrequencyInfo *mbfi, const RegSet &vregs) { - OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, loopInfo, vregs)); + OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, mbfi, vregs)); PBQP::Graph &g = p->getGraph(); const TargetMachine &tm = mf->getTarget(); @@ -350,7 +350,7 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, PBQP::PBQPNum cBenefit = copyFactor * LiveIntervals::getSpillWeight(false, true, - loopInfo->getLoopDepth(mbb)); + mbfi->getBlockFreq(mbb)); if (cp.isPhys()) { if (!mf->getRegInfo().isAllocatable(dst)) { @@ -435,10 +435,12 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.addRequired<CalculateSpillWeights>(); au.addRequired<LiveStacks>(); au.addPreserved<LiveStacks>(); - au.addRequired<MachineDominatorTree>(); - au.addPreserved<MachineDominatorTree>(); + au.addRequired<MachineBlockFrequencyInfo>(); + au.addPreserved<MachineBlockFrequencyInfo>(); au.addRequired<MachineLoopInfo>(); au.addPreserved<MachineLoopInfo>(); + au.addRequired<MachineDominatorTree>(); + au.addPreserved<MachineDominatorTree>(); au.addRequired<VirtRegMap>(); au.addPreserved<VirtRegMap>(); MachineFunctionPass::getAnalysisUsage(au); @@ -546,7 +548,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { lis = &getAnalysis<LiveIntervals>(); lss = &getAnalysis<LiveStacks>(); - loopInfo = &getAnalysis<MachineLoopInfo>(); + mbfi = &getAnalysis<MachineBlockFrequencyInfo>(); vrm = &getAnalysis<VirtRegMap>(); spiller.reset(createInlineSpiller(*this, MF, *vrm)); @@ -584,7 +586,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); OwningPtr<PBQPRAProblem> problem( - builder->build(mf, lis, loopInfo, vregsToAlloc)); + builder->build(mf, lis, mbfi, vregsToAlloc)); #ifndef NDEBUG if (pbqpDumpGraphs) { diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 87382d8..cacd7de 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -40,6 +40,9 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { if (MF->getTarget().getRegisterInfo() != TRI) { TRI = MF->getTarget().getRegisterInfo(); RegClass.reset(new RCInfo[TRI->getNumRegClasses()]); + unsigned NumPSets = TRI->getNumRegPressureSets(); + PSetLimits.reset(new unsigned[NumPSets]); + std::fill(&PSetLimits[0], &PSetLimits[NumPSets], 0); Update = true; } @@ -144,3 +147,32 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { RCI.Tag = Tag; } +/// This is not accurate because two overlapping register sets may have some +/// nonoverlapping reserved registers. However, computing the allocation order +/// for all register classes would be too expensive. +unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const { + const TargetRegisterClass *RC = 0; + unsigned NumRCUnits = 0; + for (TargetRegisterInfo::regclass_iterator + RI = TRI->regclass_begin(), RE = TRI->regclass_end(); RI != RE; ++RI) { + const int *PSetID = TRI->getRegClassPressureSets(*RI); + for (; *PSetID != -1; ++PSetID) { + if ((unsigned)*PSetID == Idx) + break; + } + if (*PSetID == -1) + continue; + + // Found a register class that counts against this pressure set. + // For efficiency, only compute the set order for the largest set. + unsigned NUnits = TRI->getRegClassWeight(*RI).WeightLimit; + if (!RC || NUnits > NumRCUnits) { + RC = *RI; + NumRCUnits = NUnits; + } + } + compute(RC); + unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC); + return TRI->getRegPressureSetLimit(Idx) + - TRI->getRegClassWeight(RC).RegWeight * NReserved; +} diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 82043c2..f99f1a3 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -166,7 +166,8 @@ namespace { /// reMaterializeTrivialDef - If the source of a copy is defined by a /// trivial computation, replace the copy by rematerialize the definition. - bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI); + bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI, + bool &IsDefCopy); /// canJoinPhys - Return true if a physreg copy should be joined. bool canJoinPhys(const CoalescerPair &CP); @@ -731,7 +732,9 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, /// reMaterializeTrivialDef - If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, - MachineInstr *CopyMI) { + MachineInstr *CopyMI, + bool &IsDefCopy) { + IsDefCopy = false; unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx(); unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); @@ -740,16 +743,18 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, return false; LiveInterval &SrcInt = LIS->getInterval(SrcReg); - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); - LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); - assert(SrcLR != SrcInt.end() && "Live range not found!"); - VNInfo *ValNo = SrcLR->valno; + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI); + VNInfo *ValNo = LiveRangeQuery(SrcInt, CopyIdx).valueIn(); + assert(ValNo && "CopyMI input register not live"); if (ValNo->isPHIDef() || ValNo->isUnused()) return false; MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); if (!DefMI) return false; - assert(DefMI && "Defining instruction disappeared"); + if (DefMI->isCopyLike()) { + IsDefCopy = true; + return false; + } if (!DefMI->isAsCheapAsAMove()) return false; if (!TII->isTriviallyReMaterializable(DefMI, AA)) @@ -843,6 +848,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, // been asked for. If so it must implicitly define the whole thing. assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && "Only expect virtual or physical registers in remat"); + NewMI->getOperand(0).setIsDead(true); NewMI->addOperand(MachineOperand::CreateReg(CopyDstReg, true /*IsDef*/, true /*IsImp*/, @@ -1063,8 +1069,11 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (!canJoinPhys(CP)) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. - if (reMaterializeTrivialDef(CP, CopyMI)) + bool IsDefCopy; + if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) return true; + if (IsDefCopy) + Again = true; // May be possible to coalesce later. return false; } } else { @@ -1096,7 +1105,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // If definition of source is defined by trivial computation, try // rematerializing it. - if (reMaterializeTrivialDef(CP, CopyMI)) + bool IsDefCopy; + if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) return true; // If we can eliminate the copy without merging the live ranges, do so now. @@ -2060,6 +2070,9 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { if (!Copy->isCopy()) return false; + if (Copy->getOperand(1).isUndef()) + return false; + unsigned SrcReg = Copy->getOperand(1).getReg(); unsigned DstReg = Copy->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(SrcReg) @@ -2105,8 +2118,8 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { // are not inherently easier to resolve, but slightly preferable until we // have local live range splitting. In particular this is required by // cmp+jmp macro fusion. - for (MachineBasicBlock::reverse_iterator - MII = MBB->rbegin(), E = MBB->rend(); MII != E; ++MII) { + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E; ++MII) { if (!MII->isCopyLike()) continue; if (isLocalCopy(&(*MII), LIS)) diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 97f22e1..b7ab138 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -76,17 +76,22 @@ void RegisterPressure::decrease(unsigned Reg, const TargetRegisterInfo *TRI, } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -static void dumpSetPressure(const std::vector<unsigned> &SetPressure, - const TargetRegisterInfo *TRI) { +void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure, + const TargetRegisterInfo *TRI) { + bool Empty = true; for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) { - if (SetPressure[i] != 0) + if (SetPressure[i] != 0) { dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n'; + Empty = false; + } } + if (Empty) + dbgs() << "\n"; } void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << "Max Pressure: "; - dumpSetPressure(MaxSetPressure, TRI); + dumpRegSetPressure(MaxSetPressure, TRI); dbgs() << "Live In: "; for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) dbgs() << PrintReg(LiveInRegs[i], TRI) << " "; @@ -98,8 +103,10 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { } void RegPressureTracker::dump() const { - dbgs() << "Curr Pressure: "; - dumpSetPressure(CurrSetPressure, TRI); + if (!isTopClosed() || !isBottomClosed()) { + dbgs() << "Curr Pressure: "; + dumpRegSetPressure(CurrSetPressure, TRI); + } P.dump(TRI); } #endif @@ -200,13 +207,15 @@ void RegPressureTracker::init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, - MachineBasicBlock::const_iterator pos) + MachineBasicBlock::const_iterator pos, + bool ShouldTrackUntiedDefs) { MF = mf; TRI = MF->getTarget().getRegisterInfo(); RCI = rci; MRI = &MF->getRegInfo(); MBB = mbb; + TrackUntiedDefs = ShouldTrackUntiedDefs; if (RequireIntervals) { assert(lis && "IntervalPressure requires LiveIntervals"); @@ -215,6 +224,7 @@ void RegPressureTracker::init(const MachineFunction *mf, CurrPos = pos; CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0); + LiveThruPressure.clear(); if (RequireIntervals) static_cast<IntervalPressure&>(P).reset(); @@ -226,6 +236,9 @@ void RegPressureTracker::init(const MachineFunction *mf, LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs()); LiveRegs.VirtRegs.clear(); LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs()); + UntiedDefs.clear(); + if (TrackUntiedDefs) + UntiedDefs.setUniverse(MRI->getNumVirtRegs()); } /// Does this pressure result have a valid top position and live ins. @@ -304,6 +317,25 @@ void RegPressureTracker::closeRegion() { // If both top and bottom are closed, do nothing. } +/// The register tracker is unaware of global liveness so ignores normal +/// live-thru ranges. However, two-address or coalesced chains can also lead +/// to live ranges with no holes. Count these to inform heuristics that we +/// can never drop below this pressure. +void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { + LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0); + assert(isBottomClosed() && "need bottom-up tracking to intialize."); + for (unsigned i = 0, e = P.LiveOutRegs.size(); i < e; ++i) { + unsigned Reg = P.LiveOutRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(Reg) + && !RPTracker.hasUntiedDef(Reg)) { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + increaseSetPressure(LiveThruPressure, LiveThruPressure, + TRI->getRegClassPressureSets(RC), + TRI->getRegClassWeight(RC).RegWeight); + } + } +} + /// \brief Convenient wrapper for checking membership in RegisterOperands. static bool containsReg(ArrayRef<unsigned> Regs, unsigned Reg) { return std::find(Regs.begin(), Regs.end(), Reg) != Regs.end(); @@ -459,11 +491,20 @@ bool RegPressureTracker::recede() { LiveRegs.insert(Reg); } } + if (TrackUntiedDefs) { + for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = RegOpers.Defs[i]; + if (TargetRegisterInfo::isVirtualRegister(Reg) && !LiveRegs.contains(Reg)) + UntiedDefs.insert(Reg); + } + } return true; } /// Advance across the current instruction. bool RegPressureTracker::advance() { + assert(!TrackUntiedDefs && "unsupported mode"); + // Check for the bottom of the analyzable region. if (CurrPos == MBB->end()) { closeRegion(); @@ -533,7 +574,8 @@ bool RegPressureTracker::advance() { static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, ArrayRef<unsigned> NewPressureVec, RegPressureDelta &Delta, - const TargetRegisterInfo *TRI) { + const RegisterClassInfo *RCI, + ArrayRef<unsigned> LiveThruPressureVec) { int ExcessUnits = 0; unsigned PSetID = ~0U; for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) { @@ -543,7 +585,10 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, if (!PDiff) // No change in this set in the common case. continue; // Only consider change beyond the limit. - unsigned Limit = TRI->getRegPressureSetLimit(i); + unsigned Limit = RCI->getRegPressureSetLimit(i); + if (!LiveThruPressureVec.empty()) + Limit += LiveThruPressureVec[i]; + if (Limit > POld) { if (Limit > PNew) PDiff = 0; // Under the limit @@ -553,9 +598,10 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, else if (Limit > PNew) PDiff = Limit - POld; // Just obeyed limit. - if (std::abs(PDiff) > std::abs(ExcessUnits)) { + if (PDiff) { ExcessUnits = PDiff; PSetID = i; + break; } } Delta.Excess.PSetID = PSetID; @@ -583,23 +629,28 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec, if (PNew == POld) // No change in this set in the common case. continue; - while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i) - ++CritIdx; + if (!Delta.CriticalMax.isValid()) { + while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i) + ++CritIdx; - if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) { - int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease; - if (PDiff > Delta.CriticalMax.UnitIncrease) { - Delta.CriticalMax.PSetID = i; - Delta.CriticalMax.UnitIncrease = PDiff; + if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) { + int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease; + if (PDiff > 0) { + Delta.CriticalMax.PSetID = i; + Delta.CriticalMax.UnitIncrease = PDiff; + } } } - - // Find the greatest increase above MaxPressureLimit. + // Find the first increase above MaxPressureLimit. // (Ignores negative MDiff). - int MDiff = (int)PNew - (int)MaxPressureLimit[i]; - if (MDiff > Delta.CurrentMax.UnitIncrease) { - Delta.CurrentMax.PSetID = i; - Delta.CurrentMax.UnitIncrease = PNew; + if (!Delta.CurrentMax.isValid()) { + int MDiff = (int)PNew - (int)MaxPressureLimit[i]; + if (MDiff > 0) { + Delta.CurrentMax.PSetID = i; + Delta.CurrentMax.UnitIncrease = MDiff; + if (CritIdx == CritEnd || Delta.CriticalMax.isValid()) + break; + } } } } @@ -659,7 +710,8 @@ getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, bumpUpwardPressure(MI); - computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI); + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI, + LiveThruPressure); computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, MaxPressureLimit, Delta); assert(Delta.CriticalMax.UnitIncrease >= 0 && @@ -749,7 +801,8 @@ getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, bumpDownwardPressure(MI); - computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI); + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI, + LiveThruPressure); computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, MaxPressureLimit, Delta); assert(Delta.CriticalMax.UnitIncrease >= 0 && diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index b8ef6a4..75ebdaa 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -44,8 +44,8 @@ bool RegScavenger::isAliasUsed(unsigned Reg) const { } void RegScavenger::initRegState() { - for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) { + for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { I->Reg = 0; I->Restore = NULL; } @@ -181,8 +181,8 @@ void RegScavenger::forward() { MachineInstr *MI = MBBI; - for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) { + for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { if (I->Restore != MI) continue; @@ -368,7 +368,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Exclude all the registers being used by the instruction. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { MachineOperand &MO = I->getOperand(i); - if (MO.isReg() && MO.getReg() != 0 && + if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && !TargetRegisterInfo::isVirtualRegister(MO.getReg())) Candidates.reset(MO.getReg()); } diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 07e5b47..75e3790 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -64,8 +64,8 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { /// specified node. bool SUnit::addPred(const SDep &D, bool Required) { // If this node already has this depenence, don't add a redundant one. - for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); - I != E; ++I) { + for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { // Zero-latency weak edges may be added purely for heuristic ordering. Don't // add them if another kind of edge already exists. if (!Required && I->getSUnit() == D.getSUnit()) @@ -77,7 +77,7 @@ bool SUnit::addPred(const SDep &D, bool Required) { // Find the corresponding successor in N. SDep ForwardD = *I; ForwardD.setSUnit(this); - for (SmallVector<SDep, 4>::iterator II = PredSU->Succs.begin(), + for (SmallVectorImpl<SDep>::iterator II = PredSU->Succs.begin(), EE = PredSU->Succs.end(); II != EE; ++II) { if (*II == ForwardD) { II->setLatency(D.getLatency()); @@ -132,8 +132,8 @@ bool SUnit::addPred(const SDep &D, bool Required) { /// the specified node. void SUnit::removePred(const SDep &D) { // Find the matching predecessor. - for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); - I != E; ++I) + for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) if (*I == D) { // Find the corresponding successor in N. SDep P = D; diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index e4da6a4..892903c 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -98,7 +98,7 @@ static void getUnderlyingObjects(const Value *V, SmallVector<Value *, 4> Objs; GetUnderlyingObjects(const_cast<Value *>(V), Objs); - for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end(); + for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { V = *I; if (!Visited.insert(V)) @@ -116,12 +116,15 @@ static void getUnderlyingObjects(const Value *V, } while (!Working.empty()); } +typedef SmallVector<PointerIntPair<const Value *, 1, bool>, 4> +UnderlyingObjectsVector; + /// getUnderlyingObjectsForInstr - If this machine instr has memory reference /// information and it can be tracked to a normal reference to a known /// object, return the Value for that object. static void getUnderlyingObjectsForInstr(const MachineInstr *MI, - const MachineFrameInfo *MFI, - SmallVectorImpl<std::pair<const Value *, bool> > &Objects) { + const MachineFrameInfo *MFI, + UnderlyingObjectsVector &Objects) { if (!MI->hasOneMemOperand() || !(*MI->memoperands_begin())->getValue() || (*MI->memoperands_begin())->isVolatile()) @@ -134,8 +137,8 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, SmallVector<Value *, 4> Objs; getUnderlyingObjects(V, Objs); - for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end(); - I != IE; ++I) { + for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); + I != IE; ++I) { bool MayAlias = true; V = *I; @@ -155,7 +158,7 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, return; } - Objects.push_back(std::make_pair(V, MayAlias)); + Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias)); } } @@ -267,13 +270,10 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { SU->hasPhysRegDefs = true; Dep = SDep(SU, SDep::Data, *Alias); RegUse = UseSU->getInstr(); - Dep.setMinLatency( - SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, - RegUse, UseOp, /*FindMin=*/true)); } Dep.setLatency( - SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, - RegUse, UseOp, /*FindMin=*/false)); + SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, + UseOp)); ST.adjustSchedDependency(SU, UseSU, Dep); UseSU->addPred(Dep); @@ -310,10 +310,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias)); else { SDep Dep(SU, Kind, /*Reg=*/*Alias); - unsigned OutLatency = - SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()); - Dep.setMinLatency(OutLatency); - Dep.setLatency(OutLatency); + Dep.setLatency( + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); DefSU->addPred(Dep); } } @@ -389,10 +387,8 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { SUnit *DefSU = DefI->SU; if (DefSU != SU && DefSU != &ExitSU) { SDep Dep(SU, SDep::Output, Reg); - unsigned OutLatency = - SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()); - Dep.setMinLatency(OutLatency); - Dep.setLatency(OutLatency); + Dep.setLatency( + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); DefSU->addPred(Dep); } DefI->SU = SU; @@ -427,10 +423,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { // Adjust the dependence latency using operand def/use information, then // allow the target to perform its own adjustments. int DefOp = Def->findRegisterDefOperandIdx(Reg); - dep.setLatency( - SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false)); - dep.setMinLatency( - SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true)); + dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx)); const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); @@ -472,8 +465,8 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, SmallVector<Value *, 4> Objs; getUnderlyingObjects(V, Objs); - for (SmallVector<Value *, 4>::iterator I = Objs.begin(), - IE = Objs.end(); I != IE; ++I) { + for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), + IE = Objs.end(); I != IE; ++I) { V = *I; if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { @@ -855,7 +848,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, AliasMemDefs.clear(); AliasMemUses.clear(); } else if (MI->mayStore()) { - SmallVector<std::pair<const Value *, bool>, 4> Objs; + UnderlyingObjectsVector Objs; getUnderlyingObjectsForInstr(MI, MFI, Objs); if (Objs.empty()) { @@ -864,10 +857,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, } bool MayAlias = false; - for (SmallVector<std::pair<const Value *, bool>, 4>::iterator - K = Objs.begin(), KE = Objs.end(); K != KE; ++K) { - const Value *V = K->first; - bool ThisMayAlias = K->second; + for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end(); + K != KE; ++K) { + const Value *V = K->getPointer(); + bool ThisMayAlias = K->getInt(); if (ThisMayAlias) MayAlias = true; @@ -929,7 +922,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! } else { - SmallVector<std::pair<const Value *, bool>, 4> Objs; + UnderlyingObjectsVector Objs; getUnderlyingObjectsForInstr(MI, MFI, Objs); if (Objs.empty()) { @@ -945,10 +938,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MayAlias = false; } - for (SmallVector<std::pair<const Value *, bool>, 4>::iterator + for (UnderlyingObjectsVector::iterator J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { - const Value *V = J->first; - bool ThisMayAlias = J->second; + const Value *V = J->getPointer(); + bool ThisMayAlias = J->getInt(); if (ThisMayAlias) MayAlias = true; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 046dd41..cb88941 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -154,7 +154,7 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); - void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, + void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc, SDValue ExtLoad, SDLoc DL, ISD::NodeType ExtType); @@ -279,7 +279,7 @@ namespace { /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void GatherAllAliases(SDNode *N, SDValue OriginalChain, - SmallVector<SDValue, 8> &Aliases); + SmallVectorImpl<SDValue> &Aliases); /// isAlias - Return true if there is any possibility that the two addresses /// overlap. @@ -326,7 +326,10 @@ namespace { /// getShiftAmountTy - Returns a type large enough to hold any valid /// shift amount - before type legalization these can be huge. EVT getShiftAmountTy(EVT LHSTy) { - return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy(); + assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); + if (LHSTy.isVector()) + return LHSTy; + return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) : TLI.getPointerTy(); } /// isTypeLegal - This method returns true if we are running before type @@ -1251,7 +1254,7 @@ static SDValue getInputChainForNode(SDNode *N) { if (unsigned NumOps = N->getNumOperands()) { if (N->getOperand(0).getValueType() == MVT::Other) return N->getOperand(0); - else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) + if (N->getOperand(NumOps-1).getValueType() == MVT::Other) return N->getOperand(NumOps-1); for (unsigned i = 1; i < NumOps-1; ++i) if (N->getOperand(i).getValueType() == MVT::Other) @@ -1610,13 +1613,19 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, - SelectionDAG &DAG, bool LegalOperations) { - if (!VT.isVector()) { + SelectionDAG &DAG, + bool LegalOperations, bool LegalTypes) { + if (!VT.isVector()) return DAG.getConstant(0, VT); - } if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { // Produce a vector of zeros. - SDValue El = DAG.getConstant(0, VT.getVectorElementType()); + EVT ElemTy = VT.getVectorElementType(); + if (LegalTypes && TLI.getTypeAction(*DAG.getContext(), ElemTy) == + TargetLowering::TypePromoteInteger) + ElemTy = TLI.getTypeToTransformTo(*DAG.getContext(), ElemTy); + assert((!LegalTypes || TLI.isTypeLegal(ElemTy)) && + "Type for zero vector elements is not legal"); + SDValue El = DAG.getConstant(0, ElemTy); std::vector<SDValue> Ops(VT.getVectorNumElements(), El); return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, &Ops[0], Ops.size()); @@ -1646,7 +1655,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold (sub x, x) -> 0 // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) - return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations); + return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // fold (sub c1, c2) -> c1-c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); @@ -1762,43 +1771,73 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) { return SDValue(); } +/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are +/// all the same constant or undefined. +static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { + BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); + if (!C) + return false; + + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + EVT EltVT = N->getValueType(0).getVectorElementType(); + return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs) && + EltVT.getSizeInBits() >= SplatBitSize); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); + // fold (mul x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + bool N0IsConst = false; + bool N1IsConst = false; + APInt ConstValue0, ConstValue1; // fold vector ops if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); + N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); + } else { + N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0; + ConstValue0 = N0IsConst? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() : APInt(); + N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0; + ConstValue1 = N1IsConst? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() : APInt(); } - // fold (mul x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); // fold (mul c1, c2) -> c1*c2 - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); + if (N0IsConst && N1IsConst) + return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); + // canonicalize constant to RHS - if (N0C && !N1C) + if (N0IsConst && !N1IsConst) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 - if (N1C && N1C->isNullValue()) + if (N1IsConst && ConstValue1 == 0) return N1; + // fold (mul x, 1) -> x + if (N1IsConst && ConstValue1 == 1) + return N0; // fold (mul x, -1) -> 0-x - if (N1C && N1C->isAllOnesValue()) + if (N1IsConst && ConstValue1.isAllOnesValue()) return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), N0); // fold (mul x, (1 << c)) -> x << c - if (N1C && N1C->getAPIntValue().isPowerOf2()) + if (N1IsConst && ConstValue1.isPowerOf2()) return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, - DAG.getConstant(N1C->getAPIntValue().logBase2(), + DAG.getConstant(ConstValue1.logBase2(), getShiftAmountTy(N0.getValueType()))); // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { - unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); + if (N1IsConst && (-ConstValue1).isPowerOf2()) { + unsigned Log2Val = (-ConstValue1).logBase2(); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. return DAG.getNode(ISD::SUB, SDLoc(N), VT, @@ -1807,9 +1846,12 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { DAG.getConstant(Log2Val, getShiftAmountTy(N0.getValueType())))); } + + APInt Val; // (mul (shl X, c1), c2) -> (mul X, c2 << c1) - if (N1C && N0.getOpcode() == ISD::SHL && - isa<ConstantSDNode>(N0.getOperand(1))) { + if (N1IsConst && N0.getOpcode() == ISD::SHL && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1)))) { SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); AddToWorkList(C3.getNode()); @@ -1822,7 +1864,9 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { { SDValue Sh(0,0), Y(0,0); // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). - if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && + if (N0.getOpcode() == ISD::SHL && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1))) && N0.getNode()->hasOneUse()) { Sh = N0; Y = N1; } else if (N1.getOpcode() == ISD::SHL && @@ -1840,8 +1884,9 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) - if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && - isa<ConstantSDNode>(N0.getOperand(1))) + if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1)))) return DAG.getNode(ISD::ADD, SDLoc(N), VT, DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1), @@ -2502,7 +2547,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } - // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must // already be zero by virtue of the width of the base type of the load. // @@ -2716,7 +2761,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0); if (LN0->getExtensionType() != ISD::SEXTLOAD && - LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { + LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); @@ -2905,7 +2950,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, /// isBSwapHWordElement - Return true if the specified node is an element /// that makes up a 32-bit packed halfword byteswap. i.e. /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) -static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) { +static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) { if (!N.getNode()->hasOneUse()) return false; @@ -3304,25 +3349,21 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { LHSShiftAmt == RHSShiftAmt.getOperand(1)) { if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { + if (SUBC->getAPIntValue() == OpSizeInBits) return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } } } // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) if (LHSShiftAmt.getOpcode() == ISD::SUB && - RHSShiftAmt == LHSShiftAmt.getOperand(1)) { + RHSShiftAmt == LHSShiftAmt.getOperand(1)) if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { + dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) + if (SUBC->getAPIntValue() == OpSizeInBits) return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } - } - } // Look for sign/zext/any-extended or truncate cases: if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || @@ -3342,13 +3383,11 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> // (rotr x, (sub 32, y)) if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { + dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) + if (SUBC->getAPIntValue() == OpSizeInBits) return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } - } } else if (LExtOp0.getOpcode() == ISD::SUB && RExtOp0 == LExtOp0.getOperand(1)) { // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> @@ -3356,13 +3395,11 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> // (rotl x, (sub 32, y)) if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { + dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) + if (SUBC->getAPIntValue() == OpSizeInBits) return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } - } } } @@ -3489,7 +3526,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (xor x, x) -> 0 if (N0 == N1) - return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations); + return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) if (N0.getOpcode() == N1.getOpcode()) { @@ -3915,8 +3952,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { DAG.getConstant(~0ULL >> ShAmt, VT)); } - - // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) + // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? EVT SmallVT = N0.getOperand(0).getValueType(); @@ -3929,7 +3965,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { N0.getOperand(0), DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); AddToWorkList(SmallShift.getNode()); - return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift); + APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt); + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), + DAG.getConstant(Mask, VT)); } } @@ -4233,20 +4272,22 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { // Determine if the condition we're dealing with is constant SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, SDLoc(N), false); - if (SCC.getNode()) AddToWorkList(SCC.getNode()); + if (SCC.getNode()) { + AddToWorkList(SCC.getNode()); - if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { - if (!SCCC->isNullValue()) - return N2; // cond always true -> true val - else - return N3; // cond always false -> false val - } + if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { + if (!SCCC->isNullValue()) + return N2; // cond always true -> true val + else + return N3; // cond always false -> false val + } - // Fold to a simpler select_cc - if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), - SCC.getOperand(0), SCC.getOperand(1), N2, N3, - SCC.getOperand(2)); + // Fold to a simpler select_cc + if (SCC.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), N2, N3, + SCC.getOperand(2)); + } // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N2, N3)) @@ -4268,7 +4309,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { // mentioned transformation is profitable. static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, unsigned ExtOpc, - SmallVector<SDNode*, 4> &ExtendNodes, + SmallVectorImpl<SDNode *> &ExtendNodes, const TargetLowering &TLI) { bool HasCopyToRegUses = false; bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); @@ -4326,7 +4367,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, return true; } -void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, +void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc, SDValue ExtLoad, SDLoc DL, ISD::NodeType ExtType) { // Extend SetCC uses if necessary. @@ -4508,7 +4549,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. if (VT.isVector() && !LegalOperations && - TLI.getBooleanContents(true) == + TLI.getBooleanContents(true) == TargetLowering::ZeroOrNegativeOneBooleanContent) { EVT N0VT = N0.getOperand(0).getValueType(); // On some architectures (such as SSE/NEON/etc) the SETCC result type is @@ -4547,14 +4588,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { NegOne, DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; - if (!VT.isVector() && (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) - return DAG.getNode(ISD::SELECT, SDLoc(N), VT, - DAG.getSetCC(SDLoc(N), - getSetCCResultType(VT), - N0.getOperand(0), N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()), - NegOne, DAG.getConstant(0, VT)); + if (!VT.isVector() && + (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) { + return DAG.getSelect(SDLoc(N), VT, + DAG.getSetCC(SDLoc(N), + getSetCCResultType(VT), + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()), + NegOne, DAG.getConstant(0, VT)); + } } // fold (sext x) -> (zext x) if the sign bit is known zero. @@ -5039,9 +5082,8 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { assert(CV != 0 && "Const value should be ConstSDNode."); const APInt &CVal = CV->getAPIntValue(); APInt NewVal = CVal & Mask; - if (NewVal != CVal) { + if (NewVal != CVal) return DAG.getConstant(NewVal, V.getValueType()); - } break; } case ISD::OR: @@ -5169,12 +5211,19 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // For the transform to be legal, the load must produce only two values // (the value loaded and the chain). Don't transform a pre-increment - // load, for example, which produces an extra value. Otherwise the + // load, for example, which produces an extra value. Otherwise the // transformation is not equivalent, and the downstream logic to replace // uses gets things wrong. if (LN0->getNumValues() > 2) return SDValue(); + // If the load that we're shrinking is an extload and we're not just + // discarding the extension we can't simply shrink the load. Bail. + // TODO: It would be possible to merge the extensions in some cases. + if (LN0->getExtensionType() != ISD::NON_EXTLOAD && + LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) + return SDValue(); + EVT PtrType = N0.getOperand(1).getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) @@ -5251,10 +5300,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && - EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) { + EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), N1); - } // fold (sext_in_reg (sext x)) -> (sext x) // fold (sext_in_reg (aext x)) -> (sext x) @@ -5400,7 +5448,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue EltNo = N0->getOperand(1); if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - EVT IndexTy = N0->getOperand(1).getValueType(); + EVT IndexTy = TLI.getVectorIdxTy(); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), @@ -5632,8 +5680,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // This often reduces constant pool loads. - if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || - (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && + if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || + (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector() && !N0.getValueType().isVector()) { SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, @@ -5892,22 +5940,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // We don't need test this condition for transformation like following, as // the DAG being transformed implies it is legal to take FP constant as // operand. - // + // // (fadd (fmul c, x), x) -> (fmul c+1, x) - // + // bool AllowNewFpConst = (Level < AfterLegalizeDAG); // If allow, fold (fadd (fneg x), x) -> 0.0 if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) { + N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) return DAG.getConstantFP(0.0, VT); - } // If allow, fold (fadd x, (fneg x)) -> 0.0 if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) { + N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) return DAG.getConstantFP(0.0, VT); - } // In unsafe math mode, we can fold chains of FADD's of the same value // into multiplications. This transform is not safe in general because @@ -5919,7 +5965,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); - // (fadd (fmul c, x), x) -> (fmul c+1, x) + // (fadd (fmul c, x), x) -> (fmul x, c+1) if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP00, 0), @@ -5928,7 +5974,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N1, NewCFP); } - // (fadd (fmul x, c), x) -> (fmul c+1, x) + // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP01, 0), @@ -5937,7 +5983,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N1, NewCFP); } - // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) + // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2) if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(1) == N1.getOperand(0)) { @@ -5948,7 +5994,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N0.getOperand(1), NewCFP); } - // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x) + // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { @@ -5964,7 +6010,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); - // (fadd x, (fmul c, x)) -> (fmul c+1, x) + // (fadd x, (fmul c, x)) -> (fmul x, c+1) if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP10, 0), @@ -5973,7 +6019,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N0, NewCFP); } - // (fadd x, (fmul x, c)) -> (fmul c+1, x) + // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP11, 0), @@ -5983,79 +6029,74 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } - // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) - if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(1) == N1.getOperand(0)) { + // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2) + if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(1) == N0.getOperand(0)) { SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP10, 0), DAG.getConstantFP(2.0, VT)); return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(1), NewCFP); + N1.getOperand(1), NewCFP); } - // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x) - if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) { + // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) + if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N0.getOperand(0)) { SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP11, 0), DAG.getConstantFP(2.0, VT)); return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), NewCFP); + N1.getOperand(0), NewCFP); } } if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) { ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); - // (fadd (fadd x, x), x) -> (fmul 3.0, x) + // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP && N0.getOperand(0) == N0.getOperand(1) && - (N0.getOperand(0) == N1)) { + (N0.getOperand(0) == N1)) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, DAG.getConstantFP(3.0, VT)); - } } if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) { ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); - // (fadd x, (fadd x, x)) -> (fmul 3.0, x) + // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && - N1.getOperand(0) == N0) { + N1.getOperand(0) == N0) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, DAG.getConstantFP(3.0, VT)); - } } - // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) + // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) if (AllowNewFpConst && N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) { + N0.getOperand(0) == N1.getOperand(0)) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstantFP(4.0, VT)); - } } // FADD -> FMA combines: if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1); - } // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1), N0); - } } return SDValue(); @@ -6110,8 +6151,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N11, DAG, LegalOperations); - else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, - &DAG.getTarget().Options)) + + if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, + &DAG.getTarget().Options)) return GetNegatedExpression(N10, DAG, LegalOperations); } } @@ -6119,27 +6161,25 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // FSUB -> FMA combines: if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) return DAG.getNode(ISD::FMA, dl, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(ISD::FNEG, dl, VT, N1)); - } // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) return DAG.getNode(ISD::FMA, dl, VT, DAG.getNode(ISD::FNEG, dl, VT, N1.getOperand(0)), N1.getOperand(1), N0); - } - // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) - if (N0.getOpcode() == ISD::FNEG && + // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0).getOpcode() == ISD::FMUL && N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { SDValue N00 = N0.getOperand(0).getOperand(0); @@ -6195,7 +6235,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. @@ -6276,21 +6316,17 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } // (fma x, c, x) -> (fmul x, (c+1)) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) { - return DAG.getNode(ISD::FMUL, dl, VT, - N0, + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) + return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, N1, DAG.getConstantFP(1.0, VT))); - } // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && - N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, dl, VT, - N0, + N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) + return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, N1, DAG.getConstantFP(-1.0, VT))); - } return SDValue(); @@ -6670,12 +6706,11 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // (fneg (fmul c, x)) -> (fmul -c, x) if (N0.getOpcode() == ISD::FMUL) { ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); - if (CFP1) { + if (CFP1) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); - } } return SDValue(); @@ -6740,7 +6775,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading // constant pool values. - if (!TLI.isFAbsFree(VT) && + if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { @@ -7165,7 +7200,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // x0 * offset0 + y0 * ptr0 = t0 // knowing that // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) - // + // // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the // indexed load/store and the expresion that needs to be re-written. // @@ -7287,7 +7322,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { for (SDNode::use_iterator III = Use->use_begin(), EEE = Use->use_end(); III != EEE; ++III) { SDNode *UseUse = *III; - if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) + if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) RealUse = true; } @@ -8577,7 +8612,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // be converted to a BUILD_VECTOR). Fill in the Ops vector with the // vector elements. SmallVector<SDValue, 8> Ops; - if (InVec.getOpcode() == ISD::BUILD_VECTOR) { + // Do not combine these two vectors if the output vector will not replace + // the input vector. + if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { Ops.append(InVec.getNode()->op_begin(), InVec.getNode()->op_end()); } else if (InVec.getOpcode() == ISD::UNDEF) { @@ -8650,7 +8687,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { OrigElt -= NumElem; } - EVT IndexTy = N->getOperand(1).getValueType(); + EVT IndexTy = TLI.getVectorIdxTy(); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, InVec, DAG.getConstant(OrigElt, IndexTy)); } @@ -8789,7 +8826,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } else { Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), Align); Chain = Load.getValue(1); if (NVT.bitsLT(LVT)) @@ -9167,7 +9204,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // The extract index must be constant. if (!CS) return SDValue(); - + // Check that we are reading from the identity index. if (CS->getZExtValue() != IdentityIndex) return SDValue(); @@ -9175,7 +9212,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (SingleSource.getNode()) return SingleSource; - + return SDValue(); } @@ -9605,8 +9642,8 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, SCC.getOperand(0), SCC.getOperand(1), SCC.getOperand(4)); AddToWorkList(SETCC.getNode()); - return DAG.getNode(ISD::SELECT, SDLoc(SCC), SCC.getValueType(), - SCC.getOperand(2), SCC.getOperand(3), SETCC); + return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), + SCC.getOperand(2), SCC.getOperand(3), SETCC); } return SCC; @@ -9675,10 +9712,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD)) return false; - Addr = DAG.getNode(ISD::SELECT, SDLoc(TheSelect), - LLD->getBasePtr().getValueType(), - TheSelect->getOperand(0), LLD->getBasePtr(), - RLD->getBasePtr()); + Addr = DAG.getSelect(SDLoc(TheSelect), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); } else { // Otherwise SELECT_CC SDNode *CondLHS = TheSelect->getOperand(0).getNode(); SDNode *CondRHS = TheSelect->getOperand(1).getNode(); @@ -9812,8 +9849,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, getSetCCResultType(N0.getValueType()), N0, N1, CC); AddToWorkList(Cond.getNode()); - SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), - Cond, One, Zero); + SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), + Cond, One, Zero); AddToWorkList(CstOffset.getNode()); CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, CstOffset); @@ -10205,7 +10242,7 @@ bool DAGCombiner::FindAliasInfo(SDNode *N, /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, - SmallVector<SDValue, 8> &Aliases) { + SmallVectorImpl<SDValue> &Aliases) { SmallVector<SDValue, 8> Chains; // List of chains to visit. SmallPtrSet<SDNode *, 16> Visited; // Visited node set. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index eb80c64..b4ac948f 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -41,6 +41,7 @@ #define DEBUG_TYPE "isel" #include "llvm/CodeGen/FastISel.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Loads.h" #include "llvm/CodeGen/Analysis.h" @@ -75,15 +76,12 @@ STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); void FastISel::startNewBlock() { LocalValueMap.clear(); + // Instructions are appended to FuncInfo.MBB. If the basic block already + // contains labels or copies, use the last instruction as the last local + // value. EmitStartPt = 0; - - // Advance the emit start point past any EH_LABEL instructions. - MachineBasicBlock::iterator - I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end(); - while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) { - EmitStartPt = I; - ++I; - } + if (!FuncInfo.MBB->empty()) + EmitStartPt = &FuncInfo.MBB->back(); LastLocalValue = EmitStartPt; } @@ -92,18 +90,16 @@ bool FastISel::LowerArguments() { // Fallback to SDISel argument lowering code to deal with sret pointer // parameter. return false; - + if (!FastLowerArguments()) return false; - // Enter non-dead arguments into ValueMap for uses in non-entry BBs. + // Enter arguments into ValueMap for uses in non-entry BBs. for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), E = FuncInfo.Fn->arg_end(); I != E; ++I) { - if (!I->use_empty()) { - DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I); - assert(VI != LocalValueMap.end() && "Missed an argument?"); - FuncInfo.ValueMap[I] = VI->second; - } + DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I); + assert(VI != LocalValueMap.end() && "Missed an argument?"); + FuncInfo.ValueMap[I] = VI->second; } return true; } @@ -601,7 +597,10 @@ bool FastISel::SelectCall(const User *I) { case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); - if (!DIVariable(DI->getVariable()).Verify() || + DIVariable DIVar(DI->getVariable()); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) { DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; @@ -613,16 +612,16 @@ bool FastISel::SelectCall(const User *I) { return true; } - unsigned Reg = 0; unsigned Offset = 0; - if (const Argument *Arg = dyn_cast<Argument>(Address)) { + Optional<MachineOperand> Op; + if (const Argument *Arg = dyn_cast<Argument>(Address)) // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); - if (Offset) - Reg = TRI.getFrameRegister(*FuncInfo.MF); - } - if (!Reg) - Reg = lookUpRegForValue(Address); + if (Offset) + Op = MachineOperand::CreateFI(Offset); + if (!Op) + if (unsigned Reg = lookUpRegForValue(Address)) + Op = MachineOperand::CreateReg(Reg, false); // If we have a VLA that has a "use" in a metadata node that's then used // here but it has no other uses, then we have a problem. E.g., @@ -635,16 +634,29 @@ bool FastISel::SelectCall(const User *I) { // If we assign 'a' a vreg and fast isel later on has to use the selection // DAG isel, it will want to copy the value to the vreg. However, there are // no uses, which goes counter to what selection DAG isel expects. - if (!Reg && !Address->use_empty() && isa<Instruction>(Address) && + if (!Op && !Address->use_empty() && isa<Instruction>(Address) && (!isa<AllocaInst>(Address) || !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) - Reg = FuncInfo.InitializeRegForValue(Address); - - if (Reg) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(TargetOpcode::DBG_VALUE)) - .addReg(Reg, RegState::Debug).addImm(Offset) - .addMetadata(DI->getVariable()); + Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address), + false); + + if (Op) + if (Op->isReg()) { + // Set the indirect flag if the type and the DIVariable's + // indirect field are in disagreement: Indirectly-addressed + // variables that are nonpointer types should be marked as + // indirect, and VLAs should be marked as indirect eventhough + // they are a pointer type. + bool IsIndirect = DI->getAddress()->getType()->isPointerTy() + ^ DIVar.isIndirect(); + Op->setIsDebug(true); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::DBG_VALUE), + IsIndirect, Op->getReg(), Offset, DI->getVariable()); + } else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::DBG_VALUE)).addOperand(*Op).addImm(0) + .addMetadata(DI->getVariable()); else // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. @@ -676,9 +688,9 @@ bool FastISel::SelectCall(const User *I) { .addFPImm(CF).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) - .addReg(Reg, RegState::Debug).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); + bool IsIndirect = DI->getOffset() != 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect, + Reg, DI->getOffset(), DI->getVariable()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 86e188a..4309dc1 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -55,15 +55,12 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { return false; } -FunctionLoweringInfo::FunctionLoweringInfo(const TargetMachine &TM) - : TM(TM), TLI(0) { -} - void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { + const TargetLowering *TLI = TM.getTargetLowering(); + Fn = &fn; MF = &mf; RegInfo = &MF->getRegInfo(); - TLI = TM.getTargetLowering(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; @@ -115,8 +112,11 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { // in a predictable order. if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) { MachineModuleInfo &MMI = MF->getMMI(); + DIVariable DIVar(DI->getVariable()); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); if (MMI.hasDebugInfo() && - DIVariable(DI->getVariable()).Verify() && + DIVar && !DI->getDebugLoc().isUnknown()) { // Don't handle byval struct arguments or VLAs, for example. // Non-byval arguments are handled here (they refer to the stack @@ -209,7 +209,8 @@ void FunctionLoweringInfo::clear() { /// CreateReg - Allocate a single virtual register for the given type. unsigned FunctionLoweringInfo::CreateReg(MVT VT) { - return RegInfo->createVirtualRegister(TLI->getRegClassFor(VT)); + return RegInfo-> + createVirtualRegister(TM.getTargetLowering()->getRegClassFor(VT)); } /// CreateRegs - Allocate the appropriate number of virtual registers of @@ -220,6 +221,8 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) { /// will assign registers for each member or element. /// unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { + const TargetLowering *TLI = TM.getTargetLowering(); + SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, Ty, ValueVTs); @@ -267,6 +270,8 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { if (!Ty->isIntegerTy() || Ty->isVectorTy()) return; + const TargetLowering *TLI = TM.getTargetLowering(); + SmallVector<EVT, 1> ValueVTs; ComputeValueVTs(*TLI, Ty, ValueVTs); assert(ValueVTs.size() == 1 && diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 3b1abd7..e107276 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -639,8 +639,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. // EmitTargetCodeForFrameDebugValue is responsible for allocation. - unsigned FrameIx = SD->getFrameIx(); - return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL); + return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(SD->getFrameIx()).addImm(Offset).addMetadata(MDPtr); } // Otherwise, we're going to create an instruction here. const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); @@ -678,7 +678,13 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MIB.addReg(0U); } - MIB.addImm(Offset).addMetadata(MDPtr); + if (Offset != 0) // Indirect addressing. + MIB.addImm(Offset); + else + MIB.addReg(0U, RegState::Debug); + + MIB.addMetadata(MDPtr); + return &*MIB; } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index a9c2203..920dda8 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -1,4 +1,4 @@ -//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==// +//===- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG -*- C++ -*--==// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 224fa5f..bd844e5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -610,7 +610,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, false, false, 0); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), false, false, + MachinePointerInfo::getFixedStack(SPFI), false, false, false, 0); } @@ -1493,7 +1493,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, + return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, false, 0); } @@ -1553,9 +1553,9 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1); // Select between the nabs and abs value based on the sign bit of // the input. - return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit, - DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), - AbsVal); + return DAG.getSelect(dl, AbsVal.getValueType(), SignBit, + DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), + AbsVal); } void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, @@ -1614,12 +1614,12 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, unsigned Opc = 0; switch (CCCode) { default: llvm_unreachable("Don't know how to expand this condition!"); - case ISD::SETO: + case ISD::SETO: assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT) == TargetLowering::Legal && "If SETO is expanded, SETOEQ must be legal!"); CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; - case ISD::SETUO: + case ISD::SETUO: assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT) == TargetLowering::Legal && "If SETUO is expanded, SETUNE must be legal!"); @@ -1629,12 +1629,12 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, case ISD::SETOGE: case ISD::SETOLT: case ISD::SETOLE: - case ISD::SETONE: - case ISD::SETUEQ: - case ISD::SETUNE: - case ISD::SETUGT: - case ISD::SETUGE: - case ISD::SETULT: + case ISD::SETONE: + case ISD::SETUEQ: + case ISD::SETUNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: case ISD::SETULE: // If we are floating point, assign and break, otherwise fall through. if (!OpVT.isInteger()) { @@ -1663,7 +1663,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, CC = SDValue(); return; } - + SDValue SetCC1, SetCC2; if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, @@ -2136,7 +2136,7 @@ static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, static bool useSinCos(SDNode *Node) { unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN ? ISD::FCOS : ISD::FSIN; - + SDValue Op0 = Node->getOperand(0); for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), UE = Op0.getNode()->use_end(); UI != UE; ++UI) { @@ -2164,25 +2164,25 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, case MVT::f128: LC = RTLIB::SINCOS_F128; break; case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; } - + // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. SDValue InChain = DAG.getEntryNode(); - + EVT RetVT = Node->getValueType(0); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - + TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - + // Pass the argument. Entry.Node = Node->getOperand(0); Entry.Ty = RetTy; Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - + // Pass the return address of sin. SDValue SinPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = SinPtr; @@ -2190,7 +2190,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - + // Also pass the return address of the cos. SDValue CosPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = CosPtr; @@ -2198,10 +2198,10 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - + SDLoc dl(Node); TargetLowering:: CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()), @@ -2335,7 +2335,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, //pseudo-op, or, even better, for whole-function isel. SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT); - return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast); + return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast); } // Otherwise, implement the fully general conversion. @@ -2348,11 +2348,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, DAG.getConstant(UINT64_C(0x7ff), MVT::i64)); SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE); - SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0); + SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0); SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), ISD::SETUGE); - SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0); + SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0); EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType()); SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, @@ -2375,7 +2375,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, Op0, DAG.getConstant(0, Op0.getValueType()), ISD::SETLT); SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4); - SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), + SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Four, Zero); // If the sign bit of the integer is set, the large number will be treated @@ -2928,7 +2928,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(0), Tmp1)); False = DAG.getNode(ISD::XOR, dl, NVT, False, DAG.getConstant(x, NVT)); - Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False); + Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False); Results.push_back(Tmp1); break; } @@ -2940,7 +2940,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned Align = Node->getConstantOperandVal(3); SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, - MachinePointerInfo(V), + MachinePointerInfo(V), false, false, false, 0); SDValue VAList = VAListLoad; @@ -3031,7 +3031,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // cast operands to v8i32 and re-build the mask. // Calculate new VT, the size of the new VT should be equal to original. - EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, VT.getSizeInBits()/NewEltVT.getSizeInBits()); assert(NewVT.bitsEq(VT)); @@ -3071,11 +3071,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (Idx < NumElems) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, - DAG.getIntPtrConstant(Idx))); + DAG.getConstant(Idx, TLI.getVectorIdxTy()))); else Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1, - DAG.getIntPtrConstant(Idx - NumElems))); + DAG.getConstant(Idx - NumElems, + TLI.getVectorIdxTy()))); } Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); @@ -3140,7 +3141,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, ISD::SETUGT); Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1); - Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3); + Tmp1 = DAG.getSelect(dl, VT, Tmp2, Tmp1, Tmp3); Results.push_back(Tmp1); break; } @@ -3269,22 +3270,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(ExpandConstantFP(CFP, true)); break; } - case ISD::EHSELECTION: { - unsigned Reg = TLI.getExceptionSelectorRegister(); - assert(Reg && "Can't expand to unknown register!"); - Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg, - Node->getValueType(0))); - Results.push_back(Results[0].getValue(1)); - break; - } - case ISD::EXCEPTIONADDR: { - unsigned Reg = TLI.getExceptionPointerRegister(); - assert(Reg && "Can't expand to unknown register!"); - Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg, - Node->getValueType(0))); - Results.push_back(Results[0].getValue(1)); - break; - } case ISD::FSUB: { EVT VT = Node->getValueType(0); assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) && @@ -3704,10 +3689,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { for (unsigned Idx = 0; Idx < NumElem; Idx++) { SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), - Node->getOperand(0), DAG.getIntPtrConstant(Idx)); + Node->getOperand(0), DAG.getConstant(Idx, + TLI.getVectorIdxTy())); SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), - Node->getOperand(1), DAG.getIntPtrConstant(Idx)); + Node->getOperand(1), DAG.getConstant(Idx, + TLI.getVectorIdxTy())); Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, VT.getScalarType(), Ex, Sh)); } @@ -3762,8 +3749,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), ISD::SETEQ); - Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, - DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); + Tmp1 = DAG.getSelect(dl, NVT, Tmp2, + DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); } else if (Node->getOpcode() == ISD::CTLZ || Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) @@ -3858,7 +3845,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); // Perform the larger operation, then round down. - Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3); + Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3); if (TruncOp != ISD::FP_ROUND) Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); else diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index f6df211..cea0b02 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -503,7 +503,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { if (L->getExtensionType() == ISD::NON_EXTLOAD) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), NVT, L->isVolatile(), + L->getPointerInfo(), NVT, L->isVolatile(), L->isNonTemporal(), false, L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -526,8 +526,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(1)); SDValue RHS = GetSoftenedFloat(N->getOperand(2)); - return DAG.getNode(ISD::SELECT, SDLoc(N), - LHS.getValueType(), N->getOperand(0),LHS,RHS); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, RHS); } SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { @@ -855,9 +855,9 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, GetExpandedFloat(N->getOperand(0), Lo, Tmp); Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp); // Lo = Hi==fabs(Hi) ? Lo : -Lo; - Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo, + Lo = DAG.getSelectCC(dl, Tmp, Hi, Lo, DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo), - DAG.getCondCode(ISD::SETEQ)); + ISD::SETEQ); } void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, @@ -1216,8 +1216,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, APInt(128, Parts)), MVT::ppcf128)); - Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT), - Lo, Hi, DAG.getCondCode(ISD::SETLT)); + Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, SrcVT), + Lo, Hi, ISD::SETLT); GetPairElements(Lo, Lo, Hi); } @@ -1370,17 +1370,17 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. - return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp, - DAG.getNode(ISD::ADD, dl, MVT::i32, - DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, - DAG.getNode(ISD::FSUB, dl, - MVT::ppcf128, - N->getOperand(0), - Tmp)), - DAG.getConstant(0x80000000, MVT::i32)), - DAG.getNode(ISD::FP_TO_SINT, dl, - MVT::i32, N->getOperand(0)), - DAG.getCondCode(ISD::SETGE)); + return DAG.getSelectCC(dl, N->getOperand(0), Tmp, + DAG.getNode(ISD::ADD, dl, MVT::i32, + DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, + DAG.getNode(ISD::FSUB, dl, + MVT::ppcf128, + N->getOperand(0), + Tmp)), + DAG.getConstant(0x80000000, MVT::i32)), + DAG.getNode(ISD::FP_TO_SINT, dl, + MVT::i32, N->getOperand(0)), + ISD::SETGE); } RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index c281553..ff8f1f9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -483,8 +483,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); - return DAG.getNode(ISD::SELECT, SDLoc(N), - LHS.getValueType(), N->getOperand(0),LHS,RHS); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { @@ -966,7 +966,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, assert(OpNo == 2 && "Different operand and result vector types?"); // Promote the index. - SDValue Idx = ZExtPromotedInteger(N->getOperand(2)); + SDValue Idx = DAG.getZExtOrTrunc(N->getOperand(2), SDLoc(N), + TLI.getVectorIdxTy()); return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Idx), 0); } @@ -1138,7 +1139,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ATOMIC_LOAD_MAX: case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_LOAD_UMAX: - case ISD::ATOMIC_SWAP: { + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_CMP_SWAP: { std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N); SplitInteger(Tmp.first, Lo, Hi); ReplaceValueWith(SDValue(N, 1), Tmp.second); @@ -1478,8 +1480,8 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { LoL = DAG.getConstant(0, NVT); // Lo part is zero. HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; case ISD::SRL: // Short: ShAmt < NVTBits @@ -1494,8 +1496,8 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { HiL = DAG.getConstant(0, NVT); // Hi part is zero. LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; case ISD::SRA: // Short: ShAmt < NVTBits @@ -1511,8 +1513,8 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { DAG.getConstant(NVTBits-1, ShTy)); LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; } } @@ -1558,13 +1560,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); - SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); + SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1], ISD::SETULT); - SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2, - DAG.getConstant(1, NVT), Carry1); + SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2, + DAG.getConstant(1, NVT), Carry1); Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); } else { Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); @@ -1572,9 +1574,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); - SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); + SDValue Borrow = DAG.getSelect(dl, NVT, Cmp, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); } } @@ -1725,9 +1727,9 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo); SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi); - Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ, - DAG.getNode(ISD::ADD, dl, NVT, LoLZ, - DAG.getConstant(NVT.getSizeInBits(), NVT))); + Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ, + DAG.getNode(ISD::ADD, dl, NVT, LoLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); Hi = DAG.getConstant(0, NVT); } @@ -1755,9 +1757,9 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo); SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi); - Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ, - DAG.getNode(ISD::ADD, dl, NVT, HiLZ, - DAG.getConstant(NVT.getSizeInBits(), NVT))); + Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ, + DAG.getNode(ISD::ADD, dl, NVT, HiLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); Hi = DAG.getConstant(0, NVT); } @@ -2289,14 +2291,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, // make sure we aren't using 0. SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT), RHS, DAG.getConstant(0, VT), ISD::SETEQ); - SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, - DAG.getConstant(1, VT), RHS); + SDValue NotZero = DAG.getSelect(dl, VT, isZero, + DAG.getConstant(1, VT), RHS); SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero); SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS, ISD::SETNE); - Overflow = DAG.getNode(ISD::SELECT, dl, N->getValueType(1), isZero, - DAG.getConstant(0, N->getValueType(1)), - Overflow); + Overflow = DAG.getSelect(dl, N->getValueType(1), isZero, + DAG.getConstant(0, N->getValueType(1)), + Overflow); ReplaceValueWith(SDValue(N, 1), Overflow); return; } @@ -2304,7 +2306,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); EVT PtrVT = TLI.getPointerTy(); Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); - + // Replace this with a libcall that will check overflow. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i32) @@ -2601,8 +2603,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, if (!NewLHS.getNode()) NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETEQ); - NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(), - NewLHS, Tmp1, Tmp2); + NewLHS = DAG.getSelect(dl, Tmp1.getValueType(), + NewLHS, Tmp1, Tmp2); NewRHS = SDValue(); } @@ -2830,8 +2832,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Zero = DAG.getIntPtrConstant(0); SDValue Four = DAG.getIntPtrConstant(4); if (TLI.isBigEndian()) std::swap(Zero, Four); - SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, - Zero, Four); + SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, + Zero, Four); unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment(); FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset); Alignment = std::min(Alignment, 4u); @@ -2885,7 +2887,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { // Extract the element from the original vector. SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(), - BaseIdx, DAG.getIntPtrConstant(i)); + BaseIdx, DAG.getConstant(i, BaseIdx.getValueType())); SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InVT.getVectorElementType(), N->getOperand(0), Index); @@ -2929,7 +2931,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { SmallVector<SDValue, 8> Ops; Ops.reserve(NumElems); for (unsigned i = 0; i != NumElems; ++i) { - SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); + SDValue Op; + // BUILD_VECTOR integer operand types are allowed to be larger than the + // result's element type. This may still be true after the promotion. For + // example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to + // (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>. + if (N->getOperand(i).getValueType().bitsLT(NOutVTElem)) + Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); + else + Op = N->getOperand(i); Ops.push_back(Op); } @@ -2975,7 +2985,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { SDValue Op = N->getOperand(i); for (unsigned j = 0; j < NumElem; ++j) { SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - InElemTy, Op, DAG.getIntPtrConstant(j)); + InElemTy, Op, DAG.getConstant(j, + TLI.getVectorIdxTy())); Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); } } @@ -3002,7 +3013,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); - SDValue V1 = N->getOperand(1); + SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl, TLI.getVectorIdxTy()); SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, V0->getValueType(0).getScalarType(), V0, V1); @@ -3030,7 +3041,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { for (unsigned i=0; i<NumElem; ++i) { // Extract element from incoming vector SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, - Incoming, DAG.getIntPtrConstant(i)); + Incoming, DAG.getConstant(i, TLI.getVectorIdxTy())); SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex); NewOps.push_back(Tr); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index ef79662..fd770d1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -615,7 +615,10 @@ void DAGTypeLegalizer::RemapValue(SDValue &N) { // replaced with other values. RemapValue(I->second); N = I->second; - assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!"); + + // Note that it is possible to have N.getNode()->getNodeId() == NewNode at + // this point because it is possible for a node to be put in the map before + // being processed. } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index e2597d6..63e9af3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -653,7 +653,7 @@ private: /// loads to load a vector with a resulting wider type. It takes /// LdChain: list of chains for the load to be generated. /// Ld: load to widen - SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, + SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD); /// GenWidenVectorExtLoads - Helper function to generate a set of extension @@ -661,20 +661,20 @@ private: /// LdChain: list of chains for the load to be generated. /// Ld: load to widen /// ExtType: extension element type - SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, + SDValue GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD, ISD::LoadExtType ExtType); /// Helper genWidenVectorStores - Helper function to generate a set of /// stores to store a widen vector into non widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value - void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST); + void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); /// Helper genWidenVectorTruncStores - Helper function to generate a set of /// stores to store a truncate widen vector into non widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value - void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, + void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); /// Modifies a vector input (widen or narrows) to a vector of NVT. The diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 1ede3dc..96f6143 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -80,9 +80,10 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), InVT.getVectorNumElements()/2); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + DAG.getConstant(InNVT.getVectorNumElements(), + TLI.getVectorIdxTy())); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); @@ -115,7 +116,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { SmallVector<SDValue, 8> Vals; for (unsigned i = 0; i < NumElems; ++i) Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, - CastInOp, DAG.getIntPtrConstant(i))); + CastInOp, DAG.getConstant(i, + TLI.getVectorIdxTy()))); // Build Lo, Hi pair by pairing extracted elements if needed. unsigned Slot = 0; @@ -161,7 +163,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { false, false, 0); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, false, 0); // Increment the pointer to the other half. @@ -227,10 +229,6 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. SDValue Idx = N->getOperand(1); - // Make sure the type of Idx is big enough to hold the new values. - if (Idx.getValueType().bitsLT(TLI.getPointerTy())) - Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); - Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); @@ -406,7 +404,8 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); Idx = DAG.getNode(ISD::ADD, dl, - Idx.getValueType(), Idx, DAG.getIntPtrConstant(1)); + Idx.getValueType(), Idx, + DAG.getConstant(1, Idx.getValueType())); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); // Convert the new vector to the old vector type. @@ -495,9 +494,9 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, unsigned NumElements = Cond.getValueType().getVectorNumElements(); EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2); CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, - DAG.getIntPtrConstant(NumElements / 2)); + DAG.getConstant(NumElements / 2, TLI.getVectorIdxTy())); } Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 071a0b8..bbe11b8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -551,7 +551,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { SmallVector<SDValue, 8> Stores; for (unsigned Idx = 0; Idx < NumElem; Idx++) { SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - RegSclVT, Value, DAG.getIntPtrConstant(Idx)); + RegSclVT, Value, DAG.getConstant(Idx, TLI.getVectorIdxTy())); // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, @@ -572,7 +572,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { // Lower a select instruction where the condition is a scalar and the // operands are vectors. Lower this select to VSELECT and implement it - // using XOR AND OR. The selector bit is broadcasted. + // using XOR AND OR. The selector bit is broadcasted. EVT VT = Op.getValueType(); SDLoc DL(Op); @@ -605,7 +605,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { // What is the size of each element in the vector mask. EVT BitTy = MaskTy.getScalarType(); - Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask, + Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy), DAG.getConstant(0, BitTy)); @@ -755,16 +755,16 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { SmallVector<SDValue, 8> Ops(NumElems); for (unsigned i = 0; i < NumElems; ++i) { SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, - DAG.getIntPtrConstant(i)); + DAG.getConstant(i, TLI.getVectorIdxTy())); SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, - DAG.getIntPtrConstant(i)); + DAG.getConstant(i, TLI.getVectorIdxTy())); Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT), LHSElem, RHSElem, CC); - Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i], - DAG.getConstant(APInt::getAllOnesValue - (EltVT.getSizeInBits()), EltVT), - DAG.getConstant(0, EltVT)); + Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], + DAG.getConstant(APInt::getAllOnesValue + (EltVT.getSizeInBits()), EltVT), + DAG.getConstant(0, EltVT)); } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index a36dca6..54380ec 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -274,16 +274,17 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { break; } } - return DAG.getNode(ISD::SELECT, SDLoc(N), - LHS.getValueType(), Cond, LHS, - GetScalarizedVector(N->getOperand(2))); + + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), Cond, LHS, + GetScalarizedVector(N->getOperand(2))); } SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(1)); - return DAG.getNode(ISD::SELECT, SDLoc(N), - LHS.getValueType(), N->getOperand(0), LHS, - GetScalarizedVector(N->getOperand(2))); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, + GetScalarizedVector(N->getOperand(2))); } SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { @@ -711,7 +712,8 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, - DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements())); + DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), + TLI.getVectorIdxTy())); } void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, @@ -753,7 +755,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, Lo.getValueType(), Lo, Elt, Idx); else Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, - DAG.getIntPtrConstant(IdxVal - LoNumElts)); + DAG.getConstant(IdxVal - LoNumElts, + TLI.getVectorIdxTy())); return; } @@ -852,14 +855,16 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + DAG.getConstant(InNVT.getVectorNumElements(), + TLI.getVectorIdxTy())); RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + DAG.getConstant(InNVT.getVectorNumElements(), + TLI.getVectorIdxTy())); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -881,9 +886,10 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + DAG.getConstant(InNVT.getVectorNumElements(), + TLI.getVectorIdxTy())); } if (N->getOpcode() == ISD::FP_ROUND) { @@ -994,7 +1000,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, // Extract the vector element by hand. SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Inputs[Input], DAG.getIntPtrConstant(Idx))); + Inputs[Input], DAG.getConstant(Idx, + TLI.getVectorIdxTy()))); } // Construct the Lo/Hi output using a BUILD_VECTOR. @@ -1030,6 +1037,10 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { dbgs() << "\n"); SDValue Res = SDValue(); + // See if the target wants to custom split this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + if (Res.getNode() == 0) { switch (N->getOpcode()) { default: @@ -1108,8 +1119,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { assert(LoNumElts == HiNumElts && "Asymmetric vector split?"); LLVMContext &Ctx = *DAG.getContext(); - SDValue Zero = DAG.getIntPtrConstant(0); - SDValue LoElts = DAG.getIntPtrConstant(LoNumElts); + SDValue Zero = DAG.getConstant(0, TLI.getVectorIdxTy()); + SDValue LoElts = DAG.getConstant(LoNumElts, TLI.getVectorIdxTy()); EVT Src0VT = Src0.getValueType(); EVT Src0EltTy = Src0VT.getVectorElementType(); EVT MaskEltTy = MaskVT.getVectorElementType(); @@ -1284,7 +1295,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); i != e; ++i) { Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, - Op, DAG.getIntPtrConstant(i))); + Op, DAG.getConstant(i, TLI.getVectorIdxTy()))); } } @@ -1333,9 +1344,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), NumElements/2); SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, - DAG.getIntPtrConstant(NumElements/2)); + DAG.getConstant(NumElements/2, + TLI.getVectorIdxTy())); // Truncate them to 1/2 the element size. EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, @@ -1562,9 +1574,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { while (CurNumElts != 0) { while (CurNumElts >= NumElts) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); Idx += NumElts; CurNumElts -= NumElts; @@ -1577,9 +1589,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { if (NumElts == 1) { for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp1, DAG.getIntPtrConstant(Idx)); + InOp1, DAG.getConstant(Idx, + TLI.getVectorIdxTy())); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp2, DAG.getIntPtrConstant(Idx)); + InOp2, DAG.getConstant(Idx, + TLI.getVectorIdxTy())); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2); } @@ -1617,7 +1631,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { unsigned NumToInsert = ConcatEnd - Idx - 1; for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, - ConcatOps[OpIdx], DAG.getIntPtrConstant(i)); + ConcatOps[OpIdx], DAG.getConstant(i, + TLI.getVectorIdxTy())); } ConcatOps[Idx+1] = VecOp; ConcatEnd = Idx + 2; @@ -1705,7 +1720,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { if (InVTNumElts % WidenNumElts == 0) { SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, - InOp, DAG.getIntPtrConstant(0)); + InOp, DAG.getConstant(0, + TLI.getVectorIdxTy())); // Extract the input and convert the shorten input vector. if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVal); @@ -1720,7 +1736,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { unsigned i; for (i=0; i < MinElts; ++i) { SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, - DAG.getIntPtrConstant(i)); + DAG.getConstant(i, TLI.getVectorIdxTy())); if (N->getNumOperands() == 1) Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); else @@ -1871,7 +1887,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { SDLoc dl(N); // Build a vector with undefined for the new nodes. EVT VT = N->getValueType(0); - EVT EltVT = VT.getVectorElementType(); + + // Integer BUILD_VECTOR operands may be larger than the node's vector element + // type. The UNDEFs need to have the same type as the existing operands. + EVT EltVT = N->getOperand(0).getValueType(); unsigned NumElts = VT.getVectorNumElements(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -1945,7 +1964,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(j)); + DAG.getConstant(j, TLI.getVectorIdxTy())); } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) @@ -2003,7 +2022,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { if (InVTNumElts % WidenNumElts == 0) { // Extract the input and convert the shorten input vector. InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2019,7 +2038,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { unsigned i; for (i=0; i < MinElts; ++i) { SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getIntPtrConstant(i)); + DAG.getConstant(i, TLI.getVectorIdxTy())); Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2062,7 +2081,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { unsigned i; for (i=0; i < NumElts; ++i) Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(IdxVal+i)); + DAG.getConstant(IdxVal+i, TLI.getVectorIdxTy())); SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) @@ -2289,7 +2308,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { for (unsigned i=0; i < NumElts; ++i) Ops[i] = DAG.getNode(Opcode, dl, EltVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getIntPtrConstant(i))); + DAG.getConstant(i, TLI.getVectorIdxTy()))); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); } @@ -2310,7 +2329,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); } } @@ -2338,7 +2357,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(j)); + DAG.getConstant(j, TLI.getVectorIdxTy())); } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); } @@ -2393,7 +2412,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SVT.getVectorElementType(), N->getValueType(0).getVectorNumElements()); SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, - ResVT, WideSETCC, DAG.getIntPtrConstant(0)); + ResVT, WideSETCC, DAG.getConstant(0, + TLI.getVectorIdxTy())); return PromoteTargetBoolean(CC, N->getValueType(0)); } @@ -2464,8 +2484,9 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, // LDOps: Load operators to build a vector type // [Start,End) the list of loads to use. static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, - SmallVector<SDValue, 16>& LdOps, + SmallVectorImpl<SDValue> &LdOps, unsigned Start, unsigned End) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(LdOps[Start]); EVT LdTy = LdOps[Start].getValueType(); unsigned Width = VecTy.getSizeInBits(); @@ -2486,12 +2507,12 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, LdTy = NewLdTy; } VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], - DAG.getIntPtrConstant(Idx++)); + DAG.getConstant(Idx++, TLI.getVectorIdxTy())); } return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } -SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, +SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD) { // The strategy assumes that we can efficiently load powers of two widths. // The routines chops the vector into the largest vector loads with the same @@ -2645,8 +2666,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, } SDValue -DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, - LoadSDNode * LD, +DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, + LoadSDNode *LD, ISD::LoadExtType ExtType) { // For extension loads, it may not be more efficient to chop up the vector // and then extended it. Instead, we unroll the load and build a new vector. @@ -2693,7 +2714,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, } -void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, +void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST) { // The strategy assumes that we can efficiently store powers of two widths. // The routines chops the vector into the largest vector stores with the same @@ -2725,7 +2746,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, unsigned NumVTElts = NewVT.getVectorNumElements(); do { SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, @@ -2745,7 +2766,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, Idx = Idx * ValEltWidth / NewVTWidth; do { SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, - DAG.getIntPtrConstant(Idx++)); + DAG.getConstant(Idx++, TLI.getVectorIdxTy())); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, @@ -2762,7 +2783,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, } void -DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, +DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST) { // For extension loads, it may not be more efficient to truncate the vector // and then store it. Instead, we extract each element and then store it. @@ -2790,7 +2811,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, unsigned Increment = ValEltVT.getSizeInBits() / 8; unsigned NumElts = StVT.getVectorNumElements(); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, isVolatile, isNonTemporal, Align)); @@ -2799,7 +2820,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Offset)); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), StEltVT, isVolatile, isNonTemporal, @@ -2836,7 +2857,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); // Fall back to extract and build. SmallVector<SDValue, 16> Ops(WidenNumElts); @@ -2845,7 +2866,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { unsigned Idx; for (Idx = 0; Idx < MinNumElts; ++Idx) Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); SDValue UndefVal = DAG.getUNDEF(EltVT); for ( ; Idx < WidenNumElts; ++Idx) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index d1f36cb..6c5e0ab 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -102,8 +102,8 @@ private: void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, const TargetRegisterClass*, - SmallVector<SUnit*, 2>&); - bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + SmallVectorImpl<SUnit*>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); void ListScheduleBottomUp(); /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. @@ -387,7 +387,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, - SmallVector<SUnit*, 2> &Copies) { + SmallVectorImpl<SUnit*> &Copies) { SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL)); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; @@ -448,7 +448,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, std::vector<SUnit*> &LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, - SmallVector<unsigned, 4> &LRegs, + SmallVectorImpl<unsigned> &LRegs, const TargetRegisterInfo *TRI) { bool Added = false; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { @@ -467,7 +467,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, /// If the specific node is the last one that's available to schedule, do /// whatever is necessary (i.e. backtracking or cloning) to make it possible. bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, - SmallVector<unsigned, 4> &LRegs){ + SmallVectorImpl<unsigned> &LRegs){ if (NumLiveRegs == 0) return false; @@ -567,7 +567,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { // "expensive to copy" values to break the dependency. In case even // that doesn't work, insert cross class copies. SUnit *TrySU = NotReady[0]; - SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index a7daf87..f5fe168 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -229,8 +229,8 @@ private: void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, const TargetRegisterClass*, - SmallVector<SUnit*, 2>&); - bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + SmallVectorImpl<SUnit*>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); void releaseInterferences(unsigned Reg = 0); @@ -1133,9 +1133,9 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { /// InsertCopiesAndMoveSuccs - Insert register copies and move all /// scheduled successors of the given SUnit to the last copy. void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - SmallVector<SUnit*, 2> &Copies) { + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVectorImpl<SUnit*> &Copies) { SUnit *CopyFromSU = CreateNewSUnit(NULL); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; @@ -1205,7 +1205,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, std::vector<SUnit*> &LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, - SmallVector<unsigned, 4> &LRegs, + SmallVectorImpl<unsigned> &LRegs, const TargetRegisterInfo *TRI) { for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { @@ -1227,7 +1227,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, std::vector<SUnit*> &LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, - SmallVector<unsigned, 4> &LRegs) { + SmallVectorImpl<unsigned> &LRegs) { // Look at all live registers. Skip Reg0 and the special CallResource. for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { if (!LiveRegDefs[i]) continue; @@ -1252,7 +1252,7 @@ static const uint32_t *getNodeRegMask(const SDNode *N) { /// If the specific node is the last one that's available to schedule, do /// whatever is necessary (i.e. backtracking or cloning) to make it possible. bool ScheduleDAGRRList:: -DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { +DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { if (NumLiveRegs == 0) return false; @@ -1331,7 +1331,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { SUnit *SU = Interferences[i-1]; LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); if (Reg) { - SmallVector<unsigned, 4> &LRegs = LRegsPos->second; + SmallVectorImpl<unsigned> &LRegs = LRegsPos->second; if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end()) continue; } @@ -1385,7 +1385,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // to resolve it. for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { SUnit *TrySU = Interferences[i]; - SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; // Try unscheduling up to the point where it's safe to schedule // this node. @@ -1433,7 +1433,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // insert cross class copies. // If it's not too expensive, i.e. cost != -1, issue copies. SUnit *TrySU = Interferences[0]; - SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; @@ -2401,7 +2401,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { bool RHasPhysReg = right->hasPhysRegDefs; if (LHasPhysReg != RHasPhysReg) { #ifndef NDEBUG - const char *const PhysRegMsg[] = {" has no physreg"," defines a physreg"}; + static const char *const PhysRegMsg[] = { " has no physreg", + " defines a physreg" }; #endif DEBUG(dbgs() << " SU (" << left->NodeNum << ") " << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index ad06473..982dcc9 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -700,11 +700,10 @@ namespace { } /// ProcessSDDbgValues - Process SDDbgValues associated with this node. -static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, - InstrEmitter &Emitter, - SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, - DenseMap<SDValue, unsigned> &VRBaseMap, - unsigned Order) { +static void +ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, + SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, + DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) { if (!N->getHasDebugValue()) return; @@ -731,11 +730,11 @@ static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, // ProcessSourceNode - Process nodes with source order numbers. These are added // to a vector which EmitSchedule uses to determine how to insert dbg_value // instructions in the right order. -static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, - InstrEmitter &Emitter, - DenseMap<SDValue, unsigned> &VRBaseMap, - SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, - SmallSet<unsigned, 8> &Seen) { +static void +ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, + DenseMap<SDValue, unsigned> &VRBaseMap, + SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, + SmallSet<unsigned, 8> &Seen) { unsigned Order = N->getIROrder(); if (!Order || !Seen.insert(Order)) { // Process any valid SDDbgValues even if node does not have any order diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index affd9e0..bc6063c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -864,14 +864,13 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { PointerType::get(Type::getInt8Ty(*getContext()), 0) : VT.getTypeForEVT(*getContext()); - return TLI.getDataLayout()->getABITypeAlignment(Ty); + return TM.getTargetLowering()->getDataLayout()->getABITypeAlignment(Ty); } // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), - TTI(0), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), - getVTList(MVT::Other)), + : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), OptLevel(OL), + EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), UpdateListeners(0) { AllNodes.push_back(&EntryNode); DbgInfo = new SDDbgInfo(); @@ -972,13 +971,15 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { EVT EltVT = VT.getScalarType(); const ConstantInt *Elt = &Val; + const TargetLowering *TLI = TM.getTargetLowering(); + // In some cases the vector type is legal but the element type is illegal and // needs to be promoted, for example v8i8 on ARM. In this case, promote the // inserted value (the type does not need to match the vector element type). // Any extra bits introduced will be truncated away. - if (VT.isVector() && TLI.getTypeAction(*getContext(), EltVT) == + if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == TargetLowering::TypePromoteInteger) { - EltVT = TLI.getTypeToTransformTo(*getContext(), EltVT); + EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits()); Elt = ConstantInt::get(*getContext(), NewVal); } @@ -1011,7 +1012,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { } SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { - return getConstant(Val, TLI.getPointerTy(), isTarget); + return getConstant(Val, TM.getTargetLowering()->getPointerTy(), isTarget); } @@ -1078,7 +1079,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, "Cannot set target flags on target-independent globals"); // Truncate (with sign-extension) the offset value to the pointer size. - unsigned BitWidth = TLI.getPointerTy().getSizeInBits(); + unsigned BitWidth = TM.getTargetLowering()->getPointerTy().getSizeInBits(); if (BitWidth < 64) Offset = SignExtend64(Offset, BitWidth); @@ -1155,7 +1156,8 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = + TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1182,7 +1184,8 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = + TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1512,7 +1515,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT OpTy = Op.getValueType(); - EVT ShTy = TLI.getShiftAmountTy(LHSTy); + EVT ShTy = TM.getTargetLowering()->getShiftAmountTy(LHSTy); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; @@ -1525,11 +1528,12 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); unsigned ByteSize = VT.getStoreSize(); Type *Ty = VT.getTypeForEVT(*getContext()); + const TargetLowering *TLI = TM.getTargetLowering(); unsigned StackAlign = - std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), minAlign); + std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), minAlign); int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); - return getFrameIndex(FrameIdx, TLI.getPointerTy()); + return getFrameIndex(FrameIdx, TLI->getPointerTy()); } /// CreateStackTemporary - Create a stack temporary suitable for holding @@ -1539,13 +1543,14 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { VT2.getStoreSizeInBits())/8; Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); - const DataLayout *TD = TLI.getDataLayout(); + const TargetLowering *TLI = TM.getTargetLowering(); + const DataLayout *TD = TLI->getDataLayout(); unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), TD->getPrefTypeAlignment(Ty2)); MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false); - return getFrameIndex(FrameIdx, TLI.getPointerTy()); + return getFrameIndex(FrameIdx, TLI->getPointerTy()); } SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, @@ -1674,6 +1679,7 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, /// processing. void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth) const { + const TargetLowering *TLI = TM.getTargetLowering(); unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. @@ -1796,7 +1802,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // The boolean result conforms to getBooleanContents. Fall through. case ISD::SETCC: // If we know the result of a setcc has the top bits zero, use this info. - if (TLI.getBooleanContents(Op.getValueType().isVector()) == + if (TLI->getBooleanContents(Op.getValueType().isVector()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); return; @@ -2108,7 +2114,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); + TLI->computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); return; } } @@ -2119,6 +2125,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, /// information. For example, immediately after an "SRA X, 2", we know that /// the top 3 bits are all equal to each other, so we return 3. unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ + const TargetLowering *TLI = TM.getTargetLowering(); EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getScalarType().getSizeInBits(); @@ -2203,7 +2210,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // The boolean result conforms to getBooleanContents. Fall through. case ISD::SETCC: // If setcc returns 0/-1, all bits are sign bits. - if (TLI.getBooleanContents(Op.getValueType().isVector()) == + if (TLI->getBooleanContents(Op.getValueType().isVector()) == TargetLowering::ZeroOrNegativeOneBooleanContent) return VTBits; break; @@ -2304,7 +2311,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) { - unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth); + unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, Depth); if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits); } @@ -3639,7 +3646,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); // Don't promote to an alignment that would require dynamic stack - // realignment. + // realignment. const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) while (NewAlign > Align && @@ -3923,10 +3930,12 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, // beyond the given memory regions. But fixing this isn't easy, and most // people don't care. + const TargetLowering *TLI = TM.getTargetLowering(); + // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); + Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); @@ -3934,13 +3943,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, TargetLowering:: CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMCPY), + TLI->getLibcallCallingConv(RTLIB::MEMCPY), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY), - TLI.getPointerTy()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), + TLI->getPointerTy()), Args, *this, dl); - std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } @@ -3979,10 +3988,12 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, // FIXME: If the memmove is volatile, lowering it to plain libc memmove may // not be safe. See memcpy above for more details. + const TargetLowering *TLI = TM.getTargetLowering(); + // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); + Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); @@ -3990,13 +4001,13 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, TargetLowering:: CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMMOVE), + TLI->getLibcallCallingConv(RTLIB::MEMMOVE), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE), - TLI.getPointerTy()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), + TLI->getPointerTy()), Args, *this, dl); - std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } @@ -4032,7 +4043,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, return Result; // Emit a library call. - Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext()); + const TargetLowering *TLI = TM.getTargetLowering(); + Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; @@ -4054,13 +4066,13 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, TargetLowering:: CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMSET), + TLI->getLibcallCallingConv(RTLIB::MEMSET), /*isTailCall=*/false, /*doesNotReturn*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), - TLI.getPointerTy()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), + TLI->getPointerTy()), Args, *this, dl); - std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } @@ -5884,7 +5896,7 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { ClonedDVs.push_back(Clone); } } - for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(), + for (SmallVectorImpl<SDDbgValue *>::iterator I = ClonedDVs.begin(), E = ClonedDVs.end(); I != E; ++I) AddDbgValue(*I, ToNode, false); } @@ -6065,9 +6077,10 @@ bool SDNode::hasPredecessor(const SDNode *N) const { return hasPredecessorHelper(N, Visited, Worklist); } -bool SDNode::hasPredecessorHelper(const SDNode *N, - SmallPtrSet<const SDNode *, 32> &Visited, - SmallVector<const SDNode *, 16> &Worklist) const { +bool +SDNode::hasPredecessorHelper(const SDNode *N, + SmallPtrSet<const SDNode *, 32> &Visited, + SmallVectorImpl<const SDNode *> &Worklist) const { if (Visited.empty()) { Worklist.push_back(this); } else { @@ -6122,11 +6135,12 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { EVT OperandVT = Operand.getValueType(); if (OperandVT.isVector()) { // A vector operand; extract a single element. + const TargetLowering *TLI = TM.getTargetLowering(); EVT OperandEltVT = OperandVT.getVectorElementType(); Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, - getConstant(i, TLI.getPointerTy())); + getConstant(i, TLI->getVectorIdxTy())); } else { // A scalar operand; just use it as is. Operands[j] = Operand; @@ -6204,8 +6218,9 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, const GlobalValue *GV2 = NULL; int64_t Offset1 = 0; int64_t Offset2 = 0; - bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); - bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); + const TargetLowering *TLI = TM.getTargetLowering(); + bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1); + bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); if (isGA1 && isGA2 && GV1 == GV2) return Offset1 == (Offset2 + Dist*Bytes); return false; @@ -6218,11 +6233,12 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. const GlobalValue *GV; int64_t GVOffset = 0; - if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - unsigned PtrWidth = TLI.getPointerTy().getSizeInBits(); + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { + unsigned PtrWidth = TLI->getPointerTy().getSizeInBits(); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, - TLI.getDataLayout()); + TLI->getDataLayout()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 21148ae..b9f4381 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -15,6 +15,7 @@ #include "SelectionDAGBuilder.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" @@ -280,7 +281,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); } // Vector/Vector bitcast. @@ -489,7 +490,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, SmallVector<SDValue, 16> Ops; for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - ElementVT, Val, DAG.getIntPtrConstant(i))); + ElementVT, Val, DAG.getConstant(i, + TLI.getVectorIdxTy()))); for (unsigned i = ValueVT.getVectorNumElements(), e = PartVT.getVectorNumElements(); i != e; ++i) @@ -515,7 +517,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - PartVT, Val, DAG.getIntPtrConstant(0)); + PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy())); bool Smaller = ValueVT.bitsLE(PartVT); Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), @@ -545,10 +547,12 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, if (IntermediateVT.isVector()) Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, - DAG.getIntPtrConstant(i * (NumElements / NumIntermediates))); + DAG.getConstant(i * (NumElements / NumIntermediates), + TLI.getVectorIdxTy())); else Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - IntermediateVT, Val, DAG.getIntPtrConstant(i)); + IntermediateVT, Val, + DAG.getConstant(i, TLI.getVectorIdxTy())); } // Split the intermediate operands into legal parts. @@ -717,6 +721,14 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, unsigned NumSignBits = LOI->NumSignBits; unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); + if (NumZeroBits == RegSize) { + // The current value is a zero. + // Explicitly express that as it would be easier for + // optimizations to kick in. + Parts[i] = DAG.getConstant(0, RegisterVT); + continue; + } + // FIXME: We capture more information than the dag can represent. For // now, just use the tightest assertzext/assertsext possible. bool isSExt = true; @@ -1008,7 +1020,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; - RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); + RegsForValue RFV(*DAG.getContext(), *TM.getTargetLowering(), + InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); resolveDanglingDebugInfo(V, N); @@ -1039,8 +1052,10 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { /// getValueImpl - Helper function for getValue and getNonRegisterValue. /// Create an SDValue for the given value. SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { + const TargetLowering *TLI = TM.getTargetLowering(); + if (const Constant *C = dyn_cast<Constant>(V)) { - EVT VT = TLI.getValueType(V->getType(), true); + EVT VT = TLI->getValueType(V->getType(), true); if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) return DAG.getConstant(*CI, VT); @@ -1049,7 +1064,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); if (isa<ConstantPointerNull>(C)) - return DAG.getConstant(0, TLI.getPointerTy()); + return DAG.getConstant(0, TLI->getPointerTy()); if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) return DAG.getConstantFP(*CFP, VT); @@ -1080,7 +1095,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { return DAG.getMergeValues(&Constants[0], Constants.size(), getCurSDLoc()); } - + if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(C)) { SmallVector<SDValue, 4> Ops; @@ -1103,7 +1118,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { "Unknown struct or array constant!"); SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, C->getType(), ValueVTs); + ComputeValueVTs(*TLI, C->getType(), ValueVTs); unsigned NumElts = ValueVTs.size(); if (NumElts == 0) return SDValue(); // empty struct @@ -1136,7 +1151,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); - EVT EltVT = TLI.getValueType(VecTy->getElementType()); + EVT EltVT = TLI->getValueType(VecTy->getElementType()); SDValue Op; if (EltVT.isFloatingPoint()) @@ -1157,13 +1172,13 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) - return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); + return DAG.getFrameIndex(SI->second, TLI->getPointerTy()); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast<Instruction>(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); - RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); + RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); } @@ -1172,6 +1187,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { + const TargetLowering *TLI = TM.getTargetLowering(); SDValue Chain = getControlRoot(); SmallVector<ISD::OutputArg, 8> Outs; SmallVector<SDValue, 8> OutVals; @@ -1184,7 +1200,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector<EVT, 1> PtrValueVTs; - ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), + ComputeValueVTs(*TLI, PointerType::getUnqual(F->getReturnType()), PtrValueVTs); SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); @@ -1192,7 +1208,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); + ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); SmallVector<SDValue, 4> Chains(NumValues); @@ -1211,7 +1227,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { MVT::Other, &Chains[0], NumValues); } else if (I.getNumOperands() != 0) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); + ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); @@ -1229,10 +1245,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ExtendKind = ISD::ZERO_EXTEND; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) - VT = TLI.getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); + VT = TLI->getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); - unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); - MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); + unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), VT); + MVT PartVT = TLI->getRegisterType(*DAG.getContext(), VT); SmallVector<SDValue, 4> Parts(NumParts); getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), @@ -1262,8 +1278,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); - Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, - Outs, OutVals, getCurSDLoc(), DAG); + Chain = TM.getTargetLowering()->LowerReturn(Chain, CallConv, isVarArg, + Outs, OutVals, getCurSDLoc(), + DAG); // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && @@ -1462,7 +1479,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, /// If we should emit this as a bunch of and/or'd together conditions, return /// false. bool -SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){ +SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) { if (Cases.size() != 2) return true; // If this is two comparisons of the same values or'd or and'd together, they @@ -1536,7 +1553,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { - if (!TLI.isJumpExpensive() && + if (!TM.getTargetLowering()->isJumpExpensive() && BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { @@ -1608,7 +1625,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); - + if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) { Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), ISD::SETULE); @@ -1659,7 +1676,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); - EVT PTy = TLI.getPointerTy(); + EVT PTy = TM.getTargetLowering()->getPointerTy(); SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); @@ -1687,9 +1704,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // can be used as an index into the jump table in a subsequent basic block. // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. - SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy()); + const TargetLowering *TLI = TM.getTargetLowering(); + SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI->getPointerTy()); - unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); + unsigned JumpTableReg = FuncInfo.CreateReg(TLI->getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; @@ -1698,8 +1716,8 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // for the switch statement if the value being switched on exceeds the largest // case in the switch. SDValue CMP = DAG.getSetCC(getCurSDLoc(), - TLI.getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), + TLI->getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), Sub, DAG.getConstant(JTH.Last - JTH.First,VT), ISD::SETUGT); @@ -1734,15 +1752,16 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, DAG.getConstant(B.First, VT)); // Check range + const TargetLowering *TLI = TM.getTargetLowering(); SDValue RangeCmp = DAG.getSetCC(getCurSDLoc(), - TLI.getSetCCResultType(*DAG.getContext(), + TLI->getSetCCResultType(*DAG.getContext(), Sub.getValueType()), Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); // Determine the type of the test operands. bool UsePtrType = false; - if (!TLI.isTypeLegal(VT)) + if (!TLI->isTypeLegal(VT)) UsePtrType = true; else { for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) @@ -1754,7 +1773,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, } } if (UsePtrType) { - VT = TLI.getPointerTy(); + VT = TLI->getPointerTy(); Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); } @@ -1798,18 +1817,19 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, Reg, VT); SDValue Cmp; unsigned PopCount = CountPopulation_64(B.Mask); + const TargetLowering *TLI = TM.getTargetLowering(); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC(getCurSDLoc(), - TLI.getSetCCResultType(*DAG.getContext(), VT), + TLI->getSetCCResultType(*DAG.getContext(), VT), ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. Cmp = DAG.getSetCC(getCurSDLoc(), - TLI.getSetCCResultType(*DAG.getContext(), VT), + TLI->getSetCCResultType(*DAG.getContext(), VT), ShiftOp, DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE); @@ -1822,7 +1842,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), VT, SwitchVal, DAG.getConstant(B.Mask, VT)); Cmp = DAG.getSetCC(getCurSDLoc(), - TLI.getSetCCResultType(*DAG.getContext(), VT), + TLI->getSetCCResultType(*DAG.getContext(), VT), AndOp, DAG.getConstant(0, VT), ISD::SETNE); } @@ -1895,39 +1915,32 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. - if (TLI.getExceptionPointerRegister() == 0 && - TLI.getExceptionSelectorRegister() == 0) + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->getExceptionPointerRegister() == 0 && + TLI->getExceptionSelectorRegister() == 0) return; SmallVector<EVT, 2> ValueVTs; - ComputeValueVTs(TLI, LP.getType(), ValueVTs); + ComputeValueVTs(*TLI, LP.getType(), ValueVTs); + assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); - // Insert the EXCEPTIONADDR instruction. - assert(FuncInfo.MBB->isLandingPad() && - "Call to eh.exception not in landing pad!"); - SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + // Get the two live-in registers as SDValues. The physregs have already been + // copied into virtual registers. SDValue Ops[2]; - Ops[0] = DAG.getRoot(); - SDValue Op1 = DAG.getNode(ISD::EXCEPTIONADDR, getCurSDLoc(), VTs, Ops, 1); - SDValue Chain = Op1.getValue(1); - - // Insert the EHSELECTION instruction. - VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); - Ops[0] = Op1; - Ops[1] = Chain; - SDValue Op2 = DAG.getNode(ISD::EHSELECTION, getCurSDLoc(), VTs, Ops, 2); - Chain = Op2.getValue(1); - Op2 = DAG.getSExtOrTrunc(Op2, getCurSDLoc(), MVT::i32); - - Ops[0] = Op1; - Ops[1] = Op2; + Ops[0] = DAG.getZExtOrTrunc( + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionPointerVirtReg, TLI->getPointerTy()), + getCurSDLoc(), ValueVTs[0]); + Ops[1] = DAG.getZExtOrTrunc( + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionSelectorVirtReg, TLI->getPointerTy()), + getCurSDLoc(), ValueVTs[1]); + + // Merge into one. SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&ValueVTs[0], ValueVTs.size()), &Ops[0], 2); - - std::pair<SDValue, SDValue> RetPair = std::make_pair(Res, Chain); - setValue(&LP, RetPair.first); - DAG.setRoot(RetPair.second); + setValue(&LP, Res); } /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for @@ -2029,12 +2042,11 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, // The last case block won't fall through into 'NextBlock' if we emit the // branches in this order. See if rearranging a case value would help. // We start at the bottom as it's the case with the least weight. - for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I){ + for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I) if (I->BB == NextBlock) { std::swap(*I, BackCase); break; } - } } // Create a CaseBlock record representing a conditional branch to @@ -2061,7 +2073,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, CC = ISD::SETEQ; LHS = SV; RHS = I->High; MHS = NULL; } else { - CC = ISD::SETCC_INVALID; + CC = ISD::SETCC_INVALID; LHS = I->Low; MHS = SV; RHS = I->High; } @@ -2115,7 +2127,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) TSize += I->size(); - if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) + const TargetLowering *TLI = TM.getTargetLowering(); + if (!areJTsAllowed(*TLI) || TSize.ult(TLI->getMinimumJumpTableEntries())) return false; APInt Range = ComputeRange(First, Last); @@ -2176,7 +2189,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = DestWeights.find(I->BB); - if (Itr != DestWeights.end()) + if (Itr != DestWeights.end()) Itr->second += I->ExtraWeight; else DestWeights[I->BB] = I->ExtraWeight; @@ -2196,7 +2209,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, } // Create a jump table index for this jump table. - unsigned JTEncoding = TLI.getJumpTableEncoding(); + unsigned JTEncoding = TLI->getJumpTableEncoding(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) ->createJumpTableIndex(DestBBs); @@ -2216,8 +2229,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, - MachineBasicBlock *Default, - MachineBasicBlock *SwitchBB) { + MachineBasicBlock* Default, + MachineBasicBlock* SwitchBB) { // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. MachineFunction *CurMF = FuncInfo.MF; @@ -2281,7 +2294,9 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, LSize += J->size(); RSize -= J->size(); } - if (areJTsAllowed(TLI)) { + + const TargetLowering *TLI = TM.getTargetLowering(); + if (areJTsAllowed(*TLI)) { // If our case is dense we *really* should handle it earlier! assert((FMetric > 0) && "Should handle dense range earlier!"); } else { @@ -2350,8 +2365,9 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, MachineBasicBlock* Default, - MachineBasicBlock *SwitchBB){ - EVT PTy = TLI.getPointerTy(); + MachineBasicBlock* SwitchBB) { + const TargetLowering *TLI = TM.getTargetLowering(); + EVT PTy = TLI->getPointerTy(); unsigned IntPtrBits = PTy.getSizeInBits(); Case& FrontCase = *CR.Range.first; @@ -2362,7 +2378,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, MachineFunction *CurMF = FuncInfo.MF; // If target does not have legal shift left, do not emit bit tests at all. - if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy())) + if (!TLI->isOperationLegal(ISD::SHL, TLI->getPointerTy())) return false; size_t numCmps = 0; @@ -2480,11 +2496,11 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, /// Clusterify - Transform simple list of Cases into list of CaseRange's size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const SwitchInst& SI) { - + /// Use a shorter form of declaration, and also /// show the we want to use CRSBuilder as Clusterifier. typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier; - + Clusterifier TheClusterifier; BranchProbabilityInfo *BPI = FuncInfo.BPI; @@ -2494,12 +2510,12 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - TheClusterifier.add(i.getCaseValueEx(), SMBB, + TheClusterifier.add(i.getCaseValueEx(), SMBB, BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0); } - + TheClusterifier.optimize(); - + size_t numCmps = 0; for (Clusterifier::RangeIterator i = TheClusterifier.begin(), e = TheClusterifier.end(); i != e; ++i, ++numCmps) { @@ -2511,7 +2527,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, // Changing it to APInt based is a pretty heavy for this commit. Cases.push_back(Case(C.first.getLow().toConstantInt(), C.first.getHigh().toConstantInt(), C.second, W)); - + if (C.first.getLow() != C.first.getHigh()) // A range counts double, since it requires two compares. ++numCmps; @@ -2645,7 +2661,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - EVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType()); + EVT ShiftTy = TM.getTargetLowering()->getShiftAmountTy(Op2.getValueType()); // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { @@ -2683,7 +2699,8 @@ void SelectionDAGBuilder::visitSDiv(const User &I) { if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && !isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue()) - setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurSDLoc(), DAG)); + setValue(&I, TM.getTargetLowering()->BuildExactSDIV(Op1, Op2, + getCurSDLoc(), DAG)); else setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, Op2)); @@ -2699,7 +2716,7 @@ void SelectionDAGBuilder::visitICmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); } @@ -2714,13 +2731,13 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { ISD::CondCode Condition = getFCmpCondCode(predicate); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I.getType(), ValueVTs); + ComputeValueVTs(*TM.getTargetLowering(), I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -2748,7 +2765,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { void SelectionDAGBuilder::visitTrunc(const User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); } @@ -2756,7 +2773,7 @@ void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); } @@ -2764,51 +2781,52 @@ void SelectionDAGBuilder::visitSExt(const User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + const TargetLowering *TLI = TM.getTargetLowering(); + EVT DestVT = TLI->getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N, - DAG.getTargetConstant(0, TLI.getPointerTy()))); + DAG.getTargetConstant(0, TLI->getPointerTy()))); } -void SelectionDAGBuilder::visitFPExt(const User &I){ +void SelectionDAGBuilder::visitFPExt(const User &I) { // FPExt is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToUI(const User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToSI(const User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitUIToFP(const User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitSIToFP(const User &I){ +void SelectionDAGBuilder::visitSIToFP(const User &I) { // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); } @@ -2816,7 +2834,7 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } @@ -2824,13 +2842,13 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. @@ -2842,23 +2860,24 @@ void SelectionDAGBuilder::visitBitCast(const User &I) { } void SelectionDAGBuilder::visitInsertElement(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); - SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), - TLI.getPointerTy(), - getValue(I.getOperand(2))); + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), + getCurSDLoc(), TLI.getVectorIdxTy()); setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), - TLI.getValueType(I.getType()), + TM.getTargetLowering()->getValueType(I.getType()), InVec, InVal, InIdx)); } void SelectionDAGBuilder::visitExtractElement(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); - SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), - TLI.getPointerTy(), - getValue(I.getOperand(1))); + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), + getCurSDLoc(), TLI.getVectorIdxTy()); setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), - TLI.getValueType(I.getType()), InVec, InIdx)); + TM.getTargetLowering()->getValueType(I.getType()), + InVec, InIdx)); } // Utility for visitShuffleVector - Return true if every element in Mask, @@ -2879,8 +2898,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector<int, 8> Mask; ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); unsigned MaskNumElts = Mask.size(); - - EVT VT = TLI.getValueType(I.getType()); + + const TargetLowering *TLI = TM.getTargetLowering(); + EVT VT = TLI->getValueType(I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); @@ -3002,7 +3022,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { Src = DAG.getUNDEF(VT); else Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, - Src, DAG.getIntPtrConstant(StartIdx[Input])); + Src, DAG.getConstant(StartIdx[Input], + TLI->getVectorIdxTy())); } // Calculate new mask. @@ -3028,7 +3049,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // replacing the shuffle with extract and build vector. // to insert and build vector. EVT EltVT = VT.getVectorElementType(); - EVT PtrVT = TLI.getPointerTy(); + EVT IdxVT = TLI->getVectorIdxTy(); SmallVector<SDValue,8> Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; @@ -3041,7 +3062,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), - EltVT, Src, DAG.getConstant(Idx, PtrVT)); + EltVT, Src, DAG.getConstant(Idx, IdxVT)); } Ops.push_back(Res); @@ -3061,10 +3082,11 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + const TargetLowering *TLI = TM.getTargetLowering(); SmallVector<EVT, 4> AggValueVTs; - ComputeValueVTs(TLI, AggTy, AggValueVTs); + ComputeValueVTs(*TLI, AggTy, AggValueVTs); SmallVector<EVT, 4> ValValueVTs; - ComputeValueVTs(TLI, ValTy, ValValueVTs); + ComputeValueVTs(*TLI, ValTy, ValValueVTs); unsigned NumAggValues = AggValueVTs.size(); unsigned NumValValues = ValValueVTs.size(); @@ -3101,8 +3123,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + const TargetLowering *TLI = TM.getTargetLowering(); SmallVector<EVT, 4> ValValueVTs; - ComputeValueVTs(TLI, ValTy, ValValueVTs); + ComputeValueVTs(*TLI, ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); @@ -3150,16 +3173,17 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Ty = cast<SequentialType>(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. + const TargetLowering *TLI = TM.getTargetLowering(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; uint64_t Offs = TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; - EVT PTy = TLI.getPointerTy(); + EVT PTy = TLI->getPointerTy(); unsigned PtrBits = PTy.getSizeInBits(); if (PtrBits < 64) OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), - TLI.getPointerTy(), + TLI->getPointerTy(), DAG.getConstant(Offs, MVT::i64)); else OffsVal = DAG.getIntPtrConstant(Offs); @@ -3170,7 +3194,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { } // N = N + Idx * ElementSize; - APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(), + APInt ElementSize = APInt(TLI->getPointerTy().getSizeInBits(), TD->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); @@ -3208,14 +3232,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { return; // getValue will auto-populate this. Type *Ty = I.getAllocatedType(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + const TargetLowering *TLI = TM.getTargetLowering(); + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), + std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); - EVT IntPtr = TLI.getPointerTy(); + EVT IntPtr = TLI->getPointerTy(); if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); @@ -3271,7 +3296,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); + ComputeValueVTs(*TM.getTargetLowering(), Ty, ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3345,7 +3370,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets); + ComputeValueVTs(*TM.getTargetLowering(), SrcV->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3419,9 +3444,10 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDValue InChain = getRoot(); - if (TLI.getInsertFencesForAtomic()) + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->getInsertFencesForAtomic()) InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, TLI); + DAG, *TLI); SDValue L = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, @@ -3431,14 +3457,14 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); setValue(&I, L); DAG.setRoot(OutChain); @@ -3466,9 +3492,10 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { SDValue InChain = getRoot(); - if (TLI.getInsertFencesForAtomic()) + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->getInsertFencesForAtomic()) InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, TLI); + DAG, *TLI); SDValue L = DAG.getAtomic(NT, dl, @@ -3477,14 +3504,14 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { getValue(I.getPointerOperand()), getValue(I.getValOperand()), I.getPointerOperand(), 0 /* Alignment */, - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); setValue(&I, L); DAG.setRoot(OutChain); @@ -3492,10 +3519,11 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { void SelectionDAGBuilder::visitFence(const FenceInst &I) { SDLoc dl = getCurSDLoc(); + const TargetLowering *TLI = TM.getTargetLowering(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); - Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); + Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy()); + Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy()); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3)); } @@ -3506,7 +3534,8 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDValue InChain = getRoot(); - EVT VT = TLI.getValueType(I.getType()); + const TargetLowering *TLI = TM.getTargetLowering(); + EVT VT = TLI->getValueType(I.getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); @@ -3515,14 +3544,14 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, getValue(I.getPointerOperand()), I.getPointerOperand(), I.getAlignment(), - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); setValue(&I, L); DAG.setRoot(OutChain); @@ -3536,14 +3565,15 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDValue InChain = getRoot(); - EVT VT = TLI.getValueType(I.getValueOperand()->getType()); + const TargetLowering *TLI = TM.getTargetLowering(); + EVT VT = TLI->getValueType(I.getValueOperand()->getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, TLI); + DAG, *TLI); SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, @@ -3551,12 +3581,12 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); DAG.setRoot(OutChain); } @@ -3581,12 +3611,13 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Info is set by getTgtMemInstrinsic TargetLowering::IntrinsicInfo Info; - bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); + const TargetLowering *TLI = TM.getTargetLowering(); + bool IsTgtIntrinsic = TLI->getTgtMemIntrinsic(Info, I, Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); + Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI->getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { @@ -3595,7 +3626,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I.getType(), ValueVTs); + ComputeValueVTs(*TLI, I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); @@ -3633,7 +3664,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (!I.getType()->isVoidTy()) { if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { - EVT VT = TLI.getValueType(PTy); + EVT VT = TLI->getValueType(PTy); Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); } @@ -4326,7 +4357,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) { return 0; const SDValue &Ext = N.getOperand(0); - if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){ + if (Ext.getOpcode() == ISD::AssertZext || + Ext.getOpcode() == ISD::AssertSext) { const SDValue &CFR = Ext.getOperand(0); if (CFR.getOpcode() == ISD::CopyFromReg) return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); @@ -4349,20 +4381,19 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); - const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); // Ignore inlined function arguments here. DIVariable DV(Variable); if (DV.isInlinedFnArgument(MF.getFunction())) return false; - unsigned Reg = 0; + Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. - Offset = FuncInfo.getArgumentFrameIndex(Arg); - if (Offset) - Reg = TRI->getFrameRegister(MF); + if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) + Op = MachineOperand::CreateFI(FI); - if (!Reg && N.getNode()) { + if (!Op && N.getNode()) { + unsigned Reg; if (N.getOpcode() == ISD::CopyFromReg) Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); else @@ -4373,32 +4404,39 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, if (PR) Reg = PR; } + if (Reg) + Op = MachineOperand::CreateReg(Reg, false); } - if (!Reg) { + if (!Op) { // Check if ValueMap has reg number. DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) - Reg = VMI->second; + Op = MachineOperand::CreateReg(VMI->second, false); } - if (!Reg && N.getNode()) { + if (!Op && N.getNode()) // Check if frame index is available. if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) if (FrameIndexSDNode *FINode = - dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) { - Reg = TRI->getFrameRegister(MF); - Offset = FINode->getIndex(); - } - } + dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) + Op = MachineOperand::CreateFI(FINode->getIndex()); - if (!Reg) + if (!Op) return false; - MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(), - TII->get(TargetOpcode::DBG_VALUE)) - .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable); - FuncInfo.ArgDbgValues.push_back(&*MIB); + // FIXME: This does not handle register-indirect values at offset 0. + bool IsIndirect = Offset != 0; + if (Op->isReg()) + FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(), + TII->get(TargetOpcode::DBG_VALUE), + IsIndirect, + Op->getReg(), Offset, Variable)); + else + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) + .addOperand(*Op).addImm(Offset).addMetadata(Variable)); + return true; } @@ -4415,6 +4453,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, /// otherwise lower it and return null. const char * SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { + const TargetLowering *TLI = TM.getTargetLowering(); SDLoc sdl = getCurSDLoc(); DebugLoc dl = getCurDebugLoc(); SDValue Res; @@ -4428,17 +4467,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::vaend: visitVAEnd(I); return 0; case Intrinsic::vacopy: visitVACopy(I); return 0; case Intrinsic::returnaddress: - setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(), + setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); return 0; case Intrinsic::frameaddress: - setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), + setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); return 0; case Intrinsic::setjmp: - return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; + return &"_setjmp"[!TLI->usesUnderscoreSetJmp()]; case Intrinsic::longjmp: - return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; + return &"_longjmp"[!TLI->usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { // Assert for address < 256 since we support only user defined address // spaces. @@ -4500,7 +4539,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); MDNode *Variable = DI.getVariable(); const Value *Address = DI.getAddress(); - if (!Address || !DIVariable(Variable).Verify()) { + DIVariable DIVar(Variable); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!Address || !DIVar) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return 0; } @@ -4575,7 +4617,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast<DbgValueInst>(I); - if (!DIVariable(DI.getVariable()).Verify()) + DIVariable DIVar(DI.getVariable()); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgValueInst should be either null or a DIVariable."); + if (!DIVar) return 0; MDNode *Variable = DI.getVariable(); @@ -4658,16 +4703,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::eh_dwarf_cfa: { SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, - TLI.getPointerTy()); + TLI->getPointerTy()); SDValue Offset = DAG.getNode(ISD::ADD, sdl, - TLI.getPointerTy(), + TLI->getPointerTy(), DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, - TLI.getPointerTy()), + TLI->getPointerTy()), CfaArg); SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, - TLI.getPointerTy(), - DAG.getConstant(0, TLI.getPointerTy())); - setValue(&I, DAG.getNode(ISD::ADD, sdl, TLI.getPointerTy(), + TLI->getPointerTy(), + DAG.getConstant(0, TLI->getPointerTy())); + setValue(&I, DAG.getNode(ISD::ADD, sdl, TLI->getPointerTy(), FA, Offset)); return 0; } @@ -4757,7 +4802,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, &ShOps[0], 2); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI->getValueType(I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, DAG.getConstant(NewIntrinsic, MVT::i32), @@ -4769,14 +4814,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vinsertf128_ps_256: case Intrinsic::x86_avx_vinsertf128_si_256: case Intrinsic::x86_avx2_vinserti128: { - EVT DestVT = TLI.getValueType(I.getType()); - EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); + EVT DestVT = TLI->getValueType(I.getType()); + EVT ElVT = TLI->getValueType(I.getArgOperand(1)->getType()); uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * ElVT.getVectorNumElements(); Res = DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI->getVectorIdxTy())); setValue(&I, Res); return 0; } @@ -4784,12 +4829,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vextractf128_ps_256: case Intrinsic::x86_avx_vextractf128_si_256: case Intrinsic::x86_avx2_vextracti128: { - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI->getValueType(I.getType()); uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * DestVT.getVectorNumElements(); Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT, getValue(I.getArgOperand(0)), - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI->getVectorIdxTy())); setValue(&I, Res); return 0; } @@ -4815,7 +4860,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertus: Code = ISD::CVT_US; break; case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI->getValueType(I.getType()); const Value *Op1 = I.getArgOperand(0); Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), DAG.getValueType(DestVT), @@ -4831,23 +4876,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), DAG)); return 0; case Intrinsic::log: - setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::log2: - setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::log10: - setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::exp: - setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::exp2: - setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), DAG, TLI)); + getValue(I.getArgOperand(1)), DAG, *TLI)); return 0; case Intrinsic::sqrt: case Intrinsic::fabs: @@ -4885,9 +4930,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(2)))); return 0; case Intrinsic::fmuladd: { - EVT VT = TLI.getValueType(I.getType()); + EVT VT = TLI->getValueType(I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isFMAFasterThanMulAndAdd(VT)){ + TLI->isFMAFasterThanFMulAndFAdd(VT)) { setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), @@ -4958,7 +5003,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode(ISD::STACKSAVE, sdl, - DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1); + DAG.getVTList(TLI->getPointerTy(), MVT::Other), &Op, 1); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return 0; @@ -4972,7 +5017,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - EVT PtrTy = TLI.getPointerTy(); + EVT PtrTy = TLI->getPointerTy(); SDValue Src = getValue(I.getArgOperand(0)); // The guard's value. AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); @@ -5034,7 +5079,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::adjust_trampoline: { setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, - TLI.getPointerTy(), + TLI->getPointerTy(), getValue(I.getArgOperand(0)))); return 0; } @@ -5064,7 +5109,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::trap: { StringRef TrapFuncName = TM.Options.getTrapFunctionName(); if (TrapFuncName.empty()) { - ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? + ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? ISD::TRAP : ISD::DEBUGTRAP; DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); return 0; @@ -5075,9 +5120,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), + DAG.getExternalSymbol(TrapFuncName.data(), + TLI->getPointerTy()), Args, DAG, sdl); - std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); + std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); DAG.setRoot(Result.second); return 0; } @@ -5134,8 +5180,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SmallVector<Value *, 4> Allocas; GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); - for (SmallVector<Value*, 4>::iterator Object = Allocas.begin(), - E = Allocas.end(); Object != E; ++Object) { + for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), + E = Allocas.end(); Object != E; ++Object) { AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); // Could not find an Alloca. @@ -5146,7 +5192,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[2]; Ops[0] = getRoot(); - Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); + Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops, 2); @@ -5156,7 +5202,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::invariant_start: // Discard region information. - setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + setValue(&I, DAG.getUNDEF(TLI->getPointerTy())); return 0; case Intrinsic::invariant_end: // Discard region information. @@ -5182,26 +5228,27 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(RetTy, CS.getAttributes(), Outs, TLI); + const TargetLowering *TLI = TM.getTargetLowering(); + GetReturnInfo(RetTy, CS.getAttributes(), Outs, *TLI); - bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - DAG.getMachineFunction(), - FTy->isVarArg(), Outs, - FTy->getContext()); + bool CanLowerReturn = TLI->CanLowerReturn(CS.getCallingConv(), + DAG.getMachineFunction(), + FTy->isVarArg(), Outs, + FTy->getContext()); SDValue DemoteStackSlot; int DemoteStackIdx = -100; if (!CanLowerReturn) { - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize( + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize( FTy->getReturnType()); - unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment( + unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment( FTy->getReturnType()); MachineFunction &MF = DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); - DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy()); + DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy()); Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; Entry.isSExt = false; @@ -5262,14 +5309,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, } // Check if target-independent constraints permit a tail call here. - // Target-dependent constraints are checked within TLI.LowerCallTo. - if (isTailCall && !isInTailCallPosition(CS, TLI)) + // Target-dependent constraints are checked within TLI->LowerCallTo. + if (isTailCall && !isInTailCallPosition(CS, *TLI)) isTailCall = false; TargetLowering:: CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG, getCurSDLoc(), CS); - std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(CLI); + std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI); assert((isTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && @@ -5282,14 +5329,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SmallVector<EVT, 1> PVTs; Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); - ComputeValueVTs(TLI, PtrRetTy, PVTs); + ComputeValueVTs(*TLI, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; SmallVector<EVT, 4> RetTys; SmallVector<uint64_t, 4> Offsets; RetTy = FTy->getReturnType(); - ComputeValueVTs(TLI, RetTy, RetTys, &Offsets); + ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets); unsigned NumValues = RetTys.size(); SmallVector<SDValue, 4> Values(NumValues); @@ -5320,6 +5367,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // As a special case, a null chain means that a tail call has been emitted and // the DAG root is already updated. HasTailCall = true; + + // Since there's no actual continuation from this block, nothing can be + // relying on us setting vregs for them. + PendingExports.clear(); } else { DAG.setRoot(Result.second); } @@ -5386,7 +5437,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr, MachinePointerInfo(PtrVal), false /*volatile*/, - false /*nontemporal*/, + false /*nontemporal*/, false /*isinvariant*/, 1 /* align=1 */); if (!ConstantMemory) @@ -5451,10 +5502,11 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // Require that we can find a legal MVT, and only do this if the target // supports unaligned loads of that type. Expanding into byte loads would // bloat the code. + const TargetLowering *TLI = TM.getTargetLowering(); if (ActuallyDoIt && Size->getZExtValue() > 4) { // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. - if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT)) + if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT)) ActuallyDoIt = false; } @@ -5464,7 +5516,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, ISD::SETNE); - EVT CallVT = TLI.getValueType(I.getType(), true); + EVT CallVT = TLI->getValueType(I.getType(), true); setValue(&I, DAG.getZExtOrTrunc(Res, getCurSDLoc(), CallVT)); return true; } @@ -5622,7 +5674,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (!RenameFn) Callee = getValue(I.getCalledValue()); else - Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); + Callee = DAG.getExternalSymbol(RenameFn, + TM.getTargetLowering()->getPointerTy()); // Check if we can potentially perform a tail call. More detailed checking is // be done within LowerCallTo, after more information about the call is known. @@ -5817,8 +5870,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { /// ConstraintOperands - Information about all of the constraints. SDISelAsmOperandInfoVector ConstraintOperands; + const TargetLowering *TLI = TM.getTargetLowering(); TargetLowering::AsmOperandInfoVector - TargetConstraints = TLI.ParseConstraints(CS); + TargetConstraints = TLI->ParseConstraints(CS); bool hasMemory = false; @@ -5843,10 +5897,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast<StructType>(CS.getType())) { - OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); + OpVT = TLI->getSimpleValueType(STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpVT = TLI.getSimpleValueType(CS.getType()); + OpVT = TLI->getSimpleValueType(CS.getType()); } ++ResNo; break; @@ -5867,7 +5921,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD). + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, TD). getSimpleVT(); } @@ -5879,7 +5933,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { else { for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { TargetLowering::ConstraintType - CType = TLI.getConstraintType(OpInfo.Codes[j]); + CType = TLI->getConstraintType(OpInfo.Codes[j]); if (CType == TargetLowering::C_Memory) { hasMemory = true; break; @@ -5911,11 +5965,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintVT != Input.ConstraintVT) { std::pair<unsigned, const TargetRegisterClass*> MatchRC = - TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); + TLI->getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); std::pair<unsigned, const TargetRegisterClass*> InputRC = - TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, - Input.ConstraintVT); + TLI->getRegForInlineAsmConstraint(Input.ConstraintCode, + Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { @@ -5928,7 +5982,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); + TLI->ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.Type == InlineAsm::isClobber) @@ -5956,16 +6010,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), - TLI.getPointerTy()); + TLI->getPointerTy()); } else { // Otherwise, create a stack slot and emit a store to it before the // asm. Type *Ty = OpVal->getType(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); - unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI->getPointerTy()); Chain = DAG.getStore(Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(SSFI), @@ -5983,7 +6037,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); + GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); } // Second pass - Loop over all of the operands, assigning virtual or physregs @@ -5994,7 +6048,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); + GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -6002,7 +6056,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands.push_back(SDValue()); // reserve space for input chain AsmNodeOperands.push_back( DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), - TLI.getPointerTy())); + TLI->getPointerTy())); // If we have a !srcloc metadata node associated with it, we want to attach // this to the ultimately generated inline asm machineinstr. To do this, we @@ -6025,7 +6079,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, SDValue()); + TLI->ComputeConstraintToUse(OpInfo, SDValue()); // Ideally, we would only check against memory constraints. However, the // meaning of an other constraint can be target-specific and we can't easily @@ -6043,7 +6097,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, - TLI.getPointerTy())); + TLI->getPointerTy())); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. @@ -6065,7 +6119,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know about this output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } @@ -6076,10 +6130,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // we can use. if (OpInfo.AssignedRegs.Regs.empty()) { LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), + Ctx.emitError(CS.getInstruction(), "couldn't allocate output register for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); - break; + Twine(OpInfo.ConstraintCode) + "'"); + return; } // If this is an indirect operand, store through the pointer after the @@ -6096,13 +6150,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know that this register is // set. - OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ? - InlineAsm::Kind_RegDefEarlyClobber : - InlineAsm::Kind_RegDef, - false, - 0, - DAG, - AsmNodeOperands); + OpInfo.AssignedRegs + .AddInlineAsmOperands(OpInfo.isEarlyClobber + ? InlineAsm::Kind_RegDefEarlyClobber + : InlineAsm::Kind_RegDef, + false, 0, DAG, AsmNodeOperands); break; } case InlineAsm::isInput: { @@ -6134,10 +6186,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.isIndirect) { // This happens on gcc/testsuite/gcc.dg/pr8788-1.c LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" - " don't know how to handle tied " - "indirect register inputs"); - report_fatal_error("Cannot handle indirect register inputs!"); + Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" + " don't know how to handle tied " + "indirect register inputs"); + return; } RegsForValue MatchedRegs; @@ -6147,14 +6199,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); i != e; ++i) { - if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) + if (const TargetRegisterClass *RC = TLI->getRegClassFor(RegVT)) MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); else { LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), "inline asm error: This value" + Ctx.emitError(CS.getInstruction(), + "inline asm error: This value" " type register class is not natively supported!"); - report_fatal_error("inline asm error: This value type register " - "class is not natively supported!"); + return; } } // Use the produced MatchedRegs object to @@ -6174,7 +6226,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; } @@ -6186,34 +6238,34 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector<SDValue> Ops; - TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, - Ops, DAG); + TLI->LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, + Ops, DAG); if (Ops.empty()) { LLVMContext &Ctx = *DAG.getContext(); Ctx.emitError(CS.getInstruction(), "invalid operand for inline asm constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); - break; + Twine(OpInfo.ConstraintCode) + "'"); + return; } // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; } if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); - assert(InOperandVal.getValueType() == TLI.getPointerTy() && + assert(InOperandVal.getValueType() == TLI->getPointerTy() && "Memory operands expect pointer values"); // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.push_back(InOperandVal); break; } @@ -6227,17 +6279,18 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { LLVMContext &Ctx = *DAG.getContext(); Ctx.emitError(CS.getInstruction(), "Don't know how to handle indirect register inputs yet " - "for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); - break; + "for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; } // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), + Ctx.emitError(CS.getInstruction(), "couldn't allocate input reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); - break; + Twine(OpInfo.ConstraintCode) + "'"); + return; } OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), @@ -6276,7 +6329,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { - EVT ResultType = TLI.getValueType(CS.getType()); + EVT ResultType = TLI->getValueType(CS.getType()); // If any of the results of the inline asm is a vector, it may have the // wrong width/num elts. This can happen for register classes that can @@ -6342,8 +6395,9 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { - const DataLayout &TD = *TLI.getDataLayout(); - SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurSDLoc(), + const TargetLowering *TLI = TM.getTargetLowering(); + const DataLayout &TD = *TLI->getDataLayout(); + SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), TD.getABITypeAlignment(I.getType())); @@ -6566,7 +6620,8 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { "Copy from a reg to the same reg!"); assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); - RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); + const TargetLowering *TLI = TM.getTargetLowering(); + RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, 0, V); PendingExports.push_back(Chain); @@ -6596,13 +6651,15 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); + const TargetLowering *TLI = getTargetLowering(); const DataLayout *TD = TLI->getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + ComputeValueVTs(*getTargetLowering(), + PointerType::getUnqual(F.getReturnType()), ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. @@ -6771,7 +6828,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { SDB->setValue(I, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { - if (LoadSDNode *LNode = + if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) @@ -6869,15 +6926,17 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Remember that this register needs to added to the machine PHI node as // the input for this MBB. SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, PN->getType(), ValueVTs); + const TargetLowering *TLI = TM.getTargetLowering(); + ComputeValueVTs(*TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); + unsigned NumRegisters = TLI->getNumRegisters(*DAG.getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); Reg += NumRegisters; } } } + ConstantsOut.clear(); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index bff92ca..ef73c00 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -1,4 +1,4 @@ -//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===// +//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- c++ -*---===// // // The LLVM Compiler Infrastructure // @@ -278,12 +278,9 @@ private: BitTestInfo Cases; }; -public: - // TLI - This is information that describes the available target features we - // need for lowering. This indicates when operations are unavailable, - // implemented with a libcall, etc. +private: const TargetMachine &TM; - const TargetLowering &TLI; +public: SelectionDAG &DAG; const DataLayout *TD; AliasAnalysis *AA; @@ -328,7 +325,6 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) : CurInst(NULL), SDNodeOrder(0), TM(dag.getTarget()), - TLI(dag.getTargetLoweringInfo()), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), HasTailCall(false) { } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index b6cc7ea..d8ee221 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -92,9 +92,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; - case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; - case ISD::LSDAADDR: return "LSDAADDR"; - case ISD::EHSELECTION: return "EHSELECTION"; case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; @@ -500,8 +497,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { DIScope Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); OS << " dbg:"; + assert((!Scope || Scope.isScope()) && + "Scope of a DebugLoc should be null or a DIScope."); // Omit the directory, since it's usually long and uninteresting. - if (Scope.Verify()) + if (Scope) OS << Scope.getFilename(); else OS << "<unknown>"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 475017a..01da51c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -275,9 +276,9 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// -SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, +SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : - MachineFunctionPass(ID), TM(tm), TLI(tm.getTargetLowering()), + MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo(TM)), CurDAG(new SelectionDAG(tm, OL)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), @@ -401,7 +402,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Insert DBG_VALUE instructions for function arguments to the entry block. for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; - unsigned Reg = MI->getOperand(0).getReg(); + bool hasFI = MI->getOperand(0).isFI(); + unsigned Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) EntryMBB->insert(EntryMBB->begin(), MI); else { @@ -414,16 +416,19 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // If Reg is live-in then update debug info to track its copy in a vreg. DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg); if (LDI != LiveInMap.end()) { + assert(!hasFI && "There's no handling of frame pointer updating here yet " + "- add if needed"); MachineInstr *Def = RegInfo->getVRegDef(LDI->second); MachineBasicBlock::iterator InsertPos = Def; const MDNode *Variable = MI->getOperand(MI->getNumOperands()-1).getMetadata(); - unsigned Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->getOperand(1).isImm(); + unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; // Def is never a terminator here, so it is ok to increment InsertPos. BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), - TII.get(TargetOpcode::DBG_VALUE)) - .addReg(LDI->second, RegState::Debug) - .addImm(Offset).addMetadata(Variable); + TII.get(TargetOpcode::DBG_VALUE), + IsIndirect, + LDI->second, Offset, Variable); // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only @@ -442,9 +447,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (CopyUseMI) { MachineInstr *NewMI = BuildMI(*MF, CopyUseMI->getDebugLoc(), - TII.get(TargetOpcode::DBG_VALUE)) - .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug) - .addImm(Offset).addMetadata(Variable); + TII.get(TargetOpcode::DBG_VALUE), + IsIndirect, + CopyUseMI->getOperand(0).getReg(), + Offset, Variable); MachineBasicBlock::iterator Pos = CopyUseMI; EntryMBB->insertAfter(Pos, NewMI); } @@ -825,12 +831,14 @@ void SelectionDAGISel::PrepareEHLandingPad() { .addSym(Label); // Mark exception register as live in. - unsigned Reg = TLI->getExceptionPointerRegister(); - if (Reg) MBB->addLiveIn(Reg); + const TargetLowering *TLI = getTargetLowering(); + const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); + if (unsigned Reg = TLI->getExceptionPointerRegister()) + FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); // Mark exception selector register as live in. - Reg = TLI->getExceptionSelectorRegister(); - if (Reg) MBB->addLiveIn(Reg); + if (unsigned Reg = TLI->getExceptionSelectorRegister()) + FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC); } /// isFoldedOrDeadInstruction - Return true if the specified instruction is @@ -929,7 +937,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (TM.Options.EnableFastISel) - FastIS = TLI->createFastISel(*FuncInfo, LibInfo); + FastIS = getTargetLowering()->createFastISel(*FuncInfo, LibInfo); // Iterate over all basic blocks in the function. ReversePostOrderTraversal<const Function*> RPOT(&Fn); @@ -968,6 +976,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); // Setup an EH landing-pad block. + FuncInfo->ExceptionPointerVirtReg = 0; + FuncInfo->ExceptionSelectorVirtReg = 0; if (FuncInfo->MBB->isLandingPad()) PrepareEHLandingPad(); @@ -2076,7 +2086,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, Result = !::CheckOpcode(Table, Index, N.getNode()); return Index; case SelectionDAGISel::OPC_CheckType: - Result = !::CheckType(Table, Index, N, SDISel.TLI); + Result = !::CheckType(Table, Index, N, SDISel.getTargetLowering()); return Index; case SelectionDAGISel::OPC_CheckChild0Type: case SelectionDAGISel::OPC_CheckChild1Type: @@ -2086,14 +2096,14 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckChild5Type: case SelectionDAGISel::OPC_CheckChild6Type: case SelectionDAGISel::OPC_CheckChild7Type: - Result = !::CheckChildType(Table, Index, N, SDISel.TLI, + Result = !::CheckChildType(Table, Index, N, SDISel.getTargetLowering(), Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type); return Index; case SelectionDAGISel::OPC_CheckCondCode: Result = !::CheckCondCode(Table, Index, N); return Index; case SelectionDAGISel::OPC_CheckValueType: - Result = !::CheckValueType(Table, Index, N, SDISel.TLI); + Result = !::CheckValueType(Table, Index, N, SDISel.getTargetLowering()); return Index; case SelectionDAGISel::OPC_CheckInteger: Result = !::CheckInteger(Table, Index, N); @@ -2386,7 +2396,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, continue; case OPC_CheckType: - if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break; + if (!::CheckType(MatcherTable, MatcherIndex, N, getTargetLowering())) + break; continue; case OPC_SwitchOpcode: { @@ -2433,7 +2444,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (CaseVT == MVT::iPTR) - CaseVT = TLI->getPointerTy(); + CaseVT = getTargetLowering()->getPointerTy(); // If the VT matches, then we will execute this case. if (CurNodeVT == CaseVT) @@ -2455,7 +2466,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckChild2Type: case OPC_CheckChild3Type: case OPC_CheckChild4Type: case OPC_CheckChild5Type: case OPC_CheckChild6Type: case OPC_CheckChild7Type: - if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI, + if (!::CheckChildType(MatcherTable, MatcherIndex, N, getTargetLowering(), Opcode-OPC_CheckChild0Type)) break; continue; @@ -2463,7 +2474,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break; continue; case OPC_CheckValueType: - if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break; + if (!::CheckValueType(MatcherTable, MatcherIndex, N, getTargetLowering())) + break; continue; case OPC_CheckInteger: if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break; @@ -2655,7 +2667,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, for (unsigned i = 0; i != NumVTs; ++i) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; - if (VT == MVT::iPTR) VT = TLI->getPointerTy().SimpleTy; + if (VT == MVT::iPTR) VT = getTargetLowering()->getPointerTy().SimpleTy; VTs.push_back(VT); } @@ -2757,8 +2769,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, bool mayStore = MCID.mayStore(); unsigned NumMemRefs = 0; - for (SmallVector<MachineMemOperand*, 2>::const_iterator I = - MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + for (SmallVectorImpl<MachineMemOperand *>::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { if ((*I)->isLoad()) { if (mayLoad) ++NumMemRefs; @@ -2774,8 +2786,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MF->allocateMemRefsArray(NumMemRefs); MachineSDNode::mmo_iterator MemRefsPos = MemRefs; - for (SmallVector<MachineMemOperand*, 2>::const_iterator I = - MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + for (SmallVectorImpl<MachineMemOperand *>::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { if ((*I)->isLoad()) { if (mayLoad) *MemRefsPos++ = *I; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f02e3d6..e3c6306 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -510,7 +510,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 // NB: it is okay if more bits are known than are requested - if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side if (KnownOne == KnownOne2) { // set bits are the same on both sides EVT VT = Op.getValueType(); SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); @@ -1992,7 +1992,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { + MVT VT) const { if (Constraint[0] != '{') return std::make_pair(0u, static_cast<TargetRegisterClass*>(0)); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index 2feea59..6c826de 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -554,7 +554,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { /// _outside_ the computed minimal placement regions have been covered. /// bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4>& blks) { + SmallVectorImpl<MachineBasicBlock *> &blks) { if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { bool processThisBlock = false; for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), @@ -629,7 +629,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, /// addUsesForTopLevelLoops - add uses for CSRs used inside top /// level loops to the exit blocks of those loops. /// -bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) { +bool PEI::addUsesForTopLevelLoops(SmallVectorImpl<MachineBasicBlock *> &blks) { bool addedUses = false; // Place restores for top level loops where needed. @@ -674,7 +674,7 @@ bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) { /// multi-entry/exit regions. /// bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4> &blks, + SmallVectorImpl<MachineBasicBlock *> &blks, CSRegBlockMap &prevSpills) { bool placedSpills = false; // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) @@ -736,7 +736,7 @@ bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, /// multi-entry/exit regions. /// bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4> &blks, + SmallVectorImpl<MachineBasicBlock *> &blks, CSRegBlockMap &prevRestores) { bool placedRestores = false; // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 23984e9..2fc8f46 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -43,7 +43,7 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { class SjLjEHPrepare : public FunctionPass { - const TargetLoweringBase *TLI; + const TargetMachine *TM; Type *FunctionContextTy; Constant *RegisterFn; Constant *UnregisterFn; @@ -58,8 +58,8 @@ namespace { AllocaInst *FuncCtx; public: static char ID; // Pass identification, replacement for typeid - explicit SjLjEHPrepare(const TargetLoweringBase *tli = NULL) - : FunctionPass(ID), TLI(tli) { } + explicit SjLjEHPrepare(const TargetMachine *TM) + : FunctionPass(ID), TM(TM) { } bool doInitialization(Module &M); bool runOnFunction(Function &F); @@ -82,8 +82,8 @@ namespace { char SjLjEHPrepare::ID = 0; // Public Interface To the SjLjEHPrepare pass. -FunctionPass *llvm::createSjLjEHPreparePass(const TargetLoweringBase *TLI) { - return new SjLjEHPrepare(TLI); +FunctionPass *llvm::createSjLjEHPreparePass(const TargetMachine *TM) { + return new SjLjEHPrepare(TM); } // doInitialization - Set up decalarations and types needed to process // exceptions. @@ -190,6 +190,7 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an alloca // because the value needs to be added to the global context list. + const TargetLowering *TLI = TM->getTargetLowering(); unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); FuncCtx = diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index c5bbba3..10a93b7 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -31,8 +31,8 @@ #include "SpillPlacement.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/EdgeBundles.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" @@ -53,11 +53,16 @@ char &llvm::SpillPlacementID = SpillPlacement::ID; void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired<MachineBlockFrequencyInfo>(); AU.addRequiredTransitive<EdgeBundles>(); AU.addRequiredTransitive<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } +/// Decision threshold. A node gets the output value 0 if the weighted sum of +/// its inputs falls in the open interval (-Threshold;Threshold). +static const BlockFrequency Threshold = 2; + /// Node - Each edge bundle corresponds to a Hopfield node. /// /// The node contains precomputed frequency data that only depends on the CFG, @@ -68,31 +73,25 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { /// because all weights are positive. /// struct SpillPlacement::Node { - /// Scale - Inverse block frequency feeding into[0] or out of[1] the bundle. - /// Ideally, these two numbers should be identical, but inaccuracies in the - /// block frequency estimates means that we need to normalize ingoing and - /// outgoing frequencies separately so they are commensurate. - float Scale[2]; - - /// Bias - Normalized contributions from non-transparent blocks. - /// A bundle connected to a MustSpill block has a huge negative bias, - /// otherwise it is a number in the range [-2;2]. - float Bias; + /// BiasN - Sum of blocks that prefer a spill. + BlockFrequency BiasN; + /// BiasP - Sum of blocks that prefer a register. + BlockFrequency BiasP; /// Value - Output value of this node computed from the Bias and links. - /// This is always in the range [-1;1]. A positive number means the variable - /// should go in a register through this bundle. - float Value; + /// This is always on of the values {-1, 0, 1}. A positive number means the + /// variable should go in a register through this bundle. + int Value; - typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector; + typedef SmallVector<std::pair<BlockFrequency, unsigned>, 4> LinkVector; /// Links - (Weight, BundleNo) for all transparent blocks connecting to other - /// bundles. The weights are all positive and add up to at most 2, weights - /// from ingoing and outgoing nodes separately add up to a most 1. The weight - /// sum can be less than 2 when the variable is not live into / out of some - /// connected basic blocks. + /// bundles. The weights are all positive block frequencies. LinkVector Links; + /// SumLinkWeights - Cached sum of the weights of all links + ThresHold. + BlockFrequency SumLinkWeights; + /// preferReg - Return true when this node prefers to be in a register. bool preferReg() const { // Undecided nodes (Value==0) go on the stack. @@ -101,28 +100,24 @@ struct SpillPlacement::Node { /// mustSpill - Return True if this node is so biased that it must spill. bool mustSpill() const { - // Actually, we must spill if Bias < sum(weights). - // It may be worth it to compute the weight sum here? - return Bias < -2.0f; - } - - /// Node - Create a blank Node. - Node() { - Scale[0] = Scale[1] = 0; + // We must spill if Bias < -sum(weights) or the MustSpill flag was set. + // BiasN is saturated when MustSpill is set, make sure this still returns + // true when the RHS saturates. Note that SumLinkWeights includes Threshold. + return BiasN >= BiasP + SumLinkWeights; } /// clear - Reset per-query data, but preserve frequencies that only depend on // the CFG. void clear() { - Bias = Value = 0; + BiasN = BiasP = Value = 0; + SumLinkWeights = Threshold; Links.clear(); } /// addLink - Add a link to bundle b with weight w. - /// out=0 for an ingoing link, and 1 for an outgoing link. - void addLink(unsigned b, float w, bool out) { - // Normalize w relative to all connected blocks from that direction. - w *= Scale[out]; + void addLink(unsigned b, BlockFrequency w) { + // Update cached sum. + SumLinkWeights += w; // There can be multiple links to the same bundle, add them up. for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) @@ -134,33 +129,48 @@ struct SpillPlacement::Node { Links.push_back(std::make_pair(w, b)); } - /// addBias - Bias this node from an ingoing[0] or outgoing[1] link. - /// Return the change to the total number of positive biases. - void addBias(float w, bool out) { - // Normalize w relative to all connected blocks from that direction. - w *= Scale[out]; - Bias += w; + /// addBias - Bias this node. + void addBias(BlockFrequency freq, BorderConstraint direction) { + switch (direction) { + default: + break; + case PrefReg: + BiasP += freq; + break; + case PrefSpill: + BiasN += freq; + break; + case MustSpill: + BiasN = BlockFrequency::getMaxFrequency(); + break; + } } /// update - Recompute Value from Bias and Links. Return true when node /// preference changes. bool update(const Node nodes[]) { // Compute the weighted sum of inputs. - float Sum = Bias; - for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) - Sum += I->first * nodes[I->second].Value; + BlockFrequency SumN = BiasN; + BlockFrequency SumP = BiasP; + for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) { + if (nodes[I->second].Value == -1) + SumN += I->first; + else if (nodes[I->second].Value == 1) + SumP += I->first; + } - // The weighted sum is going to be in the range [-2;2]. Ideally, we should - // simply set Value = sign(Sum), but we will add a dead zone around 0 for - // two reasons: + // Each weighted sum is going to be less than the total frequency of the + // bundle. Ideally, we should simply set Value = sign(SumP - SumN), but we + // will add a dead zone around 0 for two reasons: + // // 1. It avoids arbitrary bias when all links are 0 as is possible during // initial iterations. // 2. It helps tame rounding errors when the links nominally sum to 0. - const float Thres = 1e-4f; + // bool Before = preferReg(); - if (Sum < -Thres) + if (SumN >= SumP + Threshold) Value = -1; - else if (Sum > Thres) + else if (SumP >= SumN + Threshold) Value = 1; else Value = 0; @@ -177,22 +187,13 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { nodes = new Node[bundles->getNumBundles()]; // Compute total ingoing and outgoing block frequencies for all bundles. - BlockFrequency.resize(mf.getNumBlockIDs()); + BlockFrequencies.resize(mf.getNumBlockIDs()); + MachineBlockFrequencyInfo &MBFI = getAnalysis<MachineBlockFrequencyInfo>(); for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { - float Freq = LiveIntervals::getSpillWeight(true, false, - loops->getLoopDepth(I)); unsigned Num = I->getNumber(); - BlockFrequency[Num] = Freq; - nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq; - nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq; + BlockFrequencies[Num] = MBFI.getBlockFreq(I); } - // Scales are reciprocal frequencies. - for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i) - for (unsigned d = 0; d != 2; ++d) - if (nodes[i].Scale[d] > 0) - nodes[i].Scale[d] = 1 / nodes[i].Scale[d]; - // We never change the function. return false; } @@ -213,12 +214,15 @@ void SpillPlacement::activate(unsigned n) { // landing pads, or loops with many 'continue' statements. It is difficult to // allocate registers when so many different blocks are involved. // - // Give a small negative bias to large bundles such that 1/32 of the - // connected blocks need to be interested before we consider expanding the - // region through the bundle. This helps compile time by limiting the number - // of blocks visited and the number of links in the Hopfield network. - if (bundles->getBlocks(n).size() > 100) - nodes[n].Bias = -0.0625f; + // Give a small negative bias to large bundles such that a substantial + // fraction of the connected blocks need to be interested before we consider + // expanding the region through the bundle. This helps compile time by + // limiting the number of blocks visited and the number of links in the + // Hopfield network. + if (bundles->getBlocks(n).size() > 100) { + nodes[n].BiasP = 0; + nodes[n].BiasN = (BlockFrequency::getEntryFrequency() / 16); + } } @@ -227,27 +231,20 @@ void SpillPlacement::activate(unsigned n) { void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(), E = LiveBlocks.end(); I != E; ++I) { - float Freq = getBlockFrequency(I->Number); - const float Bias[] = { - 0, // DontCare, - 1, // PrefReg, - -1, // PrefSpill - 0, // PrefBoth - -HUGE_VALF // MustSpill - }; + BlockFrequency Freq = BlockFrequencies[I->Number]; // Live-in to block? if (I->Entry != DontCare) { unsigned ib = bundles->getBundle(I->Number, 0); activate(ib); - nodes[ib].addBias(Freq * Bias[I->Entry], 1); + nodes[ib].addBias(Freq, I->Entry); } // Live-out from block? if (I->Exit != DontCare) { unsigned ob = bundles->getBundle(I->Number, 1); activate(ob); - nodes[ob].addBias(Freq * Bias[I->Exit], 0); + nodes[ob].addBias(Freq, I->Exit); } } } @@ -256,15 +253,15 @@ void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) { for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) { - float Freq = getBlockFrequency(*I); + BlockFrequency Freq = BlockFrequencies[*I]; if (Strong) Freq += Freq; unsigned ib = bundles->getBundle(*I, 0); unsigned ob = bundles->getBundle(*I, 1); activate(ib); activate(ob); - nodes[ib].addBias(-Freq, 1); - nodes[ob].addBias(-Freq, 0); + nodes[ib].addBias(Freq, PrefSpill); + nodes[ob].addBias(Freq, PrefSpill); } } @@ -284,9 +281,9 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) { Linked.push_back(ib); if (nodes[ob].Links.empty() && !nodes[ob].mustSpill()) Linked.push_back(ob); - float Freq = getBlockFrequency(Number); - nodes[ib].addLink(ob, Freq, 1); - nodes[ob].addLink(ib, Freq, 0); + BlockFrequency Freq = BlockFrequencies[Number]; + nodes[ib].addLink(ob, Freq); + nodes[ob].addLink(ib, Freq); } } diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h index fc412f8..105516b 100644 --- a/lib/CodeGen/SpillPlacement.h +++ b/lib/CodeGen/SpillPlacement.h @@ -30,6 +30,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/BlockFrequency.h" namespace llvm { @@ -57,7 +58,7 @@ class SpillPlacement : public MachineFunctionPass { SmallVector<unsigned, 8> RecentPositive; // Block frequencies are computed once. Indexed by block number. - SmallVector<float, 4> BlockFrequency; + SmallVector<BlockFrequency, 4> BlockFrequencies; public: static char ID; // Pass identification, replacement for typeid. @@ -139,8 +140,8 @@ public: /// getBlockFrequency - Return the estimated block execution frequency per /// function invocation. - float getBlockFrequency(unsigned Number) const { - return BlockFrequency[Number]; + BlockFrequency getBlockFrequency(unsigned Number) const { + return BlockFrequencies[Number]; } private: diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 0a3818e..e717fac 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -325,12 +325,14 @@ void SplitAnalysis::analyze(const LiveInterval *li) { SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm, - MachineDominatorTree &mdt) + MachineDominatorTree &mdt, + MachineBlockFrequencyInfo &mbfi) : SA(sa), LIS(lis), VRM(vrm), MRI(vrm.getMachineFunction().getRegInfo()), MDT(mdt), TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), + MBFI(mbfi), Edit(0), OpenIdx(0), SpillMode(SM_Partition), @@ -1119,7 +1121,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { } // Calculate spill weight and allocation hints for new intervals. - Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops); + Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops, MBFI); assert(!LRMap || LRMap->size() == Edit->size()); } diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index 4005a3d..f029c73 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -27,6 +27,7 @@ class ConnectedVNInfoEqClasses; class LiveInterval; class LiveIntervals; class LiveRangeEdit; +class MachineBlockFrequencyInfo; class MachineInstr; class MachineLoopInfo; class MachineRegisterInfo; @@ -215,6 +216,7 @@ class SplitEditor { MachineDominatorTree &MDT; const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; + const MachineBlockFrequencyInfo &MBFI; public: @@ -349,7 +351,7 @@ public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&, - MachineDominatorTree&); + MachineDominatorTree&, MachineBlockFrequencyInfo &); /// reset - Prepare for a new split. void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition); diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 5f4c68b..faaa6e7 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -310,9 +310,9 @@ void StackColoring::calculateLocalLiveness() { SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet; - for (SmallVector<const MachineBasicBlock*, 8>::iterator - PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); - PI != PE; ++PI) { + for (SmallVectorImpl<const MachineBasicBlock *>::iterator + PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); + PI != PE; ++PI) { const MachineBasicBlock *BB = *PI; if (!BBSet.count(BB)) continue; diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 389793e..4c56380 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -33,6 +33,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" +#include <cstdlib> using namespace llvm; STATISTIC(NumFunProtected, "Number of functions protected"); @@ -41,9 +42,11 @@ STATISTIC(NumAddrTaken, "Number of local variables that have their address" namespace { class StackProtector : public FunctionPass { + const TargetMachine *TM; + /// TLI - Keep a pointer of a TargetLowering to consult for determining /// target type sizes. - const TargetLoweringBase *const TLI; + const TargetLoweringBase *TLI; const Triple Trip; Function *F; @@ -51,6 +54,10 @@ namespace { DominatorTree *DT; + /// \brief The minimum size of buffers that will receive stack smashing + /// protection when -fstack-protection is used. + unsigned SSPBufferSize; + /// VisitedPHIs - The set of PHI nodes visited when determining /// if a variable's reference has been taken. This set /// is maintained to ensure we don't visit the same PHI node multiple @@ -83,12 +90,12 @@ namespace { bool RequiresStackProtector(); public: static char ID; // Pass identification, replacement for typeid. - StackProtector() : FunctionPass(ID), TLI(0) { + StackProtector() : FunctionPass(ID), TM(0), TLI(0), SSPBufferSize(0) { initializeStackProtectorPass(*PassRegistry::getPassRegistry()); } - StackProtector(const TargetLoweringBase *tli) - : FunctionPass(ID), TLI(tli), - Trip(tli->getTargetMachine().getTargetTriple()) { + StackProtector(const TargetMachine *TM) + : FunctionPass(ID), TM(TM), TLI(0), Trip(TM->getTargetTriple()), + SSPBufferSize(8) { initializeStackProtectorPass(*PassRegistry::getPassRegistry()); } @@ -104,17 +111,24 @@ char StackProtector::ID = 0; INITIALIZE_PASS(StackProtector, "stack-protector", "Insert stack protectors", false, false) -FunctionPass *llvm::createStackProtectorPass(const TargetLoweringBase *tli) { - return new StackProtector(tli); +FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) { + return new StackProtector(TM); } bool StackProtector::runOnFunction(Function &Fn) { F = &Fn; M = F->getParent(); DT = getAnalysisIfAvailable<DominatorTree>(); + TLI = TM->getTargetLowering(); if (!RequiresStackProtector()) return false; + Attribute Attr = + Fn.getAttributes().getAttribute(AttributeSet::FunctionIndex, + "stack-protector-buffer-size"); + if (Attr.isStringAttribute()) + SSPBufferSize = atoi(Attr.getValueAsString().data()); + ++NumFunProtected; return InsertStackProtectors(); } @@ -130,7 +144,6 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong, // protector if (Strong) return true; - const TargetMachine &TM = TLI->getTargetMachine(); if (!AT->getElementType()->isIntegerTy(8)) { // If we're on a non-Darwin platform or we're inside of a structure, don't // add stack protectors unless the array is a character array. @@ -140,7 +153,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong, // If an array has more than SSPBufferSize bytes of allocated space, then we // emit stack protectors. - if (TM.Options.SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) + if (SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) return true; } @@ -228,13 +241,14 @@ bool StackProtector::RequiresStackProtector() { if (const ConstantInt *CI = dyn_cast<ConstantInt>(AI->getArraySize())) { - unsigned BufferSize = TLI->getTargetMachine().Options.SSPBufferSize; - if (CI->getLimitedValue(BufferSize) >= BufferSize) + if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) // A call to alloca with size >= SSPBufferSize requires // stack protectors. return true; - } else // A call to alloca with a variable size requires protectors. + } else { + // A call to alloca with a variable size requires protectors. return true; + } } if (ContainsProtectableArray(AI->getAllocatedType(), Strong)) @@ -251,6 +265,46 @@ bool StackProtector::RequiresStackProtector() { return false; } +/// Insert code into the entry block that stores the __stack_chk_guard +/// variable onto the stack: +/// +/// entry: +/// StackGuardSlot = alloca i8* +/// StackGuard = load __stack_chk_guard +/// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) +/// +static void CreatePrologue(Function *F, Module *M, ReturnInst *RI, + const TargetLoweringBase *TLI, const Triple &Trip, + AllocaInst *&AI, Value *&StackGuardVar) { + PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); + unsigned AddressSpace, Offset; + if (TLI->getStackCookieLocation(AddressSpace, Offset)) { + Constant *OffsetVal = + ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); + + StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, + PointerType::get(PtrTy, + AddressSpace)); + } else if (Trip.getOS() == llvm::Triple::OpenBSD) { + StackGuardVar = M->getOrInsertGlobal("__guard_local", PtrTy); + cast<GlobalValue>(StackGuardVar) + ->setVisibility(GlobalValue::HiddenVisibility); + } else { + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + } + + BasicBlock &Entry = F->getEntryBlock(); + Instruction *InsPt = &Entry.front(); + + AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt); + LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt); + + Value *Args[] = { LI, AI }; + CallInst:: + Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), + Args, "", InsPt); +} + /// InsertStackProtectors - Insert code into the prologue and epilogue of the /// function. /// @@ -269,41 +323,7 @@ bool StackProtector::InsertStackProtectors() { if (!RI) continue; if (!FailBB) { - // Insert code into the entry block that stores the __stack_chk_guard - // variable onto the stack: - // - // entry: - // StackGuardSlot = alloca i8* - // StackGuard = load __stack_chk_guard - // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) - // - PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); - unsigned AddressSpace, Offset; - if (TLI->getStackCookieLocation(AddressSpace, Offset)) { - Constant *OffsetVal = - ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); - - StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, - PointerType::get(PtrTy, AddressSpace)); - } else if (Trip.getOS() == llvm::Triple::OpenBSD) { - StackGuardVar = M->getOrInsertGlobal("__guard_local", PtrTy); - cast<GlobalValue>(StackGuardVar) - ->setVisibility(GlobalValue::HiddenVisibility); - } else { - StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); - } - - BasicBlock &Entry = F->getEntryBlock(); - Instruction *InsPt = &Entry.front(); - - AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt); - LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt); - - Value *Args[] = { LI, AI }; - CallInst:: - Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), - Args, "", InsPt); - + CreatePrologue(F, M, RI, TLI, Trip, AI, StackGuardVar); // Create the basic block to jump to when the guard check fails. FailBB = CreateFailBB(); } diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index f951561..9f44df8 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -14,20 +14,20 @@ #define DEBUG_TYPE "stackslotcoloring" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include <vector> @@ -48,13 +48,16 @@ namespace { LiveStacks* LS; MachineFrameInfo *MFI; const TargetInstrInfo *TII; - const MachineLoopInfo *loopInfo; + const MachineBlockFrequencyInfo *MBFI; // SSIntervals - Spill slot intervals. std::vector<LiveInterval*> SSIntervals; - // SSRefs - Keep a list of frame index references for each spill slot. - SmallVector<SmallVector<MachineInstr*, 8>, 16> SSRefs; + // SSRefs - Keep a list of MachineMemOperands for each spill slot. + // MachineMemOperands can be shared between instructions, so we need + // to be careful that renames like [FI0, FI1] -> [FI1, FI2] do not + // become FI0 -> FI1 -> FI2. + SmallVector<SmallVector<MachineMemOperand *, 8>, 16> SSRefs; // OrigAlignments - Alignments of stack objects before coloring. SmallVector<unsigned, 16> OrigAlignments; @@ -89,8 +92,8 @@ namespace { AU.addRequired<SlotIndexes>(); AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveStacks>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -103,7 +106,7 @@ namespace { bool OverlapWithAssignments(LiveInterval *li, int Color) const; int ColorSlot(LiveInterval *li); bool ColorSlots(MachineFunction &MF); - void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI, + void RewriteInstruction(MachineInstr *MI, SmallVectorImpl<int> &SlotMapping, MachineFunction &MF); bool RemoveDeadStores(MachineBasicBlock* MBB); }; @@ -139,7 +142,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = &*MBBI; - unsigned loopDepth = loopInfo->getLoopDepth(MBB); + BlockFrequency Freq = MBFI->getBlockFreq(MBB); for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); MII != EE; ++MII) { MachineInstr *MI = &*MII; @@ -154,8 +157,19 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { continue; LiveInterval &li = LS->getInterval(FI); if (!MI->isDebugValue()) - li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth); - SSRefs[FI].push_back(MI); + li.weight += LiveIntervals::getSpillWeight(false, true, Freq); + } + for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(), + EE = MI->memoperands_end(); MMOI != EE; ++MMOI) { + MachineMemOperand *MMO = *MMOI; + if (const Value *V = MMO->getValue()) { + if (const FixedStackPseudoSourceValue *FSV = + dyn_cast<FixedStackPseudoSourceValue>(V)) { + int FI = FSV->getFrameIndex(); + if (FI >= 0) + SSRefs[FI].push_back(MMO); + } + } } } } @@ -197,7 +211,7 @@ void StackSlotColoring::InitializeSlots() { /// LiveIntervals that have already been assigned to the specified color. bool StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { - const SmallVector<LiveInterval*,4> &OtherLIs = Assignments[Color]; + const SmallVectorImpl<LiveInterval *> &OtherLIs = Assignments[Color]; for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) { LiveInterval *OtherLI = OtherLIs[i]; if (OtherLI->overlaps(*li)) @@ -291,16 +305,26 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { if (!Changed) return false; - // Rewrite all MO_FrameIndex operands. - SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs()); + // Rewrite all MachineMemOperands. for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) { int NewFI = SlotMapping[SS]; if (NewFI == -1 || (NewFI == (int)SS)) continue; - SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS]; - for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) - RewriteInstruction(RefMIs[i], SS, NewFI, MF); + const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI); + SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS]; + for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i) + RefMMOs[i]->setValue(NewSV); + } + + // Rewrite all MO_FrameIndex operands. Look for dead stores. + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = &*MBBI; + for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); + MII != EE; ++MII) + RewriteInstruction(MII, SlotMapping, MF); + RemoveDeadStores(MBB); } // Delete unused stack slots. @@ -315,28 +339,24 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { /// RewriteInstruction - Rewrite specified instruction by replacing references /// to old frame index with new one. -void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, - int NewFI, MachineFunction &MF) { +void StackSlotColoring::RewriteInstruction(MachineInstr *MI, + SmallVectorImpl<int> &SlotMapping, + MachineFunction &MF) { // Update the operands. for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isFI()) continue; - int FI = MO.getIndex(); - if (FI != OldFI) + int OldFI = MO.getIndex(); + if (OldFI < 0) + continue; + int NewFI = SlotMapping[OldFI]; + if (NewFI == -1 || NewFI == OldFI) continue; MO.setIndex(NewFI); } - // Update the memory references. This changes the MachineMemOperands - // directly. They may be in use by multiple instructions, however all - // instructions using OldFI are being rewritten to use NewFI. - const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI); - const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI); - for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), - E = MI->memoperands_end(); I != E; ++I) - if ((*I)->getValue() == OldSV) - (*I)->setValue(NewSV); + // The MachineMemOperands have already been updated. } @@ -357,10 +377,19 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { if (DCELimit != -1 && (int)NumDead >= DCELimit) break; + int FirstSS, SecondSS; + if (TII->isStackSlotCopy(I, FirstSS, SecondSS) && + FirstSS == SecondSS && + FirstSS != -1) { + ++NumDead; + changed = true; + toErase.push_back(I); + continue; + } + MachineBasicBlock::iterator NextMI = llvm::next(I); if (NextMI == MBB->end()) continue; - int FirstSS, SecondSS; unsigned LoadReg = 0; unsigned StoreReg = 0; if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue; @@ -379,7 +408,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { ++I; } - for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(), + for (SmallVectorImpl<MachineInstr *>::iterator I = toErase.begin(), E = toErase.end(); I != E; ++I) (*I)->eraseFromParent(); @@ -396,7 +425,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { MFI = MF.getFrameInfo(); TII = MF.getTarget().getInstrInfo(); LS = &getAnalysis<LiveStacks>(); - loopInfo = &getAnalysis<MachineLoopInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); bool Changed = false; @@ -430,10 +459,5 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { Assignments[i].clear(); Assignments.clear(); - if (Changed) { - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - Changed |= RemoveDeadStores(I); - } - return Changed; } diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 1ec8817..8a1d567 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -86,7 +86,7 @@ namespace { void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, DenseMap<unsigned, unsigned> &LocalVRMap, - SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, + SmallVectorImpl<std::pair<unsigned,unsigned> > &Copies, const DenseSet<unsigned> &UsedByPhi, bool Remove); void DuplicateInstruction(MachineInstr *MI, @@ -96,7 +96,7 @@ namespace { DenseMap<unsigned, unsigned> &LocalVRMap, const DenseSet<unsigned> &UsedByPhi); void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, - SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallSetVector<MachineBasicBlock*, 8> &Succs); bool TailDuplicateBlocks(MachineFunction &MF); bool shouldTailDuplicate(const MachineFunction &MF, @@ -104,14 +104,14 @@ namespace { bool isSimpleBB(MachineBasicBlock *TailBB); bool canCompletelyDuplicateBB(MachineBasicBlock &BB); bool duplicateSimpleBB(MachineBasicBlock *TailBB, - SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, const DenseSet<unsigned> &RegsUsedByPhi, - SmallVector<MachineInstr*, 16> &Copies); + SmallVectorImpl<MachineInstr *> &Copies); bool TailDuplicate(MachineBasicBlock *TailBB, bool IsSimple, MachineFunction &MF, - SmallVector<MachineBasicBlock*, 8> &TDBBs, - SmallVector<MachineInstr*, 16> &Copies); + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + SmallVectorImpl<MachineInstr *> &Copies); bool TailDuplicateAndUpdate(MachineBasicBlock *MBB, bool IsSimple, MachineFunction &MF); @@ -382,13 +382,11 @@ void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, /// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB. /// Remember the source register that's contributed by PredBB and update SSA /// update map. -void TailDuplicatePass::ProcessPHI(MachineInstr *MI, - MachineBasicBlock *TailBB, - MachineBasicBlock *PredBB, - DenseMap<unsigned, unsigned> &LocalVRMap, - SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, - const DenseSet<unsigned> &RegsUsedByPhi, - bool Remove) { +void TailDuplicatePass::ProcessPHI( + MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, + DenseMap<unsigned, unsigned> &LocalVRMap, + SmallVectorImpl<std::pair<unsigned, unsigned> > &Copies, + const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) { unsigned DefReg = MI->getOperand(0).getReg(); unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); assert(SrcOpIdx && "Unable to find matching PHI source?"); @@ -452,7 +450,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, /// instructions in them accordingly. void TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, - SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallSetVector<MachineBasicBlock*,8> &Succs) { for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), SE = Succs.end(); SI != SE; ++SI) { @@ -662,9 +660,9 @@ TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { bool TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, - SmallVector<MachineBasicBlock*, 8> &TDBBs, - const DenseSet<unsigned> &UsedByPhi, - SmallVector<MachineInstr*, 16> &Copies) { + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + const DenseSet<unsigned> &UsedByPhi, + SmallVectorImpl<MachineInstr *> &Copies) { SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(), TailBB->succ_end()); SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), @@ -742,8 +740,8 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, bool IsSimple, MachineFunction &MF, - SmallVector<MachineBasicBlock*, 8> &TDBBs, - SmallVector<MachineInstr*, 16> &Copies) { + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + SmallVectorImpl<MachineInstr *> &Copies) { DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); DenseSet<unsigned> UsedByPhi; diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index 20eb918..bb8bd42 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -668,27 +668,13 @@ getOperandLatency(const InstrItineraryData *ItinData, /// lookup, do so. Otherwise return -1. int TargetInstrInfo::computeDefOperandLatency( const InstrItineraryData *ItinData, - const MachineInstr *DefMI, bool FindMin) const { + const MachineInstr *DefMI) const { // Let the target hook getInstrLatency handle missing itineraries. if (!ItinData) return getInstrLatency(ItinData, DefMI); - // Return a latency based on the itinerary properties and defining instruction - // if possible. Some common subtargets don't require per-operand latency, - // especially for minimum latencies. - if (FindMin) { - // If MinLatency is valid, call getInstrLatency. This uses Stage latency if - // it exists before defaulting to MinLatency. - if (ItinData->SchedModel->MinLatency >= 0) - return getInstrLatency(ItinData, DefMI); - - // If MinLatency is invalid, OperandLatency is interpreted as MinLatency. - // For empty itineraries, short-cirtuit the check and default to one cycle. - if (ItinData->isEmpty()) - return 1; - } - else if(ItinData->isEmpty()) + if(ItinData->isEmpty()) return defaultDefLatency(ItinData->SchedModel, DefMI); // ...operand lookup required @@ -709,10 +695,9 @@ int TargetInstrInfo::computeDefOperandLatency( unsigned TargetInstrInfo:: computeOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx, - bool FindMin) const { + const MachineInstr *UseMI, unsigned UseIdx) const { - int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin); + int DefLatency = computeDefOperandLatency(ItinData, DefMI); if (DefLatency >= 0) return DefLatency; @@ -732,8 +717,7 @@ computeOperandLatency(const InstrItineraryData *ItinData, unsigned InstrLatency = getInstrLatency(ItinData, DefMI); // Expected latency is the max of the stage latency and itinerary props. - if (!FindMin) - InstrLatency = std::max(InstrLatency, - defaultDefLatency(ItinData->SchedModel, DefMI)); + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 7e7359a..07cf871 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -726,14 +726,13 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, if (GV->isWeakForLinker()) { Selection = COFF::IMAGE_COMDAT_SELECT_ANY; Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; - MCSymbol *Sym = Mang->getSymbol(GV); Name.append("$"); - Name.append(Sym->getName().begin() + 1, Sym->getName().end()); + Mang->getNameWithPrefix(Name, GV, false, false); } return getContext().getCOFFSection(Name, Characteristics, - Selection, - Kind); + Kind, + Selection); } static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { @@ -761,15 +760,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (GV->isWeakForLinker()) { const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - MCSymbol *Sym = Mang->getSymbol(GV); - Name.append(Sym->getName().begin() + 1, Sym->getName().end()); + Mang->getNameWithPrefix(Name, GV, false, false); unsigned Characteristics = getCOFFSectionFlags(Kind); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; return getContext().getCOFFSection(Name.str(), Characteristics, - COFF::IMAGE_COMDAT_SELECT_ANY, Kind); + Kind, COFF::IMAGE_COMDAT_SELECT_ANY); } if (Kind.isText()) diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp index c9c88c1..7a39a4c 100644 --- a/lib/CodeGen/TargetOptionsImpl.cpp +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Function.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Target/TargetOptions.h" @@ -21,6 +22,9 @@ using namespace llvm; bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { // Check to see if we should eliminate non-leaf frame pointers and then // check to see if we should eliminate all frame pointers. + bool NoFramePointerElimNonLeaf = + MF.getFunction()->getFnAttribute("no-frame-pointer-elim-non-leaf") + .getValueAsString() == "true"; if (NoFramePointerElimNonLeaf && !NoFramePointerElim) { const MachineFrameInfo *MFI = MF.getFrameInfo(); return MFI->hasCalls(); @@ -49,29 +53,3 @@ bool TargetOptions::HonorSignDependentRoundingFPMath() const { StringRef TargetOptions::getTrapFunctionName() const { return TrapFuncName; } - -bool TargetOptions::operator==(const TargetOptions &TO) { -#define ARE_EQUAL(X) X == TO.X - return - ARE_EQUAL(UnsafeFPMath) && - ARE_EQUAL(NoInfsFPMath) && - ARE_EQUAL(NoNaNsFPMath) && - ARE_EQUAL(HonorSignDependentRoundingFPMathOption) && - ARE_EQUAL(UseSoftFloat) && - ARE_EQUAL(NoZerosInBSS) && - ARE_EQUAL(JITEmitDebugInfo) && - ARE_EQUAL(JITEmitDebugInfoToDisk) && - ARE_EQUAL(GuaranteedTailCallOpt) && - ARE_EQUAL(DisableTailCalls) && - ARE_EQUAL(StackAlignmentOverride) && - ARE_EQUAL(RealignStack) && - ARE_EQUAL(SSPBufferSize) && - ARE_EQUAL(EnableFastISel) && - ARE_EQUAL(PositionIndependentExecutable) && - ARE_EQUAL(EnableSegmentedStacks) && - ARE_EQUAL(UseInitArray) && - ARE_EQUAL(TrapFuncName) && - ARE_EQUAL(FloatABIType) && - ARE_EQUAL(AllowFPOpFusion); -#undef ARE_EQUAL -} diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index 1bf14db..64ee9d1 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -93,33 +93,10 @@ unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI, // effectively means infinite latency. Since users of the TargetSchedule API // don't know how to handle this, we convert it to a very large latency that is // easy to distinguish when debugging the DAG but won't induce overflow. -static unsigned convertLatency(int Cycles) { +static unsigned capLatency(int Cycles) { return Cycles >= 0 ? Cycles : 1000; } -/// If we can determine the operand latency from the def only, without machine -/// model or itinerary lookup, do so. Otherwise return -1. -int TargetSchedModel::getDefLatency(const MachineInstr *DefMI, - bool FindMin) const { - - // Return a latency based on the itinerary properties and defining instruction - // if possible. Some common subtargets don't require per-operand latency, - // especially for minimum latencies. - if (FindMin) { - // If MinLatency is invalid, then use the itinerary for MinLatency. If no - // itinerary exists either, then use single cycle latency. - if (SchedModel.MinLatency < 0 && !hasInstrItineraries()) { - return 1; - } - return SchedModel.MinLatency; - } - else if (!hasInstrSchedModel() && !hasInstrItineraries()) { - return TII->defaultDefLatency(&SchedModel, DefMI); - } - // ...operand lookup required - return -1; -} - /// Return the MCSchedClassDesc for this instruction. Some SchedClasses require /// evaluation of predicates that depend on instruction operands or flags. const MCSchedClassDesc *TargetSchedModel:: @@ -177,18 +154,16 @@ static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { // Top-level API for clients that know the operand indices. unsigned TargetSchedModel::computeOperandLatency( const MachineInstr *DefMI, unsigned DefOperIdx, - const MachineInstr *UseMI, unsigned UseOperIdx, - bool FindMin) const { + const MachineInstr *UseMI, unsigned UseOperIdx) const { - int DefLatency = getDefLatency(DefMI, FindMin); - if (DefLatency >= 0) - return DefLatency; + if (!hasInstrSchedModel() && !hasInstrItineraries()) + return TII->defaultDefLatency(&SchedModel, DefMI); if (hasInstrItineraries()) { int OperLatency = 0; if (UseMI) { - OperLatency = - TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, UseMI, UseOperIdx); + OperLatency = TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, + UseMI, UseOperIdx); } else { unsigned DefClass = DefMI->getDesc().getSchedClass(); @@ -205,13 +180,11 @@ unsigned TargetSchedModel::computeOperandLatency( // hook to allow subtargets to specialize latency. This hook is only // applicable to the InstrItins model. InstrSchedModel should model all // special cases without TII hooks. - if (!FindMin) - InstrLatency = std::max(InstrLatency, - TII->defaultDefLatency(&SchedModel, DefMI)); + InstrLatency = std::max(InstrLatency, + TII->defaultDefLatency(&SchedModel, DefMI)); return InstrLatency; } - assert(!FindMin && hasInstrSchedModel() && - "Expected a SchedModel for this cpu"); + // hasInstrSchedModel() const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); unsigned DefIdx = findDefIdx(DefMI, DefOperIdx); if (DefIdx < SCDesc->NumWriteLatencyEntries) { @@ -219,7 +192,7 @@ unsigned TargetSchedModel::computeOperandLatency( const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, DefIdx); unsigned WriteID = WLEntry->WriteResourceID; - unsigned Latency = convertLatency(WLEntry->Cycles); + unsigned Latency = capLatency(WLEntry->Cycles); if (!UseMI) return Latency; @@ -228,7 +201,10 @@ unsigned TargetSchedModel::computeOperandLatency( if (UseDesc->NumReadAdvanceEntries == 0) return Latency; unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); - return Latency - STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap + return 0; + return Latency - Advance; } // If DefIdx does not exist in the model (e.g. implicit defs), then return // unit latency (defaultDefLatency may be too conservative). @@ -263,7 +239,7 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { // Lookup the definition's write latency in SubtargetInfo. const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, DefIdx); - Latency = std::max(Latency, convertLatency(WLEntry->Cycles)); + Latency = std::max(Latency, capLatency(WLEntry->Cycles)); } return Latency; } @@ -274,13 +250,10 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { unsigned TargetSchedModel:: computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const { - // MinLatency == -1 is for in-order processors that always have unit - // MinLatency. MinLatency > 0 is for in-order processors with varying min - // latencies, but since this is not a RAW dep, we always use unit latency. - if (SchedModel.MinLatency != 0) + if (SchedModel.MicroOpBufferSize <= 1) return 1; - // MinLatency == 0 indicates an out-of-order processor that can dispatch + // MicroOpBufferSize > 1 indicates an out-of-order processor that can dispatch // WAW dependencies in the same cycle. // Treat predication as a data dependency for out-of-order cpus. In-order @@ -302,7 +275,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, if (SCDesc->isValid()) { for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc), *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) { - if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered) + if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize) return 1; } } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 7ca2bee..c52e675 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1539,7 +1539,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // transformations that may either eliminate the tied operands or // improve the opportunities for coalescing away the register copy. if (TiedOperands.size() == 1) { - SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs + SmallVectorImpl<std::pair<unsigned, unsigned> > &TiedPairs = TiedOperands.begin()->second; if (TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; |