diff options
Diffstat (limited to 'lib/CodeGen')
40 files changed, 1325 insertions, 898 deletions
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 6a86c85..4564beb 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetAsmParser.h" #include "llvm/Target/TargetMachine.h" @@ -112,7 +113,16 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr, OutContext, OutStreamer, *MAI)); - OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*Parser, TM)); + + // FIXME: It would be nice if we can avoid createing a new instance of + // MCSubtargetInfo here given TargetSubtargetInfo is available. However, + // we have to watch out for asm directives which can change subtarget + // state. e.g. .code 16, .code 32. + OwningPtr<MCSubtargetInfo> + STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(), + TM.getTargetCPU(), + TM.getTargetFeatureString())); + OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*STI, *Parser)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index f85a82d..125e1e8 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -229,6 +229,7 @@ public: void DbgScope::dump() const { raw_ostream &err = dbgs(); err.indent(IndentLevel); + err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n"; const MDNode *N = Desc; N->dump(); if (AbstractScope) @@ -1314,7 +1315,6 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, void DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, SmallPtrSet<const MDNode *, 16> &Processed) { - const LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) { @@ -1324,11 +1324,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, DIVariable DV(Var); const std::pair<unsigned, DebugLoc> &VP = VI->second; - DbgScope *Scope = 0; - if (const MDNode *IA = VP.second.getInlinedAt(Ctx)) - Scope = ConcreteScopes.lookup(IA); - if (Scope == 0) - Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx)); + DbgScope *Scope = findDbgScope(VP.second); // If variable scope is not found then skip this variable. if (Scope == 0) @@ -1355,6 +1351,34 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) { MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; } +/// getDebugLocEntry - Get .debug_loc entry for the instraction range starting +/// at MI. +static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, + const MCSymbol *FLabel, + const MCSymbol *SLabel, + const MachineInstr *MI) { + const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata(); + + if (MI->getNumOperands() != 3) { + MachineLocation MLoc = Asm->getDebugValueLocation(MI); + return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); + } + if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) { + MachineLocation MLoc; + MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); + return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); + } + if (MI->getOperand(0).isImm()) + return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm()); + if (MI->getOperand(0).isFPImm()) + return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm()); + if (MI->getOperand(0).isCImm()) + return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm()); + + assert (0 && "Unexpected 3 operand DBG_VALUE instruction!"); + return DotDebugLocEntry(); +} + /// collectVariableInfo - Populate DbgScope entries with variables' info. void DwarfDebug::collectVariableInfo(const MachineFunction *MF, @@ -1383,7 +1407,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, DISubprogram(DV.getContext()).describes(MF->getFunction())) Scope = CurrentFnDbgScope; else - Scope = findDbgScope(MInsn); + Scope = findDbgScope(MInsn->getDebugLoc()); // If variable scope is not found then skip this variable. if (!Scope) continue; @@ -1428,6 +1452,8 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, SLabel = FunctionEndSym; else { const MachineInstr *End = HI[1]; + DEBUG(dbgs() << "DotDebugLoc Pair:\n" + << "\t" << *Begin << "\t" << *End << "\n"); if (End->isDebugValue()) SLabel = getLabelBeforeInsn(End); else { @@ -1439,25 +1465,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, } // The value is valid until the next DBG_VALUE or clobber. - MachineLocation MLoc; - if (Begin->getNumOperands() == 3) { - if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm()) { - MLoc.set(Begin->getOperand(0).getReg(), - Begin->getOperand(1).getImm()); - DotDebugLocEntries. - push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var)); - } - // FIXME: Handle isFPImm also. - else if (Begin->getOperand(0).isImm()) { - DotDebugLocEntries. - push_back(DotDebugLocEntry(FLabel, SLabel, - Begin->getOperand(0).getImm())); - } - } else { - MLoc = Asm->getDebugValueLocation(Begin); - DotDebugLocEntries. - push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var)); - } + DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, Begin)); } DotDebugLocEntries.push_back(DotDebugLocEntry()); } @@ -1554,8 +1562,12 @@ void DwarfDebug::endInstruction(const MachineInstr *MI) { } /// getOrCreateDbgScope - Create DbgScope for the scope. -DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, - const MDNode *InlinedAt) { +DbgScope *DwarfDebug::getOrCreateDbgScope(DebugLoc DL) { + LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); + MDNode *Scope = NULL; + MDNode *InlinedAt = NULL; + DL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx); + if (!InlinedAt) { DbgScope *WScope = DbgScopeMap.lookup(Scope); if (WScope) @@ -1564,22 +1576,12 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, DbgScopeMap.insert(std::make_pair(Scope, WScope)); if (DIDescriptor(Scope).isLexicalBlock()) { DbgScope *Parent = - getOrCreateDbgScope(DILexicalBlock(Scope).getContext(), NULL); + getOrCreateDbgScope(DebugLoc::getFromDILexicalBlock(Scope)); WScope->setParent(Parent); Parent->addScope(WScope); - } - - if (!WScope->getParent()) { - StringRef SPName = DISubprogram(Scope).getLinkageName(); - // We used to check only for a linkage name, but that fails - // since we began omitting the linkage name for private - // functions. The new way is to check for the name in metadata, - // but that's not supported in old .ll test cases. Ergo, we - // check both. - if (SPName == Asm->MF->getFunction()->getName() || - DISubprogram(Scope).getFunction() == Asm->MF->getFunction()) - CurrentFnDbgScope = WScope; - } + } else if (DIDescriptor(Scope).isSubprogram() + && DISubprogram(Scope).describes(Asm->MF->getFunction())) + CurrentFnDbgScope = WScope; return WScope; } @@ -1591,37 +1593,14 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt); DbgScopeMap.insert(std::make_pair(InlinedAt, WScope)); - DILocation DL(InlinedAt); + InlinedDbgScopeMap[DebugLoc::getFromDILocation(InlinedAt)] = WScope; DbgScope *Parent = - getOrCreateDbgScope(DL.getScope(), DL.getOrigLocation()); + getOrCreateDbgScope(DebugLoc::getFromDILocation(InlinedAt)); WScope->setParent(Parent); Parent->addScope(WScope); - - ConcreteScopes[InlinedAt] = WScope; - return WScope; } -/// hasValidLocation - Return true if debug location entry attached with -/// machine instruction encodes valid location info. -static bool hasValidLocation(LLVMContext &Ctx, - const MachineInstr *MInsn, - const MDNode *&Scope, const MDNode *&InlinedAt) { - DebugLoc DL = MInsn->getDebugLoc(); - if (DL.isUnknown()) return false; - - const MDNode *S = DL.getScope(Ctx); - - // There is no need to create another DIE for compile unit. For all - // other scopes, create one DbgScope now. This will be translated - // into a scope DIE at the end. - if (DIScope(S).isCompileUnit()) return false; - - Scope = S; - InlinedAt = DL.getInlinedAt(Ctx); - return true; -} - /// calculateDominanceGraph - Calculate dominance graph for DbgScope /// hierarchy. static void calculateDominanceGraph(DbgScope *Scope) { @@ -1652,21 +1631,24 @@ static void calculateDominanceGraph(DbgScope *Scope) { /// printDbgScopeInfo - Print DbgScope info for each machine instruction. static -void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF, +void printDbgScopeInfo(const MachineFunction *MF, DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap) { #ifndef NDEBUG + LLVMContext &Ctx = MF->getFunction()->getContext(); unsigned PrevDFSIn = 0; for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; - const MDNode *Scope = NULL; - const MDNode *InlinedAt = NULL; + MDNode *Scope = NULL; + MDNode *InlinedAt = NULL; // Check if instruction has valid location information. - if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { + DebugLoc MIDL = MInsn->getDebugLoc(); + if (!MIDL.isUnknown()) { + MIDL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx); dbgs() << " [ "; if (InlinedAt) dbgs() << "*"; @@ -1696,11 +1678,9 @@ bool DwarfDebug::extractScopeInformation() { return false; // Scan each instruction and create scopes. First build working set of scopes. - LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); SmallVector<DbgRange, 4> MIRanges; DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap; - const MDNode *PrevScope = NULL; - const MDNode *PrevInlinedAt = NULL; + DebugLoc PrevDL; const MachineInstr *RangeBeginMI = NULL; const MachineInstr *PrevMI = NULL; for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); @@ -1708,17 +1688,16 @@ bool DwarfDebug::extractScopeInformation() { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; - const MDNode *Scope = NULL; - const MDNode *InlinedAt = NULL; // Check if instruction has valid location information. - if (!hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { + const DebugLoc MIDL = MInsn->getDebugLoc(); + if (MIDL.isUnknown()) { PrevMI = MInsn; continue; } // If scope has not changed then skip this instruction. - if (Scope == PrevScope && PrevInlinedAt == InlinedAt) { + if (MIDL == PrevDL) { PrevMI = MInsn; continue; } @@ -1731,9 +1710,13 @@ bool DwarfDebug::extractScopeInformation() { // If we have alread seen a beginning of a instruction range and // current instruction scope does not match scope of first instruction // in this range then create a new instruction range. + DEBUG(dbgs() << "Creating new instruction range :\n"); + DEBUG(dbgs() << "Begin Range at " << *RangeBeginMI); + DEBUG(dbgs() << "End Range at " << *PrevMI); + DEBUG(dbgs() << "Next Range starting at " << *MInsn); + DEBUG(dbgs() << "------------------------\n"); DbgRange R(RangeBeginMI, PrevMI); - MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, - PrevInlinedAt); + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL); MIRanges.push_back(R); } @@ -1742,16 +1725,15 @@ bool DwarfDebug::extractScopeInformation() { // Reset previous markers. PrevMI = MInsn; - PrevScope = Scope; - PrevInlinedAt = InlinedAt; + PrevDL = MIDL; } } // Create last instruction range. - if (RangeBeginMI && PrevMI && PrevScope) { + if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) { DbgRange R(RangeBeginMI, PrevMI); MIRanges.push_back(R); - MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt); + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL); } if (!CurrentFnDbgScope) @@ -1759,7 +1741,7 @@ bool DwarfDebug::extractScopeInformation() { calculateDominanceGraph(CurrentFnDbgScope); if (PrintDbgScope) - printDbgScopeInfo(Ctx, Asm->MF, MI2ScopeMap); + printDbgScopeInfo(Asm->MF, MI2ScopeMap); // Find ranges of instructions covered by each DbgScope; DbgScope *PrevDbgScope = NULL; @@ -1846,8 +1828,6 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); - /// ProcessedArgs - Collection of arguments already processed. - SmallPtrSet<const MDNode *, 8> ProcessedArgs; const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); /// LiveUserVar - Map physreg numbers to the MDNode they contain. std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs()); @@ -1887,8 +1867,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (Prev->isDebugValue()) { // Coalesce identical entries at the end of History. if (History.size() >= 2 && - Prev->isIdenticalTo(History[History.size() - 2])) + Prev->isIdenticalTo(History[History.size() - 2])) { + DEBUG(dbgs() << "Coalesce identical DBG_VALUE entries:\n" + << "\t" << *Prev + << "\t" << *History[History.size() - 2] << "\n"); History.pop_back(); + } // Terminate old register assignments that don't reach MI; MachineFunction::const_iterator PrevMBB = Prev->getParent(); @@ -1898,9 +1882,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // its basic block. MachineBasicBlock::const_iterator LastMI = PrevMBB->getLastNonDebugInstr(); - if (LastMI == PrevMBB->end()) + if (LastMI == PrevMBB->end()) { // Drop DBG_VALUE for empty range. + DEBUG(dbgs() << "Drop DBG_VALUE for empty range:\n" + << "\t" << *Prev << "\n"); History.pop_back(); + } else { // Terminate after LastMI. History.push_back(LastMI); @@ -2057,10 +2044,10 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DbgVariableToFrameIndexMap.clear(); VarToAbstractVarMap.clear(); DbgVariableToDbgInstMap.clear(); + InlinedDbgScopeMap.clear(); DeleteContainerSeconds(DbgScopeMap); UserVariables.clear(); DbgValues.clear(); - ConcreteScopes.clear(); DeleteContainerSeconds(AbstractScopes); AbstractScopesList.clear(); AbstractVariables.clear(); @@ -2087,22 +2074,17 @@ bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) { return true; } -/// findDbgScope - Find DbgScope for the debug loc attached with an -/// instruction. -DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { - DbgScope *Scope = NULL; - LLVMContext &Ctx = - MInsn->getParent()->getParent()->getFunction()->getContext(); - DebugLoc DL = MInsn->getDebugLoc(); - +/// findDbgScope - Find DbgScope for the debug loc. +DbgScope *DwarfDebug::findDbgScope(DebugLoc DL) { if (DL.isUnknown()) - return Scope; + return NULL; - if (const MDNode *IA = DL.getInlinedAt(Ctx)) - Scope = ConcreteScopes.lookup(IA); - if (Scope == 0) + DbgScope *Scope = NULL; + LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); + if (MDNode *IA = DL.getInlinedAt(Ctx)) + Scope = InlinedDbgScopeMap.lookup(DebugLoc::getFromDILocation(IA)); + else Scope = DbgScopeMap.lookup(DL.getScope(Ctx)); - return Scope; } @@ -2601,56 +2583,61 @@ void DwarfDebug::emitDebugLoc() { MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol(); Asm->EmitLabelDifference(end, begin, 2); Asm->OutStreamer.EmitLabel(begin); - if (Entry.isConstant()) { + if (Entry.isInt()) { DIBasicType BTy(DV.getType()); if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) { Asm->OutStreamer.AddComment("DW_OP_consts"); Asm->EmitInt8(dwarf::DW_OP_consts); - Asm->EmitSLEB128(Entry.getConstant()); + Asm->EmitSLEB128(Entry.getInt()); } else { Asm->OutStreamer.AddComment("DW_OP_constu"); Asm->EmitInt8(dwarf::DW_OP_constu); - Asm->EmitULEB128(Entry.getConstant()); + Asm->EmitULEB128(Entry.getInt()); } - } else if (DV.hasComplexAddress()) { - unsigned N = DV.getNumAddrElements(); - unsigned i = 0; - if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { - if (Entry.Loc.getOffset()) { - i = 2; - Asm->EmitDwarfRegOp(Entry.Loc); - Asm->OutStreamer.AddComment("DW_OP_deref"); - Asm->EmitInt8(dwarf::DW_OP_deref); - Asm->OutStreamer.AddComment("DW_OP_plus_uconst"); - Asm->EmitInt8(dwarf::DW_OP_plus_uconst); - Asm->EmitSLEB128(DV.getAddrElement(1)); + } else if (Entry.isLocation()) { + if (!DV.hasComplexAddress()) + // Regular entry. + Asm->EmitDwarfRegOp(Entry.Loc); + else { + // Complex address entry. + unsigned N = DV.getNumAddrElements(); + unsigned i = 0; + if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { + if (Entry.Loc.getOffset()) { + i = 2; + Asm->EmitDwarfRegOp(Entry.Loc); + Asm->OutStreamer.AddComment("DW_OP_deref"); + Asm->EmitInt8(dwarf::DW_OP_deref); + Asm->OutStreamer.AddComment("DW_OP_plus_uconst"); + Asm->EmitInt8(dwarf::DW_OP_plus_uconst); + Asm->EmitSLEB128(DV.getAddrElement(1)); + } else { + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1)); + Asm->EmitDwarfRegOp(Loc); + i = 2; + } } else { - // If first address element is OpPlus then emit - // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1)); - Asm->EmitDwarfRegOp(Loc); - i = 2; + Asm->EmitDwarfRegOp(Entry.Loc); + } + + // Emit remaining complex address elements. + for (; i < N; ++i) { + uint64_t Element = DV.getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + Asm->EmitInt8(dwarf::DW_OP_plus_uconst); + Asm->EmitULEB128(DV.getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) + Asm->EmitInt8(dwarf::DW_OP_deref); + else llvm_unreachable("unknown Opcode found in complex address"); } - } else { - Asm->EmitDwarfRegOp(Entry.Loc); - } - - // Emit remaining complex address elements. - for (; i < N; ++i) { - uint64_t Element = DV.getAddrElement(i); - if (Element == DIBuilder::OpPlus) { - Asm->EmitInt8(dwarf::DW_OP_plus_uconst); - Asm->EmitULEB128(DV.getAddrElement(++i)); - } else if (Element == DIBuilder::OpDeref) - Asm->EmitInt8(dwarf::DW_OP_deref); - else llvm_unreachable("unknown Opcode found in complex address"); } - } else { - // Regular entry. - Asm->EmitDwarfRegOp(Entry.Loc); } + // else ... ignore constant fp. There is not any good way to + // to represent them here in dwarf. Asm->OutStreamer.EmitLabel(end); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index abda2e6..b245006 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -69,17 +69,35 @@ typedef struct DotDebugLocEntry { const MDNode *Variable; bool Merged; bool Constant; - int64_t iConstant; + enum EntryType { + E_Location, + E_Integer, + E_ConstantFP, + E_ConstantInt + }; + enum EntryType EntryKind; + + union { + int64_t Int; + const ConstantFP *CFP; + const ConstantInt *CIP; + } Constants; DotDebugLocEntry() : Begin(0), End(0), Variable(0), Merged(false), - Constant(false), iConstant(0) {} + Constant(false) { Constants.Int = 0;} DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, const MDNode *V) : Begin(B), End(E), Loc(L), Variable(V), Merged(false), - Constant(false), iConstant(0) {} + Constant(false) { Constants.Int = 0; EntryKind = E_Location; } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i) : Begin(B), End(E), Variable(0), Merged(false), - Constant(true), iConstant(i) {} + Constant(true) { Constants.Int = i; EntryKind = E_Integer; } + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr) + : Begin(B), End(E), Variable(0), Merged(false), + Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; } + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr) + : Begin(B), End(E), Variable(0), Merged(false), + Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; } /// Empty entries are also used as a trigger to emit temp label. Such /// labels are referenced is used to find debug_loc offset for a given DIE. @@ -91,8 +109,13 @@ typedef struct DotDebugLocEntry { Next->Begin = Begin; Merged = true; } - bool isConstant() { return Constant; } - int64_t getConstant() { return iConstant; } + bool isLocation() const { return EntryKind == E_Location; } + bool isInt() const { return EntryKind == E_Integer; } + bool isConstantFP() const { return EntryKind == E_ConstantFP; } + bool isConstantInt() const { return EntryKind == E_ConstantInt; } + int64_t getInt() { return Constants.Int; } + const ConstantFP *getConstantFP() { return Constants.CFP; } + const ConstantInt *getConstantInt() { return Constants.CIP; } } DotDebugLocEntry; //===----------------------------------------------------------------------===// @@ -178,12 +201,10 @@ class DwarfDebug { /// DbgScopeMap - Tracks the scopes in the current function. Owns the /// contained DbgScope*s. - /// DenseMap<const MDNode *, DbgScope *> DbgScopeMap; - /// ConcreteScopes - Tracks the concrete scopees in the current function. - /// These scopes are also included in DbgScopeMap. - DenseMap<const MDNode *, DbgScope *> ConcreteScopes; + /// InlinedDbgScopeMap - Tracks inlined function scopes in current function. + DenseMap<DebugLoc, DbgScope *> InlinedDbgScopeMap; /// AbstractScopes - Tracks the abstract scopes a module. These scopes are /// not included DbgScopeMap. AbstractScopes owns its DbgScope*s. @@ -296,7 +317,7 @@ private: void assignAbbrevNumber(DIEAbbrev &Abbrev); /// getOrCreateDbgScope - Create DbgScope for the scope. - DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt); + DbgScope *getOrCreateDbgScope(DebugLoc DL); DbgScope *getOrCreateAbstractScope(const MDNode *N); @@ -427,9 +448,8 @@ private: /// is found. Update FI to hold value of the index. bool findVariableFrameIndex(const DbgVariable *V, int *FI); - /// findDbgScope - Find DbgScope for the debug loc attached with an - /// instruction. - DbgScope *findDbgScope(const MachineInstr *MI); + /// findDbgScope - Find DbgScope for the debug loc. + DbgScope *findDbgScope(DebugLoc DL); /// identifyScopeMarkers() - Indentify instructions that are marking /// beginning of or end of a scope. diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 4df7b46..99090a8 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -366,11 +366,31 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, return TailLen; } +void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB, + MachineBasicBlock *NewMBB) { + if (RS) { + RS->enterBasicBlock(CurMBB); + if (!CurMBB->empty()) + RS->forward(prior(CurMBB->end())); + BitVector RegsLiveAtExit(TRI->getNumRegs()); + RS->getRegsUsed(RegsLiveAtExit, false); + for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) + if (RegsLiveAtExit[i]) + NewMBB->addLiveIn(i); + } +} + /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything /// after it, replacing it with an unconditional branch to NewDest. void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest) { + MachineBasicBlock *CurMBB = OldInst->getParent(); + TII->ReplaceTailWithBranchTo(OldInst, NewDest); + + // For targets that use the register scavenger, we must maintain LiveIns. + MaintainLiveIns(CurMBB, NewDest); + ++NumTailMerge; } @@ -399,16 +419,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); // For targets that use the register scavenger, we must maintain LiveIns. - if (RS) { - RS->enterBasicBlock(&CurMBB); - if (!CurMBB.empty()) - RS->forward(prior(CurMBB.end())); - BitVector RegsLiveAtExit(TRI->getNumRegs()); - RS->getRegsUsed(RegsLiveAtExit, false); - for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) - if (RegsLiveAtExit[i]) - NewMBB->addLiveIn(i); - } + MaintainLiveIns(&CurMBB, NewMBB); return NewMBB; } diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index 4ed42c0..df795df 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -95,6 +95,8 @@ namespace llvm { bool TailMergeBlocks(MachineFunction &MF); bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); + void MaintainLiveIns(MachineBasicBlock *CurMBB, + MachineBasicBlock *NewMBB); void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest); MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 22c5465..03604b0 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -336,8 +336,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() { Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator. CallInst *NewSelector = - CallInst::Create(SelectorIntrinsic, Args.begin(), Args.end(), - "eh.sel.catch.all", II); + CallInst::Create(SelectorIntrinsic, Args, "eh.sel.catch.all", II); NewSelector->setTailCall(II->isTailCall()); NewSelector->setAttributes(II->getAttributes()); @@ -497,10 +496,8 @@ bool DwarfEHPrepare::LowerUnwindsAndResumes() { // Find the rewind function if we didn't already. if (!RewindFunction) { LLVMContext &Ctx = ResumeInsts[0]->getContext(); - std::vector<const Type*> - Params(1, Type::getInt8PtrTy(Ctx)); FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), - Params, false); + Type::getInt8PtrTy(Ctx), false); const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); } diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index c918bf6..6cb2277 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -23,6 +23,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -173,10 +174,10 @@ namespace { private: bool ReverseBranchCondition(BBInfo &BBI); bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups, - float Prediction, float Confidence) const; + const BranchProbability &Prediction) const; bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, - float Prediction, float Confidence) const; + const BranchProbability &Prediction) const; bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, unsigned &Dups1, unsigned &Dups2) const; void ScanInstructions(BBInfo &BBI); @@ -203,19 +204,19 @@ namespace { bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Cycle, unsigned Extra, - float Prediction, float Confidence) const { + const BranchProbability &Prediction) const { return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra, - Prediction, Confidence); + Prediction); } bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TCycle, unsigned TExtra, MachineBasicBlock &FBB, unsigned FCycle, unsigned FExtra, - float Prediction, float Confidence) const { + const BranchProbability &Prediction) const { return TCycle > 0 && FCycle > 0 && TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, - Prediction, Confidence); + Prediction); } // blockAlwaysFallThrough - Block ends without a terminator. @@ -450,7 +451,7 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { /// number of instructions that the ifcvt would need to duplicate if performed /// in Dups. bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, - float Prediction, float Confidence) const { + const BranchProbability &Prediction) const { Dups = 0; if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -461,7 +462,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, if (TrueBBI.BB->pred_size() > 1) { if (TrueBBI.CannotBeCopied || !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize, - Prediction, Confidence)) + Prediction)) return false; Dups = TrueBBI.NonPredSize; } @@ -477,7 +478,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, /// if performed in 'Dups'. bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, - float Prediction, float Confidence) const { + const BranchProbability &Prediction) const { Dups = 0; if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -499,8 +500,7 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, ++Size; } } - if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, - Prediction, Confidence)) + if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction)) return false; Dups = Size; } @@ -751,8 +751,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, ScanInstructions(BBI); - // Unanalyzable or ends with fallthrough or unconditional branch. - if (!BBI.IsBrAnalyzable || BBI.BrCond.empty()) { + // Unanalyzable or ends with fallthrough or unconditional branch, or if is not + // considered for ifcvt anymore. + if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) { BBI.IsBeingAnalyzed = false; BBI.IsAnalyzed = true; return BBI; @@ -795,21 +796,20 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, // - backedge -> 90% taken // - early exit -> 20% taken // - branch predictor confidence -> 90% - float Prediction = 0.5f; - float Confidence = 0.9f; + BranchProbability Prediction(5, 10); MachineLoop *Loop = MLI->getLoopFor(BB); if (Loop) { if (TrueBBI.BB == Loop->getHeader()) - Prediction = 0.9f; + Prediction = BranchProbability(9, 10); else if (FalseBBI.BB == Loop->getHeader()) - Prediction = 0.1f; + Prediction = BranchProbability(1, 10); MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB); MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB); if (!TrueLoop || TrueLoop->getParentLoop() == Loop) - Prediction = 0.2f; + Prediction = BranchProbability(2, 10); else if (!FalseLoop || FalseLoop->getParentLoop() == Loop) - Prediction = 0.8f; + Prediction = BranchProbability(8, 10); } if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && @@ -817,7 +817,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, TrueBBI.ExtraCost), TrueBBI.ExtraCost2, *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) + FalseBBI.ExtraCost),FalseBBI.ExtraCost2, - Prediction, Confidence) && + Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond) && FeasibilityAnalysis(FalseBBI, RevCond)) { // Diamond: @@ -833,9 +833,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, Enqueued = true; } - if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) && + if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, - TrueBBI.ExtraCost2, Prediction, Confidence) && + TrueBBI.ExtraCost2, Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { // Triangle: // EBB @@ -848,17 +848,17 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, Enqueued = true; } - if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) && + if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, - TrueBBI.ExtraCost2, Prediction, Confidence) && + TrueBBI.ExtraCost2, Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); Enqueued = true; } - if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) && + if (ValidSimple(TrueBBI, Dups, Prediction) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, - TrueBBI.ExtraCost2, Prediction, Confidence) && + TrueBBI.ExtraCost2, Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { // Simple (split, no rejoin): // EBB @@ -874,29 +874,29 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (CanRevCond) { // Try the other path... if (ValidTriangle(FalseBBI, TrueBBI, false, Dups, - 1.0-Prediction, Confidence) && + Prediction.getCompl()) && MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, - FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && + FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); Enqueued = true; } if (ValidTriangle(FalseBBI, TrueBBI, true, Dups, - 1.0-Prediction, Confidence) && + Prediction.getCompl()) && MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, - FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && + FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); Enqueued = true; } - if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) && + if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) && MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, - FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && + FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond)) { Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); Enqueued = true; diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 0273891..5547f73 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -303,7 +303,8 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, // Best spill candidate seen so far. This must dominate UseVNI. SibValueInfo SVI(UseReg, UseVNI); MachineBasicBlock *UseMBB = LIS.getMBBFromIndex(UseVNI->def); - unsigned SpillDepth = Loops.getLoopDepth(UseMBB); + MachineBasicBlock *SpillMBB = UseMBB; + unsigned SpillDepth = Loops.getLoopDepth(SpillMBB); bool SeenOrigPHI = false; // Original PHI met. do { @@ -316,7 +317,30 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, // Is this value a better spill candidate? if (!isRegToSpill(Reg)) { MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); - if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) { + if (MBB == SpillMBB) { + // This is an alternative def earlier in the same MBB. + // Hoist the spill as far as possible in SpillMBB. This can ease + // register pressure: + // + // x = def + // y = use x + // s = copy x + // + // Hoisting the spill of s to immediately after the def removes the + // interference between x and y: + // + // x = def + // spill x + // y = use x<kill> + // + if (VNI->def < SVI.SpillVNI->def) { + DEBUG(dbgs() << " hoist in BB#" << MBB->getNumber() << ": " + << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def + << '\n'); + SVI.SpillReg = Reg; + SVI.SpillVNI = VNI; + } + } else if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) { // This is a valid spill location dominating UseVNI. // Prefer to spill at a smaller loop depth. unsigned Depth = Loops.getLoopDepth(MBB); @@ -325,6 +349,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, << ':' << VNI->id << '@' << VNI->def << '\n'); SVI.SpillReg = Reg; SVI.SpillVNI = VNI; + SpillMBB = MBB; SpillDepth = Depth; } } @@ -425,6 +450,7 @@ void InlineSpiller::analyzeSiblingValues() { // Check possible sibling copies. if (VNI->isPHIDef() || VNI->getCopy()) { VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); + assert(OrigVNI && "Def outside original live range"); if (OrigVNI->def != VNI->def) DefMI = traceSiblingValue(Reg, VNI, OrigVNI); } diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index b1014a9..a09bb39 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "regalloc" #include "InterferenceCache.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -40,9 +41,18 @@ InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { E = RoundRobin; if (++RoundRobin == CacheEntries) RoundRobin = 0; - Entries[E].reset(PhysReg, LIUArray, TRI, MF); - PhysRegEntries[PhysReg] = E; - return &Entries[E]; + for (unsigned i = 0; i != CacheEntries; ++i) { + // Skip entries that are in use. + if (Entries[E].hasRefs()) { + if (++E == CacheEntries) + E = 0; + continue; + } + Entries[E].reset(PhysReg, LIUArray, TRI, MF); + PhysRegEntries[PhysReg] = E; + return &Entries[E]; + } + llvm_unreachable("Ran out of interference cache entries."); } /// revalidate - LIU contents have changed, update tags. @@ -59,6 +69,7 @@ void InterferenceCache::Entry::reset(unsigned physReg, LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI, const MachineFunction *MF) { + assert(!hasRefs() && "Cannot reset cache entry with references"); // LIU's changed, invalidate cache. ++Tag; PhysReg = physReg; diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 6c36fa4..7f0a27a 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -43,6 +43,9 @@ class InterferenceCache { /// change. unsigned Tag; + /// RefCount - The total number of Cursor instances referring to this Entry. + unsigned RefCount; + /// MF - The current function. MachineFunction *MF; @@ -68,9 +71,10 @@ class InterferenceCache { void update(unsigned MBBNum); public: - Entry() : PhysReg(0), Tag(0), Indexes(0) {} + Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0) {} void clear(MachineFunction *mf, SlotIndexes *indexes) { + assert(!hasRefs() && "Cannot clear cache entry with references"); PhysReg = 0; MF = mf; Indexes = indexes; @@ -78,6 +82,10 @@ class InterferenceCache { unsigned getPhysReg() const { return PhysReg; } + void addRef(int Delta) { RefCount += Delta; } + + bool hasRefs() const { return RefCount > 0; } + void revalidate(); /// valid - Return true if this is a valid entry for physReg. @@ -122,15 +130,48 @@ public: void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, const TargetRegisterInfo *); + /// getMaxCursors - Return the maximum number of concurrent cursors that can + /// be supported. + unsigned getMaxCursors() const { return CacheEntries; } + /// Cursor - The primary query interface for the block interference cache. class Cursor { Entry *CacheEntry; BlockInterference *Current; + + void setEntry(Entry *E) { + Current = 0; + // Update reference counts. Nothing happens when RefCount reaches 0, so + // we don't have to check for E == CacheEntry etc. + if (CacheEntry) + CacheEntry->addRef(-1); + CacheEntry = E; + if (CacheEntry) + CacheEntry->addRef(+1); + } + public: - /// Cursor - Create a cursor for the interference allocated to PhysReg and - /// all its aliases. - Cursor(InterferenceCache &Cache, unsigned PhysReg) - : CacheEntry(Cache.get(PhysReg)), Current(0) {} + /// Cursor - Create a dangling cursor. + Cursor() : CacheEntry(0), Current(0) {} + ~Cursor() { setEntry(0); } + + Cursor(const Cursor &O) : CacheEntry(0), Current(0) { + setEntry(O.CacheEntry); + } + + Cursor &operator=(const Cursor &O) { + setEntry(O.CacheEntry); + return *this; + } + + /// setPhysReg - Point this cursor to PhysReg's interference. + void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) { + // Release reference before getting a new one. That guarantees we can + // actually have CacheEntries live cursors. + setEntry(0); + if (PhysReg) + setEntry(Cache.get(PhysReg)); + } /// moveTo - Move cursor to basic block MBBNum. void moveToBlock(unsigned MBBNum) { diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 3861dda..611886f 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -29,7 +29,7 @@ static void EnsureFunctionExists(Module &M, const char *Name, ArgIt ArgBegin, ArgIt ArgEnd, const Type *RetTy) { // Insert a correctly-typed definition now. - std::vector<const Type *> ParamTys; + std::vector<Type *> ParamTys; for (ArgIt I = ArgBegin; I != ArgEnd; ++I) ParamTys.push_back(I->getType()); M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false)); @@ -69,7 +69,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, // program already contains a function with this name. Module *M = CI->getParent()->getParent()->getParent(); // Get or insert the definition now. - std::vector<const Type *> ParamTys; + std::vector<Type *> ParamTys; for (ArgIt I = ArgBegin; I != ArgEnd; ++I) ParamTys.push_back((*I)->getType()); Constant* FCache = M->getOrInsertFunction(NewFn, @@ -77,7 +77,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, IRBuilder<> Builder(CI->getParent(), CI); SmallVector<Value *, 8> Args(ArgBegin, ArgEnd); - CallInst *NewCI = Builder.CreateCall(FCache, Args.begin(), Args.end()); + CallInst *NewCI = Builder.CreateCall(FCache, Args); NewCI->setName(CI->getName()); if (!CI->use_empty()) CI->replaceAllUsesWith(NewCI); @@ -353,6 +353,13 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { report_fatal_error("Code generator does not support intrinsic function '"+ Callee->getName()+"'!"); + case Intrinsic::expect: { + // Just replace __builtin_expect(exp, c) with EXP. + Value *V = CI->getArgOperand(0); + CI->replaceAllUsesWith(V); + break; + } + // The setjmp/longjmp intrinsics should only exist in the code if it was // never optimized (ie, right out of the CFE), or if it has been hacked on // by the lowerinvoke pass. In both cases, the right thing to do is to @@ -546,14 +553,13 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) { !CI->getType()->isIntegerTy()) return false; - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); if (!Ty) return false; // Okay, we can do this xform, do so now. - const Type *Tys[] = { Ty }; Module *M = CI->getParent()->getParent()->getParent(); - Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty); Value *Op = CI->getArgOperand(0); Op = CallInst::Create(Int, Op, CI->getName(), CI); diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index b98fbed..f985af8 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -24,10 +24,14 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/Support/CommandLine.h" @@ -98,10 +102,10 @@ static cl::opt<cl::boolOrDefault> EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); -LLVMTargetMachine::LLVMTargetMachine(const Target &T, - const std::string &Triple) - : TargetMachine(T), TargetTriple(Triple) { - AsmInfo = T.createAsmInfo(TargetTriple); +LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, + StringRef CPU, StringRef FS) + : TargetMachine(T, Triple, CPU, FS) { + AsmInfo = T.createMCAsmInfo(Triple); } // Set the default code model for the JIT for a generic target. @@ -136,14 +140,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, default: return true; case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = - getTarget().createMCInstPrinter(*this, MAI.getAssemblerDialect(), MAI); + getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = 0; TargetAsmBackend *TAB = 0; if (ShowMCEncoding) { - MCE = getTarget().createCodeEmitter(*this, *Context); - TAB = getTarget().createAsmBackend(TargetTriple); + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI, *Context); + TAB = getTarget().createAsmBackend(getTargetTriple()); } MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, @@ -159,13 +164,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context); - TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple); + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI, + *Context); + TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple()); if (MCE == 0 || TAB == 0) return true; - AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Context, - *TAB, Out, MCE, + AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(), + *Context, *TAB, Out, MCE, hasMCRelaxAll(), hasMCNoExecStack())); AsmStreamer.get()->InitSections(); @@ -240,13 +247,14 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Ctx); - TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple); + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(),STI, *Ctx); + TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple()); if (MCE == 0 || TAB == 0) return true; OwningPtr<MCStreamer> AsmStreamer; - AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Ctx, + AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(), *Ctx, *TAB, Out, MCE, hasMCRelaxAll(), hasMCNoExecStack())); @@ -303,10 +311,6 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (!DisableVerify) PM.add(createVerifierPass()); - // Simplify ObjC ARC code. This is done late because it makes re-optimization - // difficult. - PM.add(createObjCARCContractPass()); - // Run loop strength reduction before anything else. if (OptLevel != CodeGenOpt::None && !DisableLSR) { PM.add(createLoopStrengthReducePass(getTargetLowering())); @@ -388,6 +392,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Expand pseudo-instructions emitted by ISel. PM.add(createExpandISelPseudosPass()); + // Pre-ra tail duplication. + if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) { + PM.add(createTailDuplicatePass(true)); + printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); + } + // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. if (OptLevel != CodeGenOpt::None) @@ -416,12 +426,6 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, printAndVerify(PM, "After codegen peephole optimization pass"); } - // Pre-ra tail duplication. - if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) { - PM.add(createTailDuplicatePass(true)); - printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); - } - // Run pre-ra passes. if (addPreRegAlloc(PM, OptLevel)) printAndVerify(PM, "After PreRegAlloc passes"); diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 292928f..5d38c83 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -123,7 +123,7 @@ public: /// getNext - Return the next UserValue in the equivalence class. UserValue *getNext() const { return next; } - /// match - Does this UserValue match the aprameters? + /// match - Does this UserValue match the parameters? bool match(const MDNode *Var, unsigned Offset) const { return Var == variable && Offset == offset; } diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index b67f966..70003e7 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -244,7 +244,7 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { // // For comments on how to speed it up, see Query::findIntersection(). unsigned LiveIntervalUnion::Query:: -collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) { +collectInterferingVRegs(unsigned MaxInterferingRegs) { InterferenceResult IR = firstInterference(); LiveInterval::iterator VirtRegEnd = VirtReg->end(); LiveInterval *RecentInterferingVReg = NULL; @@ -287,10 +287,6 @@ collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) { RecentInterferingVReg = IR.LiveUnionI.value(); ++IR.LiveUnionI; - // Stop collecting when the max weight is exceeded. - if (RecentInterferingVReg->weight >= MaxWeight) - return InterferingVRegs.size(); - continue; } // VirtRegI may have advanced far beyond LiveUnionI, diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h index c83578e..5e78d5e 100644 --- a/lib/CodeGen/LiveIntervalUnion.h +++ b/lib/CodeGen/LiveIntervalUnion.h @@ -229,8 +229,7 @@ public: // Count the virtual registers in this union that interfere with this // query's live virtual register, up to maxInterferingRegs. - unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX, - float MaxWeight = HUGE_VALF); + unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX); // Was this virtual register visited during collectInterferingVRegs? bool isSeenInterference(LiveInterval *VReg) const; diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 052abad..b385fb3 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -298,10 +298,16 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, if (NumComp <= 1) continue; ++NumFracRanges; + bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg; DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); SmallVector<LiveInterval*, 8> Dups(1, LI); for (unsigned i = 1; i != NumComp; ++i) { Dups.push_back(&createFrom(LI->reg, LIS, VRM)); + // If LI is an original interval that hasn't been split yet, make the new + // intervals their own originals instead of referring to LI. The original + // interval must contain all the split products, and LI doesn't. + if (IsOriginal) + VRM.setIsSplitFromReg(Dups.back()->reg, 0); if (delegate_) delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 77828c9..143a29b 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -15,13 +15,16 @@ #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/InlineAsm.h" +#include "llvm/LLVMContext.h" #include "llvm/Metadata.h" +#include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/Value.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCInstrDesc.h" @@ -799,6 +802,11 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, return false; } } + // If DebugLoc does not match then two dbg.values are not identical. + if (isDebugValue()) + if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown() + && getDebugLoc() != Other->getDebugLoc()) + return false; return true; } @@ -1712,3 +1720,24 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { } return Hash; } + +void MachineInstr::emitError(StringRef Msg) const { + // Find the source location cookie. + unsigned LocCookie = 0; + const MDNode *LocMD = 0; + for (unsigned i = getNumOperands(); i != 0; --i) { + if (getOperand(i-1).isMetadata() && + (LocMD = getOperand(i-1).getMetadata()) && + LocMD->getNumOperands() != 0) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) { + LocCookie = CI->getZExtValue(); + break; + } + } + } + + if (const MachineBasicBlock *MBB = getParent()) + if (const MachineFunction *MF = MBB->getParent()) + return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg); + report_fatal_error(Msg); +} diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index bcb38d7..5ea26ad 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -324,19 +324,21 @@ void RegAllocBase::allocatePhysRegs() { if (AvailablePhysReg == ~0u) { // selectOrSplit failed to find a register! - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!" - "\nCannot allocate: " << *VirtReg; + const char *Msg = "ran out of registers during register allocation"; + // Probably caused by an inline asm. + MachineInstr *MI; for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg); - MachineInstr *MI = I.skipInstruction();) { - if (!MI->isInlineAsm()) - continue; - Msg << "\nPlease check your inline asm statement for " - "invalid constraints:\n"; - MI->print(Msg, &VRM->getMachineFunction().getTarget()); - } - report_fatal_error(Msg.str()); + (MI = I.skipInstruction());) + if (MI->isInlineAsm()) + break; + if (MI) + MI->emitError(Msg); + else + report_fatal_error(Msg); + // Keep going after reporting the error. + VRM->assignVirt2Phys(VirtReg->reg, + RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); + continue; } if (AvailablePhysReg) diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index ee23194..b36a445 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -530,16 +530,10 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { return assignVirtToPhysReg(LRE, BestReg); } - // Nothing we can do. - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for " - << "invalid constraints:\n"; - MI->print(Msg, TM); - } - report_fatal_error(Msg.str()); + // Nothing we can do. Report an error and keep going with a bad allocation. + MI->emitError("ran out of registers during register allocation"); + definePhysReg(MI, *AO.begin(), regFree); + assignVirtToPhysReg(LRE, *AO.begin()); } /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 4402694..2dde767 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -133,6 +133,20 @@ class RAGreedy : public MachineFunctionPass, } } + /// Cost of evicting interference. + struct EvictionCost { + unsigned BrokenHints; ///< Total number of broken hints. + float MaxWeight; ///< Maximum spill weight evicted. + + EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {} + + bool operator<(const EvictionCost &O) const { + if (BrokenHints != O.BrokenHints) + return BrokenHints < O.BrokenHints; + return MaxWeight < O.MaxWeight; + } + }; + // splitting state. std::auto_ptr<SplitAnalysis> SA; std::auto_ptr<SplitEditor> SE; @@ -146,11 +160,13 @@ class RAGreedy : public MachineFunctionPass, /// Global live range splitting candidate info. struct GlobalSplitCandidate { unsigned PhysReg; + InterferenceCache::Cursor Intf; BitVector LiveBundles; SmallVector<unsigned, 8> ActiveBlocks; - void reset(unsigned Reg) { + void reset(InterferenceCache &Cache, unsigned Reg) { PhysReg = Reg; + Intf.setPhysReg(Cache, Reg); LiveBundles.clear(); ActiveBlocks.clear(); } @@ -192,13 +208,15 @@ private: float calcSpillCost(); bool addSplitConstraints(InterferenceCache::Cursor, float&); void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); - void growRegion(GlobalSplitCandidate &Cand, InterferenceCache::Cursor); - float calcGlobalSplitCost(GlobalSplitCandidate&, InterferenceCache::Cursor); + void growRegion(GlobalSplitCandidate &Cand); + float calcGlobalSplitCost(GlobalSplitCandidate&); void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&, SmallVectorImpl<LiveInterval*>&); void calcGapWeights(unsigned, SmallVectorImpl<float>&); - bool canEvict(LiveInterval &A, LiveInterval &B); - bool canEvictInterference(LiveInterval&, unsigned, float&); + bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); + bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); + void evictInterference(LiveInterval&, unsigned, + SmallVectorImpl<LiveInterval*>&); unsigned tryAssign(LiveInterval&, AllocationOrder&, SmallVectorImpl<LiveInterval*>&); @@ -382,7 +400,21 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, if (!PhysReg || Order.isHint(PhysReg)) return PhysReg; - // PhysReg is available. Try to evict interference from a cheaper alternative. + // PhysReg is available, but there may be a better choice. + + // If we missed a simple hint, try to cheaply evict interference from the + // preferred register. + if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) + if (Order.isHint(Hint)) { + DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); + EvictionCost MaxCost(1); + if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { + evictInterference(VirtReg, Hint, NewVRegs); + return Hint; + } + } + + // Try to evict interference from a cheaper alternative. unsigned Cost = TRI->getCostPerUse(PhysReg); // Most registers have 0 additional cost. @@ -400,23 +432,42 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // Interference eviction //===----------------------------------------------------------------------===// -/// canEvict - determine if A can evict the assigned live range B. The eviction -/// policy defined by this function together with the allocation order defined -/// by enqueue() decides which registers ultimately end up being split and -/// spilled. +/// shouldEvict - determine if A should evict the assigned live range B. The +/// eviction policy defined by this function together with the allocation order +/// defined by enqueue() decides which registers ultimately end up being split +/// and spilled. /// /// Cascade numbers are used to prevent infinite loops if this function is a /// cyclic relation. -bool RAGreedy::canEvict(LiveInterval &A, LiveInterval &B) { +/// +/// @param A The live range to be assigned. +/// @param IsHint True when A is about to be assigned to its preferred +/// register. +/// @param B The live range to be evicted. +/// @param BreaksHint True when B is already assigned to its preferred register. +bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, + LiveInterval &B, bool BreaksHint) { + bool CanSplit = getStage(B) <= RS_Second; + + // Be fairly aggressive about following hints as long as the evictee can be + // split. + if (CanSplit && IsHint && !BreaksHint) + return true; + return A.weight > B.weight; } -/// canEvict - Return true if all interferences between VirtReg and PhysReg can -/// be evicted. -/// Return false if any interference is heavier than MaxWeight. -/// On return, set MaxWeight to the maximal spill weight of an interference. +/// canEvictInterference - Return true if all interferences between VirtReg and +/// PhysReg can be evicted. When OnlyCheap is set, don't do anything +/// +/// @param VirtReg Live range that is about to be assigned. +/// @param PhysReg Desired register for assignment. +/// @prarm IsHint True when PhysReg is VirtReg's preferred register. +/// @param MaxCost Only look for cheaper candidates and update with new cost +/// when returning true. +/// @returns True when interference can be evicted cheaper than MaxCost. bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, - float &MaxWeight) { + bool IsHint, EvictionCost &MaxCost) { // Find VirtReg's cascade number. This will be unassigned if VirtReg was never // involved in an eviction before. If a cascade number was assigned, deny // evicting anything with the same or a newer cascade number. This prevents @@ -428,11 +479,11 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, if (!Cascade) Cascade = NextCascade; - float Weight = 0; + EvictionCost Cost; for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); // If there is 10 or more interferences, chances are one is heavier. - if (Q.collectInterferingVRegs(10, MaxWeight) >= 10) + if (Q.collectInterferingVRegs(10) >= 10) return false; // Check if any interfering live range is heavier than MaxWeight. @@ -440,19 +491,69 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, LiveInterval *Intf = Q.interferingVRegs()[i - 1]; if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) return false; - if (Cascade <= ExtraRegInfo[Intf->reg].Cascade) + // Never evict spill products. They cannot split or spill. + if (getStage(*Intf) == RS_Spill) return false; - if (Intf->weight >= MaxWeight) + // Once a live range becomes small enough, it is urgent that we find a + // register for it. This is indicated by an infinite spill weight. These + // urgent live ranges get to evict almost anything. + bool Urgent = !VirtReg.isSpillable() && Intf->isSpillable(); + // Only evict older cascades or live ranges without a cascade. + unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; + if (Cascade <= IntfCascade) { + if (!Urgent) + return false; + // We permit breaking cascades for urgent evictions. It should be the + // last resort, though, so make it really expensive. + Cost.BrokenHints += 10; + } + // Would this break a satisfied hint? + bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); + // Update eviction cost. + Cost.BrokenHints += BreaksHint; + Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); + // Abort if this would be too expensive. + if (!(Cost < MaxCost)) return false; - if (!canEvict(VirtReg, *Intf)) + // Finally, apply the eviction policy for non-urgent evictions. + if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) return false; - Weight = std::max(Weight, Intf->weight); } } - MaxWeight = Weight; + MaxCost = Cost; return true; } +/// evictInterference - Evict any interferring registers that prevent VirtReg +/// from being assigned to Physreg. This assumes that canEvictInterference +/// returned true. +void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + // Make sure that VirtReg has a cascade number, and assign that cascade + // number to every evicted register. These live ranges than then only be + // evicted by a newer cascade, preventing infinite loops. + unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; + if (!Cascade) + Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; + + DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) + << " interference: Cascade " << Cascade << '\n'); + for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { + LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + assert(Q.seenAllInterferences() && "Didn't check all interfererences."); + for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { + LiveInterval *Intf = Q.interferingVRegs()[i]; + unassign(*Intf, VRM->getPhys(Intf->reg)); + assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || + VirtReg.isSpillable() < Intf->isSpillable()) && + "Cannot decrease cascade number, illegal eviction"); + ExtraRegInfo[Intf->reg].Cascade = Cascade; + ++NumEvicted; + NewVRegs.push_back(Intf); + } + } +} + /// tryEvict - Try to evict all interferences for a physreg. /// @param VirtReg Currently unassigned virtual register. /// @param Order Physregs to try. @@ -463,31 +564,37 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, unsigned CostPerUseLimit) { NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); - // Keep track of the lightest single interference seen so far. - float BestWeight = HUGE_VALF; + // Keep track of the cheapest interference seen so far. + EvictionCost BestCost(~0u); unsigned BestPhys = 0; + // When we are just looking for a reduced cost per use, don't break any + // hints, and only evict smaller spill weights. + if (CostPerUseLimit < ~0u) { + BestCost.BrokenHints = 0; + BestCost.MaxWeight = VirtReg.weight; + } + Order.rewind(); while (unsigned PhysReg = Order.next()) { if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; - // The first use of a register in a function has cost 1. - if (CostPerUseLimit == 1 && !MRI->isPhysRegUsed(PhysReg)) - continue; - - float Weight = BestWeight; - if (!canEvictInterference(VirtReg, PhysReg, Weight)) - continue; - - // This is an eviction candidate. - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " interference = " - << Weight << '\n'); - if (BestPhys && Weight >= BestWeight) + // The first use of a callee-saved register in a function has cost 1. + // Don't start using a CSR when the CostPerUseLimit is low. + if (CostPerUseLimit == 1) + if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) + if (!MRI->isPhysRegUsed(CSR)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " + << PrintReg(CSR, TRI) << '\n'); + continue; + } + + if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) continue; // Best so far. BestPhys = PhysReg; - BestWeight = Weight; + // Stop if the hint can be used. if (Order.isHint(PhysReg)) break; @@ -496,29 +603,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, if (!BestPhys) return 0; - // We will evict interference. Make sure that VirtReg has a cascade number, - // and assign that cascade number to every evicted register. These live - // ranges than then only be evicted by a newer cascade, preventing infinite - // loops. - unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; - if (!Cascade) - Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; - - DEBUG(dbgs() << "evicting " << PrintReg(BestPhys, TRI) - << " interference: Cascade " << Cascade << '\n'); - for (const unsigned *AliasI = TRI->getOverlaps(BestPhys); *AliasI; ++AliasI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); - assert(Q.seenAllInterferences() && "Didn't check all interfererences."); - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *Intf = Q.interferingVRegs()[i]; - unassign(*Intf, VRM->getPhys(Intf->reg)); - assert(ExtraRegInfo[Intf->reg].Cascade < Cascade && - "Cannot decrease cascade number, illegal eviction"); - ExtraRegInfo[Intf->reg].Cascade = Cascade; - ++NumEvicted; - NewVRegs.push_back(Intf); - } - } + evictInterference(VirtReg, BestPhys, NewVRegs); return BestPhys; } @@ -637,8 +722,7 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T)); } -void RAGreedy::growRegion(GlobalSplitCandidate &Cand, - InterferenceCache::Cursor Intf) { +void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { // Keep track of through blocks that have not been added to SpillPlacer. BitVector Todo = SA->getThroughBlocks(); SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; @@ -649,8 +733,6 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand, for (;;) { ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); - if (NewBundles.empty()) - break; // Find new through blocks in the periphery of PrefRegBundles. for (int i = 0, e = NewBundles.size(); i != e; ++i) { unsigned Bundle = NewBundles[i]; @@ -670,12 +752,12 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand, } } // Any new blocks to add? - if (ActiveBlocks.size() > AddedTo) { - ArrayRef<unsigned> Add(&ActiveBlocks[AddedTo], - ActiveBlocks.size() - AddedTo); - addThroughConstraints(Intf, Add); - AddedTo = ActiveBlocks.size(); - } + if (ActiveBlocks.size() == AddedTo) + break; + addThroughConstraints(Cand.Intf, + ArrayRef<unsigned>(ActiveBlocks).slice(AddedTo)); + AddedTo = ActiveBlocks.size(); + // Perhaps iterating can enable more bundles? SpillPlacer->iterate(); } @@ -713,8 +795,7 @@ float RAGreedy::calcSpillCost() { /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// -float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, - InterferenceCache::Cursor Intf) { +float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { float GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); @@ -741,8 +822,8 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, continue; if (RegIn && RegOut) { // We need double spill code if this block has interference. - Intf.moveToBlock(Number); - if (Intf.hasInterference()) + Cand.Intf.moveToBlock(Number); + if (Cand.Intf.hasInterference()) GlobalCost += 2*SpillPlacer->getBlockFrequency(Number); continue; } @@ -772,7 +853,7 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, dbgs() << ".\n"; }); - InterferenceCache::Cursor Intf(IntfCache, Cand.PhysReg); + InterferenceCache::Cursor &Intf = Cand.Intf; LiveRangeEdit LREdit(VirtReg, NewVRegs, this); SE->reset(LREdit); @@ -789,16 +870,6 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; // Create separate intervals for isolated blocks with multiple uses. - // - // |---o---o---| Enter and leave on the stack. - // ____-----____ Create local interval for uses. - // - // | o---o---| Defined in block, leave on stack. - // -----____ Create local interval for uses. - // - // |---o---x | Enter on stack, killed in block. - // ____----- Create local interval for uses. - // if (!RegIn && !RegOut) { DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); if (!BI.isOneInstr()) { @@ -808,303 +879,28 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, continue; } - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); Intf.moveToBlock(BI.MBB->getNumber()); - DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0) - << (RegIn ? " => " : " -- ") - << "BB#" << BI.MBB->getNumber() - << (RegOut ? " => " : " -- ") - << " EB#" << Bundles->getBundle(BI.MBB->getNumber(), 1) - << " [" << Start << ';' - << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop - << ") uses [" << BI.FirstUse << ';' << BI.LastUse - << ") intf [" << Intf.first() << ';' << Intf.last() << ')'); - - // The interference interval should either be invalid or overlap MBB. - assert((!Intf.hasInterference() || Intf.first() < Stop) - && "Bad interference"); - assert((!Intf.hasInterference() || Intf.last() > Start) - && "Bad interference"); - - // We are now ready to decide where to split in the current block. There - // are many variables guiding the decision: - // - // - RegIn / RegOut: The global splitting algorithm's decisions for our - // ingoing and outgoing bundles. - // - // - BI.BlockIn / BI.BlockOut: Is the live range live-in and/or live-out - // from this block. - // - // - Intf.hasInterference(): Is there interference in this block. - // - // - Intf.first() / Inft.last(): The range of interference. - // - // The live range should be split such that MainIntv is live-in when RegIn - // is set, and live-out when RegOut is set. MainIntv should never overlap - // the interference, and the stack interval should never have more than one - // use per block. - - // No splits can be inserted after LastSplitPoint, overlap instead. - SlotIndex LastSplitPoint = Stop; - if (BI.LiveOut) - LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); - - // At this point, we know that either RegIn or RegOut is set. We dealt with - // the all-stack case above. - - // Blocks without interference are relatively easy. - if (!Intf.hasInterference()) { - DEBUG(dbgs() << ", no interference.\n"); - SE->selectIntv(MainIntv); - // The easiest case has MainIntv live through. - // - // |---o---o---| Live-in, live-out. - // ============= Use MainIntv everywhere. - // - SlotIndex From = Start, To = Stop; - - // Block entry. Reload before the first use if MainIntv is not live-in. - // - // |---o-- Enter on stack. - // ____=== Reload before first use. - // - // | o-- Defined in block. - // === Use MainIntv from def. - // - if (!RegIn) - From = SE->enterIntvBefore(BI.FirstUse); - - // Block exit. Handle cases where MainIntv is not live-out. - if (!BI.LiveOut) - // - // --x | Killed in block. - // === Use MainIntv up to kill. - // - To = SE->leaveIntvAfter(BI.LastUse); - else if (!RegOut) { - // - // --o---| Live-out on stack. - // ===____ Use MainIntv up to last use, switch to stack. - // - // -----o| Live-out on stack, last use after last split point. - // ====== Extend MainIntv to last use, overlapping. - // \____ Copy to stack interval before last split point. - // - if (BI.LastUse < LastSplitPoint) - To = SE->leaveIntvAfter(BI.LastUse); - else { - // The last use is after the last split point, it is probably an - // indirect branch. - To = SE->leaveIntvBefore(LastSplitPoint); - // Run a double interval from the split to the last use. This makes - // it possible to spill the complement without affecting the indirect - // branch. - SE->overlapIntv(To, BI.LastUse); - } - } - - // Paint in MainIntv liveness for this block. - SE->useIntv(From, To); - continue; - } - - // We are now looking at a block with interference, and we know that either - // RegIn or RegOut is set. - assert(Intf.hasInterference() && (RegIn || RegOut) && "Bad invariant"); - - // If the live range is not live through the block, it is possible that the - // interference doesn't even overlap. Deal with those cases first. Since - // no copy instructions are required, we can tolerate interference starting - // or ending at the same instruction that kills or defines our live range. - - // Live-in, killed before interference. - // - // ~~~ Interference after kill. - // |---o---x | Killed in block. - // ========= Use MainIntv everywhere. - // - if (RegIn && !BI.LiveOut && BI.LastUse <= Intf.first()) { - DEBUG(dbgs() << ", live-in, killed before interference.\n"); - SE->selectIntv(MainIntv); - SlotIndex To = SE->leaveIntvAfter(BI.LastUse); - SE->useIntv(Start, To); - continue; - } - - // Live-out, defined after interference. - // - // ~~~ Interference before def. - // | o---o---| Defined in block. - // ========= Use MainIntv everywhere. - // - if (RegOut && !BI.LiveIn && BI.FirstUse >= Intf.last()) { - DEBUG(dbgs() << ", live-out, defined after interference.\n"); - SE->selectIntv(MainIntv); - SlotIndex From = SE->enterIntvBefore(BI.FirstUse); - SE->useIntv(From, Stop); - continue; - } - - // The interference is now known to overlap the live range, but it may - // still be easy to avoid if all the interference is on one side of the - // uses, and we enter or leave on the stack. - - // Live-out on stack, interference after last use. - // - // ~~~ Interference after last use. - // |---o---o---| Live-out on stack. - // =========____ Leave MainIntv after last use. - // - // ~ Interference after last use. - // |---o---o--o| Live-out on stack, late last use. - // =========____ Copy to stack after LSP, overlap MainIntv. - // - if (!RegOut && Intf.first() > BI.LastUse.getBoundaryIndex()) { - assert(RegIn && "Stack-in, stack-out should already be handled"); - if (BI.LastUse < LastSplitPoint) { - DEBUG(dbgs() << ", live-in, stack-out, interference after last use.\n"); - SE->selectIntv(MainIntv); - SlotIndex To = SE->leaveIntvAfter(BI.LastUse); - assert(To <= Intf.first() && "Expected to avoid interference"); - SE->useIntv(Start, To); - } else { - DEBUG(dbgs() << ", live-in, stack-out, avoid last split point\n"); - SE->selectIntv(MainIntv); - SlotIndex To = SE->leaveIntvBefore(LastSplitPoint); - assert(To <= Intf.first() && "Expected to avoid interference"); - SE->overlapIntv(To, BI.LastUse); - SE->useIntv(Start, To); - } - continue; - } - // Live-in on stack, interference before first use. - // - // ~~~ Interference before first use. - // |---o---o---| Live-in on stack. - // ____========= Enter MainIntv before first use. - // - if (!RegIn && Intf.last() < BI.FirstUse.getBaseIndex()) { - assert(RegOut && "Stack-in, stack-out should already be handled"); - DEBUG(dbgs() << ", stack-in, interference before first use.\n"); - SE->selectIntv(MainIntv); - SlotIndex From = SE->enterIntvBefore(BI.FirstUse); - assert(From >= Intf.last() && "Expected to avoid interference"); - SE->useIntv(From, Stop); - continue; - } - - // The interference is overlapping somewhere we wanted to use MainIntv. That - // means we need to create a local interval that can be allocated a - // different register. - DEBUG(dbgs() << ", creating local interval.\n"); - unsigned LocalIntv = SE->openIntv(); - - // We may be creating copies directly between MainIntv and LocalIntv, - // bypassing the stack interval. When we do that, we should never use the - // leaveIntv* methods as they define values in the stack interval. By - // starting from the end of the block and working our way backwards, we can - // get by with only enterIntv* methods. - // - // When selecting split points, we generally try to maximize the stack - // interval as long at it contains no uses, maximize the main interval as - // long as it doesn't overlap interference, and minimize the local interval - // that we don't know how to allocate yet. - - // Handle the block exit, set Pos to the first handled slot. - SlotIndex Pos = BI.LastUse; - if (RegOut) { - assert(Intf.last() < LastSplitPoint && "Cannot be live-out in register"); - // Create a snippet of MainIntv that is live-out. - // - // ~~~ Interference overlapping uses. - // --o---| Live-out in MainIntv. - // ----=== Switch from LocalIntv to MainIntv after interference. - // - SE->selectIntv(MainIntv); - Pos = SE->enterIntvAfter(Intf.last()); - assert(Pos >= Intf.last() && "Expected to avoid interference"); - SE->useIntv(Pos, Stop); - SE->selectIntv(LocalIntv); - } else if (BI.LiveOut) { - if (BI.LastUse < LastSplitPoint) { - // Live-out on the stack. - // - // ~~~ Interference overlapping uses. - // --o---| Live-out on stack. - // ---____ Switch from LocalIntv to stack after last use. - // - Pos = SE->leaveIntvAfter(BI.LastUse); - } else { - // Live-out on the stack, last use after last split point. - // - // ~~~ Interference overlapping uses. - // --o--o| Live-out on stack, late use. - // ------ Copy to stack before LSP, overlap LocalIntv. - // \__ - // - Pos = SE->leaveIntvBefore(LastSplitPoint); - // We need to overlap LocalIntv so it can reach LastUse. - SE->overlapIntv(Pos, BI.LastUse); - } - } - - // When not live-out, leave Pos at LastUse. We have handled everything from - // Pos to Stop. Find the starting point for LocalIntv. - assert(SE->currentIntv() == LocalIntv && "Expecting local interval"); - - if (RegIn) { - assert(Start < Intf.first() && "Cannot be live-in with interference"); - // Live-in in MainIntv, only use LocalIntv for interference. - // - // ~~~ Interference overlapping uses. - // |---o-- Live-in in MainIntv. - // ====--- Switch to LocalIntv before interference. - // - SlotIndex Switch = SE->enterIntvBefore(Intf.first()); - assert(Switch <= Intf.first() && "Expected to avoid interference"); - SE->useIntv(Switch, Pos); - SE->selectIntv(MainIntv); - SE->useIntv(Start, Switch); - } else { - // Live-in on stack, enter LocalIntv before first use. - // - // ~~~ Interference overlapping uses. - // |---o-- Live-in in MainIntv. - // ____--- Reload to LocalIntv before interference. - // - // Defined in block. - // - // ~~~ Interference overlapping uses. - // | o-- Defined in block. - // --- Begin LocalIntv at first use. - // - SlotIndex Switch = SE->enterIntvBefore(BI.FirstUse); - SE->useIntv(Switch, Pos); - } + if (RegIn && RegOut) + SE->splitLiveThroughBlock(BI.MBB->getNumber(), + MainIntv, Intf.first(), + MainIntv, Intf.last()); + else if (RegIn) + SE->splitRegInBlock(BI, MainIntv, Intf.first()); + else + SE->splitRegOutBlock(BI, MainIntv, Intf.last()); } // Handle live-through blocks. - SE->selectIntv(MainIntv); for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { unsigned Number = Cand.ActiveBlocks[i]; bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; - DEBUG(dbgs() << "Live through BB#" << Number << '\n'); - if (RegIn && RegOut) { - Intf.moveToBlock(Number); - if (!Intf.hasInterference()) { - SE->useIntv(Indexes->getMBBStartIdx(Number), - Indexes->getMBBEndIdx(Number)); - continue; - } - } - MachineBasicBlock *MBB = MF->getBlockNumbered(Number); - if (RegIn) - SE->leaveIntvAtTop(*MBB); - if (RegOut) - SE->enterIntvAtEnd(*MBB); + if (!RegIn && !RegOut) + continue; + Intf.moveToBlock(Number); + SE->splitLiveThroughBlock(Number, RegIn ? MainIntv : 0, Intf.first(), + RegOut ? MainIntv : 0, Intf.last()); } ++NumGlobalSplits; @@ -1161,17 +957,34 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); const unsigned NoCand = ~0u; unsigned BestCand = NoCand; + unsigned NumCands = 0; Order.rewind(); - for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) { - if (GlobalCand.size() <= Cand) - GlobalCand.resize(Cand+1); - GlobalCand[Cand].reset(PhysReg); + while (unsigned PhysReg = Order.next()) { + // Discard bad candidates before we run out of interference cache cursors. + // This will only affect register classes with a lot of registers (>32). + if (NumCands == IntfCache.getMaxCursors()) { + unsigned WorstCount = ~0u; + unsigned Worst = 0; + for (unsigned i = 0; i != NumCands; ++i) { + if (i == BestCand) + continue; + unsigned Count = GlobalCand[i].LiveBundles.count(); + if (Count < WorstCount) + Worst = i, WorstCount = Count; + } + --NumCands; + GlobalCand[Worst] = GlobalCand[NumCands]; + } + + if (GlobalCand.size() <= NumCands) + GlobalCand.resize(NumCands+1); + GlobalSplitCandidate &Cand = GlobalCand[NumCands]; + Cand.reset(IntfCache, PhysReg); - SpillPlacer->prepare(GlobalCand[Cand].LiveBundles); + SpillPlacer->prepare(Cand.LiveBundles); float Cost; - InterferenceCache::Cursor Intf(IntfCache, PhysReg); - if (!addSplitConstraints(Intf, Cost)) { + if (!addSplitConstraints(Cand.Intf, Cost)) { DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; } @@ -1186,28 +999,29 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, }); continue; } - growRegion(GlobalCand[Cand], Intf); + growRegion(Cand); SpillPlacer->finish(); // No live bundles, defer to splitSingleBlocks(). - if (!GlobalCand[Cand].LiveBundles.any()) { + if (!Cand.LiveBundles.any()) { DEBUG(dbgs() << " no bundles.\n"); continue; } - Cost += calcGlobalSplitCost(GlobalCand[Cand], Intf); + Cost += calcGlobalSplitCost(Cand); DEBUG({ dbgs() << ", total = " << Cost << " with bundles"; - for (int i = GlobalCand[Cand].LiveBundles.find_first(); i>=0; - i = GlobalCand[Cand].LiveBundles.find_next(i)) + for (int i = Cand.LiveBundles.find_first(); i>=0; + i = Cand.LiveBundles.find_next(i)) dbgs() << " EB#" << i; dbgs() << ".\n"; }); if (Cost < BestCost) { - BestCand = Cand; + BestCand = NumCands; BestCost = Hysteresis * Cost; // Prevent rounding effects. } + ++NumCands; } if (BestCand == NoCand) @@ -1553,7 +1367,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // If we couldn't allocate a register from spilling, there is probably some // invalid inline assembly. The base class wil report it. - if (Stage >= RS_Spill) + if (Stage >= RS_Spill || !VirtReg.isSpillable()) return ~0u; // Try splitting VirtReg or interferences. diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index d5025b9..b91f92c 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -1203,7 +1203,6 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li, VNInfo *VNI, LiveRange *LR, SmallVector<MachineInstr*, 8> &DupCopies) { - return false; // To see if this fixes the i386 dragonegg buildbot miscompile. // FIXME: This is very conservative. For example, we don't handle // physical registers. @@ -1215,12 +1214,6 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li, unsigned Dst = MI->getOperand(0).getReg(); unsigned Src = MI->getOperand(1).getReg(); - // FIXME: If "B = X" kills X, we have to move the kill back to its - // previous use. For now we just avoid the optimization in that case. - LiveInterval &SrcInt = li.getInterval(Src); - if (SrcInt.killedAt(VNI->def)) - return false; - if (!TargetRegisterInfo::isVirtualRegister(Src) || !TargetRegisterInfo::isVirtualRegister(Dst)) return false; @@ -1252,6 +1245,12 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li, if (Src != OtherSrc) return false; + // If the copies use two different value numbers of X, we cannot merge + // A and B. + LiveInterval &SrcInt = li.getInterval(Src); + if (SrcInt.getVNInfoAt(Other->def) != SrcInt.getVNInfoAt(VNI->def)) + return false; + DupCopies.push_back(MI); return true; @@ -1472,6 +1471,7 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { if (RHSValNoAssignments.empty()) RHSValNoAssignments.push_back(-1); + SmallVector<unsigned, 8> SourceRegisters; for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(), E = DupCopies.end(); I != E; ++I) { MachineInstr *MI = *I; @@ -1485,11 +1485,19 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { // X = X // and mark the X as coalesced to keep the illusion. unsigned Src = MI->getOperand(1).getReg(); + SourceRegisters.push_back(Src); MI->getOperand(0).substVirtReg(Src, 0, *tri_); markAsJoined(MI); } + // If B = X was the last use of X in a liverange, we have to shrink it now + // that B = X is gone. + for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(), + E = SourceRegisters.end(); I != E; ++I) { + li_->shrinkToUses(&li_->getInterval(*I)); + } + // If we get here, we know that we can coalesce the live ranges. Ask the // intervals to coalesce themselves now. LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp index 6b7a8c6..f8b1bc7 100644 --- a/lib/CodeGen/ScheduleDAGEmit.cpp +++ b/lib/CodeGen/ScheduleDAGEmit.cpp @@ -45,6 +45,7 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, unsigned Reg = 0; for (SUnit::const_succ_iterator II = SU->Succs.begin(), EE = SU->Succs.end(); II != EE; ++II) { + if (II->isCtrl()) continue; // ignore chain preds if (II->getReg()) { Reg = II->getReg(); break; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 90e0cc7..4f0d2ca 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1001,7 +1001,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { dbgs() << "\nWith: "; RV.getNode()->dump(&DAG); dbgs() << '\n'); - + // Transfer debug value. DAG.TransferDbgValues(SDValue(N, 0), RV); WorkListRemover DeadNodes(*this); @@ -1566,6 +1566,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 : + dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); EVT VT = N0.getValueType(); // fold vector ops @@ -1597,6 +1599,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold (A+B)-B -> A if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) return N0.getOperand(0); + // fold C2-(A+C1) -> (C2-C1)-A + if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { + SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT); + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, + N1.getOperand(0)); + } // fold ((A+(B+or-C))-B) -> A+or-C if (N0.getOpcode() == ISD::ADD && (N0.getOperand(1).getOpcode() == ISD::SUB || @@ -2571,7 +2579,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, (!LookPassAnd0 || !LookPassAnd1) && !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) return SDValue(); - + SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00); if (OpSizeInBits > 16) Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res, @@ -5929,12 +5937,17 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Now check for #3 and #4. bool RealUse = false; + + // Caches for hasPredecessorHelper + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), E = Ptr.getNode()->use_end(); I != E; ++I) { SDNode *Use = *I; if (Use == N) continue; - if (Use->isPredecessorOf(N)) + if (N->hasPredecessorHelper(Use, Visited, Worklist)) return false; if (!((Use->getOpcode() == ISD::LOAD && diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index ea7fead..54a7d43 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -852,7 +852,7 @@ FastISel::SelectExtractValue(const User *U) { return false; // fast-isel can't handle aggregate constants at the moment // Get the actual result register, which is an offset from the base register. - unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->idx_begin(), EVI->idx_end()); + unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices()); SmallVector<EVT, 4> AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 4b6d3ef..d06e2bd 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -58,17 +58,6 @@ class SelectionDAGLegalize { /// against each other, including inserted libcalls. SmallVector<SDValue, 8> LastCALLSEQ; - enum LegalizeAction { - Legal, // The target natively supports this operation. - Promote, // This operation should be executed in a larger type. - Expand // Try to expand this to other ops, otherwise use a libcall. - }; - - /// ValueTypeActions - This is a bitvector that contains two bits for each - /// value type, where the two bits correspond to the LegalizeAction enum. - /// This can be queried with "getTypeAction(VT)". - TargetLowering::ValueTypeActionImpl ValueTypeActions; - /// LegalizedNodes - For nodes that are of legal width, and that have more /// than one use, this map indicates what regularized operand to use. This /// allows us to avoid legalizing the same thing more than once. @@ -87,25 +76,11 @@ class SelectionDAGLegalize { public: explicit SelectionDAGLegalize(SelectionDAG &DAG); - /// getTypeAction - Return how we should legalize values of this type, either - /// it is already legal or we need to expand it into multiple registers of - /// smaller integer type, or we need to promote it to a larger type. - LegalizeAction getTypeAction(EVT VT) const { - return (LegalizeAction)TLI.getTypeAction(*DAG.getContext(), VT); - } - - /// isTypeLegal - Return true if this type is legal on this target. - /// - bool isTypeLegal(EVT VT) const { - return getTypeAction(VT) == Legal; - } - void LegalizeDAG(); private: - /// LegalizeOp - We know that the specified value has a legal type. - /// Recursively ensure that the operands have legal types, then return the - /// result. + /// LegalizeOp - Return a legal replacement for the given operation, with + /// all legal operands. SDValue LegalizeOp(SDValue O); SDValue OptimizeFloatStore(StoreSDNode *ST); @@ -220,10 +195,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), - DAG(dag), - ValueTypeActions(TLI.getValueTypeActions()) { - assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && - "Too many value types for ValueTypeActions to hold!"); + DAG(dag) { } void SelectionDAGLegalize::LegalizeDAG() { @@ -753,7 +725,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { DebugLoc dl = ST->getDebugLoc(); if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && - getTypeAction(MVT::i32) == Legal) { + TLI.isTypeLegal(MVT::i32)) { Tmp3 = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), MVT::i32); @@ -763,14 +735,14 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (CFP->getValueType(0) == MVT::f64) { // If this target supports 64-bit registers, do a single 64-bit store. - if (getTypeAction(MVT::i64) == Legal) { + if (TLI.isTypeLegal(MVT::i64)) { Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); } - if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { + if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { // Otherwise, if the target supports 32-bit registers, use 2 32-bit // stores. If the target supports neither 32- nor 64-bits, this // xform is certainly not worth it. @@ -794,10 +766,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { return SDValue(0, 0); } -/// LegalizeOp - We know that the specified value has a legal type, and -/// that its operands are legal. Now ensure that the operation itself -/// is legal, recursively ensuring that the operands' operations remain -/// legal. +/// LegalizeOp - Return a legal replacement for the given operation, with +/// all legal operands. SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. return Op; @@ -806,11 +776,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { DebugLoc dl = Node->getDebugLoc(); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - assert(getTypeAction(Node->getValueType(i)) == Legal && + assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal && "Unexpected illegal type!"); for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - assert((isTypeLegal(Node->getOperand(i).getValueType()) || + assert((TLI.getTypeAction(*DAG.getContext(), + Node->getOperand(i).getValueType()) == + TargetLowering::TypeLegal || Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); @@ -1354,7 +1327,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } break; case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) { + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), @@ -1378,7 +1351,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // If this is a promoted vector load, and the vector element types are // legal, then scalarize it. if (ExtType == ISD::EXTLOAD && SrcVT.isVector() && - isTypeLegal(Node->getValueType(0).getScalarType())) { + TLI.isTypeLegal(Node->getValueType(0).getScalarType())) { SmallVector<SDValue, 8> LoadVals; SmallVector<SDValue, 8> LoadChains; unsigned NumElem = SrcVT.getVectorNumElements(); @@ -1633,15 +1606,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case TargetLowering::Custom: Result = TLI.LowerOperation(Result, DAG); break; - case Expand: + case TargetLowering::Expand: EVT WideScalarVT = Tmp3.getValueType().getScalarType(); EVT NarrowScalarVT = StVT.getScalarType(); // The Store type is illegal, must scalarize the vector store. SmallVector<SDValue, 8> Stores; - bool ScalarLegal = isTypeLegal(WideScalarVT); - if (!isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) { + bool ScalarLegal = TLI.isTypeLegal(WideScalarVT); + if (!TLI.isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) { unsigned NumElem = StVT.getVectorNumElements(); unsigned ScalarSize = StVT.getScalarType().getSizeInBits(); @@ -1677,7 +1650,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // The Store type is illegal, must scalarize the vector store. // However, the scalar type is illegal. Must bitcast the result // and store it in smaller parts. - if (!isTypeLegal(StVT) && StVT.isVector()) { + if (!TLI.isTypeLegal(StVT) && StVT.isVector()) { unsigned WideNumElem = StVT.getVectorNumElements(); unsigned Stride = NarrowScalarVT.getSizeInBits()/8; @@ -1717,7 +1690,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // TRUNCSTORE:i16 i32 -> STORE i16 - assert(isTypeLegal(StVT) && "Do not know how to expand this store!"); + assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); @@ -1876,7 +1849,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { SDValue SignBit; EVT FloatVT = Tmp2.getValueType(); EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits()); - if (isTypeLegal(IVT)) { + if (TLI.isTypeLegal(IVT)) { // Convert to an integer with the same sign bit. SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2); } else { @@ -3198,7 +3171,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); - if (getTypeAction(EltVT) == Promote) + if (!TLI.isTypeLegal(EltVT)) EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); unsigned NumElems = VT.getVectorNumElements(); SmallVector<SDValue, 8> Ops; @@ -3351,6 +3324,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, RTLIB::REM_F80, RTLIB::REM_PPCF128)); break; + case ISD::FMA: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_PPCF128)); + break; case ISD::FP16_TO_FP32: Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 27a466b..e6835d8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -74,6 +74,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; @@ -294,6 +295,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { NVT, &Op, 1, false, N->getDebugLoc()); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)), + GetSoftenedFloat(N->getOperand(2)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_PPCF128), + NVT, Ops, 3, false, N->getDebugLoc()); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), @@ -837,6 +851,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break; case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break; case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; + case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break; case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; @@ -989,6 +1004,19 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, GetPairElements(Call, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_PPCF128), + N->getValueType(0), Ops, 3, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 4597ec9..952797d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -378,6 +378,7 @@ private: SDValue SoftenFloatRes_FLOG(SDNode *N); SDValue SoftenFloatRes_FLOG2(SDNode *N); SDValue SoftenFloatRes_FLOG10(SDNode *N); + SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); SDValue SoftenFloatRes_FNEG(SDNode *N); @@ -442,6 +443,7 @@ private: void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 5d0f923..ffff10c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -182,9 +182,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: + case ISD::SIGN_EXTEND_INREG: QueryType = Node->getValueType(0); break; - case ISD::SIGN_EXTEND_INREG: case ISD::FP_ROUND_INREG: QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT(); break; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 77ce54f..35ea0bb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3326,13 +3326,13 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, const TargetLowering &TLI) { assert((SrcAlign == 0 || SrcAlign >= DstAlign) && "Expecting memcpy / memset source to meet alignment requirement!"); - // If 'SrcAlign' is zero, that means the memory operation does not need load - // the value, i.e. memset or memcpy from constant string. Otherwise, it's - // the inferred alignment of the source. 'DstAlign', on the other hand, is the - // specified alignment of the memory operation. If it is zero, that means - // it's possible to change the alignment of the destination. 'MemcpyStrSrc' - // indicates whether the memcpy source is constant so it does not need to be - // loaded. + // If 'SrcAlign' is zero, that means the memory operation does not need to + // load the value, i.e. memset or memcpy from constant string. Otherwise, + // it's the inferred alignment of the source. 'DstAlign', on the other hand, + // is the specified alignment of the memory operation. If it is zero, that + // means it's possible to change the alignment of the destination. + // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does + // not need to be loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, NonScalarIntSafe, MemcpyStrSrc, DAG.getMachineFunction()); @@ -4037,6 +4037,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(VT); @@ -4142,6 +4144,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(Val.getValueType()); @@ -4165,6 +4169,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachineMemOperand *MMO) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); EVT VT = Val.getValueType(); SDVTList VTs = getVTList(MVT::Other); SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -4191,6 +4197,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, EVT SVT,bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(SVT); @@ -4216,6 +4224,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, MachineMemOperand *MMO) { EVT VT = Val.getValueType(); + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (VT == SVT) return getStore(Chain, dl, Val, Ptr, MMO); @@ -5691,24 +5701,39 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, return false; } -/// isPredecessorOf - Return true if this node is a predecessor of N. This node -/// is either an operand of N or it can be reached by traversing up the operands. -/// NOTE: this is an expensive method. Use it carefully. -bool SDNode::isPredecessorOf(SDNode *N) const { - SmallPtrSet<SDNode *, 32> Visited; - SmallVector<SDNode *, 16> Worklist; - Worklist.push_back(N); +/// hasPredecessor - Return true if N is a predecessor of this node. +/// N is either an operand of this node, or can be reached by recursively +/// traversing up the operands. +/// NOTE: This is an expensive method. Use it carefully. +bool SDNode::hasPredecessor(const SDNode *N) const { + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + return hasPredecessorHelper(N, Visited, Worklist); +} - do { - N = Worklist.pop_back_val(); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - SDNode *Op = N->getOperand(i).getNode(); - if (Op == this) - return true; +bool SDNode::hasPredecessorHelper(const SDNode *N, + SmallPtrSet<const SDNode *, 32> &Visited, + SmallVector<const SDNode *, 16> &Worklist) const { + if (Visited.empty()) { + Worklist.push_back(this); + } else { + // Take a look in the visited set. If we've already encountered this node + // we needn't search further. + if (Visited.count(N)) + return true; + } + + // Haven't visited N yet. Continue the search. + while (!Worklist.empty()) { + const SDNode *M = Worklist.pop_back_val(); + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { + SDNode *Op = M->getOperand(i).getNode(); if (Visited.insert(Op)) Worklist.push_back(Op); + if (Op == N) + return true; } - } while (!Worklist.empty()); + } return false; } @@ -5863,6 +5888,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FSUB: return "fsub"; case ISD::FMUL: return "fmul"; case ISD::FDIV: return "fdiv"; + case ISD::FMA: return "fma"; case ISD::FREM: return "frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ea59ca1..81b03ee 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1727,7 +1727,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, VT); SDValue Cmp; - if (CountPopulation_64(B.Mask) == 1) { + unsigned PopCount = CountPopulation_64(B.Mask); + if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC(getCurDebugLoc(), @@ -1735,6 +1736,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ShiftOp, DAG.getConstant(CountTrailingZeros_64(B.Mask), VT), ISD::SETEQ); + } else if (PopCount == BB.Range) { + // There is only one zero bit in the range, test for it directly. + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(VT), + ShiftOp, + DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), + ISD::SETNE); } else { // Make desired shift SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT, @@ -2501,6 +2509,22 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Op1.getValueType(), Op1, Op2)); } +void SelectionDAGBuilder::visitSDiv(const User &I) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + // Turn exact SDivs into multiplications. + // FIXME: This should be in DAGCombiner, but it doesn't have access to the + // exact bit. + if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && + !isa<ConstantSDNode>(Op1) && + isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue()) + setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG)); + else + setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(), + Op1, Op2)); +} + void SelectionDAGBuilder::visitICmp(const User &I) { ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) @@ -2867,7 +2891,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { bool IntoUndef = isa<UndefValue>(Op0); bool FromUndef = isa<UndefValue>(Op1); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); SmallVector<EVT, 4> AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); @@ -2907,7 +2931,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { const Type *ValTy = I.getType(); bool OutOfUndef = isa<UndefValue>(Op0); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); SmallVector<EVT, 4> ValValueVTs; ComputeValueVTs(TLI, ValTy, ValValueVTs); @@ -4635,6 +4659,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::pow: visitPow(I); return 0; + case Intrinsic::fma: + setValue(&I, DAG.getNode(ISD::FMA, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)))); + return 0; case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, MVT::i16, getValue(I.getArgOperand(0)))); @@ -4771,6 +4802,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; + + case Intrinsic::expect: { + // Just replace __builtin_expect(exp, c) with EXP. + setValue(&I, getValue(I.getArgOperand(0))); + return 0; + } + case Intrinsic::trap: { StringRef TrapFuncName = getTrapFunctionName(); if (TrapFuncName.empty()) { @@ -5668,10 +5706,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { + std::pair<unsigned, const TargetRegisterClass*> MatchRC = + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass*> InputRC = + TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || - (OpInfo.ConstraintVT.getSizeInBits() != - Input.ConstraintVT.getSizeInBits())) { + (MatchRC.second != InputRC.second)) { report_fatal_error("Unsupported asm: input constraint" " with a matching output constraint of" " incompatible type!"); @@ -5934,8 +5975,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { "Don't know how to handle indirect register inputs yet!"); // Copy the input into the appropriate registers. - if (OpInfo.AssignedRegs.Regs.empty() || - !OpInfo.AssignedRegs.areValueTypesLegal(TLI)) + if (OpInfo.AssignedRegs.Regs.empty()) report_fatal_error("Couldn't allocate input reg for constraint '" + Twine(OpInfo.ConstraintCode) + "'!"); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index a1ca891..a0884eb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -467,7 +467,7 @@ private: void visitSRem(const User &I) { visitBinary(I, ISD::SREM); } void visitFRem(const User &I) { visitBinary(I, ISD::FREM); } void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); } - void visitSDiv(const User &I) { visitBinary(I, ISD::SDIV); } + void visitSDiv(const User &I); void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); } void visitAnd (const User &I) { visitBinary(I, ISD::AND); } void visitOr (const User &I) { visitBinary(I, ISD::OR); } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 758296e..2626ac3 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -139,6 +139,10 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::REM_F64] = "fmod"; Names[RTLIB::REM_F80] = "fmodl"; Names[RTLIB::REM_PPCF128] = "fmodl"; + Names[RTLIB::FMA_F32] = "fmaf"; + Names[RTLIB::FMA_F64] = "fma"; + Names[RTLIB::FMA_F80] = "fmal"; + Names[RTLIB::FMA_PPCF128] = "fmal"; Names[RTLIB::POWI_F32] = "__powisf2"; Names[RTLIB::POWI_F64] = "__powidf2"; Names[RTLIB::POWI_F80] = "__powixf2"; @@ -2623,7 +2627,6 @@ PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { TargetLowering::ConstraintType TargetLowering::getConstraintType(const std::string &Constraint) const { - // FIXME: lots more standard ones to handle. if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; @@ -2963,10 +2966,13 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { + std::pair<unsigned, const TargetRegisterClass*> MatchRC = + getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass*> InputRC = + getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || - (OpInfo.ConstraintVT.getSizeInBits() != - Input.ConstraintVT.getSizeInBits())) { + (MatchRC.second != InputRC.second)) { report_fatal_error("Unsupported asm: input constraint" " with a matching output constraint of" " incompatible type!"); @@ -3212,6 +3218,32 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, return true; } +/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication +/// with the multiplicative inverse of the constant. +SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, + SelectionDAG &DAG) const { + ConstantSDNode *C = cast<ConstantSDNode>(Op2); + APInt d = C->getAPIntValue(); + assert(d != 0 && "Division by zero!"); + + // Shift the value upfront if it is even, so the LSB is one. + unsigned ShAmt = d.countTrailingZeros(); + if (ShAmt) { + // TODO: For UDIV use SRL instead of SRA. + SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType())); + Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt); + d = d.ashr(ShAmt); + } + + // Calculate the multiplicative inverse, using Newton's method. + APInt t, xn = d; + while ((t = d*xn) != 1) + xn *= APInt(d.getBitWidth(), 2) - t; + + Op2 = DAG.getConstant(xn, Op1.getValueType()); + return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2); +} + /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index ff58b22..5a253a4 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -45,7 +45,8 @@ namespace { /// StackEntryTy - Abstract type of a link in the shadow stack. /// - const StructType *StackEntryTy; + StructType *StackEntryTy; + StructType *FrameMapTy; /// Roots - GC roots in the current function. Each is a pair of the /// intrinsic call and its corresponding alloca. @@ -164,8 +165,7 @@ namespace { InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, - Args.begin(), Args.end(), - CI->getName(), CallBB); + Args, CI->getName(), CallBB); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); CI->replaceAllUsesWith(II); @@ -211,18 +211,14 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { }; Constant *DescriptorElts[] = { - ConstantStruct::get(StructType::get(Int32Ty, Int32Ty, NULL), BaseElts), + ConstantStruct::get(FrameMapTy, BaseElts), ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata) }; - Constant *FrameMap = - ConstantStruct::get(StructType::get(DescriptorElts[0]->getType(), - DescriptorElts[1]->getType(), NULL), - DescriptorElts); - - std::string TypeName("gc_map."); - TypeName += utostr(NumMeta); - F.getParent()->addTypeName(TypeName, FrameMap->getType()); + Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()}; + StructType *STy = StructType::createNamed("gc_map."+utostr(NumMeta), EltTys); + + Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts); // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems // that, short of multithreaded LLVM, it should be safe; all that is @@ -250,17 +246,12 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) { // doInitialization creates the generic version of this type. - std::vector<const Type*> EltTys; + std::vector<Type*> EltTys; EltTys.push_back(StackEntryTy); for (size_t I = 0; I != Roots.size(); I++) EltTys.push_back(Roots[I].second->getAllocatedType()); - Type *Ty = StructType::get(F.getContext(), EltTys); - - std::string TypeName("gc_stackentry."); - TypeName += F.getName(); - F.getParent()->addTypeName(TypeName, Ty); - - return Ty; + + return StructType::createNamed("gc_stackentry."+F.getName().str(), EltTys); } /// doInitialization - If this module uses the GC intrinsics, find them now. If @@ -271,13 +262,12 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) { // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots. // void *Meta[]; // May be absent for roots without metadata. // }; - std::vector<const Type*> EltTys; + std::vector<Type*> EltTys; // 32 bits is ok up to a 32GB stack frame. :) EltTys.push_back(Type::getInt32Ty(M.getContext())); // Specifies length of variable length array. EltTys.push_back(Type::getInt32Ty(M.getContext())); - StructType *FrameMapTy = StructType::get(M.getContext(), EltTys); - M.addTypeName("gc_map", FrameMapTy); + FrameMapTy = StructType::createNamed("gc_map", EltTys); PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); // struct StackEntry { @@ -285,18 +275,14 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) { // FrameMap *Map; // Pointer to constant FrameMap. // void *Roots[]; // Stack roots (in-place array, so we pretend). // }; - OpaqueType *RecursiveTy = OpaqueType::get(M.getContext()); - + + StackEntryTy = StructType::createNamed(M.getContext(), "gc_stackentry"); + EltTys.clear(); - EltTys.push_back(PointerType::getUnqual(RecursiveTy)); + EltTys.push_back(PointerType::getUnqual(StackEntryTy)); EltTys.push_back(FrameMapPtrTy); - PATypeHolder LinkTyH = StructType::get(M.getContext(), EltTys); - - RecursiveTy->refineAbstractTypeTo(LinkTyH.get()); - StackEntryTy = cast<StructType>(LinkTyH.get()); + StackEntryTy->setBody(EltTys); const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); - M.addTypeName("gc_stackentry", LinkTyH.get()); // FIXME: Is this safe from - // a FunctionPass? // Get the root chain if it already exists. Head = M.getGlobalVariable("llvm_gc_root_chain"); @@ -403,7 +389,7 @@ bool ShadowStackGC::performCustomLowering(Function &F) { Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry, 0,1,"gc_frame.map"); - AtEntry.CreateStore(FrameMap, EntryMapPtr); + AtEntry.CreateStore(FrameMap, EntryMapPtr); // After all the allocas... for (unsigned I = 0, E = Roots.size(); I != E; ++I) { diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index c2565af..65a33da 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -87,9 +87,8 @@ FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) { bool SjLjEHPass::doInitialization(Module &M) { // Build the function context structure. // builtin_setjmp uses a five word jbuf - const Type *VoidPtrTy = - Type::getInt8PtrTy(M.getContext()); - const Type *Int32Ty = Type::getInt32Ty(M.getContext()); + Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); + Type *Int32Ty = Type::getInt32Ty(M.getContext()); FunctionContextTy = StructType::get(VoidPtrTy, // __prev Int32Ty, // call_site diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index a0952a0..761cab7 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -1124,3 +1124,263 @@ void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { } finish(); } + + +//===----------------------------------------------------------------------===// +// Global Live Range Splitting Support +//===----------------------------------------------------------------------===// + +// These methods support a method of global live range splitting that uses a +// global algorithm to decide intervals for CFG edges. They will insert split +// points and color intervals in basic blocks while avoiding interference. +// +// Note that splitSingleBlock is also useful for blocks where both CFG edges +// are on the stack. + +void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, + unsigned IntvIn, SlotIndex LeaveBefore, + unsigned IntvOut, SlotIndex EnterAfter){ + SlotIndex Start, Stop; + tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum); + + DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop + << ") intf " << LeaveBefore << '-' << EnterAfter + << ", live-through " << IntvIn << " -> " << IntvOut); + + assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks"); + + if (!IntvOut) { + DEBUG(dbgs() << ", spill on entry.\n"); + // + // <<<<<<<<< Possible LeaveBefore interference. + // |-----------| Live through. + // -____________ Spill on entry. + // + selectIntv(IntvIn); + MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum); + SlotIndex Idx = leaveIntvAtTop(*MBB); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + (void)Idx; + return; + } + + if (!IntvIn) { + DEBUG(dbgs() << ", reload on exit.\n"); + // + // >>>>>>> Possible EnterAfter interference. + // |-----------| Live through. + // ___________-- Reload on exit. + // + selectIntv(IntvOut); + MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum); + SlotIndex Idx = enterIntvAtEnd(*MBB); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + (void)Idx; + return; + } + + if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) { + DEBUG(dbgs() << ", straight through.\n"); + // + // |-----------| Live through. + // ------------- Straight through, same intv, no interference. + // + selectIntv(IntvOut); + useIntv(Start, Stop); + return; + } + + // We cannot legally insert splits after LSP. + SlotIndex LSP = SA.getLastSplitPoint(MBBNum); + + if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter || + LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) { + DEBUG(dbgs() << ", switch avoiding interference.\n"); + // + // >>>> <<<< Non-overlapping EnterAfter/LeaveBefore interference. + // |-----------| Live through. + // ------======= Switch intervals between interference. + // + SlotIndex Cut = (LeaveBefore && LeaveBefore < LSP) ? LeaveBefore : LSP; + selectIntv(IntvOut); + SlotIndex Idx = enterIntvBefore(Cut); + useIntv(Idx, Stop); + selectIntv(IntvIn); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + return; + } + + DEBUG(dbgs() << ", create local intv for interference.\n"); + // + // >>><><><><<<< Overlapping EnterAfter/LeaveBefore interference. + // |-----------| Live through. + // ==---------== Switch intervals before/after interference. + // + assert(LeaveBefore <= EnterAfter && "Missed case"); + + selectIntv(IntvOut); + SlotIndex Idx = enterIntvAfter(EnterAfter); + useIntv(Idx, Stop); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + + selectIntv(IntvIn); + Idx = leaveIntvBefore(LeaveBefore); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); +} + + +void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvIn, SlotIndex LeaveBefore) { + SlotIndex Start, Stop; + tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop + << "), uses " << BI.FirstUse << '-' << BI.LastUse + << ", reg-in " << IntvIn << ", leave before " << LeaveBefore + << (BI.LiveOut ? ", stack-out" : ", killed in block")); + + assert(IntvIn && "Must have register in"); + assert(BI.LiveIn && "Must be live-in"); + assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference"); + + if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastUse)) { + DEBUG(dbgs() << " before interference.\n"); + // + // <<< Interference after kill. + // |---o---x | Killed in block. + // ========= Use IntvIn everywhere. + // + selectIntv(IntvIn); + useIntv(Start, BI.LastUse); + return; + } + + SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber()); + + if (!LeaveBefore || LeaveBefore > BI.LastUse.getBoundaryIndex()) { + // + // <<< Possible interference after last use. + // |---o---o---| Live-out on stack. + // =========____ Leave IntvIn after last use. + // + // < Interference after last use. + // |---o---o--o| Live-out on stack, late last use. + // ============ Copy to stack after LSP, overlap IntvIn. + // \_____ Stack interval is live-out. + // + if (BI.LastUse < LSP) { + DEBUG(dbgs() << ", spill after last use before interference.\n"); + selectIntv(IntvIn); + SlotIndex Idx = leaveIntvAfter(BI.LastUse); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + } else { + DEBUG(dbgs() << ", spill before last split point.\n"); + selectIntv(IntvIn); + SlotIndex Idx = leaveIntvBefore(LSP); + overlapIntv(Idx, BI.LastUse); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + } + return; + } + + // The interference is overlapping somewhere we wanted to use IntvIn. That + // means we need to create a local interval that can be allocated a + // different register. + unsigned LocalIntv = openIntv(); + (void)LocalIntv; + DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n"); + + if (!BI.LiveOut || BI.LastUse < LSP) { + // + // <<<<<<< Interference overlapping uses. + // |---o---o---| Live-out on stack. + // =====----____ Leave IntvIn before interference, then spill. + // + SlotIndex To = leaveIntvAfter(BI.LastUse); + SlotIndex From = enterIntvBefore(LeaveBefore); + useIntv(From, To); + selectIntv(IntvIn); + useIntv(Start, From); + assert((!LeaveBefore || From <= LeaveBefore) && "Interference"); + return; + } + + // <<<<<<< Interference overlapping uses. + // |---o---o--o| Live-out on stack, late last use. + // =====------- Copy to stack before LSP, overlap LocalIntv. + // \_____ Stack interval is live-out. + // + SlotIndex To = leaveIntvBefore(LSP); + overlapIntv(To, BI.LastUse); + SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore)); + useIntv(From, To); + selectIntv(IntvIn); + useIntv(Start, From); + assert((!LeaveBefore || From <= LeaveBefore) && "Interference"); +} + +void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvOut, SlotIndex EnterAfter) { + SlotIndex Start, Stop; + tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop + << "), uses " << BI.FirstUse << '-' << BI.LastUse + << ", reg-out " << IntvOut << ", enter after " << EnterAfter + << (BI.LiveIn ? ", stack-in" : ", defined in block")); + + SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber()); + + assert(IntvOut && "Must have register out"); + assert(BI.LiveOut && "Must be live-out"); + assert((!EnterAfter || EnterAfter < LSP) && "Bad interference"); + + if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstUse)) { + DEBUG(dbgs() << " after interference.\n"); + // + // >>>> Interference before def. + // | o---o---| Defined in block. + // ========= Use IntvOut everywhere. + // + selectIntv(IntvOut); + useIntv(BI.FirstUse, Stop); + return; + } + + if (!EnterAfter || EnterAfter < BI.FirstUse.getBaseIndex()) { + DEBUG(dbgs() << ", reload after interference.\n"); + // + // >>>> Interference before def. + // |---o---o---| Live-through, stack-in. + // ____========= Enter IntvOut before first use. + // + selectIntv(IntvOut); + SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstUse)); + useIntv(Idx, Stop); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + return; + } + + // The interference is overlapping somewhere we wanted to use IntvOut. That + // means we need to create a local interval that can be allocated a + // different register. + DEBUG(dbgs() << ", interference overlaps uses.\n"); + // + // >>>>>>> Interference overlapping uses. + // |---o---o---| Live-through, stack-in. + // ____---====== Create local interval for interference range. + // + selectIntv(IntvOut); + SlotIndex Idx = enterIntvAfter(EnterAfter); + useIntv(Idx, Stop); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + + openIntv(); + SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstUse)); + useIntv(From, Idx); +} diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index a9ccf40b..7948b72 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -426,6 +426,42 @@ public: /// splitSingleBlocks - Split CurLI into a separate live interval inside each /// basic block in Blocks. void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks); + + /// splitLiveThroughBlock - Split CurLI in the given block such that it + /// enters the block in IntvIn and leaves it in IntvOut. There may be uses in + /// the block, but they will be ignored when placing split points. + /// + /// @param MBBNum Block number. + /// @param IntvIn Interval index entering the block. + /// @param LeaveBefore When set, leave IntvIn before this point. + /// @param IntvOut Interval index leaving the block. + /// @param EnterAfter When set, enter IntvOut after this point. + void splitLiveThroughBlock(unsigned MBBNum, + unsigned IntvIn, SlotIndex LeaveBefore, + unsigned IntvOut, SlotIndex EnterAfter); + + /// splitRegInBlock - Split CurLI in the given block such that it enters the + /// block in IntvIn and leaves it on the stack (or not at all). Split points + /// are placed in a way that avoids putting uses in the stack interval. This + /// may require creating a local interval when there is interference. + /// + /// @param BI Block descriptor. + /// @param IntvIn Interval index entering the block. Not 0. + /// @param LeaveBefore When set, leave IntvIn before this point. + void splitRegInBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvIn, SlotIndex LeaveBefore); + + /// splitRegOutBlock - Split CurLI in the given block such that it enters the + /// block on the stack (or isn't live-in at all) and leaves it in IntvOut. + /// Split points are placed to avoid interference and such that the uses are + /// not in the stack interval. This may require creating a local interval + /// when there is interference. + /// + /// @param BI Block descriptor. + /// @param IntvOut Interval index leaving the block. + /// @param EnterAfter When set, enter IntvOut after this point. + void splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvOut, SlotIndex EnterAfter); }; } diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index f0a44ab..d3cbd15 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -186,7 +186,7 @@ bool StackProtector::InsertStackProtectors() { Value *Args[] = { LI, AI }; CallInst:: Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), - &Args[0], array_endof(Args), "", InsPt); + Args, "", InsPt); // Create the basic block to jump to when the guard check fails. FailBB = CreateFailBB(); diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 6fe4bd7..6b801cb 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -102,9 +102,15 @@ namespace { SmallVector<MachineBasicBlock*, 8> &TDBBs, const DenseSet<unsigned> &RegsUsedByPhi, SmallVector<MachineInstr*, 16> &Copies); - bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, + bool TailDuplicate(MachineBasicBlock *TailBB, + bool IsSimple, + MachineFunction &MF, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallVector<MachineInstr*, 16> &Copies); + bool TailDuplicateAndUpdate(MachineBasicBlock *MBB, + bool IsSimple, + MachineFunction &MF); + void RemoveDeadBlock(MachineBasicBlock *MBB); }; @@ -175,6 +181,109 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { } } +/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup. +bool +TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, + bool IsSimple, + MachineFunction &MF) { + // Save the successors list. + SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), + MBB->succ_end()); + + SmallVector<MachineBasicBlock*, 8> TDBBs; + SmallVector<MachineInstr*, 16> Copies; + if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies)) + return false; + + ++NumTails; + + SmallVector<MachineInstr*, 8> NewPHIs; + MachineSSAUpdater SSAUpdate(MF, &NewPHIs); + + // TailBB's immediate successors are now successors of those predecessors + // which duplicated TailBB. Add the predecessors as sources to the PHI + // instructions. + bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); + if (PreRegAlloc) + UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); + + // If it is dead, remove it. + if (isDead) { + NumInstrDups -= MBB->size(); + RemoveDeadBlock(MBB); + ++NumDeadBlocks; + } + + // Update SSA form. + if (!SSAUpdateVRs.empty()) { + for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { + unsigned VReg = SSAUpdateVRs[i]; + SSAUpdate.Initialize(VReg); + + // If the original definition is still around, add it as an available + // value. + MachineInstr *DefMI = MRI->getVRegDef(VReg); + MachineBasicBlock *DefBB = 0; + if (DefMI) { + DefBB = DefMI->getParent(); + SSAUpdate.AddAvailableValue(DefBB, VReg); + } + + // Add the new vregs as available values. + DenseMap<unsigned, AvailableValsTy>::iterator LI = + SSAUpdateVals.find(VReg); + for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = LI->second[j].first; + unsigned SrcReg = LI->second[j].second; + SSAUpdate.AddAvailableValue(SrcBB, SrcReg); + } + + // Rewrite uses that are outside of the original def's block. + MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); + while (UI != MRI->use_end()) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + ++UI; + if (UseMI->isDebugValue()) { + // SSAUpdate can replace the use with an undef. That creates + // a debug instruction that is a kill. + // FIXME: Should it SSAUpdate job to delete debug instructions + // instead of replacing the use with undef? + UseMI->eraseFromParent(); + continue; + } + if (UseMI->getParent() == DefBB && !UseMI->isPHI()) + continue; + SSAUpdate.RewriteUse(UseMO); + } + } + + SSAUpdateVRs.clear(); + SSAUpdateVals.clear(); + } + + // Eliminate some of the copies inserted by tail duplication to maintain + // SSA form. + for (unsigned i = 0, e = Copies.size(); i != e; ++i) { + MachineInstr *Copy = Copies[i]; + if (!Copy->isCopy()) + continue; + unsigned Dst = Copy->getOperand(0).getReg(); + unsigned Src = Copy->getOperand(1).getReg(); + MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); + if (++UI == MRI->use_end()) { + // Copy is the only use. Do trivial copy propagation here. + MRI->replaceRegWith(Dst, Src); + Copy->eraseFromParent(); + } + } + + if (NewPHIs.size()) + NumAddedPHIs += NewPHIs.size(); + + return true; +} + /// TailDuplicateBlocks - Look for small blocks that are unconditionally /// branched to and do not fall through. Tail-duplicate their instructions /// into their predecessors to eliminate (dynamic) branches. @@ -186,108 +295,22 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { VerifyPHIs(MF, true); } - SmallVector<MachineInstr*, 8> NewPHIs; - MachineSSAUpdater SSAUpdate(MF, &NewPHIs); - for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { MachineBasicBlock *MBB = I++; if (NumTails == TailDupLimit) break; - // Save the successors list. - SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), - MBB->succ_end()); - - SmallVector<MachineBasicBlock*, 8> TDBBs; - SmallVector<MachineInstr*, 16> Copies; - if (TailDuplicate(MBB, MF, TDBBs, Copies)) { - ++NumTails; - - // TailBB's immediate successors are now successors of those predecessors - // which duplicated TailBB. Add the predecessors as sources to the PHI - // instructions. - bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); - if (PreRegAlloc) - UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); - - // If it is dead, remove it. - if (isDead) { - NumInstrDups -= MBB->size(); - RemoveDeadBlock(MBB); - ++NumDeadBlocks; - } - - // Update SSA form. - if (!SSAUpdateVRs.empty()) { - for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { - unsigned VReg = SSAUpdateVRs[i]; - SSAUpdate.Initialize(VReg); - - // If the original definition is still around, add it as an available - // value. - MachineInstr *DefMI = MRI->getVRegDef(VReg); - MachineBasicBlock *DefBB = 0; - if (DefMI) { - DefBB = DefMI->getParent(); - SSAUpdate.AddAvailableValue(DefBB, VReg); - } - - // Add the new vregs as available values. - DenseMap<unsigned, AvailableValsTy>::iterator LI = - SSAUpdateVals.find(VReg); - for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { - MachineBasicBlock *SrcBB = LI->second[j].first; - unsigned SrcReg = LI->second[j].second; - SSAUpdate.AddAvailableValue(SrcBB, SrcReg); - } - - // Rewrite uses that are outside of the original def's block. - MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); - while (UI != MRI->use_end()) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = &*UI; - ++UI; - if (UseMI->isDebugValue()) { - // SSAUpdate can replace the use with an undef. That creates - // a debug instruction that is a kill. - // FIXME: Should it SSAUpdate job to delete debug instructions - // instead of replacing the use with undef? - UseMI->eraseFromParent(); - continue; - } - if (UseMI->getParent() == DefBB && !UseMI->isPHI()) - continue; - SSAUpdate.RewriteUse(UseMO); - } - } + bool IsSimple = isSimpleBB(MBB); - SSAUpdateVRs.clear(); - SSAUpdateVals.clear(); - } - - // Eliminate some of the copies inserted by tail duplication to maintain - // SSA form. - for (unsigned i = 0, e = Copies.size(); i != e; ++i) { - MachineInstr *Copy = Copies[i]; - if (!Copy->isCopy()) - continue; - unsigned Dst = Copy->getOperand(0).getReg(); - unsigned Src = Copy->getOperand(1).getReg(); - MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); - if (++UI == MRI->use_end()) { - // Copy is the only use. Do trivial copy propagation here. - MRI->replaceRegWith(Dst, Src); - Copy->eraseFromParent(); - } - } + if (!shouldTailDuplicate(MF, IsSimple, *MBB)) + continue; - if (PreRegAlloc && TailDupVerify) - VerifyPHIs(MF, false); - MadeChange = true; - } + MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF); } - NumAddedPHIs += NewPHIs.size(); + + if (PreRegAlloc && TailDupVerify) + VerifyPHIs(MF, false); return MadeChange; } @@ -527,14 +550,12 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, // to allow undoing the effects of tail merging and other optimizations // that rearrange the predecessors of the indirect branch. - bool hasIndirectBR = false; - if (PreRegAlloc && !TailBB.empty()) { - const MCInstrDesc &MCID = TailBB.back().getDesc(); - if (MCID.isIndirectBranch()) { - MaxDuplicateCount = 20; - hasIndirectBR = true; - } - } + bool HasIndirectbr = false; + if (!TailBB.empty()) + HasIndirectbr = TailBB.back().getDesc().isIndirectBranch(); + + if (HasIndirectbr && PreRegAlloc) + MaxDuplicateCount = 20; // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. @@ -564,7 +585,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, return false; } - if (hasIndirectBR) + if (HasIndirectbr && PreRegAlloc) return true; if (IsSimple) @@ -706,14 +727,11 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, /// TailDuplicate - If it is profitable, duplicate TailBB's contents in each /// of its predecessors. bool -TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, +TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, + bool IsSimple, + MachineFunction &MF, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallVector<MachineInstr*, 16> &Copies) { - bool IsSimple = isSimpleBB(TailBB); - - if (!shouldTailDuplicate(MF, IsSimple, *TailBB)) - return false; - DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); DenseSet<unsigned> UsedByPhi; diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index f2eb5cf..a3c5620 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -43,6 +43,19 @@ using namespace dwarf; // ELF //===----------------------------------------------------------------------===// +TargetLoweringObjectFileELF::TargetLoweringObjectFileELF() + : TargetLoweringObjectFile(), + TLSDataSection(0), + TLSBSSSection(0), + DataRelSection(0), + DataRelLocalSection(0), + DataRelROSection(0), + DataRelROLocalSection(0), + MergeableConst4Section(0), + MergeableConst8Section(0), + MergeableConst16Section(0) { +} + void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFile::Initialize(Ctx, TM); @@ -480,6 +493,27 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, // MachO //===----------------------------------------------------------------------===// +TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() + : TargetLoweringObjectFile(), + TLSDataSection(0), + TLSBSSSection(0), + TLSTLVSection(0), + TLSThreadInitSection(0), + CStringSection(0), + UStringSection(0), + TextCoalSection(0), + ConstTextCoalSection(0), + ConstDataSection(0), + DataCoalSection(0), + DataCommonSection(0), + DataBSSSection(0), + FourByteConstantSection(0), + EightByteConstantSection(0), + SixteenByteConstantSection(0), + LazySymbolPointerSection(0), + NonLazySymbolPointerSection(0) { +} + void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, const TargetMachine &TM) { IsFunctionEHFrameSymbolPrivate = false; @@ -891,6 +925,13 @@ unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const { // COFF //===----------------------------------------------------------------------===// +TargetLoweringObjectFileCOFF::TargetLoweringObjectFileCOFF() + : TargetLoweringObjectFile(), + DrectveSection(0), + PDataSection(0), + XDataSection(0) { +} + void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFile::Initialize(Ctx, TM); diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index ba50f4e..03abff3 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -208,6 +208,11 @@ namespace llvm { /// @brief returns the register allocation preference. unsigned getRegAllocPref(unsigned virtReg); + /// @brief returns true if VirtReg is assigned to its preferred physreg. + bool hasPreferredPhys(unsigned VirtReg) { + return getPhys(VirtReg) == getRegAllocPref(VirtReg); + } + /// @brief records virtReg is a split live interval from SReg. void setIsSplitFromReg(unsigned virtReg, unsigned SReg) { Virt2SplitMap[virtReg] = SReg; |