diff options
Diffstat (limited to 'lib/CodeGen')
50 files changed, 1455 insertions, 1074 deletions
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 14e14c3..00874d4 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -290,7 +290,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, } bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, - const TargetLowering &TLI) { + SDValue &Chain, const TargetLowering &TLI) { const Function *F = DAG.getMachineFunction().getFunction(); // Conservatively require the attributes of the call to match those of @@ -304,5 +304,5 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, return false; // Check if the only use is a function return node. - return TLI.isUsedByReturnOnly(Node); + return TLI.isUsedByReturnOnly(Node, Chain); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index dd3fb3b..f6cde98 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1781,7 +1781,9 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, if (CFP->getType()->isFloatTy()) { if (AP.isVerbose()) { float Val = CFP->getValueAPF().convertToFloat(); - AP.OutStreamer.GetCommentOS() << "float " << Val << '\n'; + uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + AP.OutStreamer.GetCommentOS() << "float " << Val << '\n' + << " (" << format("0x%x", IntVal) << ")\n"; } uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace); @@ -1793,7 +1795,9 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, if (CFP->getType()->isDoubleTy()) { if (AP.isVerbose()) { double Val = CFP->getValueAPF().convertToDouble(); - AP.OutStreamer.GetCommentOS() << "double " << Val << '\n'; + uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + AP.OutStreamer.GetCommentOS() << "double " << Val << '\n' + << " (" << format("0x%lx", IntVal) << ")\n"; } uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 89e6cd1..e9e9335 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -329,7 +329,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; } - if (OpNo >= MI->getNumOperands()) { + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. + if (OpNo >= MI->getNumOperands() || + MI->getOperand(OpNo).isMetadata()) { Error = true; } else { unsigned OpFlags = MI->getOperand(OpNo).getImm(); diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 644eaad..454a923 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -11,14 +11,16 @@ // //===----------------------------------------------------------------------===// +#include "DwarfAccelTable.h" +#include "DwarfDebug.h" +#include "DIE.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" -#include "DwarfAccelTable.h" -#include "DwarfDebug.h" -#include "DIE.h" using namespace llvm; @@ -34,44 +36,28 @@ const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) { llvm_unreachable("invalid AtomType!"); } -// The general case would need to have a less hard coded size for the -// length of the HeaderData, however, if we're constructing based on a -// single Atom then we know it will always be: 4 + 4 + 2 + 2. -DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) : - Header(12), - HeaderData(atom) { -} - // The length of the header data is always going to be 4 + 4 + 4*NumAtoms. -DwarfAccelTable::DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &atomList) : +DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) : Header(8 + (atomList.size() * 4)), - HeaderData(atomList) { -} + HeaderData(atomList), + Entries(Allocator) { } -DwarfAccelTable::~DwarfAccelTable() { - for (size_t i = 0, e = Data.size(); i < e; ++i) - delete Data[i]; - for (StringMap<DataArray>::iterator - EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) - for (DataArray::iterator DI = EI->second.begin(), - DE = EI->second.end(); DI != DE; ++DI) - delete (*DI); -} +DwarfAccelTable::~DwarfAccelTable() { } void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) { + assert(Data.empty() && "Already finalized!"); // If the string is in the list already then add this die to the list // otherwise add a new one. DataArray &DIEs = Entries[Name]; - DIEs.push_back(new HashDataContents(die, Flags)); + DIEs.push_back(new (Allocator) HashDataContents(die, Flags)); } void DwarfAccelTable::ComputeBucketCount(void) { // First get the number of unique hashes. - std::vector<uint32_t> uniques; - uniques.resize(Data.size()); + std::vector<uint32_t> uniques(Data.size()); for (size_t i = 0, e = Data.size(); i < e; ++i) uniques[i] = Data[i]->HashValue; - std::stable_sort(uniques.begin(), uniques.end()); + array_pod_sort(uniques.begin(), uniques.end()); std::vector<uint32_t>::iterator p = std::unique(uniques.begin(), uniques.end()); uint32_t num = std::distance(uniques.begin(), p); @@ -84,31 +70,23 @@ void DwarfAccelTable::ComputeBucketCount(void) { Header.hashes_count = num; } -namespace { - // DIESorter - comparison predicate that sorts DIEs by their offset. - struct DIESorter { - bool operator()(const struct DwarfAccelTable::HashDataContents *A, - const struct DwarfAccelTable::HashDataContents *B) const { - return A->Die->getOffset() < B->Die->getOffset(); - } - }; +// compareDIEs - comparison predicate that sorts DIEs by their offset. +static bool compareDIEs(const DwarfAccelTable::HashDataContents *A, + const DwarfAccelTable::HashDataContents *B) { + return A->Die->getOffset() < B->Die->getOffset(); } void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) { // Create the individual hash data outputs. for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { - struct HashData *Entry = new HashData((*EI).getKeyData()); // Unique the entries. - std::stable_sort(EI->second.begin(), EI->second.end(), DIESorter()); + std::stable_sort(EI->second.begin(), EI->second.end(), compareDIEs); EI->second.erase(std::unique(EI->second.begin(), EI->second.end()), EI->second.end()); - for (DataArray::const_iterator DI = EI->second.begin(), - DE = EI->second.end(); - DI != DE; ++DI) - Entry->addData((*DI)); + HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second); Data.push_back(Entry); } @@ -215,7 +193,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { D->getStringPool()); Asm->OutStreamer.AddComment("Num DIEs"); Asm->EmitInt32((*HI)->Data.size()); - for (std::vector<struct HashDataContents*>::const_iterator + for (ArrayRef<HashDataContents*>::const_iterator DI = (*HI)->Data.begin(), DE = (*HI)->Data.end(); DI != DE; ++DI) { // Emit the DIE offset diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 2278d4c..963b8cd 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -15,6 +15,7 @@ #define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/DataTypes.h" @@ -164,22 +165,12 @@ public: private: struct TableHeaderData { - uint32_t die_offset_base; - std::vector<Atom> Atoms; + SmallVector<Atom, 1> Atoms; + + TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0) + : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) { } - TableHeaderData(std::vector<DwarfAccelTable::Atom> &AtomList, - uint32_t offset = 0) : - die_offset_base(offset) { - for (size_t i = 0, e = AtomList.size(); i != e; ++i) - Atoms.push_back(AtomList[i]); - } - - TableHeaderData(DwarfAccelTable::Atom Atom, uint32_t offset = 0) - : die_offset_base(offset) { - Atoms.push_back(Atom); - } - #ifndef NDEBUG void print (raw_ostream &O) { O << "die_offset_base: " << die_offset_base << "\n"; @@ -221,11 +212,11 @@ private: StringRef Str; uint32_t HashValue; MCSymbol *Sym; - std::vector<struct HashDataContents*> Data; // offsets - HashData(StringRef S) : Str(S) { + ArrayRef<HashDataContents*> Data; // offsets + HashData(StringRef S, ArrayRef<HashDataContents*> Data) + : Str(S), Data(Data) { HashValue = DwarfAccelTable::HashDJB(S); } - void addData(struct HashDataContents *Datum) { Data.push_back(Datum); } #ifndef NDEBUG void print(raw_ostream &O) { O << "Name: " << Str << "\n"; @@ -255,15 +246,18 @@ private: void EmitHashes(AsmPrinter *); void EmitOffsets(AsmPrinter *, MCSymbol *); void EmitData(AsmPrinter *, DwarfDebug *D); - + + // Allocator for HashData and HashDataContents. + BumpPtrAllocator Allocator; + // Output Variables TableHeader Header; TableHeaderData HeaderData; std::vector<HashData*> Data; // String Data - typedef std::vector<struct HashDataContents*> DataArray; - typedef StringMap<DataArray> StringEntries; + typedef std::vector<HashDataContents*> DataArray; + typedef StringMap<DataArray, BumpPtrAllocator&> StringEntries; StringEntries Entries; // Buckets/Hashes/Offsets @@ -274,8 +268,7 @@ private: // Public Implementation public: - DwarfAccelTable(DwarfAccelTable::Atom); - DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &); + DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>); ~DwarfAccelTable(); void AddName(StringRef, DIE*, char = 0); void FinalizeTable(AsmPrinter *, const char *); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 3b383f6..cc5b642 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -188,6 +188,24 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { /// addSourceLine - Add location information to specified debug information /// entry. +void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { + // Verify type. + if (!Ty.Verify()) + return; + + unsigned Line = Ty.getLineNumber(); + if (Line == 0) + return; + DIFile File = Ty.getFile(); + unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(), + File.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { // Verify namespace. if (!NS.Verify()) @@ -628,7 +646,8 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { } /// addType - Add a new type attribute to the specified entity. -void CompileUnit::addType(DIE *Entity, DIType Ty) { +void CompileUnit::addType(DIE *Entity, DIType Ty, + unsigned Attribute) { if (!Ty.Verify()) return; @@ -636,7 +655,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) { DIEEntry *Entry = getDIEEntry(Ty); // If it exists then use the existing value. if (Entry) { - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); + Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); return; } @@ -646,7 +665,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) { // Set up proxy. Entry = createDIEEntry(Buffer); insertDIEEntry(Ty, Entry); - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); + Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); // If this is a complete composite type then include it in the // list of global types. @@ -826,13 +845,20 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); addSourceLine(ElemDie, DV); - } else if (Element.isDerivedType()) - ElemDie = createMemberDIE(DIDerivedType(Element)); - else if (Element.isObjCProperty()) { + } else if (Element.isDerivedType()) { + DIDerivedType DDTy(Element); + if (DDTy.getTag() == dwarf::DW_TAG_friend) { + ElemDie = new DIE(dwarf::DW_TAG_friend); + addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); + } else + ElemDie = createMemberDIE(DIDerivedType(Element)); + } else if (Element.isObjCProperty()) { DIObjCProperty Property(Element); ElemDie = new DIE(Property.getTag()); StringRef PropertyName = Property.getObjCPropertyName(); addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); + addType(ElemDie, Property.getType()); + addSourceLine(ElemDie, Property); StringRef GetterName = Property.getObjCPropertyGetterName(); if (!GetterName.empty()) addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName); @@ -1006,9 +1032,10 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add function template parameters. addTemplateParams(*SPDie, SP.getTemplateParams()); - // Unfortunately this code needs to stay here to work around - // a bug in older gdbs that requires the linkage name to resolve - // multiple template functions. + // Unfortunately this code needs to stay here instead of below the + // AT_specification code in order to work around a bug in older + // gdbs that requires the linkage name to resolve multiple template + // functions. StringRef LinkageName = SP.getLinkageName(); if (!LinkageName.empty()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 4e63c3f..45e407e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -213,6 +213,7 @@ public: void addSourceLine(DIE *Die, DISubprogram SP); void addSourceLine(DIE *Die, DIType Ty); void addSourceLine(DIE *Die, DINameSpace NS); + void addSourceLine(DIE *Die, DIObjCProperty Ty); /// addAddress - Add an address attribute to a die based on the location /// provided. @@ -260,8 +261,10 @@ public: /// addToContextOwner - Add Die into the list of its context owner's children. void addToContextOwner(DIE *Die, DIDescriptor Context); - /// addType - Add a new type attribute to the specified entity. - void addType(DIE *Entity, DIType Ty); + /// addType - Add a new type attribute to the specified entity. This takes + /// and attribute parameter because DW_AT_friend attributes are also + /// type references. + void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type); /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *getOrCreateNameSpace(DINameSpace NS); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 388cef4..cb78878 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -19,6 +19,7 @@ #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Instructions.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" @@ -133,6 +134,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfStrSectionSym = TextSectionSym = 0; DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; + + // Turn on accelerator tables for Darwin. + if (Triple(M->getTargetTriple()).isOSDarwin()) + DwarfAccelTables = true; + { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(M); @@ -438,7 +444,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); DILocation DL(Scope->getInlinedAt()); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, + GetOrCreateSourceID(DL.getFilename(), DL.getDirectory())); TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); // Add name to the name table, we do this here because we're guaranteed @@ -554,9 +561,9 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit.getLanguage()); NewCU->addString(Die, dwarf::DW_AT_name, FN); - // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This - // simplifies debug range entries. - NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); + // 2.17.1 requires that we use DW_AT_low_pc for a single entry point + // into an entity. + NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. if (Asm->MAI->doesDwarfRequireRelocationForSectionOffset()) @@ -1086,12 +1093,15 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { if (!MI->isDebugValue()) { DebugLoc DL = MI->getDebugLoc(); if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) { - unsigned Flags = DWARF2_FLAG_IS_STMT; + unsigned Flags = 0; PrevInstLoc = DL; if (DL == PrologEndLoc) { Flags |= DWARF2_FLAG_PROLOGUE_END; PrologEndLoc = DebugLoc(); } + if (PrologEndLoc.isUnknown()) + Flags |= DWARF2_FLAG_IS_STMT; + if (!DL.isUnknown()) { const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); @@ -1186,12 +1196,19 @@ static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) { } /// getFnDebugLoc - Walk up the scope chain of given debug loc and find -/// line number info for the function. +/// line number info for the function. static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { const MDNode *Scope = getScopeNode(DL, Ctx); DISubprogram SP = getDISubprogram(Scope); - if (SP.Verify()) - return DebugLoc::get(SP.getLineNumber(), 0, SP); + if (SP.Verify()) { + // Check for number of operands since the compatibility is + // cheap here. + if (SP->getNumOperands() > 19) + return DebugLoc::get(SP.getScopeLineNumber(), 0, SP); + else + return DebugLoc::get(SP.getLineNumber(), 0, SP); + } + return DebugLoc(); } @@ -1364,7 +1381,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { MF->getFunction()->getContext()); recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), FnStartDL.getScope(MF->getFunction()->getContext()), - DWARF2_FLAG_IS_STMT); + 0); } } diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index f57f4a8..ef1d2ba 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -183,8 +184,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, TII = tii; TRI = tri; MMI = mmi; - - RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; + RS = NULL; + + // Use a RegScavenger to help update liveness when required. + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (MRI.tracksLiveness() && TRI->requiresRegisterScavenging(MF)) + RS = new RegScavenger(); + else + MRI.invalidateLiveness(); // Fix CFG. The later algorithms expect it to be right. bool MadeChange = false; diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index d596d8b..d5ea666 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -14,12 +14,12 @@ #define DEBUG_TYPE "regalloc" #include "Spiller.h" -#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBundle.h" @@ -655,7 +655,7 @@ void InlineSpiller::analyzeSiblingValues() { if (OrigVNI->def != VNI->def) DefMI = traceSiblingValue(Reg, VNI, OrigVNI); } - if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) { + if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) { DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def << " may remat from " << *DefMI); } @@ -856,7 +856,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, SibValueMap::const_iterator SibI = SibValues.find(ParentVNI); if (SibI != SibValues.end()) RM.OrigMI = SibI->second.DefMI; - if (!Edit->canRematerializeAt(RM, UseIdx, false, LIS)) { + if (!Edit->canRematerializeAt(RM, UseIdx, false)) { markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); return false; @@ -883,12 +883,12 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, } // Alocate a new register for the remat. - LiveInterval &NewLI = Edit->createFrom(Original, LIS, VRM); + LiveInterval &NewLI = Edit->createFrom(Original); NewLI.markNotSpillable(); // Finally we can rematerialize OrigMI before MI. SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, - LIS, TII, TRI); + TRI); DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *LIS.getInstructionFromIndex(DefIdx)); @@ -913,7 +913,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { // analyzeSiblingValues has already tested all relevant defining instructions. - if (!Edit->anyRematerializable(LIS, TII, AA)) + if (!Edit->anyRematerializable(AA)) return; UsedValues.clear(); @@ -954,7 +954,7 @@ void InlineSpiller::reMaterializeAll() { if (DeadDefs.empty()) return; DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); - Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII, RegsToSpill); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); // Get rid of deleted and empty intervals. for (unsigned i = RegsToSpill.size(); i != 0; --i) { @@ -966,7 +966,7 @@ void InlineSpiller::reMaterializeAll() { LiveInterval &LI = LIS.getInterval(Reg); if (!LI.empty()) continue; - Edit->eraseVirtReg(Reg, LIS); + Edit->eraseVirtReg(Reg); RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); } DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n"); @@ -1181,7 +1181,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Allocate interval around instruction. // FIXME: Infer regclass from instruction alone. - LiveInterval &NewLI = Edit->createFrom(Reg, LIS, VRM); + LiveInterval &NewLI = Edit->createFrom(Reg); NewLI.markNotSpillable(); if (RI.Reads) @@ -1244,7 +1244,7 @@ void InlineSpiller::spillAll() { // Hoisted spills may cause dead code. if (!DeadDefs.empty()) { DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); - Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII, RegsToSpill); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); } // Finally delete the SnippetCopies. @@ -1260,7 +1260,7 @@ void InlineSpiller::spillAll() { // Delete all spilled registers. for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) - Edit->eraseVirtReg(RegsToSpill[i], LIS); + Edit->eraseVirtReg(RegsToSpill[i]); } void InlineSpiller::spill(LiveRangeEdit &edit) { @@ -1289,5 +1289,5 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { if (!RegsToSpill.empty()) spillAll(); - Edit->calculateRegClassAndHint(MF, LIS, Loops); + Edit->calculateRegClassAndHint(MF, Loops); } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 97e6547..a1f479a 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -172,6 +172,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, + *getInstrInfo(), Context->getRegisterInfo(), STI); // Create a code emitter if asked to show the encoding. diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 3ade660..934cc12 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1068,9 +1068,9 @@ public: #ifndef NDEBUG LIValidator validator; - std::for_each(Entering.begin(), Entering.end(), validator); - std::for_each(Internal.begin(), Internal.end(), validator); - std::for_each(Exiting.begin(), Exiting.end(), validator); + validator = std::for_each(Entering.begin(), Entering.end(), validator); + validator = std::for_each(Internal.begin(), Internal.end(), validator); + validator = std::for_each(Exiting.begin(), Exiting.end(), validator); assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness."); #endif @@ -1115,9 +1115,9 @@ public: #ifndef NDEBUG LIValidator validator; - std::for_each(Entering.begin(), Entering.end(), validator); - std::for_each(Internal.begin(), Internal.end(), validator); - std::for_each(Exiting.begin(), Exiting.end(), validator); + validator = std::for_each(Entering.begin(), Entering.end(), validator); + validator = std::for_each(Internal.begin(), Internal.end(), validator); + validator = std::for_each(Exiting.begin(), Exiting.end(), validator); assert(validator.rangesOk() && "moveAllOperandsInto broke liveness."); #endif } diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index f9b93d5..695f536 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" @@ -31,13 +31,12 @@ STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); void LiveRangeEdit::Delegate::anchor() { } -LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg, - LiveIntervals &LIS, - VirtRegMap &VRM) { - MachineRegisterInfo &MRI = VRM.getRegInfo(); +LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) { unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); - VRM.grow(); - VRM.setIsSplitFromReg(VReg, VRM.getOriginal(OldReg)); + if (VRM) { + VRM->grow(); + VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); + } LiveInterval &LI = LIS.getOrCreateInterval(VReg); newRegs_.push_back(&LI); return LI; @@ -45,37 +44,32 @@ LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg, bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, - const TargetInstrInfo &tii, AliasAnalysis *aa) { assert(DefMI && "Missing instruction"); scannedRemattable_ = true; - if (!tii.isTriviallyReMaterializable(DefMI, aa)) + if (!TII.isTriviallyReMaterializable(DefMI, aa)) return false; remattable_.insert(VNI); return true; } -void LiveRangeEdit::scanRemattable(LiveIntervals &lis, - const TargetInstrInfo &tii, - AliasAnalysis *aa) { +void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { for (LiveInterval::vni_iterator I = parent_.vni_begin(), E = parent_.vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; - MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def); + MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def); if (!DefMI) continue; - checkRematerializable(VNI, DefMI, tii, aa); + checkRematerializable(VNI, DefMI, aa); } scannedRemattable_ = true; } -bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis, - const TargetInstrInfo &tii, - AliasAnalysis *aa) { +bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) { if (!scannedRemattable_) - scanRemattable(lis, tii, aa); + scanRemattable(aa); return !remattable_.empty(); } @@ -83,8 +77,7 @@ bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis, /// OrigIdx are also available with the same value at UseIdx. bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx, - LiveIntervals &lis) { + SlotIndex UseIdx) { OrigIdx = OrigIdx.getRegSlot(true); UseIdx = UseIdx.getRegSlot(true); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { @@ -92,10 +85,10 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; // Reserved registers are OK. - if (MO.isUndef() || !lis.hasInterval(MO.getReg())) + if (MO.isUndef() || !LIS.hasInterval(MO.getReg())) continue; - LiveInterval &li = lis.getInterval(MO.getReg()); + LiveInterval &li = LIS.getInterval(MO.getReg()); const VNInfo *OVNI = li.getVNInfoAt(OrigIdx); if (!OVNI) continue; @@ -107,8 +100,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, bool LiveRangeEdit::canRematerializeAt(Remat &RM, SlotIndex UseIdx, - bool cheapAsAMove, - LiveIntervals &lis) { + bool cheapAsAMove) { assert(scannedRemattable_ && "Call anyRematerializable first"); // Use scanRemattable info. @@ -118,10 +110,10 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM, // No defining instruction provided. SlotIndex DefIdx; if (RM.OrigMI) - DefIdx = lis.getInstructionIndex(RM.OrigMI); + DefIdx = LIS.getInstructionIndex(RM.OrigMI); else { DefIdx = RM.ParentVNI->def; - RM.OrigMI = lis.getInstructionFromIndex(DefIdx); + RM.OrigMI = LIS.getInstructionFromIndex(DefIdx); assert(RM.OrigMI && "No defining instruction for remattable value"); } @@ -130,7 +122,7 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM, return false; // Verify that all used registers are available with the same values. - if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx, lis)) + if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx)) return false; return true; @@ -140,27 +132,22 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, const Remat &RM, - LiveIntervals &lis, - const TargetInstrInfo &tii, const TargetRegisterInfo &tri, bool Late) { assert(RM.OrigMI && "Invalid remat"); - tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); + TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); rematted_.insert(RM.ParentVNI); - return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) + return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) .getRegSlot(); } -void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) { +void LiveRangeEdit::eraseVirtReg(unsigned Reg) { if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg)) LIS.removeInterval(Reg); } bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, - SmallVectorImpl<MachineInstr*> &Dead, - MachineRegisterInfo &MRI, - LiveIntervals &LIS, - const TargetInstrInfo &TII) { + SmallVectorImpl<MachineInstr*> &Dead) { MachineInstr *DefMI = 0, *UseMI = 0; // Check that there is a single def and a single use. @@ -206,13 +193,10 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, } void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, - LiveIntervals &LIS, VirtRegMap &VRM, - const TargetInstrInfo &TII, ArrayRef<unsigned> RegsBeingSpilled) { SetVector<LiveInterval*, SmallVector<LiveInterval*, 8>, SmallPtrSet<LiveInterval*, 8> > ToShrink; - MachineRegisterInfo &MRI = VRM.getRegInfo(); for (;;) { // Erase all dead defs. @@ -263,7 +247,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, LI.removeValNo(VNI); if (LI.empty()) { ToShrink.remove(&LI); - eraseVirtReg(Reg, LIS); + eraseVirtReg(Reg); } } } @@ -282,7 +266,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, // Shrink just one live interval. Then delete new dead defs. LiveInterval *LI = ToShrink.back(); ToShrink.pop_back(); - if (foldAsLoad(LI, Dead, MRI, LIS, TII)) + if (foldAsLoad(LI, Dead)) continue; if (delegate_) delegate_->LRE_WillShrinkVirtReg(LI->reg); @@ -302,7 +286,6 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, } if (BeingSpilled) continue; - // LI may have been separated, create new intervals. LI->RenumberValues(LIS); @@ -311,16 +294,16 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, if (NumComp <= 1) continue; ++NumFracRanges; - bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg; + bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg; DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); SmallVector<LiveInterval*, 8> Dups(1, LI); for (unsigned i = 1; i != NumComp; ++i) { - Dups.push_back(&createFrom(LI->reg, LIS, VRM)); + Dups.push_back(&createFrom(LI->reg)); // If LI is an original interval that hasn't been split yet, make the new // intervals their own originals instead of referring to LI. The original // interval must contain all the split products, and LI doesn't. if (IsOriginal) - VRM.setIsSplitFromReg(Dups.back()->reg, 0); + VRM->setIsSplitFromReg(Dups.back()->reg, 0); if (delegate_) delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); } @@ -329,10 +312,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, } void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, - LiveIntervals &LIS, const MachineLoopInfo &Loops) { VirtRegAuxInfo VRAI(MF, LIS, Loops); - MachineRegisterInfo &MRI = MF.getRegInfo(); for (iterator I = begin(), E = end(); I != E; ++I) { LiveInterval &LI = **I; if (MRI.recomputeRegClass(LI.reg, MF.getTarget())) diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h deleted file mode 100644 index 1148025..0000000 --- a/lib/CodeGen/LiveRangeEdit.h +++ /dev/null @@ -1,201 +0,0 @@ -//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The LiveRangeEdit class represents changes done to a virtual register when it -// is spilled or split. -// -// The parent register is never changed. Instead, a number of new virtual -// registers are created and added to the newRegs vector. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H -#define LLVM_CODEGEN_LIVERANGEEDIT_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/CodeGen/LiveInterval.h" - -namespace llvm { - -class AliasAnalysis; -class LiveIntervals; -class MachineLoopInfo; -class MachineRegisterInfo; -class VirtRegMap; - -class LiveRangeEdit { -public: - /// Callback methods for LiveRangeEdit owners. - class Delegate { - virtual void anchor(); - public: - /// Called immediately before erasing a dead machine instruction. - virtual void LRE_WillEraseInstruction(MachineInstr *MI) {} - - /// Called when a virtual register is no longer used. Return false to defer - /// its deletion from LiveIntervals. - virtual bool LRE_CanEraseVirtReg(unsigned) { return true; } - - /// Called before shrinking the live range of a virtual register. - virtual void LRE_WillShrinkVirtReg(unsigned) {} - - /// Called after cloning a virtual register. - /// This is used for new registers representing connected components of Old. - virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {} - - virtual ~Delegate() {} - }; - -private: - LiveInterval &parent_; - SmallVectorImpl<LiveInterval*> &newRegs_; - Delegate *const delegate_; - - /// firstNew_ - Index of the first register added to newRegs_. - const unsigned firstNew_; - - /// scannedRemattable_ - true when remattable values have been identified. - bool scannedRemattable_; - - /// remattable_ - Values defined by remattable instructions as identified by - /// tii.isTriviallyReMaterializable(). - SmallPtrSet<const VNInfo*,4> remattable_; - - /// rematted_ - Values that were actually rematted, and so need to have their - /// live range trimmed or entirely removed. - SmallPtrSet<const VNInfo*,4> rematted_; - - /// scanRemattable - Identify the parent_ values that may rematerialize. - void scanRemattable(LiveIntervals &lis, - const TargetInstrInfo &tii, - AliasAnalysis *aa); - - /// allUsesAvailableAt - Return true if all registers used by OrigMI at - /// OrigIdx are also available with the same value at UseIdx. - bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx, LiveIntervals &lis); - - /// foldAsLoad - If LI has a single use and a single def that can be folded as - /// a load, eliminate the register by folding the def into the use. - bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr*> &Dead, - MachineRegisterInfo&, LiveIntervals&, const TargetInstrInfo&); - -public: - /// Create a LiveRangeEdit for breaking down parent into smaller pieces. - /// @param parent The register being spilled or split. - /// @param newRegs List to receive any new registers created. This needn't be - /// empty initially, any existing registers are ignored. - LiveRangeEdit(LiveInterval &parent, - SmallVectorImpl<LiveInterval*> &newRegs, - Delegate *delegate = 0) - : parent_(parent), newRegs_(newRegs), - delegate_(delegate), - firstNew_(newRegs.size()), - scannedRemattable_(false) {} - - LiveInterval &getParent() const { return parent_; } - unsigned getReg() const { return parent_.reg; } - - /// Iterator for accessing the new registers added by this edit. - typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator; - iterator begin() const { return newRegs_.begin()+firstNew_; } - iterator end() const { return newRegs_.end(); } - unsigned size() const { return newRegs_.size()-firstNew_; } - bool empty() const { return size() == 0; } - LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; } - - ArrayRef<LiveInterval*> regs() const { - return makeArrayRef(newRegs_).slice(firstNew_); - } - - /// createFrom - Create a new virtual register based on OldReg. - LiveInterval &createFrom(unsigned OldReg, LiveIntervals&, VirtRegMap&); - - /// create - Create a new register with the same class and original slot as - /// parent. - LiveInterval &create(LiveIntervals &LIS, VirtRegMap &VRM) { - return createFrom(getReg(), LIS, VRM); - } - - /// anyRematerializable - Return true if any parent values may be - /// rematerializable. - /// This function must be called before any rematerialization is attempted. - bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&, - AliasAnalysis*); - - /// checkRematerializable - Manually add VNI to the list of rematerializable - /// values if DefMI may be rematerializable. - bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, - const TargetInstrInfo&, AliasAnalysis*); - - /// Remat - Information needed to rematerialize at a specific location. - struct Remat { - VNInfo *ParentVNI; // parent_'s value at the remat location. - MachineInstr *OrigMI; // Instruction defining ParentVNI. - explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {} - }; - - /// canRematerializeAt - Determine if ParentVNI can be rematerialized at - /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI. - /// When cheapAsAMove is set, only cheap remats are allowed. - bool canRematerializeAt(Remat &RM, - SlotIndex UseIdx, - bool cheapAsAMove, - LiveIntervals &lis); - - /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an - /// instruction into MBB before MI. The new instruction is mapped, but - /// liveness is not updated. - /// Return the SlotIndex of the new instruction. - SlotIndex rematerializeAt(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, - const Remat &RM, - LiveIntervals&, - const TargetInstrInfo&, - const TargetRegisterInfo&, - bool Late = false); - - /// markRematerialized - explicitly mark a value as rematerialized after doing - /// it manually. - void markRematerialized(const VNInfo *ParentVNI) { - rematted_.insert(ParentVNI); - } - - /// didRematerialize - Return true if ParentVNI was rematerialized anywhere. - bool didRematerialize(const VNInfo *ParentVNI) const { - return rematted_.count(ParentVNI); - } - - /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try - /// to erase it from LIS. - void eraseVirtReg(unsigned Reg, LiveIntervals &LIS); - - /// eliminateDeadDefs - Try to delete machine instructions that are now dead - /// (allDefsAreDead returns true). This may cause live intervals to be trimmed - /// and further dead efs to be eliminated. - /// RegsBeingSpilled lists registers currently being spilled by the register - /// allocator. These registers should not be split into new intervals - /// as currently those new intervals are not guaranteed to spill. - void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, - LiveIntervals&, VirtRegMap&, - const TargetInstrInfo&, - ArrayRef<unsigned> RegsBeingSpilled - = ArrayRef<unsigned>()); - - /// calculateRegClassAndHint - Recompute register class and hint for each new - /// register. - void calculateRegClassAndHint(MachineFunction&, LiveIntervals&, - const MachineLoopInfo&); -}; - -} - -#endif diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 48e1e4c..5a0d97d 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -14,7 +14,7 @@ // the instruction, but are never used after the instruction (i.e., they are // killed). // -// This class computes live variables using are sparse implementation based on +// This class computes live variables using a sparse implementation based on // the machine code SSA form. This class computes live variable information for // each virtual and _register allocatable_ physical register in a function. It // uses the dominance properties of SSA form to efficiently compute live diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index ca8a8e8..1abb8f2 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -321,8 +321,8 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { void MachineBasicBlock::removeLiveIn(unsigned Reg) { std::vector<unsigned>::iterator I = std::find(LiveIns.begin(), LiveIns.end(), Reg); - assert(I != LiveIns.end() && "Not a live in!"); - LiveIns.erase(I); + if (I != LiveIns.end()) + LiveIns.erase(I); } bool MachineBasicBlock::isLiveIn(unsigned Reg) const { @@ -392,22 +392,44 @@ void MachineBasicBlock::updateTerminator() { TII->InsertBranch(*this, TBB, 0, Cond, dl); } } else { + // Walk through the successors and find the successor which is not + // a landing pad and is not the conditional branch destination (in TBB) + // as the fallthrough successor. + MachineBasicBlock *FallthroughBB = 0; + for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { + if ((*SI)->isLandingPad() || *SI == TBB) + continue; + assert(!FallthroughBB && "Found more than one fallthrough successor."); + FallthroughBB = *SI; + } + if (!FallthroughBB && canFallThrough()) { + // We fallthrough to the same basic block as the conditional jump + // targets. Remove the conditional jump, leaving unconditional + // fallthrough. + // FIXME: This does not seem like a reasonable pattern to support, but it + // has been seen in the wild coming out of degenerate ARM test cases. + TII->RemoveBranch(*this); + + // Finally update the unconditional successor to be reached via a branch + // if it would not be reached by fallthrough. + if (!isLayoutSuccessor(TBB)) + TII->InsertBranch(*this, TBB, 0, Cond, dl); + return; + } + // The block has a fallthrough conditional branch. - MachineBasicBlock *MBBA = *succ_begin(); - MachineBasicBlock *MBBB = *llvm::next(succ_begin()); - if (MBBA == TBB) std::swap(MBBB, MBBA); if (isLayoutSuccessor(TBB)) { if (TII->ReverseBranchCondition(Cond)) { // We can't reverse the condition, add an unconditional branch. Cond.clear(); - TII->InsertBranch(*this, MBBA, 0, Cond, dl); + TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl); return; } TII->RemoveBranch(*this); - TII->InsertBranch(*this, MBBA, 0, Cond, dl); - } else if (!isLayoutSuccessor(MBBA)) { + TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl); + } else if (!isLayoutSuccessor(FallthroughBB)) { TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, MBBA, Cond, dl); + TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl); } } } diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 63892af..5ba6851 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -102,13 +102,13 @@ public: } /// \brief Iterator over blocks within the chain. - typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator iterator; + typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator; /// \brief Beginning of blocks within the chain. - iterator begin() const { return Blocks.begin(); } + iterator begin() { return Blocks.begin(); } /// \brief End of blocks within the chain. - iterator end() const { return Blocks.end(); } + iterator end() { return Blocks.end(); } /// \brief Merge a block chain into this one. /// @@ -141,6 +141,14 @@ public: } } +#ifndef NDEBUG + /// \brief Dump the blocks in this chain. + void dump() LLVM_ATTRIBUTE_USED { + for (iterator I = begin(), E = end(); I != E; ++I) + (*I)->dump(); + } +#endif // NDEBUG + /// \brief Count of predecessors within the loop currently being processed. /// /// This count is updated at each loop we process to represent the number of @@ -203,12 +211,15 @@ class MachineBlockPlacement : public MachineFunctionPass { void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter = 0); - MachineBasicBlock *findBestLoopTop(MachineFunction &F, - MachineLoop &L, + MachineBasicBlock *findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet); + MachineBasicBlock *findBestLoopExit(MachineFunction &F, + MachineLoop &L, + const BlockFilterSet &LoopBlockSet); void buildLoopChains(MachineFunction &F, MachineLoop &L); + void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB, + const BlockFilterSet &LoopBlockSet); void buildCFGChains(MachineFunction &F); - void AlignLoops(MachineFunction &F); public: static char ID; // Pass identification, replacement for typeid @@ -430,7 +441,6 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(), WBE = WorkList.end(); WBI != WBE; ++WBI) { - assert(!BlockFilter || BlockFilter->count(*WBI)); BlockChain &SuccChain = *BlockToChain[*WBI]; if (&SuccChain == &Chain) { DEBUG(dbgs() << " " << getBlockName(*WBI) @@ -533,16 +543,89 @@ void MachineBlockPlacement::buildChain( /// \brief Find the best loop top block for layout. /// +/// Look for a block which is strictly better than the loop header for laying +/// out at the top of the loop. This looks for one and only one pattern: +/// a latch block with no conditional exit. This block will cause a conditional +/// jump around it or will be the bottom of the loop if we lay it out in place, +/// but if it it doesn't end up at the bottom of the loop for any reason, +/// rotation alone won't fix it. Because such a block will always result in an +/// unconditional jump (for the backedge) rotating it in front of the loop +/// header is always profitable. +MachineBasicBlock * +MachineBlockPlacement::findBestLoopTop(MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { + // Check that the header hasn't been fused with a preheader block due to + // crazy branches. If it has, we need to start with the header at the top to + // prevent pulling the preheader into the loop body. + BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; + if (!LoopBlockSet.count(*HeaderChain.begin())) + return L.getHeader(); + + DEBUG(dbgs() << "Finding best loop top for: " + << getBlockName(L.getHeader()) << "\n"); + + BlockFrequency BestPredFreq; + MachineBasicBlock *BestPred = 0; + for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(), + PE = L.getHeader()->pred_end(); + PI != PE; ++PI) { + MachineBasicBlock *Pred = *PI; + if (!LoopBlockSet.count(Pred)) + continue; + DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " + << Pred->succ_size() << " successors, " + << MBFI->getBlockFreq(Pred) << " freq\n"); + if (Pred->succ_size() > 1) + continue; + + BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); + if (!BestPred || PredFreq > BestPredFreq || + (!(PredFreq < BestPredFreq) && + Pred->isLayoutSuccessor(L.getHeader()))) { + BestPred = Pred; + BestPredFreq = PredFreq; + } + } + + // If no direct predecessor is fine, just use the loop header. + if (!BestPred) + return L.getHeader(); + + // Walk backwards through any straight line of predecessors. + while (BestPred->pred_size() == 1 && + (*BestPred->pred_begin())->succ_size() == 1 && + *BestPred->pred_begin() != L.getHeader()) + BestPred = *BestPred->pred_begin(); + + DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n"); + return BestPred; +} + + +/// \brief Find the best loop exiting block for layout. +/// /// This routine implements the logic to analyze the loop looking for the best /// block to layout at the top of the loop. Typically this is done to maximize /// fallthrough opportunities. MachineBasicBlock * -MachineBlockPlacement::findBestLoopTop(MachineFunction &F, - MachineLoop &L, - const BlockFilterSet &LoopBlockSet) { +MachineBlockPlacement::findBestLoopExit(MachineFunction &F, + MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { + // We don't want to layout the loop linearly in all cases. If the loop header + // is just a normal basic block in the loop, we want to look for what block + // within the loop is the best one to layout at the top. However, if the loop + // header has be pre-merged into a chain due to predecessors not having + // analyzable branches, *and* the predecessor it is merged with is *not* part + // of the loop, rotating the header into the middle of the loop will create + // a non-contiguous range of blocks which is Very Bad. So start with the + // header and only rotate if safe. + BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; + if (!LoopBlockSet.count(*HeaderChain.begin())) + return 0; + BlockFrequency BestExitEdgeFreq; + unsigned BestExitLoopDepth = 0; MachineBasicBlock *ExitingBB = 0; - MachineBasicBlock *LoopingBB = 0; // If there are exits to outer loops, loop rotation can severely limit // fallthrough opportunites unless it selects such an exit. Keep a set of // blocks where rotating to exit with that block will reach an outer loop. @@ -565,15 +648,10 @@ MachineBlockPlacement::findBestLoopTop(MachineFunction &F, // successor isn't found. MachineBasicBlock *OldExitingBB = ExitingBB; BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq; - // We also compute and store the best looping successor for use in layout. - MachineBasicBlock *BestLoopSucc = 0; + bool HasLoopingSucc = false; // FIXME: Due to the performance of the probability and weight routines in - // the MBPI analysis, we use the internal weights. This is only valid - // because it is purely a ranking function, we don't care about anything - // but the relative values. - uint32_t BestLoopSuccWeight = 0; - // FIXME: We also manually compute the probabilities to avoid quadratic - // behavior. + // the MBPI analysis, we use the internal weights and manually compute the + // probabilities to avoid quadratic behavior. uint32_t WeightScale = 0; uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale); for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(), @@ -585,10 +663,8 @@ MachineBlockPlacement::findBestLoopTop(MachineFunction &F, continue; BlockChain &SuccChain = *BlockToChain[*SI]; // Don't split chains, either this chain or the successor's chain. - if (&Chain == &SuccChain || *SI != *SuccChain.begin()) { - DEBUG(dbgs() << " " << (LoopBlockSet.count(*SI) ? "looping: " - : "exiting: ") - << getBlockName(*I) << " -> " + if (&Chain == &SuccChain) { + DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " << getBlockName(*SI) << " (chain conflict)\n"); continue; } @@ -597,60 +673,103 @@ MachineBlockPlacement::findBestLoopTop(MachineFunction &F, if (LoopBlockSet.count(*SI)) { DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> " << getBlockName(*SI) << " (" << SuccWeight << ")\n"); - if (BestLoopSucc && BestLoopSuccWeight >= SuccWeight) - continue; - - BestLoopSucc = *SI; - BestLoopSuccWeight = SuccWeight; + HasLoopingSucc = true; continue; } + unsigned SuccLoopDepth = 0; + if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) { + SuccLoopDepth = ExitLoop->getLoopDepth(); + if (ExitLoop->contains(&L)) + BlocksExitingToOuterLoop.insert(*I); + } + BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " - << getBlockName(*SI) << " (" << ExitEdgeFreq << ")\n"); + << getBlockName(*SI) << " [L:" << SuccLoopDepth + << "] (" << ExitEdgeFreq << ")\n"); // Note that we slightly bias this toward an existing layout successor to // retain incoming order in the absence of better information. // FIXME: Should we bias this more strongly? It's pretty weak. - if (!ExitingBB || ExitEdgeFreq > BestExitEdgeFreq || + if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth || + ExitEdgeFreq > BestExitEdgeFreq || ((*I)->isLayoutSuccessor(*SI) && !(ExitEdgeFreq < BestExitEdgeFreq))) { BestExitEdgeFreq = ExitEdgeFreq; ExitingBB = *I; } - - if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) - if (ExitLoop->contains(&L)) - BlocksExitingToOuterLoop.insert(*I); } // Restore the old exiting state, no viable looping successor was found. - if (!BestLoopSucc) { + if (!HasLoopingSucc) { ExitingBB = OldExitingBB; BestExitEdgeFreq = OldBestExitEdgeFreq; continue; } - - // If this was best exiting block thus far, also record the looping block. - if (ExitingBB == *I) - LoopingBB = BestLoopSucc; } - // Without a candidate exitting block or with only a single block in the + // Without a candidate exiting block or with only a single block in the // loop, just use the loop header to layout the loop. if (!ExitingBB || L.getNumBlocks() == 1) - return L.getHeader(); + return 0; // Also, if we have exit blocks which lead to outer loops but didn't select // one of them as the exiting block we are rotating toward, disable loop // rotation altogether. if (!BlocksExitingToOuterLoop.empty() && !BlocksExitingToOuterLoop.count(ExitingBB)) - return L.getHeader(); + return 0; - assert(LoopingBB && "All successors of a loop block are exit blocks!"); DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); - DEBUG(dbgs() << " Best top block: " << getBlockName(LoopingBB) << "\n"); - return LoopingBB; + return ExitingBB; +} + +/// \brief Attempt to rotate an exiting block to the bottom of the loop. +/// +/// Once we have built a chain, try to rotate it to line up the hot exit block +/// with fallthrough out of the loop if doing so doesn't introduce unnecessary +/// branches. For example, if the loop has fallthrough into its header and out +/// of its bottom already, don't rotate it. +void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, + MachineBasicBlock *ExitingBB, + const BlockFilterSet &LoopBlockSet) { + if (!ExitingBB) + return; + + MachineBasicBlock *Top = *LoopChain.begin(); + bool ViableTopFallthrough = false; + for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(), + PE = Top->pred_end(); + PI != PE; ++PI) { + BlockChain *PredChain = BlockToChain[*PI]; + if (!LoopBlockSet.count(*PI) && + (!PredChain || *PI == *llvm::prior(PredChain->end()))) { + ViableTopFallthrough = true; + break; + } + } + + // If the header has viable fallthrough, check whether the current loop + // bottom is a viable exiting block. If so, bail out as rotating will + // introduce an unnecessary branch. + if (ViableTopFallthrough) { + MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end()); + for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(), + SE = Bottom->succ_end(); + SI != SE; ++SI) { + BlockChain *SuccChain = BlockToChain[*SI]; + if (!LoopBlockSet.count(*SI) && + (!SuccChain || *SI == *SuccChain->begin())) + return; + } + } + + BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(), + ExitingBB); + if (ExitIt == LoopChain.end()) + return; + + std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end()); } /// \brief Forms basic block chains from the natural loop structures. @@ -669,8 +788,20 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, SmallVector<MachineBasicBlock *, 16> BlockWorkList; BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); - MachineBasicBlock *LayoutTop = findBestLoopTop(F, L, LoopBlockSet); - BlockChain &LoopChain = *BlockToChain[LayoutTop]; + // First check to see if there is an obviously preferable top block for the + // loop. This will default to the header, but may end up as one of the + // predecessors to the header if there is one which will result in strictly + // fewer branches in the loop body. + MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet); + + // If we selected just the header for the loop top, look for a potentially + // profitable exit block in the event that rotating the loop can eliminate + // branches by placing an exit edge at the bottom. + MachineBasicBlock *ExitingBB = 0; + if (LoopTop == L.getHeader()) + ExitingBB = findBestLoopExit(F, L, LoopBlockSet); + + BlockChain &LoopChain = *BlockToChain[LoopTop]; // FIXME: This is a really lame way of walking the chains in the loop: we // walk the blocks, and use a set to prevent visiting a particular chain @@ -702,7 +833,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, BlockWorkList.push_back(*Chain.begin()); } - buildChain(LayoutTop, LoopChain, BlockWorkList, &LoopBlockSet); + buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet); + rotateLoop(LoopChain, ExitingBB, LoopBlockSet); DEBUG({ // Crash at the end so we get all of the debugging output first. @@ -714,7 +846,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"; } for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end(); - BCI != BCE; ++BCI) + BCI != BCE; ++BCI) { + dbgs() << " ... " << getBlockName(*BCI) << "\n"; if (!LoopBlockSet.erase(*BCI)) { // We don't mark the loop as bad here because there are real situations // where this can occur. For example, with an unanalyzable fallthrough @@ -724,6 +857,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" << " Bad block: " << getBlockName(*BCI) << "\n"; } + } if (!LoopBlockSet.empty()) { BadLoop = true; @@ -863,28 +997,33 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond)) F.back().updateTerminator(); -} -/// \brief Recursive helper to align a loop and any nested loops. -static void AlignLoop(MachineFunction &F, MachineLoop *L, unsigned Align) { - // Recurse through nested loops. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) - AlignLoop(F, *I, Align); - - L->getTopBlock()->setAlignment(Align); -} - -/// \brief Align loop headers to target preferred alignments. -void MachineBlockPlacement::AlignLoops(MachineFunction &F) { + // Walk through the backedges of the function now that we have fully laid out + // the basic blocks and align the destination of each backedge. We don't rely + // on the loop info here so that we can align backedges in unnatural CFGs and + // backedges that were introduced purely because of the loop rotations done + // during this layout pass. + // FIXME: This isn't quite right, we shouldn't align backedges that result + // from blocks being sunken below the exit block for the function. if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) return; - unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) return; // Don't care about loop alignment. - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) - AlignLoop(F, *I, Align); + SmallPtrSet<MachineBasicBlock *, 16> PreviousBlocks; + for (BlockChain::iterator BI = FunctionChain.begin(), + BE = FunctionChain.end(); + BI != BE; ++BI) { + PreviousBlocks.insert(*BI); + // Set alignment on the destination of all the back edges in the new + // ordering. + for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(), + SE = (*BI)->succ_end(); + SI != SE; ++SI) + if (PreviousBlocks.count(*SI)) + (*SI)->setAlignment(Align); + } } bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { @@ -900,7 +1039,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { assert(BlockToChain.empty()); buildCFGChains(F); - AlignLoops(F); BlockToChain.clear(); ChainAllocator.DestroyAll(); diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 9aa74f1..9730eaa 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -43,9 +43,12 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); private: + typedef SmallVector<unsigned, 4> DestList; + typedef DenseMap<unsigned, DestList> SourceMap; + void SourceNoLongerAvailable(unsigned Reg, - DenseMap<unsigned, unsigned> &SrcMap, - DenseMap<unsigned, MachineInstr*> &AvailCopyMap); + SourceMap &SrcMap, + DenseMap<unsigned, MachineInstr*> &AvailCopyMap); bool CopyPropagateBlock(MachineBasicBlock &MBB); }; } @@ -57,24 +60,32 @@ INITIALIZE_PASS(MachineCopyPropagation, "machine-cp", void MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg, - DenseMap<unsigned, unsigned> &SrcMap, + SourceMap &SrcMap, DenseMap<unsigned, MachineInstr*> &AvailCopyMap) { - DenseMap<unsigned, unsigned>::iterator SI = SrcMap.find(Reg); + SourceMap::iterator SI = SrcMap.find(Reg); if (SI != SrcMap.end()) { - unsigned MappedDef = SI->second; - // Source of copy is no longer available for propagation. - if (AvailCopyMap.erase(MappedDef)) { - for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) - AvailCopyMap.erase(*SR); + const DestList& Defs = SI->second; + for (DestList::const_iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + unsigned MappedDef = *I; + // Source of copy is no longer available for propagation. + if (AvailCopyMap.erase(MappedDef)) { + for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) + AvailCopyMap.erase(*SR); + } } } for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { SI = SrcMap.find(*AS); if (SI != SrcMap.end()) { - unsigned MappedDef = SI->second; - if (AvailCopyMap.erase(MappedDef)) { - for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) - AvailCopyMap.erase(*SR); + const DestList& Defs = SI->second; + for (DestList::const_iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + unsigned MappedDef = *I; + if (AvailCopyMap.erase(MappedDef)) { + for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) + AvailCopyMap.erase(*SR); + } } } } @@ -125,10 +136,10 @@ static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src, } bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { - SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion - DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map - DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map - DenseMap<unsigned, unsigned> SrcMap; // Src -> Def map + SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion + DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map + DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map + SourceMap SrcMap; // Src -> Def map bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { @@ -213,7 +224,10 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // Remember source that's copied to Def. Once it's clobbered, then // it's no longer available for copy propagation. - SrcMap[Src] = Def; + if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) == + SrcMap[Src].end()) { + SrcMap[Src].push_back(Def); + } continue; } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 8ab8b18..d8c2f6a 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -195,9 +195,10 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { MachineMemOperand * MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, uint64_t s, unsigned base_alignment, - const MDNode *TBAAInfo) { + const MDNode *TBAAInfo, + const MDNode *Ranges) { return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment, - TBAAInfo); + TBAAInfo, Ranges); } MachineMemOperand * @@ -284,7 +285,13 @@ void MachineFunction::dump() const { } void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { - OS << "# Machine code for function " << Fn->getName() << ":\n"; + OS << "# Machine code for function " << Fn->getName() << ": "; + if (RegInfo) { + OS << (RegInfo->isSSA() ? "SSA" : "Post SSA"); + if (!RegInfo->tracksLiveness()) + OS << ", not tracking liveness"; + } + OS << '\n'; // Print Frame Information FrameInfo->print(*this, OS); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 43af1ad..e553a04 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -381,10 +381,11 @@ MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) { MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, uint64_t s, unsigned int a, - const MDNode *TBAAInfo) + const MDNode *TBAAInfo, + const MDNode *Ranges) : PtrInfo(ptrinfo), Size(s), Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)), - TBAAInfo(TBAAInfo) { + TBAAInfo(TBAAInfo), Ranges(Ranges) { assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) && "invalid pointer value"); assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 428a9d9..8c562cc 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -80,6 +80,14 @@ namespace { MachineLoop *CurLoop; // The current loop we are working on. MachineBasicBlock *CurPreheader; // The preheader for CurLoop. + // Exit blocks for CurLoop. + SmallVector<MachineBasicBlock*, 8> ExitBlocks; + + bool isExitBlock(const MachineBasicBlock *MBB) const { + return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != + ExitBlocks.end(); + } + // Track 'estimated' register pressure. SmallSet<unsigned, 32> RegSeen; SmallVector<unsigned, 8> RegPressure; @@ -182,9 +190,9 @@ namespace { /// bool IsLoopInvariantInst(MachineInstr &I); - /// HasAnyPHIUse - Return true if the specified register is used by any - /// phi node. - bool HasAnyPHIUse(unsigned Reg) const; + /// HasLoopPHIUse - Return true if the specified instruction is used by any + /// phi node in the current loop. + bool HasLoopPHIUse(const MachineInstr *MI) const; /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered @@ -197,7 +205,7 @@ namespace { /// CanCauseHighRegPressure - Visit BBs from header to current BB, /// check if hoisting an instruction of the given cost matrix can cause high /// register pressure. - bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost); + bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap); /// UpdateBackTraceRegPressure - Traverse the back trace from header to /// the current block and update their register pressures to reflect the @@ -348,6 +356,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { while (!Worklist.empty()) { CurLoop = Worklist.pop_back_val(); CurPreheader = 0; + ExitBlocks.clear(); // If this is done before regalloc, only visit outer-most preheader-sporting // loops. @@ -356,6 +365,8 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { continue; } + CurLoop->getExitBlocks(ExitBlocks); + if (!PreRegAlloc) HoistRegionPostRA(); else { @@ -478,6 +489,10 @@ void MachineLICM::ProcessMI(MachineInstr *MI, /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop /// invariants out to the preheader. void MachineLICM::HoistRegionPostRA() { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + unsigned NumRegs = TRI->getNumRegs(); BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. @@ -514,25 +529,46 @@ void MachineLICM::HoistRegionPostRA() { } } + // Gather the registers read / clobbered by the terminator. + BitVector TermRegs(NumRegs); + MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); + if (TI != Preheader->end()) { + for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = TI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) + TermRegs.set(*AS); + } + } + // Now evaluate whether the potential candidates qualify. // 1. Check if the candidate defined register is defined by another // instruction in the loop. // 2. If the candidate is a load from stack slot (always true for now), // check if the slot is stored anywhere in the loop. + // 3. Make sure candidate def should not clobber + // registers read by the terminator. Similarly its def should not be + // clobbered by the terminator. for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { if (Candidates[i].FI != INT_MIN && StoredFIs.count(Candidates[i].FI)) continue; - if (!PhysRegClobbers.test(Candidates[i].Def)) { + unsigned Def = Candidates[i].Def; + if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { bool Safe = true; MachineInstr *MI = Candidates[i].MI; for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { const MachineOperand &MO = MI->getOperand(j); if (!MO.isReg() || MO.isDef() || !MO.getReg()) continue; - if (PhysRegDefs.test(MO.getReg()) || - PhysRegClobbers.test(MO.getReg())) { + unsigned Reg = MO.getReg(); + if (PhysRegDefs.test(Reg) || + PhysRegClobbers.test(Reg)) { // If it's using a non-loop-invariant register, then it's obviously // not safe to hoist. Safe = false; @@ -571,7 +607,6 @@ void MachineLICM::AddToLiveIns(unsigned Reg) { /// dirty work. void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { MachineBasicBlock *Preheader = getCurPreheader(); - if (!Preheader) return; // Now move the instructions to the predecessor, inserting it before any // terminator instructions. @@ -931,22 +966,40 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } -/// HasAnyPHIUse - Return true if the specified register is used by any -/// phi node. -bool MachineLICM::HasAnyPHIUse(unsigned Reg) const { - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (UseMI->isPHI()) - return true; - // Look pass copies as well. - if (UseMI->isCopy()) { - unsigned Def = UseMI->getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Def) && - HasAnyPHIUse(Def)) - return true; +/// HasLoopPHIUse - Return true if the specified instruction is used by a +/// phi node and hoisting it could cause a copy to be inserted. +bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { + SmallVector<const MachineInstr*, 8> Work(1, MI); + do { + MI = Work.pop_back_val(); + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + // A PHI may cause a copy to be inserted. + if (UseMI->isPHI()) { + // A PHI inside the loop causes a copy because the live range of Reg is + // extended across the PHI. + if (CurLoop->contains(UseMI)) + return true; + // A PHI in an exit block can cause a copy to be inserted if the PHI + // has multiple predecessors in the loop with different values. + // For now, approximate by rejecting all exit blocks. + if (isExitBlock(UseMI->getParent())) + return true; + continue; + } + // Look past copies as well. + if (UseMI->isCopy() && CurLoop->contains(UseMI)) + Work.push_back(UseMI); + } } - } + } while (!Work.empty()); return false; } @@ -1014,7 +1067,8 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { /// CanCauseHighRegPressure - Visit BBs from header to current BB, check /// if hoisting an instruction of the given cost matrix can cause high /// register pressure. -bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) { +bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, + bool CheapInstr) { for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); CI != CE; ++CI) { if (CI->second <= 0) @@ -1023,6 +1077,12 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) { unsigned RCId = CI->first; unsigned Limit = RegLimit[RCId]; int Cost = CI->second; + + // Don't hoist cheap instructions if they would increase register pressure, + // even if we're under the limit. + if (CheapInstr) + return true; + for (unsigned i = BackTrace.size(); i != 0; --i) { SmallVector<unsigned, 8> &RP = BackTrace[i-1]; if (RP[RCId] + Cost >= Limit) @@ -1085,87 +1145,95 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (MI.isImplicitDef()) return true; - // If the instruction is cheap, only hoist if it is re-materilizable. LICM - // will increase register pressure. It's probably not worth it if the - // instruction is cheap. - // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting - // these tend to help performance in low register pressure situation. The - // trade off is it may cause spill in high pressure situation. It will end up - // adding a store in the loop preheader. But the reload is no more expensive. - // The side benefit is these loads are frequently CSE'ed. - if (IsCheapInstruction(MI)) { - if (!TII->isTriviallyReMaterializable(&MI, AA)) - return false; - } else { - // Estimate register pressure to determine whether to LICM the instruction. - // In low register pressure situation, we can be more aggressive about - // hoisting. Also, favors hoisting long latency instructions even in - // moderately high pressure situation. - // FIXME: If there are long latency loop-invariant instructions inside the - // loop at this point, why didn't the optimizer's LICM hoist them? - DenseMap<unsigned, int> Cost; - for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.isImplicit()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; + // Besides removing computation from the loop, hoisting an instruction has + // these effects: + // + // - The value defined by the instruction becomes live across the entire + // loop. This increases register pressure in the loop. + // + // - If the value is used by a PHI in the loop, a copy will be required for + // lowering the PHI after extending the live range. + // + // - When hoisting the last use of a value in the loop, that value no longer + // needs to be live in the loop. This lowers register pressure in the loop. + + bool CheapInstr = IsCheapInstruction(MI); + bool CreatesCopy = HasLoopPHIUse(&MI); + + // Don't hoist a cheap instruction if it would create a copy in the loop. + if (CheapInstr && CreatesCopy) { + DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); + return false; + } - unsigned RCId, RCCost; - getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); - if (MO.isDef()) { - if (HasHighOperandLatency(MI, i, Reg)) { - ++NumHighLatency; - return true; - } + // Rematerializable instructions should always be hoisted since the register + // allocator can just pull them down again when needed. + if (TII->isTriviallyReMaterializable(&MI, AA)) + return true; - DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second += RCCost; - else - Cost.insert(std::make_pair(RCId, RCCost)); - } else if (isOperandKill(MO, MRI)) { - // Is a virtual register use is a kill, hoisting it out of the loop - // may actually reduce register pressure or be register pressure - // neutral. - DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second -= RCCost; - else - Cost.insert(std::make_pair(RCId, -RCCost)); + // Estimate register pressure to determine whether to LICM the instruction. + // In low register pressure situation, we can be more aggressive about + // hoisting. Also, favors hoisting long latency instructions even in + // moderately high pressure situation. + // Cheap instructions will only be hoisted if they don't increase register + // pressure at all. + // FIXME: If there are long latency loop-invariant instructions inside the + // loop at this point, why didn't the optimizer's LICM hoist them? + DenseMap<unsigned, int> Cost; + for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + unsigned RCId, RCCost; + getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); + if (MO.isDef()) { + if (HasHighOperandLatency(MI, i, Reg)) { + DEBUG(dbgs() << "Hoist High Latency: " << MI); + ++NumHighLatency; + return true; } + Cost[RCId] += RCCost; + } else if (isOperandKill(MO, MRI)) { + // Is a virtual register use is a kill, hoisting it out of the loop + // may actually reduce register pressure or be register pressure + // neutral. + Cost[RCId] -= RCCost; } + } - // Visit BBs from header to current BB, if hoisting this doesn't cause - // high register pressure, then it's safe to proceed. - if (!CanCauseHighRegPressure(Cost)) { - ++NumLowRP; - return true; - } + // Visit BBs from header to current BB, if hoisting this doesn't cause + // high register pressure, then it's safe to proceed. + if (!CanCauseHighRegPressure(Cost, CheapInstr)) { + DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); + ++NumLowRP; + return true; + } - // Do not "speculate" in high register pressure situation. If an - // instruction is not guaranteed to be executed in the loop, it's best to be - // conservative. - if (AvoidSpeculation && - (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) - return false; + // Don't risk increasing register pressure if it would create copies. + if (CreatesCopy) { + DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); + return false; + } - // High register pressure situation, only hoist if the instruction is going - // to be remat'ed. - if (!TII->isTriviallyReMaterializable(&MI, AA) && - !MI.isInvariantLoad(AA)) - return false; + // Do not "speculate" in high register pressure situation. If an + // instruction is not guaranteed to be executed in the loop, it's best to be + // conservative. + if (AvoidSpeculation && + (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { + DEBUG(dbgs() << "Won't speculate: " << MI); + return false; } - // If result(s) of this instruction is used by PHIs outside of the loop, then - // don't hoist it if the instruction because it will introduce an extra copy. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - if (HasAnyPHIUse(MO.getReg())) - return false; + // High register pressure situation, only hoist if the instruction is going + // to be remat'ed. + if (!TII->isTriviallyReMaterializable(&MI, AA) && + !MI.isInvariantLoad(AA)) { + DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); + return false; } return true; diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp index 58e067b..cb204fd 100644 --- a/lib/CodeGen/MachinePassRegistry.cpp +++ b/lib/CodeGen/MachinePassRegistry.cpp @@ -18,6 +18,19 @@ using namespace llvm; void MachinePassRegistryListener::anchor() { } +/// setDefault - Set the default constructor by name. +void MachinePassRegistry::setDefault(StringRef Name) { + MachinePassCtor Ctor = 0; + for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) { + if (R->getName() == Name) { + Ctor = R->getCtor(); + break; + } + } + assert(Ctor && "Unregistered pass name"); + setDefault(Ctor); +} + /// Add - Adds a function pass to the registration list. /// void MachinePassRegistry::Add(MachinePassRegistryNode *Node) { diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index f140dec..7ea1517 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -18,7 +18,7 @@ using namespace llvm; MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) - : TRI(&TRI), IsSSA(true) { + : TRI(&TRI), IsSSA(true), TracksLiveness(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); UsedPhysRegs.resize(TRI.getNumRegs()); diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 364a244..1d3241b 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -227,6 +227,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { assert(RemainingCount == 0 && "Instruction count mismatch!"); Scheduler->finishBlock(); } + Scheduler->finalizeSchedule(); DEBUG(LIS->print(dbgs())); return true; } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 830a876..74ba94d 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -202,6 +202,7 @@ namespace { void report(const char *msg, const MachineInstr *MI); void report(const char *msg, const MachineOperand *MO, unsigned MONum); + void checkLiveness(const MachineOperand *MO, unsigned MONum); void markReachable(const MachineBasicBlock *MBB); void calcRegsPassed(); void checkPHIOps(const MachineBasicBlock *MBB); @@ -608,7 +609,9 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } // Ensure non-terminators don't follow terminators. - if (MI->isTerminator()) { + // Ignore predicated terminators formed by if conversion. + // FIXME: If conversion shouldn't need to violate this rule. + if (MI->isTerminator() && !TII->isPredicated(MI)) { if (!FirstTerminator) FirstTerminator = MI; } else if (FirstTerminator) { @@ -656,112 +659,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const unsigned Reg = MO->getReg(); if (!Reg) return; + if (MRI->tracksLiveness() && !MI->isDebugValue()) + checkLiveness(MO, MONum); - // Check Live Variables. - if (MI->isDebugValue()) { - // Liveness checks are not valid for debug values. - } else if (MO->isUse() && !MO->isUndef()) { - regsLiveInButUnused.erase(Reg); - - bool isKill = false; - unsigned defIdx; - if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { - // A two-addr use counts as a kill if use and def are the same. - unsigned DefReg = MI->getOperand(defIdx).getReg(); - if (Reg == DefReg) - isKill = true; - else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - report("Two-address instruction operands must be identical", - MO, MONum); - } - } else - isKill = MO->isKill(); - - if (isKill) - addRegWithSubRegs(regsKilled, Reg); - - // Check that LiveVars knows this kill. - if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && - MO->isKill()) { - LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); - if (std::find(VI.Kills.begin(), - VI.Kills.end(), MI) == VI.Kills.end()) - report("Kill missing from LiveVariables", MO, MONum); - } - - // Check LiveInts liveness and kill. - if (TargetRegisterInfo::isVirtualRegister(Reg) && - LiveInts && !LiveInts->isNotInMIMap(MI)) { - SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true); - if (LiveInts->hasInterval(Reg)) { - const LiveInterval &LI = LiveInts->getInterval(Reg); - if (!LI.liveAt(UseIdx)) { - report("No live range at use", MO, MONum); - *OS << UseIdx << " is not live in " << LI << '\n'; - } - // Check for extra kill flags. - // Note that we allow missing kill flags for now. - if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) { - report("Live range continues after kill flag", MO, MONum); - *OS << "Live range: " << LI << '\n'; - } - } else { - report("Virtual register has no Live interval", MO, MONum); - } - } - - // Use of a dead register. - if (!regsLive.count(Reg)) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - // Reserved registers may be used even when 'dead'. - if (!isReserved(Reg)) - report("Using an undefined physical register", MO, MONum); - } else { - BBInfo &MInfo = MBBInfoMap[MI->getParent()]; - // We don't know which virtual registers are live in, so only complain - // if vreg was killed in this MBB. Otherwise keep track of vregs that - // must be live in. PHI instructions are handled separately. - if (MInfo.regsKilled.count(Reg)) - report("Using a killed virtual register", MO, MONum); - else if (!MI->isPHI()) - MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); - } - } - } else if (MO->isDef()) { - // Register defined. - // TODO: verify that earlyclobber ops are not used. - if (MO->isDead()) - addRegWithSubRegs(regsDead, Reg); - else - addRegWithSubRegs(regsDefined, Reg); - - // Verify SSA form. - if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && - llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) - report("Multiple virtual register defs in SSA form", MO, MONum); - - // Check LiveInts for a live range, but only for virtual registers. - if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && - !LiveInts->isNotInMIMap(MI)) { - SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot(); - if (LiveInts->hasInterval(Reg)) { - const LiveInterval &LI = LiveInts->getInterval(Reg); - if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { - assert(VNI && "NULL valno is not allowed"); - if (VNI->def != DefIdx && !MO->isEarlyClobber()) { - report("Inconsistent valno->def", MO, MONum); - *OS << "Valno " << VNI->id << " is not defined at " - << DefIdx << " in " << LI << '\n'; - } - } else { - report("No live range at def", MO, MONum); - *OS << DefIdx << " is not live in " << LI << '\n'; - } - } else { - report("Virtual register has no Live interval", MO, MONum); - } - } - } // Check register classes. if (MONum < MCID.getNumOperands() && !MO->isImplicit()) { @@ -853,6 +753,115 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } } +void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { + const MachineInstr *MI = MO->getParent(); + const unsigned Reg = MO->getReg(); + + // Both use and def operands can read a register. + if (MO->readsReg()) { + regsLiveInButUnused.erase(Reg); + + bool isKill = false; + unsigned defIdx; + if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { + // A two-addr use counts as a kill if use and def are the same. + unsigned DefReg = MI->getOperand(defIdx).getReg(); + if (Reg == DefReg) + isKill = true; + else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + report("Two-address instruction operands must be identical", MO, MONum); + } + } else + isKill = MO->isKill(); + + if (isKill) + addRegWithSubRegs(regsKilled, Reg); + + // Check that LiveVars knows this kill. + if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && + MO->isKill()) { + LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); + if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end()) + report("Kill missing from LiveVariables", MO, MONum); + } + + // Check LiveInts liveness and kill. + if (TargetRegisterInfo::isVirtualRegister(Reg) && + LiveInts && !LiveInts->isNotInMIMap(MI)) { + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (!LI.liveAt(UseIdx)) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << LI << '\n'; + } + // Check for extra kill flags. + // Note that we allow missing kill flags for now. + if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) { + report("Live range continues after kill flag", MO, MONum); + *OS << "Live range: " << LI << '\n'; + } + } else { + report("Virtual register has no Live interval", MO, MONum); + } + } + + // Use of a dead register. + if (!regsLive.count(Reg)) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Reserved registers may be used even when 'dead'. + if (!isReserved(Reg)) + report("Using an undefined physical register", MO, MONum); + } else { + BBInfo &MInfo = MBBInfoMap[MI->getParent()]; + // We don't know which virtual registers are live in, so only complain + // if vreg was killed in this MBB. Otherwise keep track of vregs that + // must be live in. PHI instructions are handled separately. + if (MInfo.regsKilled.count(Reg)) + report("Using a killed virtual register", MO, MONum); + else if (!MI->isPHI()) + MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); + } + } + } + + if (MO->isDef()) { + // Register defined. + // TODO: verify that earlyclobber ops are not used. + if (MO->isDead()) + addRegWithSubRegs(regsDead, Reg); + else + addRegWithSubRegs(regsDefined, Reg); + + // Verify SSA form. + if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && + llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) + report("Multiple virtual register defs in SSA form", MO, MONum); + + // Check LiveInts for a live range, but only for virtual registers. + if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && + !LiveInts->isNotInMIMap(MI)) { + SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot(); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { + assert(VNI && "NULL valno is not allowed"); + if (VNI->def != DefIdx && !MO->isEarlyClobber()) { + report("Inconsistent valno->def", MO, MONum); + *OS << "Valno " << VNI->id << " is not defined at " + << DefIdx << " in " << LI << '\n'; + } + } else { + report("No live range at def", MO, MONum); + *OS << DefIdx << " is not live in " << LI << '\n'; + } + } else { + report("Virtual register has no Live interval", MO, MONum); + } + } + } +} + void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { BBInfo &MInfo = MBBInfoMap[MI->getParent()]; set_union(MInfo.regsKilled, regsKilled); diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 6246c21..13d1bbc 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -37,8 +37,9 @@ static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, cl::desc("Disable tail duplication")); static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden, cl::desc("Disable pre-register allocation tail duplication")); -static cl::opt<bool> EnableBlockPlacement("enable-block-placement", - cl::Hidden, cl::desc("Enable probability-driven block placement")); +static cl::opt<bool> DisableBlockPlacement("disable-block-placement", + cl::Hidden, cl::desc("Disable the probability-driven block placement, and " + "re-enable the old code placement pass")); static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats", cl::Hidden, cl::desc("Collect probability-driven block placement stats")); static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden, @@ -272,11 +273,6 @@ AnalysisID TargetPassConfig::addPass(char &ID) { return FinalID; } -void TargetPassConfig::printNoVerify(const char *Banner) const { - if (TM->shouldPrintMachineCode()) - PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); -} - void TargetPassConfig::printAndVerify(const char *Banner) const { if (TM->shouldPrintMachineCode()) PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); @@ -394,16 +390,16 @@ void TargetPassConfig::addMachinePasses() { // Expand pseudo instructions before second scheduling pass. addPass(ExpandPostRAPseudosID); - printNoVerify("After ExpandPostRAPseudos"); + printAndVerify("After ExpandPostRAPseudos"); // Run pre-sched2 passes. if (addPreSched2()) - printNoVerify("After PreSched2 passes"); + printAndVerify("After PreSched2 passes"); // Second pass scheduler. if (getOptLevel() != CodeGenOpt::None) { addPass(PostRASchedulerID); - printNoVerify("After PostRAScheduler"); + printAndVerify("After PostRAScheduler"); } // GC @@ -416,7 +412,7 @@ void TargetPassConfig::addMachinePasses() { addBlockPlacement(); if (addPreEmitPass()) - printNoVerify("After PreEmit passes"); + printAndVerify("After PreEmit passes"); } /// Add passes that optimize machine instructions in SSA form. @@ -601,24 +597,24 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { void TargetPassConfig::addMachineLateOptimization() { // Branch folding must be run after regalloc and prolog/epilog insertion. if (addPass(BranchFolderPassID) != &NoPassID) - printNoVerify("After BranchFolding"); + printAndVerify("After BranchFolding"); // Tail duplication. if (addPass(TailDuplicateID) != &NoPassID) - printNoVerify("After TailDuplicate"); + printAndVerify("After TailDuplicate"); // Copy propagation. if (addPass(MachineCopyPropagationID) != &NoPassID) - printNoVerify("After copy propagation pass"); + printAndVerify("After copy propagation pass"); } /// Add standard basic block placement passes. void TargetPassConfig::addBlockPlacement() { AnalysisID ID = &NoPassID; - if (EnableBlockPlacement) { - // MachineBlockPlacement is an experimental pass which is disabled by - // default currently. Eventually it should subsume CodePlacementOpt, so - // when enabled, the other is disabled. + if (!DisableBlockPlacement) { + // MachineBlockPlacement is a new pass which subsumes the functionality of + // CodPlacementOpt. The old code placement pass can be restored by + // disabling block placement, but eventually it will be removed. ID = addPass(MachineBlockPlacementID); } else { ID = addPass(CodePlacementOptID); @@ -628,6 +624,6 @@ void TargetPassConfig::addBlockPlacement() { if (EnableBlockPlacementStats) addPass(MachineBlockPlacementStatsID); - printNoVerify("After machine block placement."); + printAndVerify("After machine block placement."); } } diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index 85119c9..b00eceb 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -14,11 +14,11 @@ #define DEBUG_TYPE "regalloc" #include "RegAllocBase.h" -#include "LiveRangeEdit.h" #include "Spiller.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index f39a21c..77ee314 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -15,7 +15,6 @@ #define DEBUG_TYPE "regalloc" #include "RegAllocBase.h" #include "LiveDebugVariables.h" -#include "LiveRangeEdit.h" #include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" @@ -24,6 +23,7 @@ #include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -187,7 +187,7 @@ void RABasic::spillReg(LiveInterval& VirtReg, unsigned PhysReg, unassign(SpilledVReg, PhysReg); // Spill the extracted interval. - LiveRangeEdit LRE(SpilledVReg, SplitVRegs); + LiveRangeEdit LRE(SpilledVReg, SplitVRegs, *MF, *LIS, VRM); spiller().spill(LRE); } // After extracting segments, the query's results are invalid. But keep the @@ -287,7 +287,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); if (!VirtReg.isSpillable()) return ~0u; - LiveRangeEdit LRE(VirtReg, SplitVRegs); + LiveRangeEdit LRE(VirtReg, SplitVRegs, *MF, *LIS, VRM); spiller().spill(LRE); // The live virtual register requesting allocation was spilled, so tell diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index feec3d4..3f2a617 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -16,7 +16,6 @@ #include "AllocationOrder.h" #include "InterferenceCache.h" #include "LiveDebugVariables.h" -#include "LiveRangeEdit.h" #include "RegAllocBase.h" #include "Spiller.h" #include "SpillPlacement.h" @@ -29,6 +28,7 @@ #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -428,13 +428,13 @@ void RAGreedy::enqueue(LiveInterval *LI) { Prio |= (1u << 30); } - Queue.push(std::make_pair(Prio, Reg)); + Queue.push(std::make_pair(Prio, ~Reg)); } LiveInterval *RAGreedy::dequeue() { if (Queue.empty()) return 0; - LiveInterval *LI = &LIS->getInterval(Queue.top().second); + LiveInterval *LI = &LIS->getInterval(~Queue.top().second); Queue.pop(); return LI; } @@ -1183,7 +1183,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; // Prepare split editor. - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); // Assign all edge bundles to the preferred candidate, or NoCand. @@ -1231,7 +1231,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); unsigned Reg = VirtReg.reg; bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { @@ -1512,7 +1512,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, << '-' << Uses[BestAfter] << ", " << BestDiff << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit); SE->openIntv(); @@ -1644,7 +1644,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // Finally spill VirtReg itself. NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - LiveRangeEdit LRE(VirtReg, NewVRegs, this); + LiveRangeEdit LRE(VirtReg, NewVRegs, *MF, *LIS, VRM, this); spiller().spill(LRE); setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 9fcf886..a284614 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -31,14 +31,15 @@ #define DEBUG_TYPE "regalloc" -#include "LiveRangeEdit.h" #include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" #include "RegisterCoalescer.h" +#include "llvm/Module.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/RegAllocPBQP.h" #include "llvm/CodeGen/MachineDominators.h" @@ -56,6 +57,7 @@ #include <limits> #include <memory> #include <set> +#include <sstream> #include <vector> using namespace llvm; @@ -69,6 +71,13 @@ pbqpCoalescing("pbqp-coalescing", cl::desc("Attempt coalescing during PBQP register allocation."), cl::init(false), cl::Hidden); +#ifndef NDEBUG +static cl::opt<bool> +pbqpDumpGraphs("pbqp-dump-graphs", + cl::desc("Dump graphs for each function/round in the compilation unit."), + cl::init(false), cl::Hidden); +#endif + namespace { /// @@ -187,7 +196,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, const RegSet &vregs) { typedef std::vector<const LiveInterval*> LIVector; - + ArrayRef<SlotIndex> regMaskSlots = lis->getRegMaskSlots(); MachineRegisterInfo *mri = &mf->getRegInfo(); const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); @@ -224,7 +233,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, } } - // Remove any physical registers which overlap. + RegSet overlappingPRegs; + + // Record physical registers whose ranges overlap. for (RegSet::const_iterator pregItr = pregs.begin(), pregEnd = pregs.end(); pregItr != pregEnd; ++pregItr) { @@ -235,9 +246,41 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, continue; } - if (!vregLI->overlaps(*pregLI)) { - continue; + if (vregLI->overlaps(*pregLI)) + overlappingPRegs.insert(preg); + } + + // Record any overlaps with regmask operands. + BitVector regMaskOverlaps(tri->getNumRegs()); + for (ArrayRef<SlotIndex>::iterator rmItr = regMaskSlots.begin(), + rmEnd = regMaskSlots.end(); + rmItr != rmEnd; ++rmItr) { + SlotIndex rmIdx = *rmItr; + if (vregLI->liveAt(rmIdx)) { + MachineInstr *rmMI = lis->getInstructionFromIndex(rmIdx); + const uint32_t* regMask = 0; + for (MachineInstr::mop_iterator mopItr = rmMI->operands_begin(), + mopEnd = rmMI->operands_end(); + mopItr != mopEnd; ++mopItr) { + if (mopItr->isRegMask()) { + regMask = mopItr->getRegMask(); + break; + } + } + assert(regMask != 0 && "Couldn't find register mask."); + regMaskOverlaps.setBitsNotInMask(regMask); } + } + + for (unsigned preg = 0; preg < tri->getNumRegs(); ++preg) { + if (regMaskOverlaps.test(preg)) + overlappingPRegs.insert(preg); + } + + for (RegSet::const_iterator pregItr = overlappingPRegs.begin(), + pregEnd = overlappingPRegs.end(); + pregItr != pregEnd; ++pregItr) { + unsigned preg = *pregItr; // Remove the register from the allowed set. VRAllowed::iterator eraseItr = @@ -507,7 +550,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, } else if (problem.isSpillOption(vreg, alloc)) { vregsToAlloc.erase(vreg); SmallVector<LiveInterval*, 8> newSpills; - LiveRangeEdit LRE(lis->getInterval(vreg), newSpills); + LiveRangeEdit LRE(lis->getInterval(vreg), newSpills, *mf, *lis, vrm); spiller->spill(LRE); DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: " @@ -633,6 +676,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // Find the vreg intervals in need of allocation. findVRegIntervalsToAlloc(); + const Function* func = mf->getFunction(); + std::string fqn = + func->getParent()->getModuleIdentifier() + "." + + func->getName().str(); + (void)fqn; + // If there are non-empty intervals allocate them using pbqp. if (!vregsToAlloc.empty()) { @@ -644,6 +693,20 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { std::auto_ptr<PBQPRAProblem> problem = builder->build(mf, lis, loopInfo, vregsToAlloc); + +#ifndef NDEBUG + if (pbqpDumpGraphs) { + std::ostringstream rs; + rs << round; + std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph"); + std::string tmp; + raw_fd_ostream os(graphFileName.c_str(), tmp); + DEBUG(dbgs() << "Dumping graph for round " << round << " to \"" + << graphFileName << "\"\n"); + problem->getGraph().dump(os); + } +#endif + PBQP::Solution solution = PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve( problem->getGraph()); diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 2818f49..03bd82e 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -83,6 +83,11 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) && "Target changed?"); + // It is not possible to use the register scavenger after late optimization + // passes that don't preserve accurate liveness information. + assert(MRI->tracksLiveness() && + "Cannot use register scavenger with inaccurate liveness"); + // Self-initialize. if (!MBB) { NumPhysRegs = TRI->getNumRegs(); diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 6be1ab7..d46eb89 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -39,8 +39,8 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, LiveIntervals *lis) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), - IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT), - FirstDbgValue(0) { + IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false), + LoopRegs(MLI, MDT), FirstDbgValue(0) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && @@ -554,7 +554,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { continue; } - assert(!MI->isTerminator() && !MI->isLabel() && + assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() && "Cannot schedule terminators or labels!"); SUnit *SU = MISUnitMap[MI]; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7c4db97..0914c66 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1080,6 +1080,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // If the root changed (e.g. it was a dead load, update the root). DAG.setRoot(Dummy.getValue()); + DAG.RemoveDeadNodes(); } SDValue DAGCombiner::visit(SDNode *N) { @@ -1452,16 +1453,14 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (VT.isInteger() && !VT.isVector()) { APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || - (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); } } @@ -1547,16 +1546,14 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || - (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), MVT::Glue)); @@ -2336,6 +2333,67 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { ORNode, N0.getOperand(1)); } + // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) + // Only perform this optimization after type legalization and before + // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by + // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and + // we don't want to undo this promotion. + // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper + // on scalars. + if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) + && Level == AfterLegalizeVectorOps) { + SDValue In0 = N0.getOperand(0); + SDValue In1 = N1.getOperand(0); + EVT In0Ty = In0.getValueType(); + EVT In1Ty = In1.getValueType(); + // If both incoming values are integers, and the original types are the same. + if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { + SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1); + SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op); + AddToWorkList(Op.getNode()); + return BC; + } + } + + // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). + // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) + // If both shuffles use the same mask, and both shuffle within a single + // vector, then it is worthwhile to move the swizzle after the operation. + // The type-legalizer generates this pattern when loading illegal + // vector types from memory. In many cases this allows additional shuffle + // optimizations. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N0.getOperand(1).getOpcode() == ISD::UNDEF && + N1.getOperand(1).getOpcode() == ISD::UNDEF) { + ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); + ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); + + assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() && + "Inputs to shuffles are not the same type"); + + unsigned NumElts = VT.getVectorNumElements(); + + // Check that both shuffles use the same mask. The masks are known to be of + // the same length because the result vector type is the same. + bool SameMask = true; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx0 = SVN0->getMaskElt(i); + int Idx1 = SVN1->getMaskElt(i); + if (Idx0 != Idx1) { + SameMask = false; + break; + } + } + + if (SameMask) { + SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(Op.getNode()); + return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, + DAG.getUNDEF(VT), &SVN0->getMask()[0]); + } + } + return SDValue(); } @@ -3773,8 +3831,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::CTLZ && N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne); // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. @@ -3782,7 +3839,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // If all of the bits input the to ctlz node are known to be zero, then // the result of the ctlz is "32" and the result of the shift is one. - APInt UnknownBits = ~KnownZero & Mask; + APInt UnknownBits = ~KnownZero; if (UnknownBits == 0) return DAG.getConstant(1, VT); // Otherwise, check to see if there is exactly one bit input to the ctlz. @@ -4298,12 +4355,17 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { EVT N0VT = N0.getOperand(0).getValueType(); - // We know that the # elements of the results is the same as the - // # elements of the compare (and the # elements of the compare result - // for that matter). Check to see that they are the same size. If so, - // we know that the element size of the sext'd result matches the - // element size of the compare operands. - if (VT.getSizeInBits() == N0VT.getSizeInBits()) + // On some architectures (such as SSE/NEON/etc) the SETCC result type is + // of the same size as the compared operands. Only optimize sext(setcc()) + // if this is the case. + EVT SVT = TLI.getSetCCResultType(N0VT); + + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == SVT.getSizeInBits()) return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); @@ -4317,11 +4379,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MatchingVectorType = EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + + if (SVT == MatchingVectorType) { + SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } } } @@ -4352,6 +4416,44 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { return SDValue(); } +// isTruncateOf - If N is a truncate of some other value, return true, record +// the value being truncated in Op and which of Op's bits are zero in KnownZero. +// This function computes KnownZero to avoid a duplicated call to +// ComputeMaskedBits in the caller. +static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, + APInt &KnownZero) { + APInt KnownOne; + if (N->getOpcode() == ISD::TRUNCATE) { + Op = N->getOperand(0); + DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + return true; + } + + if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || + cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) + return false; + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + assert(Op0.getValueType() == Op1.getValueType()); + + ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); + ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); + if (COp0 && COp0->isNullValue()) + Op = Op1; + else if (COp1 && COp1->isNullValue()) + Op = Op0; + else + return false; + + DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + + if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) + return false; + + return true; +} + SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4369,16 +4471,17 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // (zext (truncate x)) -> (truncate x) // This is valid when the truncated bits of x are already zero. // FIXME: We should extend this to work for vectors too. - if (N0.getOpcode() == ISD::TRUNCATE && !VT.isVector()) { - SDValue Op = N0.getOperand(0); - APInt TruncatedBits - = APInt::getBitsSet(Op.getValueSizeInBits(), - N0.getValueSizeInBits(), - std::min(Op.getValueSizeInBits(), - VT.getSizeInBits())); - APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(Op, TruncatedBits, KnownZero, KnownOne); - if (TruncatedBits == KnownZero) { + SDValue Op; + APInt KnownZero; + if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { + APInt TruncatedBits = + (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? + APInt(Op.getValueSizeInBits(), 0) : + APInt::getBitsSet(Op.getValueSizeInBits(), + N0.getValueSizeInBits(), + std::min(Op.getValueSizeInBits(), + VT.getSizeInBits())); + if (TruncatedBits == (KnownZero & TruncatedBits)) { if (VT.bitsGT(Op.getValueType())) return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op); if (VT.bitsLT(Op.getValueType())) @@ -5280,7 +5383,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // This often reduces constant pool loads. - if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) && + if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || + (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, N0.getOperand(0)); @@ -5667,6 +5771,24 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); + // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. + if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) { + // Compute the reciprocal 1.0 / c2. + APFloat N1APF = N1CFP->getValueAPF(); + APFloat Recip(N1APF.getSemantics(), 1); // 1.0 + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || + TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT))) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, + DAG.getConstantFP(Recip, VT)); + } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, @@ -5931,7 +6053,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BITCAST && + if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && !VT.isVector() && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger()) { @@ -5967,7 +6089,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && + if (!TLI.isFAbsFree(VT) && + N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); @@ -7628,8 +7751,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - assert(N0.getValueType().getVectorNumElements() == NumElts && - "Vector shuffle must be normalized in DAG"); + assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); // Canonicalize shuffle undef, undef -> undef if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) @@ -7654,12 +7776,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SmallVector<int, 8> NewMask; for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); - if (Idx < 0) - NewMask.push_back(Idx); - else if (Idx < (int)NumElts) - NewMask.push_back(Idx + NumElts); - else - NewMask.push_back(Idx - NumElts); + if (Idx >= 0) { + if (Idx < (int)NumElts) + Idx += NumElts; + else + Idx -= NumElts; + } + NewMask.push_back(Idx); } return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT), &NewMask[0]); @@ -7721,6 +7844,40 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return N0; } } + + // If this shuffle node is simply a swizzle of another shuffle node, + // and it reverses the swizzle of the previous shuffle then we can + // optimize shuffle(shuffle(x, undef), undef) -> x. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N1.getOpcode() == ISD::UNDEF) { + + ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); + + // Shuffle nodes can only reverse shuffles with a single non-undef value. + if (N0.getOperand(1).getOpcode() != ISD::UNDEF) + return SDValue(); + + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(OtherSV->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + assert(Idx < (int)NumElts && "Index references undef operand"); + // Next, this index comes from the first value, which is the incoming + // shuffle. Adopt the incoming index. + if (Idx >= 0) + Idx = OtherSV->getMaskElt(Idx); + + // The combined shuffle must map each index to itself. + if (Idx >= 0 && (unsigned)Idx != i) + return SDValue(); + } + + return OtherSV->getOperand(0); + } + return SDValue(); } @@ -7796,7 +7953,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { SDValue Elt = RHS.getOperand(i); if (!isa<ConstantSDNode>(Elt)) return SDValue(); - else if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) + + if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) Indices.push_back(i); else if (cast<ConstantSDNode>(Elt)->isNullValue()) Indices.push_back(NumElts); @@ -7991,8 +8149,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if ((LLD->hasAnyUseOfValue(1) && (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || - (LLD->hasAnyUseOfValue(1) && - (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS)))) + (RLD->hasAnyUseOfValue(1) && + (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) return false; Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 9f4a44a..0c1ac69 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -395,6 +395,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { ISDOpcode = ISD::SRA; } + // Transform "urem x, pow2" -> "and x, pow2-1". + if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) && + isPowerOf2_64(Imm)) { + --Imm; + ISDOpcode = ISD::AND; + } + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, Imm, VT.getSimpleVT()); if (ResultReg == 0) return false; @@ -592,7 +599,18 @@ bool FastISel::SelectCall(const User *I) { if (!Reg) Reg = lookUpRegForValue(Address); - if (!Reg && isa<Instruction>(Address) && + // If we have a VLA that has a "use" in a metadata node that's then used + // here but it has no other uses, then we have a problem. E.g., + // + // int foo (const int *x) { + // char a[*x]; + // return 0; + // } + // + // If we assign 'a' a vreg and fast isel later on has to use the selection + // DAG isel, it will want to copy the value to the vreg. However, there are + // no uses, which goes counter to what selection DAG isel expects. + if (!Reg && !Address->use_empty() && isa<Instruction>(Address) && (!isa<AllocaInst>(Address) || !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) Reg = FuncInfo.InitializeRegForValue(Address); @@ -803,8 +821,11 @@ FastISel::SelectInstruction(const Instruction *I) { /// the CFG. void FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { - if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) { - // The unconditional fall-through case, which needs no instructions. + + if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { + // For more accurate line information if this is the only instruction + // in the block then emit it, otherwise we have the unconditional + // fall-through case, which needs no instructions. } else { // The unconditional branch case. TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 1b84b13..a96a997 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -718,10 +718,15 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: - case ISD::VAARG: case ISD::STACKSAVE: Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; + case ISD::VAARG: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + if (Action != TargetLowering::Promote) + Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); + break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::EXTRACT_VECTOR_ELT: @@ -1762,11 +1767,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - // The input chain to this libcall is the entry node of the function. - // Legalizing the call will automatically add the previous call to the - // dependence. - SDValue InChain = DAG.getEntryNode(); - TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { @@ -1782,9 +1782,19 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + // By default, the input chain to this libcall is the entry node of the + // function. If the libcall is going to be emitted as a tail call then + // TLI.isUsedByReturnOnly will change it to the right chain if the return + // node which is being folded has a non-entry input chain. + SDValue InChain = DAG.getEntryNode(); + // isTailCall may be true since the callee does not reference caller stack // frame. Check if it's in the right position. - bool isTailCall = isInTailCallPosition(DAG, Node, TLI); + SDValue TCChain = InChain; + bool isTailCall = isInTailCallPosition(DAG, Node, TCChain, TLI); + if (isTailCall) + InChain = TCChain; + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), isTailCall, @@ -1820,7 +1830,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), false, + false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); @@ -3528,6 +3538,33 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::SINT_TO_FP, dl); Results.push_back(Tmp1); break; + case ISD::VAARG: { + SDValue Chain = Node->getOperand(0); // Get the chain. + SDValue Ptr = Node->getOperand(1); // Get the pointer. + + unsigned TruncOp; + if (OVT.isVector()) { + TruncOp = ISD::BITCAST; + } else { + assert(OVT.isInteger() + && "VAARG promotion is supported only for vectors or integer types"); + TruncOp = ISD::TRUNCATE; + } + + // Perform the larger operation, then convert back + Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2), + Node->getConstantOperandVal(3)); + Chain = Tmp1.getValue(1); + + Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1); + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + ReplacedNode(Node); + break; + } case ISD::AND: case ISD::OR: case ISD::XOR: { @@ -3601,6 +3638,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { break; } case ISD::FDIV: + case ISD::FREM: case ISD::FPOW: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 41506d1..95ddb1e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1362,7 +1362,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne); // If we don't know anything about the high bits, exit. if (((KnownZero|KnownOne) & HighBitMask) == 0) diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 69c2100..e866445 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -521,6 +521,7 @@ private: SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N); + SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); SDValue ScalarizeVecRes_SETCC(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 3ae8345..9fe4480 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -417,7 +417,8 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); - return DAG.getNode(ISD::OR, DL, VT, Op1, Op2); + SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); } SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index a8aee12..5f23f01 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -58,6 +58,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; + case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break; case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; @@ -226,6 +227,37 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { return InOp; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { + SDValue Cond = GetScalarizedVector(N->getOperand(0)); + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false); + TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true); + if (ScalarBool != VecBool) { + EVT CondVT = Cond.getValueType(); + switch (ScalarBool) { + case TargetLowering::UndefinedBooleanContent: + break; + case TargetLowering::ZeroOrOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); + // Vector read from all ones, scalar expects a single 1 so mask. + Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT, + Cond, DAG.getConstant(1, CondVT)); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrOneBooleanContent); + // Vector reads from a one, scalar from all ones so sign extend. + Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT, + Cond, DAG.getValueType(MVT::i1)); + break; + } + } + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), Cond, LHS, + GetScalarizedVector(N->getOperand(2))); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(1)); return DAG.getNode(ISD::SELECT, N->getDebugLoc(), diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index f44adfc..2cb5d37 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1587,6 +1587,7 @@ protected: std::vector<SUnit*> Queue; unsigned CurQueueId; bool TracksRegPressure; + bool SrcOrder; // SUnits - The SUnits for the current graph. std::vector<SUnit> *SUnits; @@ -1612,11 +1613,12 @@ public: RegReductionPQBase(MachineFunction &mf, bool hasReadyFilter, bool tracksrp, + bool srcorder, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, const TargetLowering *tli) : SchedulingPriorityQueue(hasReadyFilter), - CurQueueId(0), TracksRegPressure(tracksrp), + CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { if (TracksRegPressure) { unsigned NumRC = TRI->getNumRegClasses(); @@ -1731,10 +1733,12 @@ class RegReductionPriorityQueue : public RegReductionPQBase { public: RegReductionPriorityQueue(MachineFunction &mf, bool tracksrp, + bool srcorder, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, const TargetLowering *tli) - : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli), + : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, + tii, tri, tli), Picker(this) {} bool isBottomUp() const { return SF::IsBottomUp; } @@ -2625,7 +2629,7 @@ void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { if (!Disable2AddrHack) AddPseudoTwoAddrDeps(); // Reroute edges to nodes with multiple uses. - if (!TracksRegPressure) + if (!TracksRegPressure && !SrcOrder) PrescheduleNodesWithMultipleUses(); // Calculate node priorities. CalculateSethiUllmanNumbers(); @@ -2948,7 +2952,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); BURegReductionPriorityQueue *PQ = - new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); + new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; @@ -2962,7 +2966,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); SrcRegReductionPriorityQueue *PQ = - new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); + new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; @@ -2977,7 +2981,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, const TargetLowering *TLI = &IS->getTargetLowering(); HybridBURRPriorityQueue *PQ = - new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); + new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); @@ -2993,7 +2997,7 @@ llvm::createILPListDAGScheduler(SelectionDAGISel *IS, const TargetLowering *TLI = &IS->getTargetLowering(); ILPBURRPriorityQueue *PQ = - new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); + new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e3a7305..92671d1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -62,6 +62,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unknown FP format"); + case MVT::f16: return &APFloat::IEEEhalf; case MVT::f32: return &APFloat::IEEEsingle; case MVT::f64: return &APFloat::IEEEdouble; case MVT::f80: return &APFloat::x87DoubleExtended; @@ -1042,7 +1043,7 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { return getConstantFP(APFloat((float)Val), VT, isTarget); else if (EltVT==MVT::f64) return getConstantFP(APFloat(Val), VT, isTarget); - else if (EltVT==MVT::f80 || EltVT==MVT::f128) { + else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) { bool ignored; APFloat apf = APFloat(Val); apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, @@ -1627,7 +1628,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth) const { APInt KnownZero, KnownOne; - ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); return (KnownZero & Mask) == Mask; } @@ -1636,15 +1637,12 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, /// known to be either zero or one and return them in the KnownZero/KnownOne /// bitsets. This code only analyzes bits in Mask, in order to short-circuit /// processing. -void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, - APInt &KnownZero, APInt &KnownOne, - unsigned Depth) const { - unsigned BitWidth = Mask.getBitWidth(); - assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() && - "Mask size mismatches value type size!"); +void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, + APInt &KnownOne, unsigned Depth) const { + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. - if (Depth == 6 || Mask == 0) + if (Depth == 6) return; // Limit search depth. APInt KnownZero2, KnownOne2; @@ -1652,14 +1650,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, switch (Op.getOpcode()) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask; - KnownZero = ~KnownOne & Mask; + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); + KnownZero = ~KnownOne; return; case ISD::AND: // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero, - KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1669,9 +1666,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownZero |= KnownZero2; return; case ISD::OR: - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne, - KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1681,8 +1677,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownOne |= KnownOne2; return; case ISD::XOR: { - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1694,9 +1690,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::MUL: { - APInt Mask2 = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1715,33 +1710,29 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, LeadZ = std::min(LeadZ, BitWidth); KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | APInt::getHighBitsSet(BitWidth, LeadZ); - KnownZero &= Mask; return; } case ISD::UDIV: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op.getOperand(0), - AllOnes, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - ComputeMaskedBits(Op.getOperand(1), - AllOnes, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); - KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); return; } case ISD::SELECT: - ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1750,8 +1741,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownZero &= KnownZero2; return; case ISD::SELECT_CC: - ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1783,8 +1774,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ShAmt >= BitWidth) return; - ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt), - KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero <<= ShAmt; KnownOne <<= ShAmt; @@ -1801,13 +1791,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ShAmt >= BitWidth) return; - ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt), - KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); KnownZero |= HighBits; // High bits known zero. } return; @@ -1819,15 +1808,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ShAmt >= BitWidth) return; - APInt InDemandedMask = (Mask << ShAmt); // If any of the demanded bits are produced by the sign extension, we also // demand the input sign bit. - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; - if (HighBits.getBoolValue()) - InDemandedMask |= APInt::getSignBit(BitWidth); + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); - ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, - Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); @@ -1849,10 +1834,10 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Sign extension. Compute the demanded bits in the result that are not // present in the input. - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask; + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits); APInt InSignBit = APInt::getSignBit(EBits); - APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits); + APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits); // If the sign extended bits are demanded, we know that the sign // bit is demanded. @@ -1860,8 +1845,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (NewBits.getBoolValue()) InputDemandedBits |= InSignBit; - ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, - KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownOne &= InputDemandedBits; + KnownZero &= InputDemandedBits; assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the @@ -1889,22 +1875,23 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op); if (ISD::isZEXTLoad(Op.getNode())) { - LoadSDNode *LD = cast<LoadSDNode>(Op); EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarType().getSizeInBits(); - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask; + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + } else if (const MDNode *Ranges = LD->getRanges()) { + computeMaskedBitsLoad(*Ranges, KnownZero); } return; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; - APInt InMask = Mask.trunc(InBits); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); KnownZero |= NewBits; @@ -1914,17 +1901,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt InSignBit = APInt::getSignBit(InBits); - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; - APInt InMask = Mask.trunc(InBits); - - // If any of the sign extended bits are demanded, we know that the sign - // bit is demanded. Temporarily set this bit in the mask for our callee. - if (NewBits.getBoolValue()) - InMask |= InSignBit; + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // Note if the sign bit is known to be zero or one. bool SignBitKnownZero = KnownZero.isNegative(); @@ -1932,13 +1913,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, assert(!(SignBitKnownZero && SignBitKnownOne) && "Sign bit can't be known to be both zero and one!"); - // If the sign bit wasn't actually demanded by our caller, we don't - // want it set in the KnownZero and KnownOne result values. Reset the - // mask and reapply it to the result values. - InMask = Mask.trunc(InBits); - KnownZero &= InMask; - KnownOne &= InMask; - KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -1952,10 +1926,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InMask = Mask.trunc(InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); return; @@ -1963,10 +1936,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InMask = Mask.zext(InBits); KnownZero = KnownZero.zext(InBits); KnownOne = KnownOne.zext(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.trunc(BitWidth); KnownOne = KnownOne.trunc(BitWidth); @@ -1975,9 +1947,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::AssertZext: { EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); - ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, - KnownOne, Depth+1); - KnownZero |= (~InMask) & Mask; + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownZero |= (~InMask); return; } case ISD::FGETSIGN: @@ -1994,8 +1965,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2, - Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -2003,7 +1973,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if ((KnownZero2 & MaskV) == MaskV) { unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); // Top bits known zero. - KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2); } } } @@ -2014,13 +1984,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. - APInt Mask2 = APInt::getLowBitsSet(BitWidth, - BitWidth - Mask.countLeadingZeros()); - ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); - ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); KnownZeroOut = std::min(KnownZeroOut, KnownZero2.countTrailingOnes()); @@ -2044,7 +2012,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (RA.isPowerOf2()) { APInt LowBits = RA - 1; APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -2059,10 +2027,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // the upper bits are all one. if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) KnownOne |= ~LowBits; - - KnownZero &= Mask; - KnownOne &= Mask; - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); } } @@ -2072,9 +2036,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - APInt Mask2 = LowBits & Mask; - KnownZero |= ~LowBits & Mask; - ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1); + KnownZero |= ~LowBits; + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1); assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); break; } @@ -2082,16 +2045,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne, - Depth+1); - ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2, - Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); KnownOne.clearAllBits(); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); return; } case ISD::FrameIndex: @@ -2111,8 +2071,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this, - Depth); + TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); return; } } @@ -2236,12 +2195,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VTBits); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)) == Mask) + if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) return VTBits; // If we are subtracting one from a positive number, there is no carry @@ -2262,11 +2220,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VTBits); - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)) == Mask) + if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) return VTBits; // If the input is known to be positive (the sign bit is known clear), @@ -2315,9 +2272,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VTBits); - ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + APInt Mask; if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; } else if (KnownOne.isNegative()) { // sign bit is 1; @@ -2471,7 +2428,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::FABS: V.clearSign(); return getConstantFP(V, VT); - case ISD::FP_ROUND: case ISD::FP_EXTEND: { bool ignored; // This can return overflow, underflow, or inexact; we don't care. @@ -3037,6 +2993,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, default: break; } } + + if (Opcode == ISD::FP_ROUND) { + APFloat V = N1CFP->getValueAPF(); // make copy + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(*EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } } // Canonicalize an UNDEF to the RHS, even over a constant. @@ -4170,7 +4136,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, - unsigned Alignment, const MDNode *TBAAInfo) { + unsigned Alignment, const MDNode *TBAAInfo, + const MDNode *Ranges) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 @@ -4192,7 +4159,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - TBAAInfo); + TBAAInfo, Ranges); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } @@ -4248,11 +4215,12 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, - const MDNode *TBAAInfo) { + const MDNode *TBAAInfo, + const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, - PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment, - TBAAInfo); + PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo, Ranges); } SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, @@ -6036,10 +6004,9 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { int64_t GVOffset = 0; if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = TLI.getPointerTy().getSizeInBits(); - APInt AllOnes = APInt::getAllOnesValue(PtrWidth); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); - llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), AllOnes, - KnownZero, KnownOne, TLI.getTargetData()); + llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, + TLI.getTargetData()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2ac9655..94cb958 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2804,11 +2804,11 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { } // Utility for visitShuffleVector - Return true if every element in Mask, -// begining // from position Pos and ending in Pos+Size, falls within the +// begining from position Pos and ending in Pos+Size, falls within the // specified sequential range [L, L+Pos). or is undef. static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, - int Pos, int Size, int Low) { - for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low) + unsigned Pos, unsigned Size, int Low) { + for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) if (Mask[i] >= 0 && Mask[i] != Low) return false; return true; @@ -2878,10 +2878,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector<int, 8> MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - if (Idx < (int)SrcNumElts) - MappedOps.push_back(Idx); - else - MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); + if (Idx >= (int)SrcNumElts) + Idx -= SrcNumElts - MaskNumElts; + MappedOps.push_back(Idx); } setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, @@ -2893,13 +2892,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Analyze the access pattern of the vector to see if we can extract // two subvectors and do the shuffle. The analysis is done by calculating // the range of elements the mask access on both vectors. - int MinRange[2] = { static_cast<int>(SrcNumElts+1), - static_cast<int>(SrcNumElts+1)}; + int MinRange[2] = { static_cast<int>(SrcNumElts), + static_cast<int>(SrcNumElts)}; int MaxRange[2] = {-1, -1}; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - int Input = 0; + unsigned Input = 0; if (Idx < 0) continue; @@ -2915,35 +2914,31 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Check if the access is smaller than the vector size and can we find // a reasonable extract index. - int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not - // Extract. + int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not + // Extract. int StartIdx[2]; // StartIdx to extract from - for (int Input=0; Input < 2; ++Input) { - if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) { + for (unsigned Input = 0; Input < 2; ++Input) { + if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { RangeUse[Input] = 0; // Unused StartIdx[Input] = 0; - } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) { - // Fits within range but we should see if we can find a good - // start index that is a multiple of the mask length. - if (MaxRange[Input] < (int)MaskNumElts) { - RangeUse[Input] = 1; // Extract from beginning of the vector - StartIdx[Input] = 0; - } else { - StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; - if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && - StartIdx[Input] + MaskNumElts <= SrcNumElts) - RangeUse[Input] = 1; // Extract from a multiple of the mask length. - } + continue; } + + // Find a good start index that is a multiple of the mask length. Then + // see if the rest of the elements are in range. + StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; + if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && + StartIdx[Input] + MaskNumElts <= SrcNumElts) + RangeUse[Input] = 1; // Extract from a multiple of the mask length. } if (RangeUse[0] == 0 && RangeUse[1] == 0) { setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } - else if (RangeUse[0] < 2 && RangeUse[1] < 2) { + if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { // Extract appropriate subvector and generate a vector shuffle - for (int Input=0; Input < 2; ++Input) { + for (unsigned Input = 0; Input < 2; ++Input) { SDValue &Src = Input == 0 ? Src1 : Src2; if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); @@ -2956,12 +2951,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector<int, 8> MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - if (Idx < 0) - MappedOps.push_back(Idx); - else if (Idx < (int)SrcNumElts) - MappedOps.push_back(Idx - StartIdx[0]); - else - MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); + if (Idx >= 0) { + if (Idx < (int)SrcNumElts) + Idx -= StartIdx[0]; + else + Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; + } + MappedOps.push_back(Idx); } setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, @@ -2977,22 +2973,20 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { EVT PtrVT = TLI.getPointerTy(); SmallVector<SDValue,8> Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { - if (Mask[i] < 0) { - Ops.push_back(DAG.getUNDEF(EltVT)); - } else { - int Idx = Mask[i]; - SDValue Res; + int Idx = Mask[i]; + SDValue Res; - if (Idx < (int)SrcNumElts) - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src1, DAG.getConstant(Idx, PtrVT)); - else - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src2, - DAG.getConstant(Idx - SrcNumElts, PtrVT)); + if (Idx < 0) { + Res = DAG.getUNDEF(EltVT); + } else { + SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; + if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Ops.push_back(Res); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src, DAG.getConstant(Idx, PtrVT)); } + + Ops.push_back(Res); } setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), @@ -3215,6 +3209,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isInvariant = I.getMetadata("invariant.load") != 0; unsigned Alignment = I.getAlignment(); const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; @@ -3262,7 +3257,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, - isNonTemporal, isInvariant, Alignment, TBAAInfo); + isNonTemporal, isInvariant, Alignment, TBAAInfo, + Ranges); Values[i] = L; Chains[ChainI] = L.getValue(1); @@ -3586,6 +3582,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } setValue(&I, Result); + } else { + // Assign order to result here. If the intrinsic does not produce a result, + // it won't be mapped to a SDNode and visit() will not assign it an order + // number. + ++SDNodeOrder; + AssignOrderingToNode(Result.getNode()); } } @@ -3627,17 +3629,6 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) { return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); } -// implVisitAluOverflow - Lower arithmetic overflow instrinsics. -const char * -SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { - SDValue Op1 = getValue(I.getArgOperand(0)); - SDValue Op2 = getValue(I.getArgOperand(1)); - - SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); - setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); - return 0; -} - /// visitExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. void @@ -4397,9 +4388,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) { const SDValue &CFR = Ext.getOperand(0); if (CFR.getOpcode() == ISD::CopyFromReg) return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); - else - if (CFR.getOpcode() == ISD::TRUNCATE) - return getTruncatedArgReg(CFR); + if (CFR.getOpcode() == ISD::TRUNCATE) + return getTruncatedArgReg(CFR); } return 0; } @@ -4428,7 +4418,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) - Reg = TRI->getFrameRegister(MF); + Reg = TRI->getFrameRegister(MF); if (!Reg && N.getNode()) { if (N.getOpcode() == ISD::CopyFromReg) @@ -4690,8 +4680,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { V = BCI->getOperand(0); const AllocaInst *AI = dyn_cast<AllocaInst>(V); // Don't handle byval struct arguments or VLAs, for example. - if (!AI) + if (!AI) { + DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); + DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); return 0; + } DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI == FuncInfo.StaticAllocaMap.end()) @@ -4837,7 +4830,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::x86_avx_vinsertf128_pd_256: case Intrinsic::x86_avx_vinsertf128_ps_256: - case Intrinsic::x86_avx_vinsertf128_si_256: { + case Intrinsic::x86_avx_vinsertf128_si_256: + case Intrinsic::x86_avx2_vinserti128: { DebugLoc dl = getCurDebugLoc(); EVT DestVT = TLI.getValueType(I.getType()); EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); @@ -4861,6 +4855,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertuu: { ISD::CvtCode Code = ISD::CVT_INVALID; switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::convertff: Code = ISD::CVT_FF; break; case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; case Intrinsic::convertfui: Code = ISD::CVT_FU; break; @@ -5093,18 +5088,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::uadd_with_overflow: - return implVisitAluOverflow(I, ISD::UADDO); case Intrinsic::sadd_with_overflow: - return implVisitAluOverflow(I, ISD::SADDO); case Intrinsic::usub_with_overflow: - return implVisitAluOverflow(I, ISD::USUBO); case Intrinsic::ssub_with_overflow: - return implVisitAluOverflow(I, ISD::SSUBO); case Intrinsic::umul_with_overflow: - return implVisitAluOverflow(I, ISD::UMULO); - case Intrinsic::smul_with_overflow: - return implVisitAluOverflow(I, ISD::SMULO); + case Intrinsic::smul_with_overflow: { + ISD::NodeType Op; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; + case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; + case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; + case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; + case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; + case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; + } + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); + setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); + return 0; + } case Intrinsic::prefetch: { SDValue Ops[5]; unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 8cf88e1..8393b41 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -556,8 +556,6 @@ private: void visitUserOp2(const Instruction &I) { llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } - - const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op); void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 8aabc02..605509b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -508,7 +508,6 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { Worklist.push_back(CurDAG->getRoot().getNode()); - APInt Mask; APInt KnownZero; APInt KnownOne; @@ -539,8 +538,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { continue; unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); - Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits()); - CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne); + CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne); FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); } while (!Worklist.empty()); } @@ -1444,7 +1442,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, APInt NeededMask = DesiredMask & ~ActualMask; APInt KnownZero, KnownOne; - CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne); + CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne); // If all the missing bits in the or are already known to be set, match! if ((NeededMask & KnownOne) == NeededMask) diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 792de75..e341e15 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -39,28 +39,6 @@ static cl::opt<bool> AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true), cl::desc("Allow promotion of integer vector element types")); -namespace llvm { -TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) { - bool isLocal = GV->hasLocalLinkage(); - bool isDeclaration = GV->isDeclaration(); - // FIXME: what should we do for protected and internal visibility? - // For variables, is internal different from hidden? - bool isHidden = GV->hasHiddenVisibility(); - - if (reloc == Reloc::PIC_) { - if (isLocal || isHidden) - return TLSModel::LocalDynamic; - else - return TLSModel::GeneralDynamic; - } else { - if (!isDeclaration || isHidden) - return TLSModel::LocalExec; - else - return TLSModel::InitialExec; - } -} -} - /// InitLibcallNames - Set default libcall names. /// static void InitLibcallNames(const char **Names) { @@ -1101,8 +1079,12 @@ unsigned TargetLowering::getJumpTableEncoding() const { SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { // If our PIC model is GP relative, use the global offset table as the base. - if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress) + unsigned JTEncoding = getJumpTableEncoding(); + + if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) || + (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)) return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + return Table; } @@ -1244,7 +1226,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Depth != 0) { // If not at the root, Just compute the KnownZero/KnownOne bits to // simplify things downstream. - TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); + TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); return false; } // If this is the root being simplified, allow it to have multiple uses, @@ -1263,8 +1245,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, switch (Op.getOpcode()) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask; - KnownZero = ~KnownOne & NewMask; + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); + KnownZero = ~KnownOne; return false; // Don't fall through, will infinitely loop. case ISD::AND: // If the RHS is a constant, check to see if the LHS would be zero without @@ -1274,8 +1256,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { APInt LHSZero, LHSOne; // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask, - LHSZero, LHSOne, Depth); + TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth); // If the LHS already has zeros where RHSC does, this and is dead. if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op.getOperand(0)); @@ -1386,8 +1367,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 - if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known - if ((KnownOne & KnownOne2) == KnownOne) { + // NB: it is okay if more bits are known than are requested + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side + if (KnownOne == KnownOne2) { // set bits are the same on both sides EVT VT = Op.getValueType(); SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, @@ -1725,11 +1707,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the sign bit is known one, the top bits match. if (KnownOne.intersects(InSignBit)) { - KnownOne |= NewBits; - KnownZero &= ~NewBits; + KnownOne |= NewBits; + assert((KnownZero & NewBits) == 0); } else { // Otherwise, top bits aren't known. - KnownOne &= ~NewBits; - KnownZero &= ~NewBits; + assert((KnownOne & NewBits) == 0); + assert((KnownZero & NewBits) == 0); } break; } @@ -1863,7 +1845,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // FALL THROUGH default: // Just use ComputeMaskedBits to compute output bits. - TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth); + TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); break; } @@ -1879,7 +1861,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -1890,7 +1871,7 @@ void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); } /// ComputeNumSignBitsForTargetNode - This method can be implemented by @@ -1934,9 +1915,8 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { // Fall back to ComputeMaskedBits to catch other known cases. EVT OpVT = Val.getValueType(); unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(Val, KnownZero, KnownOne); return (KnownZero.countPopulation() == BitWidth - 1) && (KnownOne.countPopulation() == 1); } @@ -2432,8 +2412,15 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0 == N1) { // We can always fold X == X for integer setcc's. - if (N0.getValueType().isInteger()) - return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + if (N0.getValueType().isInteger()) { + switch (getBooleanContents(N0.getValueType().isVector())) { + case UndefinedBooleanContent: + case ZeroOrOneBooleanContent: + return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + case ZeroOrNegativeOneBooleanContent: + return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT); + } + } unsigned UOF = ISD::getUnorderedFlavor(Cond); if (UOF == 2) // FP operators that are undefined on NaNs. return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); @@ -2467,6 +2454,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + // If RHS is a legal immediate value for a compare instruction, we need + // to be careful about increasing register pressure needlessly. + bool LegalRHSImm = false; + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) { if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { // Turn (X+C1) == C2 --> X == C2-C1 @@ -2501,25 +2492,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond); } } + + // Could RHSC fold directly into a compare? + if (RHSC->getValueType(0).getSizeInBits() <= 64) + LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue()); } // Simplify (X+Z) == X --> Z == 0 - if (N0.getOperand(0) == N1) - return DAG.getSetCC(dl, VT, N0.getOperand(1), - DAG.getConstant(0, N0.getValueType()), Cond); - if (N0.getOperand(1) == N1) { - if (DAG.isCommutativeBinOp(N0.getOpcode())) - return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(0, N0.getValueType()), Cond); - else if (N0.getNode()->hasOneUse()) { - assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); - // (Z-X) == X --> Z == X<<1 - SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), - N1, + // Don't do this if X is an immediate that can fold into a cmp + // instruction and X+Z has other uses. It could be an induction variable + // chain, and the transform would increase register pressure. + if (!LegalRHSImm || N0.getNode()->hasOneUse()) { + if (N0.getOperand(0) == N1) + return DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(0, N0.getValueType()), Cond); + if (N0.getOperand(1) == N1) { + if (DAG.isCommutativeBinOp(N0.getOpcode())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(0, N0.getValueType()), Cond); + else if (N0.getNode()->hasOneUse()) { + assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); + // (Z-X) == X --> Z == X<<1 + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1, DAG.getConstant(1, getShiftAmountTy(N1.getValueType()))); - if (!DCI.isCalledByLegalizer()) - DCI.AddToWorklist(SH.getNode()); - return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + } } } } diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index c5bd3a3..26cf259 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -34,7 +34,8 @@ void SlotIndexes::releaseMemory() { mi2iMap.clear(); MBBRanges.clear(); idx2MBBMap.clear(); - clearList(); + indexList.clear(); + ileAllocator.Reset(); } bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { @@ -45,17 +46,15 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { // iterator in lock-step (though skipping it over indexes which have // null pointers in the instruction field). // At each iteration assert that the instruction pointed to in the index - // is the same one pointed to by the MI iterator. This + // is the same one pointed to by the MI iterator. This // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should // only need to be set up once after the first numbering is computed. mf = &fn; - initList(); // Check that the list contains only the sentinal. - assert(indexListHead->getNext() == 0 && - "Index list non-empty at initial numbering?"); + assert(indexList.empty() && "Index list non-empty at initial numbering?"); assert(idx2MBBMap.empty() && "Index -> MBB mapping non-empty at initial numbering?"); assert(MBBRanges.empty() && @@ -68,7 +67,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { MBBRanges.resize(mf->getNumBlockIDs()); idx2MBBMap.reserve(mf->size()); - push_back(createEntry(0, index)); + indexList.push_back(createEntry(0, index)); // Iterate over the function. for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end(); @@ -76,7 +75,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { MachineBasicBlock *mbb = &*mbbItr; // Insert an index for the MBB start. - SlotIndex blockStartIndex(back(), SlotIndex::Slot_Block); + SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block); for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); miItr != miEnd; ++miItr) { @@ -85,20 +84,20 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { continue; // Insert a store index for the instr. - push_back(createEntry(mi, index += SlotIndex::InstrDist)); + indexList.push_back(createEntry(mi, index += SlotIndex::InstrDist)); // Save this base index in the maps. - mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), + mi2iMap.insert(std::make_pair(mi, SlotIndex(&indexList.back(), SlotIndex::Slot_Block))); - + ++functionSize; } // We insert one blank instructions between basic blocks. - push_back(createEntry(0, index += SlotIndex::InstrDist)); + indexList.push_back(createEntry(0, index += SlotIndex::InstrDist)); MBBRanges[mbb->getNumber()].first = blockStartIndex; - MBBRanges[mbb->getNumber()].second = SlotIndex(back(), + MBBRanges[mbb->getNumber()].second = SlotIndex(&indexList.back(), SlotIndex::Slot_Block); idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb)); } @@ -119,38 +118,37 @@ void SlotIndexes::renumberIndexes() { unsigned index = 0; - for (IndexListEntry *curEntry = front(); curEntry != getTail(); - curEntry = curEntry->getNext()) { - curEntry->setIndex(index); + for (IndexList::iterator I = indexList.begin(), E = indexList.end(); + I != E; ++I) { + I->setIndex(index); index += SlotIndex::InstrDist; } } -// Renumber indexes locally after curEntry was inserted, but failed to get a new +// Renumber indexes locally after curItr was inserted, but failed to get a new // index. -void SlotIndexes::renumberIndexes(IndexListEntry *curEntry) { +void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { // Number indexes with half the default spacing so we can catch up quickly. const unsigned Space = SlotIndex::InstrDist/2; assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM"); - IndexListEntry *start = curEntry->getPrev(); - unsigned index = start->getIndex(); - IndexListEntry *tail = getTail(); + IndexList::iterator startItr = prior(curItr); + unsigned index = startItr->getIndex(); do { - curEntry->setIndex(index += Space); - curEntry = curEntry->getNext(); + curItr->setIndex(index += Space); + ++curItr; // If the next index is bigger, we have caught up. - } while (curEntry != tail && curEntry->getIndex() <= index); + } while (curItr != indexList.end() && curItr->getIndex() <= index); - DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << start->getIndex() << '-' + DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex() << '-' << index << " ***\n"); ++NumLocalRenum; } void SlotIndexes::dump() const { - for (const IndexListEntry *itr = front(); itr != getTail(); - itr = itr->getNext()) { + for (IndexList::const_iterator itr = indexList.begin(); + itr != indexList.end(); ++itr) { dbgs() << itr->getIndex() << " "; if (itr->getInstr() != 0) { @@ -168,7 +166,7 @@ void SlotIndexes::dump() const { // Print a SlotIndex to a raw_ostream. void SlotIndex::print(raw_ostream &os) const { if (isValid()) - os << entry().getIndex() << "Berd"[getSlot()]; + os << listEntry()->getIndex() << "Berd"[getSlot()]; else os << "invalid"; } diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index b72dea7..4cd22eb 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -11,8 +11,8 @@ #include "Spiller.h" #include "VirtRegMap.h" -#include "LiveRangeEdit.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -116,7 +116,7 @@ protected: } // Create a new vreg & interval for this instr. - LiveInterval *newLI = &LRE.create(*lis, *vrm); + LiveInterval *newLI = &LRE.create(); newLI->weight = HUGE_VALF; // Update the reg operands & kill flags. diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index ab9b524..9959f74 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -14,10 +14,10 @@ #define DEBUG_TYPE "regalloc" #include "SplitKit.h" -#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -351,7 +351,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { // We don't need an AliasAnalysis since we will only be performing // cheap-as-a-copy remats anyway. - Edit->anyRematerializable(LIS, TII, 0); + Edit->anyRematerializable(0); } void SplitEditor::dump() const { @@ -436,8 +436,8 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, // Attempt cheap-as-a-copy rematerialization. LiveRangeEdit::Remat RM(ParentVNI); - if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) { - Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late); + if (Edit->canRematerializeAt(RM, UseIdx, true)) { + Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late); ++NumRemats; } else { // Can't remat, just insert a copy from parent. @@ -456,11 +456,11 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, unsigned SplitEditor::openIntv() { // Create the complement as index 0. if (Edit->empty()) - Edit->create(LIS, VRM); + Edit->create(); // Create the open interval. OpenIdx = Edit->size(); - Edit->create(LIS, VRM); + Edit->create(); return OpenIdx; } @@ -1033,7 +1033,7 @@ void SplitEditor::deleteRematVictims() { if (Dead.empty()) return; - Edit->eliminateDeadDefs(Dead, LIS, VRM, TII); + Edit->eliminateDeadDefs(Dead); } void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { @@ -1108,7 +1108,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { SmallVector<LiveInterval*, 8> dups; dups.push_back(li); for (unsigned j = 1; j != NumComp; ++j) - dups.push_back(&Edit->create(LIS, VRM)); + dups.push_back(&Edit->create()); ConEQ.Distribute(&dups[0], MRI); // The new intervals all map back to i. if (LRMap) @@ -1116,7 +1116,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { } // Calculate spill weight and allocation hints for new intervals. - Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops); + Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops); assert(!LRMap || LRMap->size() == Edit->size()); } diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index be25855..2beb928 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -78,6 +78,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0; unsigned Reg1 = MI->getOperand(Idx1).getReg(); unsigned Reg2 = MI->getOperand(Idx2).getReg(); + unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0; + unsigned SubReg1 = MI->getOperand(Idx1).getSubReg(); + unsigned SubReg2 = MI->getOperand(Idx2).getSubReg(); bool Reg1IsKill = MI->getOperand(Idx1).isKill(); bool Reg2IsKill = MI->getOperand(Idx2).isKill(); // If destination is tied to either of the commuted source register, then @@ -86,10 +89,12 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) { Reg2IsKill = false; Reg0 = Reg2; + SubReg0 = SubReg2; } else if (HasDef && Reg0 == Reg2 && MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) { Reg1IsKill = false; Reg0 = Reg1; + SubReg0 = SubReg1; } if (NewMI) { @@ -98,19 +103,23 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, MachineFunction &MF = *MI->getParent()->getParent(); if (HasDef) return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead)) - .addReg(Reg2, getKillRegState(Reg2IsKill)) - .addReg(Reg1, getKillRegState(Reg2IsKill)); + .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead), SubReg0) + .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) + .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); else return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg2, getKillRegState(Reg2IsKill)) - .addReg(Reg1, getKillRegState(Reg2IsKill)); + .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) + .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); } - if (HasDef) + if (HasDef) { MI->getOperand(0).setReg(Reg0); + MI->getOperand(0).setSubReg(SubReg0); + } MI->getOperand(Idx2).setReg(Reg1); MI->getOperand(Idx1).setReg(Reg2); + MI->getOperand(Idx2).setSubReg(SubReg1); + MI->getOperand(Idx1).setSubReg(SubReg2); MI->getOperand(Idx2).setIsKill(Reg1IsKill); MI->getOperand(Idx1).setIsKill(Reg2IsKill); return MI; diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 24b8bc2..c30b133 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1183,8 +1183,9 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, /// TryInstructionTransform - For the case where an instruction has a single /// pair of tied register operands, attempt some transformations that may /// either eliminate the tied operands or improve the opportunities for -/// coalescing away the register copy. Returns true if the tied operands -/// are eliminated altogether. +/// coalescing away the register copy. Returns true if no copy needs to be +/// inserted to untie mi's operands (either because they were untied, or +/// because mi was rescheduled, and will be visited again later). bool TwoAddressInstructionPass:: TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, @@ -1380,7 +1381,6 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, /// runOnMachineFunction - Reduce two-address instructions to two operands. /// bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "Machine Function\n"); const TargetMachine &TM = MF.getTarget(); MRI = &MF.getRegInfo(); TII = TM.getInstrInfo(); @@ -1595,19 +1595,19 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { MadeChange = true; DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); - } - // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. - if (mi->isInsertSubreg()) { - // From %reg = INSERT_SUBREG %reg, %subreg, subidx - // To %reg:subidx = COPY %subreg - unsigned SubIdx = mi->getOperand(3).getImm(); - mi->RemoveOperand(3); - assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); - mi->getOperand(0).setSubReg(SubIdx); - mi->RemoveOperand(1); - mi->setDesc(TII->get(TargetOpcode::COPY)); - DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. + if (mi->isInsertSubreg()) { + // From %reg = INSERT_SUBREG %reg, %subreg, subidx + // To %reg:subidx = COPY %subreg + unsigned SubIdx = mi->getOperand(3).getImm(); + mi->RemoveOperand(3); + assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); + mi->getOperand(0).setSubReg(SubIdx); + mi->RemoveOperand(1); + mi->setDesc(TII->get(TargetOpcode::COPY)); + DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + } } // Clear TiedOperands here instead of at the top of the loop @@ -1833,6 +1833,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { SmallSet<unsigned, 4> Seen; for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { unsigned SrcReg = MI->getOperand(i).getReg(); + unsigned SrcSubIdx = MI->getOperand(i).getSubReg(); unsigned SubIdx = MI->getOperand(i+1).getImm(); // DefMI of NULL means the value does not have a vreg in this block // i.e., its a physical register or a subreg. @@ -1888,7 +1889,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc, MI->getDebugLoc(), TII->get(TargetOpcode::COPY)) .addReg(DstReg, RegState::Define, SubIdx) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill), SrcSubIdx); MI->getOperand(i).setReg(0); if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) LV->replaceKillInstruction(SrcReg, MI, CopyMI); |