diff options
author | Stephen Hines <srhines@google.com> | 2013-03-05 23:27:24 -0800 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2013-03-05 23:27:24 -0800 |
commit | 5adb136be579e8fff3734461580cb34d1d2983b8 (patch) | |
tree | bff1a422e9c9789df563aaf9a7e91e63e8ec0384 /lib/CodeGen | |
parent | 227a4a4ade38716ba9eb3205f48b52910f3b955e (diff) | |
parent | b3201c5cf1e183d840f7c99ff779d57f1549d8e5 (diff) | |
download | external_llvm-5adb136be579e8fff3734461580cb34d1d2983b8.zip external_llvm-5adb136be579e8fff3734461580cb34d1d2983b8.tar.gz external_llvm-5adb136be579e8fff3734461580cb34d1d2983b8.tar.bz2 |
Merge commit 'b3201c5cf1e183d840f7c99ff779d57f1549d8e5' into merge_20130226
Conflicts:
include/llvm/Support/ELF.h
lib/Support/DeltaAlgorithm.cpp
Change-Id: I24a4fbce62eb39d924efee3c687b55e1e17b30cd
Diffstat (limited to 'lib/CodeGen')
83 files changed, 5048 insertions, 3817 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 152d9fa..c50f8b5 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -151,23 +151,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { std::vector<unsigned> &KillIndices = State->GetKillIndices(); std::vector<unsigned> &DefIndices = State->GetDefIndices(); - // Determine the live-out physregs for this block. - if (IsReturnBlock) { - // In a return block, examine the function live-out regs. - for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), - E = MRI.liveout_end(); I != E; ++I) { - for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { - unsigned Reg = *AI; - State->UnionGroups(Reg, 0); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - } - } - } - - // In a non-return block, examine the live-in regs of all successors. - // Note a return block can have successors if the return instruction is - // predicated. + // Examine the live-in regs of all successors. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index 94754a0..3fa1f8f 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -22,7 +22,6 @@ #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -45,4 +44,9 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, dbgs() << '\n'; } }); +#ifndef NDEBUG + for (unsigned I = 0, E = Hints.size(); I != E; ++I) + assert(std::find(Order.begin(), Order.end(), Hints[I]) != Order.end() && + "Target hint is outside allocation order."); +#endif } diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h index a5293f6..aed461a 100644 --- a/lib/CodeGen/AllocationOrder.h +++ b/lib/CodeGen/AllocationOrder.h @@ -39,6 +39,9 @@ public: const VirtRegMap &VRM, const RegisterClassInfo &RegClassInfo); + /// Get the allocation order without reordered hints. + ArrayRef<MCPhysReg> getOrder() const { return Order; } + /// Return the next physical register in the allocation order, or 0. /// It is safe to call next() again after it returned 0, it will keep /// returning 0 until rewind() is called. @@ -53,6 +56,18 @@ public: return 0; } + /// As next(), but allow duplicates to be returned, and stop before the + /// Limit'th register in the RegisterClassInfo allocation order. + /// + /// This can produce more than Limit registers if there are hints. + unsigned nextWithDups(unsigned Limit) { + if (Pos < 0) + return Hints.end()[Pos++]; + if (Pos < int(Limit)) + return Order[Pos++]; + return 0; + } + /// Start over from the beginning. void rewind() { Pos = -int(Hints.size()); } diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index aaba144..c7abf7a 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -265,8 +265,7 @@ static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) { /// between it and the return. /// /// This function only tests target-independent requirements. -bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attribute CalleeRetAttr, - const TargetLowering &TLI) { +bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){ const Instruction *I = CS.getInstruction(); const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); @@ -312,14 +311,16 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attribute CalleeRetAttr, // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. const Function *F = ExitBB->getParent(); - Attribute CallerRetAttr = F->getAttributes().getRetAttributes(); - if (AttrBuilder(CalleeRetAttr).removeAttribute(Attribute::NoAlias) != - AttrBuilder(CallerRetAttr).removeAttribute(Attribute::NoAlias)) + AttributeSet CallerAttrs = F->getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias) != + AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias)) return false; // It's not safe to eliminate the sign / zero extension of the return value. - if (CallerRetAttr.hasAttribute(Attribute::ZExt) || - CallerRetAttr.hasAttribute(Attribute::SExt)) + if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) return false; // Otherwise, make sure the unmodified return value of I is the return value. diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 1728331..188047d 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -32,12 +32,11 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -cl::opt<bool> +static cl::opt<bool> EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden, cl::desc("Generate ARM EHABI tables with unwinding descriptors"), cl::init(false)); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index cbcc290..aa9d43b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -391,9 +391,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // - pointer to mangled symbol above with initializer unsigned PtrSize = TD->getPointerSizeInBits()/8; OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), - PtrSize, 0); - OutStreamer.EmitIntValue(0, PtrSize, 0); - OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0); + PtrSize); + OutStreamer.EmitIntValue(0, PtrSize); + OutStreamer.EmitSymbolValue(MangSym, PtrSize); OutStreamer.AddBlankLine(); return; @@ -1040,7 +1040,7 @@ void AsmPrinter::EmitConstantPool() { // Emit inter-object padding for alignment. unsigned AlignMask = CPE.getAlignment() - 1; unsigned NewOffset = (Offset + AlignMask) & ~AlignMask; - OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/); + OutStreamer.EmitZeros(NewOffset - Offset); Type *Ty = CPE.getType(); Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty); @@ -1203,7 +1203,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, assert(Value && "Unknown entry kind!"); unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout()); - OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0); + OutStreamer.EmitValue(Value, EntrySize); } @@ -1326,19 +1326,19 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { /// EmitInt8 - Emit a byte directive and value. /// void AsmPrinter::EmitInt8(int Value) const { - OutStreamer.EmitIntValue(Value, 1, 0/*addrspace*/); + OutStreamer.EmitIntValue(Value, 1); } /// EmitInt16 - Emit a short directive and value. /// void AsmPrinter::EmitInt16(int Value) const { - OutStreamer.EmitIntValue(Value, 2, 0/*addrspace*/); + OutStreamer.EmitIntValue(Value, 2); } /// EmitInt32 - Emit a long directive and value. /// void AsmPrinter::EmitInt32(int Value) const { - OutStreamer.EmitIntValue(Value, 4, 0/*addrspace*/); + OutStreamer.EmitIntValue(Value, 4); } /// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size @@ -1353,14 +1353,14 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, OutContext); if (!MAI->hasSetDirective()) { - OutStreamer.EmitValue(Diff, Size, 0/*AddrSpace*/); + OutStreamer.EmitValue(Diff, Size); return; } // Otherwise, emit with .set (aka assignment). MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); OutStreamer.EmitAssignment(SetLabel, Diff); - OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/); + OutStreamer.EmitSymbolValue(SetLabel, Size); } /// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" @@ -1384,12 +1384,12 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, OutContext); if (!MAI->hasSetDirective()) - OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/); + OutStreamer.EmitValue(Diff, 4); else { // Otherwise, emit with .set (aka assignment). MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); OutStreamer.EmitAssignment(SetLabel, Diff); - OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/); + OutStreamer.EmitSymbolValue(SetLabel, 4); } } @@ -1407,7 +1407,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, MCConstantExpr::Create(Offset, OutContext), OutContext); - OutStreamer.EmitValue(Expr, Size, 0/*AddrSpace*/); + OutStreamer.EmitValue(Expr, Size); } @@ -1746,90 +1746,48 @@ static void emitGlobalConstantStruct(const ConstantStruct *CS, static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AsmPrinter &AP) { - if (CFP->getType()->isHalfTy()) { - if (AP.isVerbose()) { - SmallString<10> Str; - CFP->getValueAPF().toString(Str); - AP.OutStreamer.GetCommentOS() << "half " << Str << '\n'; - } - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.EmitIntValue(Val, 2, AddrSpace); - return; - } - - if (CFP->getType()->isFloatTy()) { - if (AP.isVerbose()) { - float Val = CFP->getValueAPF().convertToFloat(); - uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.GetCommentOS() << "float " << Val << '\n' - << " (" << format("0x%x", IntVal) << ")\n"; - } - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace); - return; - } + APInt API = CFP->getValueAPF().bitcastToAPInt(); - // FP Constants are printed as integer constants to avoid losing - // precision. - if (CFP->getType()->isDoubleTy()) { - if (AP.isVerbose()) { - double Val = CFP->getValueAPF().convertToDouble(); - uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.GetCommentOS() << "double " << Val << '\n' - << " (" << format("0x%lx", IntVal) << ")\n"; - } + // First print a comment with what we think the original floating-point value + // should have been. + if (AP.isVerbose()) { + SmallString<8> StrVal; + CFP->getValueAPF().toString(StrVal); - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); - return; + CFP->getType()->print(AP.OutStreamer.GetCommentOS()); + AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n'; } - if (CFP->getType()->isX86_FP80Ty() || CFP->getType()->isFP128Ty()) { - // all long double variants are printed as hex - // API needed to prevent premature destruction - APInt API = CFP->getValueAPF().bitcastToAPInt(); - const uint64_t *p = API.getRawData(); - if (AP.isVerbose()) { - // Convert to double so we can print the approximate val as a comment. - SmallString<8> StrVal; - CFP->getValueAPF().toString(StrVal); + // Now iterate through the APInt chunks, emitting them in endian-correct + // order, possibly with a smaller chunk at beginning/end (e.g. for x87 80-bit + // floats). + unsigned NumBytes = API.getBitWidth() / 8; + unsigned TrailingBytes = NumBytes % sizeof(uint64_t); + const uint64_t *p = API.getRawData(); - const char *TyNote = CFP->getType()->isFP128Ty() ? "fp128 " : "x86_fp80 "; - AP.OutStreamer.GetCommentOS() << TyNote << StrVal << '\n'; - } + // PPC's long double has odd notions of endianness compared to how LLVM + // handles it: p[0] goes first for *big* endian on PPC. + if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) { + int Chunk = API.getNumWords() - 1; - // The 80-bit type is made of a 64-bit and 16-bit value, the 128-bit has 2 - // 64-bit words. - uint32_t TrailingSize = CFP->getType()->isFP128Ty() ? 8 : 2; + if (TrailingBytes) + AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes, AddrSpace); - if (AP.TM.getDataLayout()->isBigEndian()) { - AP.OutStreamer.EmitIntValue(p[1], TrailingSize, AddrSpace); - AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); - } else { - AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); - AP.OutStreamer.EmitIntValue(p[1], TrailingSize, AddrSpace); - } + for (; Chunk >= 0; --Chunk) + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); + } else { + unsigned Chunk; + for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk) + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); - // Emit the tail padding for the long double. - const DataLayout &TD = *AP.TM.getDataLayout(); - AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - - TD.getTypeStoreSize(CFP->getType()), AddrSpace); - return; + if (TrailingBytes) + AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes, AddrSpace); } - assert(CFP->getType()->isPPC_FP128Ty() && - "Floating point constant type not handled"); - // All long double variants are printed as hex - // API needed to prevent premature destruction. - APInt API = CFP->getValueAPF().bitcastToAPInt(); - const uint64_t *p = API.getRawData(); - if (AP.TM.getDataLayout()->isBigEndian()) { - AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); - AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace); - } else { - AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace); - AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); - } + // Emit the tail padding for the long double. + const DataLayout &TD = *AP.TM.getDataLayout(); + AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - + TD.getTypeStoreSize(CFP->getType()), AddrSpace); } static void emitGlobalConstantLargeInt(const ConstantInt *CI, diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index ece92d8..156acac 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -46,7 +46,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc, if (isVerbose() && Desc) OutStreamer.AddComment(Desc); - OutStreamer.EmitULEB128IntValue(Value, 0/*addrspace*/, PadTo); + OutStreamer.EmitULEB128IntValue(Value, PadTo); } /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value. @@ -58,7 +58,7 @@ void AsmPrinter::EmitCFAByte(unsigned Val) const { else OutStreamer.AddComment(dwarf::CallFrameString(Val)); } - OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/); + OutStreamer.EmitIntValue(Val, 1); } static const char *DecodeDWARFEncoding(unsigned Encoding) { @@ -102,7 +102,7 @@ void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const { DecodeDWARFEncoding(Val)); } - OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/); + OutStreamer.EmitIntValue(Val, 1); } /// GetSizeOfEncodedValue - Return the size of the encoding in bytes. @@ -126,9 +126,9 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, const MCExpr *Exp = TLOF.getTTypeGlobalReference(GV, Mang, MMI, Encoding, OutStreamer); - OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0); + OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding)); } else - OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding), 0); + OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding)); } /// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its @@ -157,7 +157,7 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, // If the section in question will end up with an address of 0 anyway, we can // just emit an absolute reference to save a relocation. if (Section.isBaseAddressKnownZero()) { - OutStreamer.EmitSymbolValue(Label, 4, 0/*AddrSpace*/); + OutStreamer.EmitSymbolValue(Label, 4); return; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 4890627..8b9a884 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -38,7 +38,7 @@ using namespace llvm; namespace { struct SrcMgrDiagInfo { const MDNode *LocInfo; - LLVMContext::DiagHandlerTy DiagHandler; + LLVMContext::InlineAsmDiagHandlerTy DiagHandler; void *DiagContext; }; } @@ -89,15 +89,15 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, SourceMgr SrcMgr; SrcMgrDiagInfo DiagInfo; - // If the current LLVMContext has a diagnostic handler, set it in SourceMgr. + // If the current LLVMContext has an inline asm handler, set it in SourceMgr. LLVMContext &LLVMCtx = MMI->getModule()->getContext(); bool HasDiagHandler = false; - if (LLVMCtx.getDiagnosticHandler() != 0) { + if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) { // If the source manager has an issue, we arrange for srcMgrDiagHandler // to be invoked, getting DiagInfo passed into it. DiagInfo.LocInfo = LocMDNode; - DiagInfo.DiagHandler = LLVMCtx.getDiagnosticHandler(); - DiagInfo.DiagContext = LLVMCtx.getDiagnosticContext(); + DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler(); + DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext(); SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo); HasDiagHandler = true; } diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index a913ca4..4ded281 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -193,18 +193,20 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { case dwarf::DW_FORM_data1: Size = 1; break; case dwarf::DW_FORM_ref2: // Fall thru case dwarf::DW_FORM_data2: Size = 2; break; + case dwarf::DW_FORM_sec_offset: // Fall thru case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: Size = 4; break; case dwarf::DW_FORM_ref8: // Fall thru case dwarf::DW_FORM_data8: Size = 8; break; case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return; + case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; case dwarf::DW_FORM_addr: Size = Asm->getDataLayout().getPointerSize(); break; default: llvm_unreachable("DIE Value form not supported yet"); } - Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/); + Asm->OutStreamer.EmitIntValue(Integer, Size); } /// SizeOf - Determine size of integer value in bytes. @@ -217,11 +219,13 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { case dwarf::DW_FORM_data1: return sizeof(int8_t); case dwarf::DW_FORM_ref2: // Fall thru case dwarf::DW_FORM_data2: return sizeof(int16_t); + case dwarf::DW_FORM_sec_offset: // Fall thru case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: return sizeof(int32_t); case dwarf::DW_FORM_ref8: // Fall thru case dwarf::DW_FORM_data8: return sizeof(int64_t); case dwarf::DW_FORM_GNU_str_index: return MCAsmInfo::getULEB128Size(Integer); + case dwarf::DW_FORM_GNU_addr_index: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(); @@ -243,13 +247,14 @@ void DIEInteger::print(raw_ostream &O) { /// EmitValue - Emit label value. /// void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { - AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form), 0/*AddrSpace*/); + AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form)); } /// SizeOf - Determine size of label value in bytes. /// unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; return AP->getDataLayout().getPointerSize(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index c193999..f58ec9b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -173,7 +173,7 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context), MCSymbolRefExpr::Create(SecBegin, Context), Context); - Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0); + Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t)); } } } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 21cceaf..93b00fb 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -170,6 +170,42 @@ void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form, Die->addValue(Attribute, Form, Value); } +/// addLabelAddress - Add a dwarf label attribute data and value using +/// DW_FORM_addr or DW_FORM_GNU_addr_index. +/// +void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute, + MCSymbol *Label) { + if (!DD->useSplitDwarf()) { + if (Label != NULL) { + DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); + Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); + } else { + DIEValue *Value = new (DIEValueAllocator) DIEInteger(0); + Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); + } + } else { + unsigned idx = DU->getAddrPoolIndex(Label); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); + } +} + +/// addOpAddress - Add a dwarf op address data and value using the +/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. +/// +void CompileUnit::addOpAddress(DIE *Die, MCSymbol *Sym) { + + if (!DD->useSplitDwarf()) { + addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Die, 0, dwarf::DW_FORM_udata, Sym); + } else { + unsigned idx = DU->getAddrPoolIndex(Sym); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); + Die->addValue(0, dwarf::DW_FORM_GNU_addr_index, Value); + } +} + /// addDelta - Add a label delta attribute data and value. /// void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form, @@ -592,10 +628,21 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { return true; } +/// addConstantFPValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { + return addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false); +} + /// addConstantValue - Add constant value entry in variable DIE. bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned) { - unsigned CIBitWidth = CI->getBitWidth(); + return addConstantValue(Die, CI->getValue(), Unsigned); +} + +// addConstantValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val, + bool Unsigned) { + unsigned CIBitWidth = Val.getBitWidth(); if (CIBitWidth <= 64) { unsigned form = 0; switch (CIBitWidth) { @@ -607,16 +654,15 @@ bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata; } if (Unsigned) - addUInt(Die, dwarf::DW_AT_const_value, form, CI->getZExtValue()); + addUInt(Die, dwarf::DW_AT_const_value, form, Val.getZExtValue()); else - addSInt(Die, dwarf::DW_AT_const_value, form, CI->getSExtValue()); + addSInt(Die, dwarf::DW_AT_const_value, form, Val.getSExtValue()); return true; } DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); // Get the raw data form of the large APInt. - const APInt Val = CI->getValue(); const uint64_t *Ptr64 = Val.getRawData(); int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. @@ -650,18 +696,21 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { } } +/// getOrCreateContextDIE - Get context owner's DIE. +DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) { + if (Context.isType()) + return getOrCreateTypeDIE(DIType(Context)); + else if (Context.isNameSpace()) + return getOrCreateNameSpace(DINameSpace(Context)); + else if (Context.isSubprogram()) + return getOrCreateSubprogramDIE(DISubprogram(Context)); + else + return getDIE(Context); +} + /// addToContextOwner - Add Die into the list of its context owner's children. void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { - if (Context.isType()) { - DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context)); - ContextDIE->addChild(Die); - } else if (Context.isNameSpace()) { - DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); - ContextDIE->addChild(Die); - } else if (Context.isSubprogram()) { - DIE *ContextDIE = getOrCreateSubprogramDIE(DISubprogram(Context)); - ContextDIE->addChild(Die); - } else if (DIE *ContextDIE = getDIE(Context)) + if (DIE *ContextDIE = getOrCreateContextDIE(Context)) ContextDIE->addChild(Die); else addDie(Die); @@ -864,6 +913,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } else { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); addType(Arg, DIType(Ty)); + if (DIType(Ty).isArtificial()) + addFlag(Arg, dwarf::DW_AT_artificial); Buffer.addChild(Arg); } } @@ -905,22 +956,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { dwarf::DW_ACCESS_public); if (SP.isExplicit()) addFlag(ElemDie, dwarf::DW_AT_explicit); - } - else if (Element.isVariable()) { - DIVariable DV(Element); - ElemDie = new DIE(dwarf::DW_TAG_variable); - addString(ElemDie, dwarf::DW_AT_name, DV.getName()); - addType(ElemDie, DV.getType()); - addFlag(ElemDie, dwarf::DW_AT_declaration); - addFlag(ElemDie, dwarf::DW_AT_external); - addSourceLine(ElemDie, DV); } else if (Element.isDerivedType()) { DIDerivedType DDTy(Element); if (DDTy.getTag() == dwarf::DW_TAG_friend) { ElemDie = new DIE(dwarf::DW_TAG_friend); addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); - } else - ElemDie = createMemberDIE(DIDerivedType(Element)); + } else if (DDTy.isStaticMember()) + ElemDie = createStaticMemberDIE(DDTy); + else + ElemDie = createMemberDIE(DDTy); } else if (Element.isObjCProperty()) { DIObjCProperty Property(Element); ElemDie = new DIE(Property.getTag()); @@ -1236,39 +1280,56 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (!GV.Verify()) return; - DIE *VariableDIE = new DIE(GV.getTag()); - // Add to map. - insertDIE(N, VariableDIE); - - // Add name. - addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); - StringRef LinkageName = GV.getLinkageName(); - bool isGlobalVariable = GV.getGlobal() != NULL; - if (!LinkageName.empty() && isGlobalVariable) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, - getRealLinkageName(LinkageName)); - // Add type. + DIDescriptor GVContext = GV.getContext(); DIType GTy = GV.getType(); - addType(VariableDIE, GTy); - // Add scoping info. - if (!GV.isLocalToUnit()) - addFlag(VariableDIE, dwarf::DW_AT_external); + // If this is a static data member definition, some attributes belong + // to the declaration DIE. + DIE *VariableDIE = NULL; + bool IsStaticMember = false; + DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration(); + if (SDMDecl.Verify()) { + assert(SDMDecl.isStaticMember() && "Expected static member decl"); + // We need the declaration DIE that is in the static member's class. + // But that class might not exist in the DWARF yet. + // Creating the class will create the static member decl DIE. + getOrCreateContextDIE(SDMDecl.getContext()); + VariableDIE = getDIE(SDMDecl); + assert(VariableDIE && "Static member decl has no context?"); + IsStaticMember = true; + } + + // If this is not a static data member definition, create the variable + // DIE and add the initial set of attributes to it. + if (!VariableDIE) { + VariableDIE = new DIE(GV.getTag()); + // Add to map. + insertDIE(N, VariableDIE); + + // Add name and type. + addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); + addType(VariableDIE, GTy); + + // Add scoping info. + if (!GV.isLocalToUnit()) { + addFlag(VariableDIE, dwarf::DW_AT_external); + addGlobalName(GV.getName(), VariableDIE); + } + + // Add line number info. + addSourceLine(VariableDIE, GV); + // Add to context owner. + addToContextOwner(VariableDIE, GVContext); + } - // Add line number info. - addSourceLine(VariableDIE, GV); - // Add to context owner. - DIDescriptor GVContext = GV.getContext(); - addToContextOwner(VariableDIE, GVContext); // Add location. bool addToAccelTable = false; DIE *VariableSpecDIE = NULL; + bool isGlobalVariable = GV.getGlobal() != NULL; if (isGlobalVariable) { addToAccelTable = true; DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(GV.getGlobal())); + addOpAddress(Block, Asm->Mang->getSymbol(GV.getGlobal())); // Do not create specification DIE if context is either compile unit // or a subprogram. if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && @@ -1278,22 +1339,31 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, VariableDIE); addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); - addFlag(VariableDIE, dwarf::DW_AT_declaration); + // A static member's declaration is already flagged as such. + if (!SDMDecl.Verify()) + addFlag(VariableDIE, dwarf::DW_AT_declaration); addDie(VariableSpecDIE); } else { addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } + // Add linkage name. + StringRef LinkageName = GV.getLinkageName(); + if (!LinkageName.empty() && isGlobalVariable) + addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); } else if (const ConstantInt *CI = - dyn_cast_or_null<ConstantInt>(GV.getConstant())) - addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); - else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { + dyn_cast_or_null<ConstantInt>(GV.getConstant())) { + // AT_const_value was added when the static memeber was created. To avoid + // emitting AT_const_value multiple times, we only add AT_const_value when + // it is not a static member. + if (!IsStaticMember) + addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); + } else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { addToAccelTable = true; // GV is a merged global. DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); Value *Ptr = CE->getOperand(0); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(cast<GlobalValue>(Ptr))); + addOpAddress(Block, Asm->Mang->getSymbol(cast<GlobalValue>(Ptr))); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end()); addUInt(Block, 0, dwarf::DW_FORM_udata, @@ -1618,3 +1688,38 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { } return MemberDie; } + +/// createStaticMemberDIE - Create new DIE for C++ static member. +DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) { + if (!DT.Verify()) + return NULL; + + DIE *StaticMemberDIE = new DIE(DT.getTag()); + DIType Ty = DT.getTypeDerivedFrom(); + + addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName()); + addType(StaticMemberDIE, Ty); + addSourceLine(StaticMemberDIE, DT); + addFlag(StaticMemberDIE, dwarf::DW_AT_external); + addFlag(StaticMemberDIE, dwarf::DW_AT_declaration); + + // FIXME: We could omit private if the parent is a class_type, and + // public if the parent is something else. + if (DT.isProtected()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (DT.isPrivate()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + else + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + + if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant())) + addConstantValue(StaticMemberDIE, CI, Ty.isUnsignedDIType()); + if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant())) + addConstantFPValue(StaticMemberDIE, CFP); + + insertDIE(DT, StaticMemberDIE); + return StaticMemberDIE; +} diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index f210dcc..77bf6a9 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -27,6 +27,7 @@ class DwarfUnits; class MachineLocation; class MachineOperand; class ConstantInt; +class ConstantFP; class DbgVariable; //===----------------------------------------------------------------------===// @@ -63,6 +64,10 @@ class CompileUnit { /// descriptors to debug information entries using a DIEEntry proxy. DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; + /// GlobalNames - A map of globally visible named entities for this unit. + /// + StringMap<DIE*> GlobalNames; + /// GlobalTypes - A map of globally visible types for this unit. /// StringMap<DIE*> GlobalTypes; @@ -86,6 +91,9 @@ class CompileUnit { /// DWARF version doesn't handle the language, return -1. int64_t getDefaultLowerBound() const; + /// getOrCreateContextDIE - Get context owner's DIE. + DIE *getOrCreateContextDIE(DIDescriptor Context); + public: CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW, DwarfUnits *); @@ -95,6 +103,7 @@ public: unsigned getUniqueID() const { return UniqueID; } unsigned getLanguage() const { return Language; } DIE* getCUDie() const { return CUDie.get(); } + const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; } const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } const StringMap<std::vector<DIE*> > &getAccelNames() const { @@ -115,6 +124,10 @@ public: /// bool hasContent() const { return !CUDie->getChildren().empty(); } + /// addGlobalName - Add a new global entity to the compile unit. + /// + void addGlobalName(StringRef Name, DIE *Die) { GlobalNames[Name] = Die; } + /// addGlobalType - Add a new global type to the compile unit. /// void addGlobalType(DIType Ty); @@ -207,6 +220,16 @@ public: void addLabel(DIE *Die, unsigned Attribute, unsigned Form, const MCSymbol *Label); + /// addLabelAddress - Add a dwarf label attribute data and value using + /// either DW_FORM_addr or DW_FORM_GNU_addr_index. + /// + void addLabelAddress(DIE *Die, unsigned Attribute, MCSymbol *Label); + + /// addOpAddress - Add a dwarf op address data and value using the + /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. + /// + void addOpAddress(DIE *Die, MCSymbol *Label); + /// addDelta - Add a label delta attribute data and value. /// void addDelta(DIE *Die, unsigned Attribute, unsigned Form, @@ -237,9 +260,11 @@ public: /// addConstantValue - Add constant value entry in variable DIE. bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); + bool addConstantValue(DIE *Die, const APInt &Val, bool Unsigned); /// addConstantFPValue - Add constant value entry in variable DIE. bool addConstantFPValue(DIE *Die, const MachineOperand &MO); + bool addConstantFPValue(DIE *Die, const ConstantFP *CFP); /// addTemplateParams - Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DIArray TParams); @@ -339,6 +364,9 @@ public: /// createMemberDIE - Create new member DIE. DIE *createMemberDIE(DIDerivedType DT); + /// createStaticMemberDIE - Create new static data member DIE. + DIE *createStaticMemberDIE(DIDerivedType DT); + private: // DIEValueAllocator - All DIEValues are allocated through this allocator. diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 66a5a6d..87659ef 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -54,6 +54,10 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, cl::desc("Make an absence of debug location information explicit."), cl::init(false)); +static cl::opt<bool> GenerateDwarfPubNamesSection("generate-dwarf-pubnames", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Generate DWARF pubnames section")); + namespace { enum DefaultOnOff { Default, Enable, Disable @@ -159,14 +163,13 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) PrevLabel(NULL), GlobalCUIndexCount(0), InfoHolder(A, &AbbreviationsSet, &Abbreviations, "info_string", DIEValueAllocator), - SkeletonCU(0), SkeletonAbbrevSet(InitAbbreviationsSetSize), SkeletonHolder(A, &SkeletonAbbrevSet, &SkeletonAbbrevs, "skel_string", DIEValueAllocator) { DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; DwarfStrSectionSym = TextSectionSym = 0; - DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; + DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0; DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; @@ -237,6 +240,15 @@ unsigned DwarfUnits::getStringPoolIndex(StringRef Str) { return Entry.second; } +unsigned DwarfUnits::getAddrPoolIndex(MCSymbol *Sym) { + std::pair<MCSymbol*, unsigned> &Entry = AddressPool[Sym]; + if (Entry.first) return Entry.second; + + Entry.second = NextAddrPoolNumber++; + Entry.first = Sym; + return Entry.second; +} + // Define a unique number for the abbreviation. // void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) { @@ -384,10 +396,12 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, } } - SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber())); - SPCU->addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("func_end", Asm->getFunctionNumber())); + SPCU->addLabelAddress(SPDie, dwarf::DW_AT_low_pc, + Asm->GetTempSymbol("func_begin", + Asm->getFunctionNumber())); + SPCU->addLabelAddress(SPDie, dwarf::DW_AT_high_pc, + Asm->GetTempSymbol("func_end", + Asm->getFunctionNumber())); const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location); @@ -429,16 +443,16 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, return ScopeDIE; } - const MCSymbol *Start = getLabelBeforeInsn(RI->first); - const MCSymbol *End = getLabelAfterInsn(RI->second); + MCSymbol *Start = getLabelBeforeInsn(RI->first); + MCSymbol *End = getLabelAfterInsn(RI->second); if (End == 0) return 0; assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); - TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); - TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, Start); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, End); return ScopeDIE; } @@ -462,8 +476,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, } SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(); - const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); - const MCSymbol *EndLabel = getLabelAfterInsn(RI->second); + MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); + MCSymbol *EndLabel = getLabelAfterInsn(RI->second); if (StartLabel == 0 || EndLabel == 0) { llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); @@ -492,10 +506,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, DebugRangeSymbols.push_back(NULL); DebugRangeSymbols.push_back(NULL); } else { - TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - StartLabel); - TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - EndLabel); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel); } InlinedSubprogramDIEs.insert(OriginDIE); @@ -531,6 +543,12 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { if (!Scope || !Scope->getScopeNode()) return NULL; + DIScope DS(Scope->getScopeNode()); + // Early return to avoid creating dangling variable|scope DIEs. + if (!Scope->getInlinedAt() && DS.isSubprogram() && Scope->isAbstractScope() && + !TheCU->getDIE(DS)) + return NULL; + SmallVector<DIE *, 8> Children; DIE *ObjectPointer = NULL; @@ -556,7 +574,6 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { for (unsigned j = 0, M = Scopes.size(); j < M; ++j) if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j])) Children.push_back(Nested); - DIScope DS(Scope->getScopeNode()); DIE *ScopeDIE = NULL; if (Scope->getInlinedAt()) ScopeDIE = constructInlinedScopeDIE(TheCU, Scope); @@ -646,15 +663,28 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { DIUnit.getLanguage()); NewCU->addString(Die, dwarf::DW_AT_name, FN); // 2.17.1 requires that we use DW_AT_low_pc for a single entry point - // into an entity. - NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); + // into an entity. We're using 0 (or a NULL label) for this. + NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL); + + // Define start line table label for each Compile Unit. + MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start", + NewCU->getUniqueID()); + Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym, + NewCU->getUniqueID()); + // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. + // The line table entries are not always emitted in assembly, so it + // is not okay to use line_table_start here. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - Asm->GetTempSymbol("section_line")); - else + NewCU->getUniqueID() == 0 ? + Asm->GetTempSymbol("section_line") : LineTableStartSym); + else if (NewCU->getUniqueID() == 0) NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + else + NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, + LineTableStartSym, DwarfLineSectionSym); if (!CompilationDir.empty()) NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); @@ -671,8 +701,13 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { if (!FirstCU) FirstCU = NewCU; - if (useSplitDwarf() && !SkeletonCU) - SkeletonCU = constructSkeletonCU(N); + + if (useSplitDwarf()) { + // This should be a unique identifier when we want to build .dwp files. + NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); + // Now construct the skeleton CU associated. + constructSkeletonCU(N); + } InfoHolder.addUnit(NewCU); @@ -702,7 +737,9 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, // Add to context owner. TheCU->addToContextOwner(SubprogramDie, SP.getContext()); - return; + // Expose as global, if requested. + if (GenerateDwarfPubNamesSection) + TheCU->addGlobalName(SP.getName(), SubprogramDie); } // Collect debug info from named mdnodes such as llvm.dbg.enum and llvm.dbg.ty. @@ -754,6 +791,9 @@ bool DwarfDebug::collectLegacyDebugInfo(const Module *M) { } if (!HasDebugInfo) return false; + // Emit initial sections so we can refer to them later. + emitSectionLabels(); + // Create all the compile unit DIEs. for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), E = DbgFinder.compile_unit_end(); I != E; ++I) @@ -791,6 +831,9 @@ void DwarfDebug::beginModule() { // module using debug info finder to collect debug info. NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (CU_Nodes) { + // Emit initial sections so we can reference labels later. + emitSectionLabels(); + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CUNode(CU_Nodes->getOperand(i)); CompileUnit *CU = constructCompileUnit(CUNode); @@ -923,9 +966,6 @@ void DwarfDebug::endModule() { // Finalize the debug info for the module. finalizeModuleInfo(); - // Emit initial sections. - emitSectionLabels(); - if (!useSplitDwarf()) { // Emit all the DIEs into a debug info section. emitDebugInfo(); @@ -975,6 +1015,9 @@ void DwarfDebug::endModule() { // Emit info into a debug macinfo section. emitDebugMacInfo(); + // Emit DWO addresses. + InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection()); + // Emit inline info. // TODO: When we don't need the option anymore we // can remove all of the code that this section @@ -991,6 +1034,10 @@ void DwarfDebug::endModule() { emitAccelTypes(); } + // Emit info into a debug pubnames section, if requested. + if (GenerateDwarfPubNamesSection) + emitDebugPubnames(); + // Emit info into a debug pubtypes section. // TODO: When we don't need the option anymore we can // remove all of the code that adds to the table. @@ -1008,11 +1055,12 @@ void DwarfDebug::endModule() { E = CUMap.end(); I != E; ++I) delete I->second; - delete SkeletonCU; + for (SmallVector<CompileUnit *, 1>::iterator I = SkeletonCUs.begin(), + E = SkeletonCUs.end(); I != E; ++I) + delete *I; // Reset these for the next Module if we have one. FirstCU = NULL; - SkeletonCU = NULL; } // Find abstract variable, if any, associated with Var. @@ -1179,7 +1227,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, continue; } - // handle multiple DBG_VALUE instructions describing one variable. + // Handle multiple DBG_VALUE instructions describing one variable. RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); for (SmallVectorImpl<const MachineInstr*>::const_iterator @@ -1234,14 +1282,14 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, } // Return Label preceding the instruction. -const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) { +MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) { MCSymbol *Label = LabelsBeforeInsn.lookup(MI); assert(Label && "Didn't insert label before instruction"); return Label; } // Return Label immediately following the instruction. -const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { +MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { return LabelsAfterInsn.lookup(MI); } @@ -1377,6 +1425,13 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (LScopes.empty()) return; identifyScopeMarkers(); + // Set DwarfCompileUnitID in MCContext to the Compile Unit this function + // belongs to. + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); + assert(TheCU && "Unable to find compile unit!"); + Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); + FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); // Assumes in correct section after the entry point. @@ -1561,6 +1616,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { Asm->getFunctionNumber()); // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionEndSym); + // Set DwarfCompileUnitID in MCContext to default value. + Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); SmallPtrSet<const MDNode *, 16> ProcessedVars; collectVariableInfo(MF, ProcessedVars); @@ -1734,8 +1791,11 @@ void DwarfDebug::emitSectionLabels() { if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection()) emitSectionSym(Asm, MacroInfo); - emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); + DwarfLineSectionSym = + emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); emitSectionSym(Asm, TLOF.getDwarfLocSection()); + if (GenerateDwarfPubNamesSection) + emitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); DwarfStrSectionSym = emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); @@ -1942,8 +2002,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->OutStreamer.AddComment("Section end label"); Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd), - Asm->getDataLayout().getPointerSize(), - 0/*AddrSpace*/); + Asm->getDataLayout().getPointerSize()); // Mark end of matrix. Asm->OutStreamer.AddComment("DW_LNE_end_sequence"); @@ -2072,6 +2131,61 @@ void DwarfDebug::emitAccelTypes() { AT.Emit(Asm, SectionBegin, &InfoHolder); } +/// emitDebugPubnames - Emit visible names into a debug pubnames section. +/// +void DwarfDebug::emitDebugPubnames() { + const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + + typedef DenseMap<const MDNode*, CompileUnit*> CUMapType; + for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + unsigned ID = TheCU->getUniqueID(); + + if (TheCU->getGlobalNames().empty()) + continue; + + // Start the dwarf pubnames section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubNamesSection()); + + Asm->OutStreamer.AddComment("Length of Public Names Info"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID), + Asm->GetTempSymbol("pubnames_begin", ID), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID)); + + Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + + Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); + Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), + DwarfInfoSectionSym); + + Asm->OutStreamer.AddComment("Compilation Unit Length"); + Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), ID), + Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), + 4); + + const StringMap<DIE*> &Globals = TheCU->getGlobalNames(); + for (StringMap<DIE*>::const_iterator + GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + const DIE *Entity = GI->second; + + Asm->OutStreamer.AddComment("DIE offset"); + Asm->EmitInt32(Entity->getOffset()); + + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("External Name"); + Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); + } + + Asm->OutStreamer.AddComment("End Mark"); + Asm->EmitInt32(0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", ID)); + } +} + void DwarfDebug::emitDebugPubTypes() { for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { @@ -2114,7 +2228,7 @@ void DwarfDebug::emitDebugPubTypes() { if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); // Emit the name with a terminating null byte. - Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); } Asm->OutStreamer.AddComment("End Mark"); @@ -2152,22 +2266,53 @@ void DwarfUnits::emitStrings(const MCSection *StrSection, // Emit the string itself with a terminating null byte. Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(), - Entries[i].second->getKeyLength()+1), - 0/*addrspace*/); + Entries[i].second->getKeyLength()+1)); } // If we've got an offset section go ahead and emit that now as well. if (OffsetSection) { Asm->OutStreamer.SwitchSection(OffsetSection); unsigned offset = 0; - unsigned size = 4; + unsigned size = 4; // FIXME: DWARF64 is 8. for (unsigned i = 0, e = Entries.size(); i != e; ++i) { - Asm->OutStreamer.EmitIntValue(offset, size, 0); + Asm->OutStreamer.EmitIntValue(offset, size); offset += Entries[i].second->getKeyLength() + 1; } } } +// Emit strings into a string section. +void DwarfUnits::emitAddresses(const MCSection *AddrSection) { + + if (AddressPool.empty()) return; + + // Start the dwarf addr section. + Asm->OutStreamer.SwitchSection(AddrSection); + + // Get all of the string pool entries and put them in an array by their ID so + // we can sort them. + SmallVector<std::pair<unsigned, + std::pair<MCSymbol*, unsigned>* >, 64> Entries; + + for (DenseMap<MCSymbol*, std::pair<MCSymbol*, unsigned> >::iterator + I = AddressPool.begin(), E = AddressPool.end(); + I != E; ++I) + Entries.push_back(std::make_pair(I->second.second, &(I->second))); + + array_pod_sort(Entries.begin(), Entries.end()); + + for (unsigned i = 0, e = Entries.size(); i != e; ++i) { + // Emit a label for reference from debug information entries. + MCSymbol *Sym = Entries[i].second->first; + if (Sym) + Asm->EmitLabelReference(Entries[i].second->first, + Asm->getDataLayout().getPointerSize()); + else + Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize()); + } + +} + // Emit visible names into a debug str section. void DwarfDebug::emitDebugStr() { DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; @@ -2199,12 +2344,12 @@ void DwarfDebug::emitDebugLoc() { DotDebugLocEntry &Entry = *I; if (Entry.isMerged()) continue; if (Entry.isEmpty()) { - Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); - Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); + Asm->OutStreamer.EmitIntValue(0, Size); + Asm->OutStreamer.EmitIntValue(0, Size); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index)); } else { - Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0); - Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0); + Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size); + Asm->OutStreamer.EmitSymbolValue(Entry.End, Size); DIVariable DV(Entry.Variable); Asm->OutStreamer.AddComment("Loc expr size"); MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); @@ -2290,9 +2435,9 @@ void DwarfDebug::emitDebugRanges() { I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); I != E; ++I) { if (*I) - Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0); + Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size); else - Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); + Asm->OutStreamer.EmitIntValue(0, Size); } } @@ -2376,7 +2521,7 @@ void DwarfDebug::emitDebugInlineInfo() { if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc"); Asm->OutStreamer.EmitSymbolValue(LI->first, - Asm->getDataLayout().getPointerSize(),0); + Asm->getDataLayout().getPointerSize()); } } @@ -2391,68 +2536,44 @@ void DwarfDebug::emitDebugInlineInfo() { // DW_AT_low_pc and DW_AT_high_pc are not used, and vice versa. CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { DICompileUnit DIUnit(N); - StringRef FN = DIUnit.getFilename(); CompilationDir = DIUnit.getDirectory(); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, DIUnit.getLanguage(), Die, Asm, this, &SkeletonHolder); - // FIXME: This should be the .dwo file. - NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, FN); - // FIXME: We also need DW_AT_addr_base and DW_AT_dwo_id. + NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, + DIUnit.getSplitDebugFilename()); + + // This should be a unique identifier when we want to build .dwp files. + NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); + + // FIXME: The addr base should be relative for each compile unit, however, + // this one is going to be 0 anyhow. + NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0); // 2.17.1 requires that we use DW_AT_low_pc for a single entry point - // into an entity. + // into an entity. We're using 0, or a NULL label for this. NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); + // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - Asm->GetTempSymbol("section_line")); + NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, + DwarfLineSectionSym); else - NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, 0); if (!CompilationDir.empty()) NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); SkeletonHolder.addUnit(NewCU); + SkeletonCUs.push_back(NewCU); return NewCU; } -void DwarfDebug::emitSkeletonCU(const MCSection *Section) { - Asm->OutStreamer.SwitchSection(Section); - DIE *Die = SkeletonCU->getCUDie(); - - // Emit the compile units header. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(Section->getLabelBeginName(), - SkeletonCU->getUniqueID())); - - // Emit size of content not including length itself - unsigned ContentSize = Die->getSize() + - sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t); // Pointer Size (in bytes) - - Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); - Asm->EmitInt32(ContentSize); - Asm->OutStreamer.AddComment("DWARF version number"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); - - const MCSection *ASec = Asm->getObjFileLowering().getDwarfAbbrevSection(); - Asm->EmitSectionOffset(Asm->GetTempSymbol(ASec->getLabelBeginName()), - DwarfAbbrevSectionSym); - Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); - - emitDIE(Die, &SkeletonAbbrevs); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(Section->getLabelEndName(), - SkeletonCU->getUniqueID())); -} - void DwarfDebug::emitSkeletonAbbrevs(const MCSection *Section) { assert(useSplitDwarf() && "No split dwarf debug info?"); emitAbbrevs(Section, &SkeletonAbbrevs); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 1e471f7..7b56815 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -195,6 +195,10 @@ public: typedef StringMap<std::pair<MCSymbol*, unsigned>, BumpPtrAllocator&> StrPool; +// A Symbol->pair<Symbol, unsigned> mapping of addresses used by indirect +// references. +typedef DenseMap<MCSymbol *, std::pair<MCSymbol *, unsigned> > AddrPool; + /// \brief Collects and handles information specific to a particular /// collection of units. class DwarfUnits { @@ -215,12 +219,17 @@ class DwarfUnits { unsigned NextStringPoolNumber; std::string StringPref; + // Collection of addresses for this unit and assorted labels. + AddrPool AddressPool; + unsigned NextAddrPoolNumber; + public: DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS, std::vector<DIEAbbrev *> *A, const char *Pref, BumpPtrAllocator &DA) : Asm(AP), AbbreviationsSet(AS), Abbreviations(A), - StringPool(DA), NextStringPoolNumber(0), StringPref(Pref) {} + StringPool(DA), NextStringPoolNumber(0), StringPref(Pref), + AddressPool(), NextAddrPoolNumber(0) {} /// \brief Compute the size and offset of a DIE given an incoming Offset. unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); @@ -242,6 +251,9 @@ public: /// \brief Emit all of the strings to the section given. void emitStrings(const MCSection *, const MCSection *, const MCSymbol *); + /// \brief Emit all of the addresses to the section given. + void emitAddresses(const MCSection *); + /// \brief Returns the entry into the start of the pool. MCSymbol *getStringPoolSym(); @@ -255,6 +267,13 @@ public: /// \brief Returns the string pool. StrPool *getStringPool() { return &StringPool; } + + /// \brief Returns the index into the address pool with the given + /// label/symbol. + unsigned getAddrPoolIndex(MCSymbol *); + + /// \brief Returns the address pool. + AddrPool *getAddrPool() { return &AddressPool; } }; /// \brief Collects and handles dwarf debug information. @@ -367,7 +386,7 @@ class DwarfDebug { // section offsets and are created by EmitSectionLabels. MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; - MCSymbol *DwarfDebugLocSectionSym; + MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym; @@ -396,8 +415,8 @@ class DwarfDebug { // original object file, rather than things that are meant // to be in the .dwo sections. - // The CU left in the original object file for separated debug info. - CompileUnit *SkeletonCU; + // The CUs left in the original object file for separated debug info. + SmallVector<CompileUnit *, 1> SkeletonCUs; // Used to uniquely define abbreviations for the skeleton emission. FoldingSet<DIEAbbrev> SkeletonAbbrevSet; @@ -481,6 +500,9 @@ private: /// \brief Emit type dies into a hashed accelerator table. void emitAccelTypes(); + /// \brief Emit visible names into a debug pubnames section. + void emitDebugPubnames(); + /// \brief Emit visible types into a debug pubtypes section. void emitDebugPubTypes(); @@ -508,9 +530,6 @@ private: /// section. CompileUnit *constructSkeletonCU(const MDNode *); - /// \brief Emit the local split debug info section. - void emitSkeletonCU(const MCSection *); - /// \brief Emit the local split abbreviations. void emitSkeletonAbbrevs(const MCSection *); @@ -560,7 +579,7 @@ private: } /// \brief Return Label preceding the instruction. - const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); + MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); /// \brief Ensure that a label will be emitted after MI. void requestLabelAfterInsn(const MachineInstr *MI) { @@ -568,7 +587,7 @@ private: } /// \brief Return Label immediately following the instruction. - const MCSymbol *getLabelAfterInsn(const MachineInstr *MI); + MCSymbol *getLabelAfterInsn(const MachineInstr *MI); public: //===--------------------------------------------------------------------===// diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 975bb94..7133458 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -33,7 +33,6 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -608,7 +607,7 @@ void DwarfException::EmitExceptionTable() { if (!S.PadLabel) { if (VerboseAsm) Asm->OutStreamer.AddComment(" has no landing pad"); - Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); + Asm->OutStreamer.EmitIntValue(0, 4/*size*/); } else { if (VerboseAsm) Asm->OutStreamer.AddComment(Twine(" jumps to ") + diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index b802853..98177c0 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -100,7 +100,7 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { EmitCamlGlobal(getModule(), AP, "data_end"); // FIXME: Why does ocaml emit this?? - AP.OutStreamer.EmitIntValue(0, IntPtrSize, 0); + AP.OutStreamer.EmitIntValue(0, IntPtrSize); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); EmitCamlGlobal(getModule(), AP, "frametable"); @@ -145,7 +145,7 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { "Live root count "+Twine(LiveCount)+" >= 65536."); } - AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize, 0); + AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize); AP.EmitInt16(FrameSize); AP.EmitInt16(LiveCount); diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index cb25674..1561012 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -33,7 +33,6 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index c27e081..e8b5b4f 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -26,7 +26,7 @@ using namespace llvm; namespace { class BasicTTI : public ImmutablePass, public TargetTransformInfo { - const TargetLowering *TLI; + const TargetLoweringBase *TLI; /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the result needs to be inserted and/or extracted from vectors. @@ -37,7 +37,7 @@ public: llvm_unreachable("This pass cannot be directly constructed"); } - BasicTTI(const TargetLowering *TLI) : ImmutablePass(ID), TLI(TLI) { + BasicTTI(const TargetLoweringBase *TLI) : ImmutablePass(ID), TLI(TLI) { initializeBasicTTIPass(*PassRegistry::getPassRegistry()); } @@ -83,6 +83,8 @@ public: /// @{ virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getMaximumUnrollFactor() const; + virtual unsigned getRegisterBitWidth(bool Vector) const; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const; @@ -99,6 +101,7 @@ public: virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, ArrayRef<Type*> Tys) const; virtual unsigned getNumberOfParts(Type *Tp) const; + virtual unsigned getAddressComputationCost(Type *Ty) const; /// @} }; @@ -110,7 +113,7 @@ INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti", char BasicTTI::ID = 0; ImmutablePass * -llvm::createBasicTargetTransformInfoPass(const TargetLowering *TLI) { +llvm::createBasicTargetTransformInfoPass(const TargetLoweringBase *TLI) { return new BasicTTI(TLI); } @@ -126,7 +129,7 @@ bool BasicTTI::isLegalICmpImmediate(int64_t imm) const { bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale) const { - TargetLowering::AddrMode AM; + TargetLoweringBase::AddrMode AM; AM.BaseGV = BaseGV; AM.BaseOffs = BaseOffset; AM.HasBaseReg = HasBaseReg; @@ -182,6 +185,14 @@ unsigned BasicTTI::getNumberOfRegisters(bool Vector) const { return 1; } +unsigned BasicTTI::getRegisterBitWidth(bool Vector) const { + return 32; +} + +unsigned BasicTTI::getMaximumUnrollFactor() const { + return 1; +} + unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { // Check if any of the operands are vector operands. int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -231,6 +242,27 @@ unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src); std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst); + // Check for NOOP conversions. + if (SrcLT.first == DstLT.first && + SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { + + // Bitcast between types that are legalized to the same type are free. + if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) + return 0; + } + + if (Opcode == Instruction::Trunc && + TLI->isTruncateFree(SrcLT.second, DstLT.second)) + return 0; + + if (Opcode == Instruction::ZExt && + TLI->isZExtFree(SrcLT.second, DstLT.second)) + return 0; + + // If the cast is marked as legal (or promote) then assume low cost. + if (TLI->isOperationLegalOrPromote(ISD, DstLT.second)) + return 1; + // Handle scalar conversions. if (!Src->isVectorTy() && !Dst->isVectorTy()) { @@ -238,14 +270,6 @@ unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, if (Opcode == Instruction::BitCast) return 0; - if (Opcode == Instruction::Trunc && - TLI->isTruncateFree(SrcLT.second, DstLT.second)) - return 0; - - if (Opcode == Instruction::ZExt && - TLI->isZExtFree(SrcLT.second, DstLT.second)) - return 0; - // Just check the op cost. If the operation is legal then assume it costs 1. if (!TLI->isOperationExpand(ISD, DstLT.second)) return 1; @@ -261,10 +285,6 @@ unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, if (SrcLT.first == DstLT.first && SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { - // Bitcast between types that are legalized to the same type are free. - if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) - return 0; - // Assume that Zext is done using AND. if (Opcode == Instruction::ZExt) return 1; @@ -381,3 +401,7 @@ unsigned BasicTTI::getNumberOfParts(Type *Tp) const { std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); return LT.first; } + +unsigned BasicTTI::getAddressComputationCost(Type *Ty) const { + return 0; +} diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index d5f3932..ddc7ada 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -9,8 +9,8 @@ add_llvm_library(LLVMCodeGen CodeGen.cpp CodePlacementOpt.cpp CriticalAntiDepBreaker.cpp - DeadMachineInstructionElim.cpp DFAPacketizer.cpp + DeadMachineInstructionElim.cpp DwarfEHPrepare.cpp EarlyIfConversion.cpp EdgeBundles.cpp @@ -32,21 +32,20 @@ add_llvm_library(LLVMCodeGen LiveInterval.cpp LiveIntervalAnalysis.cpp LiveIntervalUnion.cpp + LiveRangeCalc.cpp + LiveRangeEdit.cpp LiveRegMatrix.cpp LiveStackAnalysis.cpp LiveVariables.cpp - LiveRangeCalc.cpp - LiveRangeEdit.cpp LocalStackSlotAllocation.cpp MachineBasicBlock.cpp MachineBlockFrequencyInfo.cpp MachineBlockPlacement.cpp MachineBranchProbabilityInfo.cpp + MachineCSE.cpp MachineCodeEmitter.cpp MachineCopyPropagation.cpp - MachineCSE.cpp MachineDominators.cpp - MachinePostDominators.cpp MachineFunction.cpp MachineFunctionAnalysis.cpp MachineFunctionPass.cpp @@ -58,6 +57,7 @@ add_llvm_library(LLVMCodeGen MachineModuleInfo.cpp MachineModuleInfoImpls.cpp MachinePassRegistry.cpp + MachinePostDominators.cpp MachineRegisterInfo.cpp MachineSSAUpdater.cpp MachineScheduler.cpp @@ -91,16 +91,17 @@ add_llvm_library(LLVMCodeGen ShrinkWrapping.cpp SjLjEHPrepare.cpp SlotIndexes.cpp - Spiller.cpp SpillPlacement.cpp + Spiller.cpp SplitKit.cpp + StackColoring.cpp StackProtector.cpp StackSlotColoring.cpp - StackColoring.cpp StrongPHIElimination.cpp TailDuplication.cpp TargetFrameLoweringImpl.cpp TargetInstrInfo.cpp + TargetLoweringBase.cpp TargetLoweringObjectFileImpl.cpp TargetOptionsImpl.cpp TargetRegisterInfo.cpp diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 48105d9..0eb74a4 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -57,23 +57,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn()); - // Determine the live-out physregs for this block. - if (IsReturnBlock) { - // In a return block, examine the function live-out regs. - for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), - E = MRI.liveout_end(); I != E; ++I) { - for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { - unsigned Reg = *AI; - Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[Reg] = BBSize; - DefIndices[Reg] = ~0u; - } - } - } - - // In a non-return block, examine the live-in regs of all successors. - // Note a return block can have successors if the return instruction is - // predicated. + // Examine the live-in regs of all successors. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), @@ -371,12 +355,13 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, return false; } -unsigned -CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin, - RegRefIter RegRefEnd, - unsigned AntiDepReg, - unsigned LastNewReg, - const TargetRegisterClass *RC) +unsigned CriticalAntiDepBreaker:: +findSuitableFreeRegister(RegRefIter RegRefBegin, + RegRefIter RegRefEnd, + unsigned AntiDepReg, + unsigned LastNewReg, + const TargetRegisterClass *RC, + SmallVector<unsigned, 2> &Forbid) { ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC); for (unsigned i = 0; i != Order.size(); ++i) { @@ -401,6 +386,15 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin, Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) || KillIndices[AntiDepReg] > DefIndices[NewReg]) continue; + // If NewReg overlaps any of the forbidden registers, we can't use it. + bool Forbidden = false; + for (SmallVector<unsigned, 2>::iterator it = Forbid.begin(), + ite = Forbid.end(); it != ite; ++it) + if (TRI->regsOverlap(NewReg, *it)) { + Forbidden = true; + break; + } + if (Forbidden) continue; return NewReg; } @@ -564,6 +558,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, PrescanInstruction(MI); + SmallVector<unsigned, 2> ForbidRegs; + // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). @@ -574,7 +570,9 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, AntiDepReg = 0; else if (AntiDepReg) { // If this instruction has a use of AntiDepReg, breaking it - // is invalid. + // is invalid. If the instruction defines other registers, + // save a list of them so that we don't pick a new register + // that overlaps any of them. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -584,6 +582,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, AntiDepReg = 0; break; } + if (MO.isDef() && Reg != AntiDepReg) + ForbidRegs.push_back(Reg); } } @@ -606,7 +606,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second, AntiDepReg, LastNewReg[AntiDepReg], - RC)) { + RC, ForbidRegs)) { DEBUG(dbgs() << "Breaking anti-dependence edge on " << TRI->getName(AntiDepReg) << " with " << RegRefs.count(AntiDepReg) << " references" diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 8fb2b0e..df13dd3 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -102,7 +102,8 @@ class TargetRegisterInfo; RegRefIter RegRefEnd, unsigned AntiDepReg, unsigned LastNewReg, - const TargetRegisterClass *RC); + const TargetRegisterClass *RC, + SmallVector<unsigned, 2> &Forbid); }; } diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index a526d24..a54217f 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -99,15 +99,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // Start out assuming that reserved registers are live out of this block. LivePhysRegs = MRI->getReservedRegs(); - // Also add any explicit live-out physregs for this block. - if (!MBB->empty() && MBB->back().isReturn()) - for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(), - LOE = MRI->liveout_end(); LOI != LOE; ++LOI) { - unsigned Reg = *LOI; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - LivePhysRegs.set(Reg); - } - // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not // live across blocks, but some targets (x86) can have flags live out of a // block. diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 4cafa96..f27ec77 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -33,7 +33,7 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { class DwarfEHPrepare : public FunctionPass { const TargetMachine *TM; - const TargetLowering *TLI; + const TargetLoweringBase *TLI; // RewindFunction - _Unwind_Resume or the target equivalent. Constant *RewindFunction; diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index f332925..fac207e 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -17,8 +17,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "early-ifcvt" -#include "llvm/CodeGen/Passes.h" -#include "MachineTraceMetrics.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" @@ -31,6 +29,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 0b9e83d..1611db8 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -49,8 +49,6 @@ private: bool LowerSubregToReg(MachineInstr *MI); bool LowerCopy(MachineInstr *MI); - void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, - const TargetRegisterInfo *TRI); void TransferImplicitDefs(MachineInstr *MI); }; } // end anonymous namespace @@ -61,21 +59,6 @@ char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID; INITIALIZE_PASS(ExpandPostRA, "postrapseudos", "Post-RA pseudo instruction expansion pass", false, false) -/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead, -/// and the lowered replacement instructions immediately precede it. -/// Mark the replacement instructions with the dead flag. -void -ExpandPostRA::TransferDeadFlag(MachineInstr *MI, unsigned DstReg, - const TargetRegisterInfo *TRI) { - for (MachineBasicBlock::iterator MII = - prior(MachineBasicBlock::iterator(MI)); ; --MII) { - if (MII->addRegisterDead(DstReg, TRI)) - break; - assert(MII != MI->getParent()->begin() && - "copyPhysReg output doesn't reference destination register!"); - } -} - /// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered /// replacement instructions immediately precede it. Copy any implicit-def /// operands from MI to the replacement instruction. @@ -114,6 +97,12 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); + if (MI->allDefsAreDead()) { + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "subreg: replaced by: " << *MI); + return true; + } + if (DstSubReg == InsReg) { // No need to insert an identify copy instruction. // Watch out for case like this: @@ -135,10 +124,6 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { MachineBasicBlock::iterator CopyMI = MI; --CopyMI; CopyMI->addRegisterDefined(DstReg); - - // Transfer the kill/dead flags, if needed. - if (MI->getOperand(0).isDead()) - TransferDeadFlag(MI, DstSubReg, TRI); DEBUG(dbgs() << "subreg: " << *CopyMI); } @@ -148,6 +133,14 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { } bool ExpandPostRA::LowerCopy(MachineInstr *MI) { + + if (MI->allDefsAreDead()) { + DEBUG(dbgs() << "dead copy: " << *MI); + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "replaced by: " << *MI); + return true; + } + MachineOperand &DstMO = MI->getOperand(0); MachineOperand &SrcMO = MI->getOperand(1); @@ -155,7 +148,7 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { DEBUG(dbgs() << "identity copy: " << *MI); // No need to insert an identity copy instruction, but replace with a KILL // if liveness is changed. - if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) { + if (SrcMO.isUndef() || MI->getNumOperands() > 2) { // We must make sure the super-register gets killed. Replace the // instruction with KILL. MI->setDesc(TII->get(TargetOpcode::KILL)); @@ -171,8 +164,6 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill()); - if (DstMO.isDead()) - TransferDeadFlag(MI, DstMO.getReg(), TRI); if (MI->getNumOperands() > 2) TransferImplicitDefs(MI); DEBUG({ diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index a6a06e4..ef5247c 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -37,21 +37,9 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const; bool runOnFunction(Function &F); - }; - - class Deleter : public FunctionPass { - static char ID; - - public: - Deleter(); - - const char *getPassName() const; - void getAnalysisUsage(AnalysisUsage &AU) const; - - bool runOnFunction(Function &F); bool doFinalization(Module &M); }; - + } INITIALIZE_PASS(GCModuleInfo, "collector-metadata", @@ -182,32 +170,9 @@ bool Printer::runOnFunction(Function &F) { return false; } -// ----------------------------------------------------------------------------- - -char Deleter::ID = 0; - -FunctionPass *llvm::createGCInfoDeleter() { - return new Deleter(); -} - -Deleter::Deleter() : FunctionPass(ID) {} - -const char *Deleter::getPassName() const { - return "Delete Garbage Collector Information"; -} - -void Deleter::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired<GCModuleInfo>(); -} - -bool Deleter::runOnFunction(Function &MF) { - return false; -} - -bool Deleter::doFinalization(Module &M) { +bool Printer::doFinalization(Module &M) { GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>(); - assert(GMI && "Deleter didn't require GCModuleInfo?!"); + assert(GMI && "Printer didn't require GCModuleInfo?!"); GMI->clear(); return false; } diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 8906991..9958d7d 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -151,7 +151,7 @@ namespace { /// basic block number. std::vector<BBInfo> BBAnalysis; - const TargetLowering *TLI; + const TargetLoweringBase *TLI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const InstrItineraryData *InstrItins; @@ -1557,7 +1557,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { if (Succ == FallThrough) continue; FromBBI.BB->removeSuccessor(Succ); - if (AddEdges) + if (AddEdges && !ToBBI.BB->isSuccessor(Succ)) ToBBI.BB->addSuccessor(Succ); } diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 16e7968..07f0ccf 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -413,30 +413,22 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { } case Intrinsic::stacksave: + case Intrinsic::stackrestore: { if (!Warned) - Context.emitWarning("this target does not support the " - "llvm.stacksave intrinsic"); - Warned = true; - CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); - break; - - case Intrinsic::stackrestore: - if (!Warned) - Context.emitWarning("this target does not support the " - "llvm.stackrestore intrinsic"); + errs() << "WARNING: this target does not support the llvm.stack" + << (Callee->getIntrinsicID() == Intrinsic::stacksave ? + "save" : "restore") << " intrinsic.\n"; Warned = true; + if (Callee->getIntrinsicID() == Intrinsic::stacksave) + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); break; + } case Intrinsic::returnaddress: - Context.emitWarning("this target does not support the " - "llvm.returnaddress intrinsic"); - CI->replaceAllUsesWith(ConstantPointerNull::get( - cast<PointerType>(CI->getType()))); - break; - case Intrinsic::frameaddress: - Context.emitWarning("this target does not support the " - "llvm.frameaddress intrinsic"); + errs() << "WARNING: this target does not support the llvm." + << (Callee->getIntrinsicID() == Intrinsic::returnaddress ? + "return" : "frame") << "address intrinsic.\n"; CI->replaceAllUsesWith(ConstantPointerNull::get( cast<PointerType>(CI->getType()))); break; @@ -446,12 +438,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::pcmarker: break; // Simply strip out pcmarker on unsupported architectures - case Intrinsic::readcyclecounter: - Context.emitWarning("this target does not support the " - "llvm.readcyclecounter intrinsic; " - "it is being lowered to a constant 0"); + case Intrinsic::readcyclecounter: { + errs() << "WARNING: this target does not support the llvm.readcyclecoun" + << "ter intrinsic. It is being lowered to a constant 0\n"; CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0)); break; + } case Intrinsic::dbg_declare: break; // Simply strip out debugging intrinsics diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt index fee0347..81ef1aa 100644 --- a/lib/CodeGen/LLVMBuild.txt +++ b/lib/CodeGen/LLVMBuild.txt @@ -22,4 +22,4 @@ subdirectories = AsmPrinter SelectionDAG type = Library name = CodeGen parent = Libraries -required_libraries = Analysis Core MC Scalar Support Target TransformUtils +required_libraries = Analysis Core MC Scalar Support Target TransformUtils ObjCARC diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 12cd2d1..1a09837 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -226,7 +226,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, PM.add(Printer); - PM.add(createGCInfoDeleter()); return false; } @@ -245,7 +244,6 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, return true; addCodeEmitter(PM, JCE); - PM.add(createGCInfoDeleter()); return false; // success! } diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index 3c01d91..8172154 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -314,24 +314,22 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { void LexicalScope::anchor() { } /// dump - Print data structures. -void LexicalScope::dump() const { +void LexicalScope::dump(unsigned Indent) const { #ifndef NDEBUG raw_ostream &err = dbgs(); - err.indent(IndentLevel); + err.indent(Indent); err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n"; const MDNode *N = Desc; + err.indent(Indent); N->dump(); if (AbstractScope) - err << "Abstract Scope\n"; + err << std::string(Indent, ' ') << "Abstract Scope\n"; - IndentLevel += 2; if (!Children.empty()) - err << "Children ...\n"; + err << std::string(Indent + 2, ' ') << "Children ...\n"; for (unsigned i = 0, e = Children.size(); i != e; ++i) if (Children[i] != this) - Children[i]->dump(); - - IndentLevel -= 2; + Children[i]->dump(Indent + 2); #endif } diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 786f353..0b117ac 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -247,10 +247,6 @@ public: LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS); - /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx. - void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx, - const TargetRegisterInfo *TRI); - /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is /// live. Returns true if any changes were made. bool splitRegister(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs); @@ -259,7 +255,7 @@ public: /// provided virtual register map. void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI); - /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures. + /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, const TargetInstrInfo &TRI); @@ -286,6 +282,11 @@ class LDVImpl { MachineDominatorTree *MDT; const TargetRegisterInfo *TRI; + /// Whether emitDebugValues is called. + bool EmitDone; + /// Whether the machine function is modified during the pass. + bool ModifiedMF; + /// userValues - All allocated UserValue instances. SmallVector<UserValue*, 8> userValues; @@ -320,27 +321,30 @@ class LDVImpl { void computeIntervals(); public: - LDVImpl(LiveDebugVariables *ps) : pass(*ps) {} + LDVImpl(LiveDebugVariables *ps) : pass(*ps), EmitDone(false), + ModifiedMF(false) {} bool runOnMachineFunction(MachineFunction &mf); - /// clear - Relase all memory. + /// clear - Release all memory. void clear() { DeleteContainerPointers(userValues); userValues.clear(); virtRegToEqClass.clear(); userVarMap.clear(); + // Make sure we call emitDebugValues if the machine function was modified. + assert((!ModifiedMF || EmitDone) && + "Dbg values are not emitted in LDV"); + EmitDone = false; + ModifiedMF = false; } /// mapVirtReg - Map virtual register to an equivalence class. void mapVirtReg(unsigned VirtReg, UserValue *EC); - /// renameRegister - Replace all references to OldReg with NewReg:SubIdx. - void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx); - /// splitRegister - Replace all references to OldReg with NewRegs. void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs); - /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures. + /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. void emitDebugValues(VirtRegMap *VRM); void print(raw_ostream&); @@ -693,6 +697,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { computeIntervals(); DEBUG(print(dbgs())); LS.releaseMemory(); + ModifiedMF = Changed; return Changed; } @@ -714,45 +719,6 @@ LiveDebugVariables::~LiveDebugVariables() { delete static_cast<LDVImpl*>(pImpl); } -void UserValue:: -renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx, - const TargetRegisterInfo *TRI) { - for (unsigned i = locations.size(); i; --i) { - unsigned LocNo = i - 1; - MachineOperand &Loc = locations[LocNo]; - if (!Loc.isReg() || Loc.getReg() != OldReg) - continue; - if (TargetRegisterInfo::isPhysicalRegister(NewReg)) - Loc.substPhysReg(NewReg, *TRI); - else - Loc.substVirtReg(NewReg, SubIdx, *TRI); - coalesceLocation(LocNo); - } -} - -void LDVImpl:: -renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) { - UserValue *UV = lookupVirtReg(OldReg); - if (!UV) - return; - - if (TargetRegisterInfo::isVirtualRegister(NewReg)) - mapVirtReg(NewReg, UV); - if (OldReg != NewReg) - virtRegToEqClass.erase(OldReg); - - do { - UV->renameRegister(OldReg, NewReg, SubIdx, TRI); - UV = UV->getNext(); - } while (UV); -} - -void LiveDebugVariables:: -renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) { - if (pImpl) - static_cast<LDVImpl*>(pImpl)->renameRegister(OldReg, NewReg, SubIdx); -} - //===----------------------------------------------------------------------===// // Live Range Splitting //===----------------------------------------------------------------------===// @@ -1011,6 +977,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { userValues[i]->rewriteLocations(*VRM, *TRI); userValues[i]->emitDebugValues(VRM, *LIS, *TII); } + EmitDone = true; } void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 68f4b16..dccd847 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -440,7 +440,7 @@ void LiveInterval::join(LiveInterval &Other, iterator OutIt = begin(); OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]]; - for (iterator I = next(OutIt), E = end(); I != E; ++I) { + for (iterator I = llvm::next(OutIt), E = end(); I != E; ++I) { VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]]; assert(nextValNo != 0 && "Huh?"); @@ -464,10 +464,12 @@ void LiveInterval::join(LiveInterval &Other, ranges.erase(OutIt, end()); } - // Remember assignements because val# ids are changing. - SmallVector<unsigned, 16> OtherAssignments; + // Rewrite Other values before changing the VNInfo ids. + // This can leave Other in an invalid state because we're not coalescing + // touching segments that now have identical values. That's OK since Other is + // not supposed to be valid after calling join(); for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) - OtherAssignments.push_back(RHSValNoAssignments[I->valno->id]); + I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]]; // Update val# info. Renumber them and make sure they all belong to this // LiveInterval now. Also remove dead val#'s. @@ -486,148 +488,9 @@ void LiveInterval::join(LiveInterval &Other, valnos.resize(NumNewVals); // shrinkify // Okay, now insert the RHS live ranges into the LHS. - unsigned RangeNo = 0; - for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) { - // Map the valno in the other live range to the current live range. - I->valno = NewVNInfo[OtherAssignments[RangeNo]]; - assert(I->valno && "Adding a dead range?"); - } - mergeIntervalRanges(Other); - - verify(); -} - -/// \brief Helper function for merging in another LiveInterval's ranges. -/// -/// This is a helper routine implementing an efficient merge of another -/// LiveIntervals ranges into the current interval. -/// -/// \param LHSValNo If non-NULL, set as the new value number for every range -/// from RHS which is merged into the LHS. -/// \param RHSValNo If non-NULL, then only ranges in RHS whose original value -/// number maches this value number will be merged into LHS. -void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS, - VNInfo *LHSValNo, - const VNInfo *RHSValNo) { - if (RHS.empty()) - return; - - // Ensure we're starting with a valid range. Note that we don't verify RHS - // because it may have had its value numbers adjusted in preparation for - // merging. - verify(); - - // The strategy for merging these efficiently is as follows: - // - // 1) Find the beginning of the impacted ranges in the LHS. - // 2) Create a new, merged sub-squence of ranges merging from the position in - // #1 until either LHS or RHS is exhausted. Any part of LHS between RHS - // entries being merged will be copied into this new range. - // 3) Replace the relevant section in LHS with these newly merged ranges. - // 4) Append any remaning ranges from RHS if LHS is exhausted in #2. - // - // We don't follow the typical in-place merge strategy for sorted ranges of - // appending the new ranges to the back and then using std::inplace_merge - // because one step of the merge can both mutate the original elements and - // remove elements from the original. Essentially, because the merge includes - // collapsing overlapping ranges, a more complex approach is required. - - // We do an initial binary search to optimize for a common pattern: a large - // LHS, and a very small RHS. - const_iterator RI = RHS.begin(), RE = RHS.end(); - iterator LE = end(), LI = std::upper_bound(begin(), LE, *RI); - - // Merge into NewRanges until one of the ranges is exhausted. - SmallVector<LiveRange, 4> NewRanges; - - // Keep track of where to begin the replacement. - iterator ReplaceI = LI; - - // If there are preceding ranges in the LHS, put the last one into NewRanges - // so we can optionally extend it. Adjust the replacement point accordingly. - if (LI != begin()) { - ReplaceI = llvm::prior(LI); - NewRanges.push_back(*ReplaceI); - } - - // Now loop over the mergable portions of both LHS and RHS, merging into - // NewRanges. - while (LI != LE && RI != RE) { - // Skip incoming ranges with the wrong value. - if (RHSValNo && RI->valno != RHSValNo) { - ++RI; - continue; - } - - // Select the first range. We pick the earliest start point, and then the - // largest range. - LiveRange R = *LI; - if (*RI < R) { - R = *RI; - ++RI; - if (LHSValNo) - R.valno = LHSValNo; - } else { - ++LI; - } - - if (NewRanges.empty()) { - NewRanges.push_back(R); - continue; - } - - LiveRange &LastR = NewRanges.back(); - if (R.valno == LastR.valno) { - // Try to merge this range into the last one. - if (R.start <= LastR.end) { - LastR.end = std::max(LastR.end, R.end); - continue; - } - } else { - // We can't merge ranges across a value number. - assert(R.start >= LastR.end && - "Cannot overlap two LiveRanges with differing ValID's"); - } - - // If all else fails, just append the range. - NewRanges.push_back(R); - } - assert(RI == RE || LI == LE); - - // Check for being able to merge into the trailing sequence of ranges on the LHS. - if (!NewRanges.empty()) - for (; LI != LE && (LI->valno == NewRanges.back().valno && - LI->start <= NewRanges.back().end); - ++LI) - NewRanges.back().end = std::max(NewRanges.back().end, LI->end); - - // Replace the ranges in the LHS with the newly merged ones. It would be - // really nice if there were a move-supporting 'replace' directly in - // SmallVector, but as there is not, we pay the price of copies to avoid - // wasted memory allocations. - SmallVectorImpl<LiveRange>::iterator NRI = NewRanges.begin(), - NRE = NewRanges.end(); - for (; ReplaceI != LI && NRI != NRE; ++ReplaceI, ++NRI) - *ReplaceI = *NRI; - if (NRI == NRE) - ranges.erase(ReplaceI, LI); - else - ranges.insert(LI, NRI, NRE); - - // And finally insert any trailing end of RHS (if we have one). - for (; RI != RE; ++RI) { - LiveRange R = *RI; - if (LHSValNo) - R.valno = LHSValNo; - if (!ranges.empty() && - ranges.back().valno == R.valno && R.start <= ranges.back().end) - ranges.back().end = std::max(ranges.back().end, R.end); - else - ranges.push_back(R); - } - - // Ensure we finished with a valid new sequence of ranges. - verify(); + LiveRangeUpdater Updater(this); + for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) + Updater.add(*I); } /// MergeRangesInAsValue - Merge all of the intervals in RHS into this live @@ -636,7 +499,9 @@ void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS, /// the overlapping LiveRanges have the specified value number. void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, VNInfo *LHSValNo) { - mergeIntervalRanges(RHS, LHSValNo); + LiveRangeUpdater Updater(this); + for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) + Updater.add(I->start, I->end, LHSValNo); } /// MergeValueInAsValue - Merge all of the live ranges of a specific val# @@ -647,7 +512,10 @@ void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, const VNInfo *RHSValNo, VNInfo *LHSValNo) { - mergeIntervalRanges(RHS, LHSValNo, RHSValNo); + LiveRangeUpdater Updater(this); + for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) + if (I->valno == RHSValNo) + Updater.add(I->start, I->end, LHSValNo); } /// MergeValueNumberInto - This method is called when two value nubmers @@ -785,6 +653,206 @@ void LiveRange::print(raw_ostream &os) const { os << *this; } +//===----------------------------------------------------------------------===// +// LiveRangeUpdater class +//===----------------------------------------------------------------------===// +// +// The LiveRangeUpdater class always maintains these invariants: +// +// - When LastStart is invalid, Spills is empty and the iterators are invalid. +// This is the initial state, and the state created by flush(). +// In this state, isDirty() returns false. +// +// Otherwise, segments are kept in three separate areas: +// +// 1. [begin; WriteI) at the front of LI. +// 2. [ReadI; end) at the back of LI. +// 3. Spills. +// +// - LI.begin() <= WriteI <= ReadI <= LI.end(). +// - Segments in all three areas are fully ordered and coalesced. +// - Segments in area 1 precede and can't coalesce with segments in area 2. +// - Segments in Spills precede and can't coalesce with segments in area 2. +// - No coalescing is possible between segments in Spills and segments in area +// 1, and there are no overlapping segments. +// +// The segments in Spills are not ordered with respect to the segments in area +// 1. They need to be merged. +// +// When they exist, Spills.back().start <= LastStart, +// and WriteI[-1].start <= LastStart. + +void LiveRangeUpdater::print(raw_ostream &OS) const { + if (!isDirty()) { + if (LI) + OS << "Clean " << PrintReg(LI->reg) << " updater: " << *LI << '\n'; + else + OS << "Null updater.\n"; + return; + } + assert(LI && "Can't have null LI in dirty updater."); + OS << PrintReg(LI->reg) << " updater with gap = " << (ReadI - WriteI) + << ", last start = " << LastStart + << ":\n Area 1:"; + for (LiveInterval::const_iterator I = LI->begin(); I != WriteI; ++I) + OS << ' ' << *I; + OS << "\n Spills:"; + for (unsigned I = 0, E = Spills.size(); I != E; ++I) + OS << ' ' << Spills[I]; + OS << "\n Area 2:"; + for (LiveInterval::const_iterator I = ReadI, E = LI->end(); I != E; ++I) + OS << ' ' << *I; + OS << '\n'; +} + +void LiveRangeUpdater::dump() const +{ + print(errs()); +} + +// Determine if A and B should be coalesced. +static inline bool coalescable(const LiveRange &A, const LiveRange &B) { + assert(A.start <= B.start && "Unordered live ranges."); + if (A.end == B.start) + return A.valno == B.valno; + if (A.end < B.start) + return false; + assert(A.valno == B.valno && "Cannot overlap different values"); + return true; +} + +void LiveRangeUpdater::add(LiveRange Seg) { + assert(LI && "Cannot add to a null destination"); + + // Flush the state if Start moves backwards. + if (!LastStart.isValid() || LastStart > Seg.start) { + if (isDirty()) + flush(); + // This brings us to an uninitialized state. Reinitialize. + assert(Spills.empty() && "Leftover spilled segments"); + WriteI = ReadI = LI->begin(); + } + + // Remember start for next time. + LastStart = Seg.start; + + // Advance ReadI until it ends after Seg.start. + LiveInterval::iterator E = LI->end(); + if (ReadI != E && ReadI->end <= Seg.start) { + // First try to close the gap between WriteI and ReadI with spills. + if (ReadI != WriteI) + mergeSpills(); + // Then advance ReadI. + if (ReadI == WriteI) + ReadI = WriteI = LI->find(Seg.start); + else + while (ReadI != E && ReadI->end <= Seg.start) + *WriteI++ = *ReadI++; + } + + assert(ReadI == E || ReadI->end > Seg.start); + + // Check if the ReadI segment begins early. + if (ReadI != E && ReadI->start <= Seg.start) { + assert(ReadI->valno == Seg.valno && "Cannot overlap different values"); + // Bail if Seg is completely contained in ReadI. + if (ReadI->end >= Seg.end) + return; + // Coalesce into Seg. + Seg.start = ReadI->start; + ++ReadI; + } + + // Coalesce as much as possible from ReadI into Seg. + while (ReadI != E && coalescable(Seg, *ReadI)) { + Seg.end = std::max(Seg.end, ReadI->end); + ++ReadI; + } + + // Try coalescing Spills.back() into Seg. + if (!Spills.empty() && coalescable(Spills.back(), Seg)) { + Seg.start = Spills.back().start; + Seg.end = std::max(Spills.back().end, Seg.end); + Spills.pop_back(); + } + + // Try coalescing Seg into WriteI[-1]. + if (WriteI != LI->begin() && coalescable(WriteI[-1], Seg)) { + WriteI[-1].end = std::max(WriteI[-1].end, Seg.end); + return; + } + + // Seg doesn't coalesce with anything, and needs to be inserted somewhere. + if (WriteI != ReadI) { + *WriteI++ = Seg; + return; + } + + // Finally, append to LI or Spills. + if (WriteI == E) { + LI->ranges.push_back(Seg); + WriteI = ReadI = LI->ranges.end(); + } else + Spills.push_back(Seg); +} + +// Merge as many spilled segments as possible into the gap between WriteI +// and ReadI. Advance WriteI to reflect the inserted instructions. +void LiveRangeUpdater::mergeSpills() { + // Perform a backwards merge of Spills and [SpillI;WriteI). + size_t GapSize = ReadI - WriteI; + size_t NumMoved = std::min(Spills.size(), GapSize); + LiveInterval::iterator Src = WriteI; + LiveInterval::iterator Dst = Src + NumMoved; + LiveInterval::iterator SpillSrc = Spills.end(); + LiveInterval::iterator B = LI->begin(); + + // This is the new WriteI position after merging spills. + WriteI = Dst; + + // Now merge Src and Spills backwards. + while (Src != Dst) { + if (Src != B && Src[-1].start > SpillSrc[-1].start) + *--Dst = *--Src; + else + *--Dst = *--SpillSrc; + } + assert(NumMoved == size_t(Spills.end() - SpillSrc)); + Spills.erase(SpillSrc, Spills.end()); +} + +void LiveRangeUpdater::flush() { + if (!isDirty()) + return; + // Clear the dirty state. + LastStart = SlotIndex(); + + assert(LI && "Cannot add to a null destination"); + + // Nothing to merge? + if (Spills.empty()) { + LI->ranges.erase(WriteI, ReadI); + LI->verify(); + return; + } + + // Resize the WriteI - ReadI gap to match Spills. + size_t GapSize = ReadI - WriteI; + if (GapSize < Spills.size()) { + // The gap is too small. Make some room. + size_t WritePos = WriteI - LI->begin(); + LI->ranges.insert(ReadI, Spills.size() - GapSize, LiveRange()); + // This also invalidated ReadI, but it is recomputed below. + WriteI = LI->ranges.begin() + WritePos; + } else { + // Shrink the gap if necessary. + LI->ranges.erase(WriteI + Spills.size(), ReadI); + } + ReadI = WriteI + Spills.size(); + mergeSpills(); + LI->verify(); +} + unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { // Create initial equivalence classes. EqClass.clear(); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 4198457..22b35d5 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -40,11 +40,6 @@ #include <limits> using namespace llvm; -// Switch to the new experimental algorithm for computing live intervals. -static cl::opt<bool> -NewLiveIntervals("new-live-intervals", cl::Hidden, - cl::desc("Use new algorithm forcomputing live intervals")); - char LiveIntervals::ID = 0; char &llvm::LiveIntervalsID = LiveIntervals::ID; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", @@ -60,6 +55,9 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); AU.addPreserved<AliasAnalysis>(); + // LiveVariables isn't really required by this analysis, it is only required + // here to make sure it is live during TwoAddressInstructionPass and + // PHIElimination. This is temporary. AU.addRequired<LiveVariables>(); AU.addPreserved<LiveVariables>(); AU.addPreservedID(MachineLoopInfoID); @@ -105,7 +103,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { TRI = TM->getRegisterInfo(); TII = TM->getInstrInfo(); AA = &getAnalysis<AliasAnalysis>(); - LV = &getAnalysis<LiveVariables>(); Indexes = &getAnalysis<SlotIndexes>(); DomTree = &getAnalysis<MachineDominatorTree>(); if (!LRCalc) @@ -114,16 +111,8 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { // Allocate space for all virtual registers. VirtRegIntervals.resize(MRI->getNumVirtRegs()); - if (NewLiveIntervals) { - // This is the new way of computing live intervals. - // It is independent of LiveVariables, and it can run at any time. - computeVirtRegs(); - computeRegMasks(); - } else { - // This is the old way of computing live intervals. - // It depends on LiveVariables. - computeIntervals(); - } + computeVirtRegs(); + computeRegMasks(); computeLiveInRegUnits(); DEBUG(dump()); @@ -165,298 +154,6 @@ void LiveIntervals::dumpInstrs() const { } #endif -static -bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { - unsigned Reg = MI.getOperand(MOIdx).getReg(); - for (unsigned i = MOIdx+1, e = MI.getNumOperands(); i < e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg()) - continue; - if (MO.getReg() == Reg && MO.isDef()) { - assert(MI.getOperand(MOIdx).getSubReg() != MO.getSubReg() && - MI.getOperand(MOIdx).getSubReg() && - (MO.getSubReg() || MO.isImplicit())); - return true; - } - } - return false; -} - -/// isPartialRedef - Return true if the specified def at the specific index is -/// partially re-defining the specified live interval. A common case of this is -/// a definition of the sub-register. -bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO, - LiveInterval &interval) { - if (!MO.getSubReg() || MO.isEarlyClobber()) - return false; - - SlotIndex RedefIndex = MIIdx.getRegSlot(); - const LiveRange *OldLR = - interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); - MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def); - if (DefMI != 0) { - return DefMI->findRegisterDefOperandIdx(interval.reg) != -1; - } - return false; -} - -void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, - MachineBasicBlock::iterator mi, - SlotIndex MIIdx, - MachineOperand& MO, - unsigned MOIdx, - LiveInterval &interval) { - DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, TRI)); - - // Virtual registers may be defined multiple times (due to phi - // elimination and 2-addr elimination). Much of what we do only has to be - // done once for the vreg. We use an empty interval to detect the first - // time we see a vreg. - LiveVariables::VarInfo& vi = LV->getVarInfo(interval.reg); - if (interval.empty()) { - // Get the Idx of the defining instructions. - SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); - - // Make sure the first definition is not a partial redefinition. - assert(!MO.readsReg() && "First def cannot also read virtual register " - "missing <undef> flag?"); - - VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator); - assert(ValNo->id == 0 && "First value in interval is not 0?"); - - // Loop over all of the blocks that the vreg is defined in. There are - // two cases we have to handle here. The most common case is a vreg - // whose lifetime is contained within a basic block. In this case there - // will be a single kill, in MBB, which comes after the definition. - if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) { - // FIXME: what about dead vars? - SlotIndex killIdx; - if (vi.Kills[0] != mi) - killIdx = getInstructionIndex(vi.Kills[0]).getRegSlot(); - else - killIdx = defIndex.getDeadSlot(); - - // If the kill happens after the definition, we have an intra-block - // live range. - if (killIdx > defIndex) { - assert(vi.AliveBlocks.empty() && - "Shouldn't be alive across any blocks!"); - LiveRange LR(defIndex, killIdx, ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR << "\n"); - return; - } - } - - // The other case we handle is when a virtual register lives to the end - // of the defining block, potentially live across some blocks, then is - // live into some number of blocks, but gets killed. Start by adding a - // range that goes from this definition to the end of the defining block. - LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo); - DEBUG(dbgs() << " +" << NewLR); - interval.addRange(NewLR); - - bool PHIJoin = LV->isPHIJoin(interval.reg); - - if (PHIJoin) { - // A phi join register is killed at the end of the MBB and revived as a - // new valno in the killing blocks. - assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks"); - DEBUG(dbgs() << " phi-join"); - } else { - // Iterate over all of the blocks that the variable is completely - // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the - // live interval. - for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), - E = vi.AliveBlocks.end(); I != E; ++I) { - MachineBasicBlock *aliveBlock = MF->getBlockNumbered(*I); - LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), - ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR); - } - } - - // Finally, this virtual register is live from the start of any killing - // block to the 'use' slot of the killing instruction. - for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) { - MachineInstr *Kill = vi.Kills[i]; - SlotIndex Start = getMBBStartIdx(Kill->getParent()); - SlotIndex killIdx = getInstructionIndex(Kill).getRegSlot(); - - // Create interval with one of a NEW value number. Note that this value - // number isn't actually defined by an instruction, weird huh? :) - if (PHIJoin) { - assert(getInstructionFromIndex(Start) == 0 && - "PHI def index points at actual instruction."); - ValNo = interval.getNextValue(Start, VNInfoAllocator); - } - LiveRange LR(Start, killIdx, ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR); - } - - } else { - if (MultipleDefsBySameMI(*mi, MOIdx)) - // Multiple defs of the same virtual register by the same instruction. - // e.g. %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ... - // This is likely due to elimination of REG_SEQUENCE instructions. Return - // here since there is nothing to do. - return; - - // If this is the second time we see a virtual register definition, it - // must be due to phi elimination or two addr elimination. If this is - // the result of two address elimination, then the vreg is one of the - // def-and-use register operand. - - // It may also be partial redef like this: - // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0 - // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0 - bool PartReDef = isPartialRedef(MIIdx, MO, interval); - if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) { - // If this is a two-address definition, then we have already processed - // the live range. The only problem is that we didn't realize there - // are actually two values in the live interval. Because of this we - // need to take the LiveRegion that defines this register and split it - // into two values. - SlotIndex RedefIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); - - const LiveRange *OldLR = - interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); - VNInfo *OldValNo = OldLR->valno; - SlotIndex DefIndex = OldValNo->def.getRegSlot(); - - // Delete the previous value, which should be short and continuous, - // because the 2-addr copy must be in the same MBB as the redef. - interval.removeRange(DefIndex, RedefIndex); - - // The new value number (#1) is defined by the instruction we claimed - // defined value #0. - VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator); - - // Value#0 is now defined by the 2-addr instruction. - OldValNo->def = RedefIndex; - - // Add the new live interval which replaces the range for the input copy. - LiveRange LR(DefIndex, RedefIndex, ValNo); - DEBUG(dbgs() << " replace range with " << LR); - interval.addRange(LR); - - // If this redefinition is dead, we need to add a dummy unit live - // range covering the def slot. - if (MO.isDead()) - interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(), - OldValNo)); - - DEBUG(dbgs() << " RESULT: " << interval); - } else if (LV->isPHIJoin(interval.reg)) { - // In the case of PHI elimination, each variable definition is only - // live until the end of the block. We've already taken care of the - // rest of the live range. - - SlotIndex defIndex = MIIdx.getRegSlot(); - if (MO.isEarlyClobber()) - defIndex = MIIdx.getRegSlot(true); - - VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator); - - SlotIndex killIndex = getMBBEndIdx(mbb); - LiveRange LR(defIndex, killIndex, ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " phi-join +" << LR); - } else { - llvm_unreachable("Multiply defined register"); - } - } - - DEBUG(dbgs() << '\n'); -} - -void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, - MachineBasicBlock::iterator MI, - SlotIndex MIIdx, - MachineOperand& MO, - unsigned MOIdx) { - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) - handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx, - getOrCreateInterval(MO.getReg())); -} - -/// computeIntervals - computes the live intervals for virtual -/// registers. for some ordering of the machine instructions [1,N] a -/// live interval is an interval [i, j) where 1 <= i <= j < N for -/// which a variable is live -void LiveIntervals::computeIntervals() { - DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n" - << "********** Function: " << MF->getName() << '\n'); - - RegMaskBlocks.resize(MF->getNumBlockIDs()); - - SmallVector<unsigned, 8> UndefUses; - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size(); - - if (MBB->empty()) - continue; - - // Track the index of the current machine instr. - SlotIndex MIIndex = getMBBStartIdx(MBB); - DEBUG(dbgs() << "BB#" << MBB->getNumber() - << ":\t\t# derived from " << MBB->getName() << "\n"); - - // Skip over empty initial indices. - if (getInstructionFromIndex(MIIndex) == 0) - MIIndex = Indexes->getNextNonNullIndex(MIIndex); - - for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); - MI != miEnd; ++MI) { - DEBUG(dbgs() << MIIndex << "\t" << *MI); - if (MI->isDebugValue()) - continue; - assert(Indexes->getInstructionFromIndex(MIIndex) == MI && - "Lost SlotIndex synchronization"); - - // Handle defs. - for (int i = MI->getNumOperands() - 1; i >= 0; --i) { - MachineOperand &MO = MI->getOperand(i); - - // Collect register masks. - if (MO.isRegMask()) { - RegMaskSlots.push_back(MIIndex.getRegSlot()); - RegMaskBits.push_back(MO.getRegMask()); - continue; - } - - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - - // handle register defs - build intervals - if (MO.isDef()) - handleRegisterDef(MBB, MI, MIIndex, MO, i); - else if (MO.isUndef()) - UndefUses.push_back(MO.getReg()); - } - - // Move to the next instr slot. - MIIndex = Indexes->getNextNonNullIndex(MIIndex); - } - - // Compute the number of register mask instructions in this block. - std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()]; - RMB.second = RegMaskSlots.size() - RMB.first; - } - - // Create empty intervals for registers defined by implicit_def's (except - // for those implicit_def that define values which are liveout of their - // blocks. - for (unsigned i = 0, e = UndefUses.size(); i != e; ++i) { - unsigned UndefReg = UndefUses[i]; - (void)getOrCreateInterval(UndefReg); - } -} - LiveInterval* LiveIntervals::createInterval(unsigned reg) { float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F; return new LiveInterval(reg, Weight); @@ -1335,3 +1032,129 @@ void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); HME.updateAllRanges(MI); } + +void +LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + ArrayRef<unsigned> OrigRegs) { + // Find anchor points, which are at the beginning/end of blocks or at + // instructions that already have indexes. + while (Begin != MBB->begin() && !Indexes->hasIndex(Begin)) + --Begin; + while (End != MBB->end() && !Indexes->hasIndex(End)) + ++End; + + SlotIndex endIdx; + if (End == MBB->end()) + endIdx = getMBBEndIdx(MBB).getPrevSlot(); + else + endIdx = getInstructionIndex(End); + + Indexes->repairIndexesInRange(MBB, Begin, End); + + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (MOI->isReg() && + TargetRegisterInfo::isVirtualRegister(MOI->getReg()) && + !hasInterval(MOI->getReg())) { + LiveInterval &LI = getOrCreateInterval(MOI->getReg()); + computeVirtRegInterval(&LI); + } + } + } + + for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) { + unsigned Reg = OrigRegs[i]; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + LiveInterval &LI = getInterval(Reg); + // FIXME: Should we support undefs that gain defs? + if (!LI.hasAtLeastOneValue()) + continue; + + LiveInterval::iterator LII = LI.find(endIdx); + SlotIndex lastUseIdx; + if (LII != LI.end() && LII->start < endIdx) + lastUseIdx = LII->end; + else + --LII; + + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + + SlotIndex instrIdx = getInstructionIndex(MI); + bool isStartValid = getInstructionFromIndex(LII->start); + bool isEndValid = getInstructionFromIndex(LII->end); + + // FIXME: This doesn't currently handle early-clobber or multiple removed + // defs inside of the region to repair. + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + const MachineOperand &MO = *OI; + if (!MO.isReg() || MO.getReg() != Reg) + continue; + + if (MO.isDef()) { + if (!isStartValid) { + if (LII->end.isDead()) { + SlotIndex prevStart; + if (LII != LI.begin()) + prevStart = llvm::prior(LII)->start; + + // FIXME: This could be more efficient if there was a removeRange + // method that returned an iterator. + LI.removeRange(*LII, true); + if (prevStart.isValid()) + LII = LI.find(prevStart); + else + LII = LI.begin(); + } else { + LII->start = instrIdx.getRegSlot(); + LII->valno->def = instrIdx.getRegSlot(); + if (MO.getSubReg() && !MO.isUndef()) + lastUseIdx = instrIdx.getRegSlot(); + else + lastUseIdx = SlotIndex(); + continue; + } + } + + if (!lastUseIdx.isValid()) { + VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), + VNInfoAllocator); + LiveRange LR(instrIdx.getRegSlot(), instrIdx.getDeadSlot(), VNI); + LII = LI.addRange(LR); + } else if (LII->start != instrIdx.getRegSlot()) { + VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), + VNInfoAllocator); + LiveRange LR(instrIdx.getRegSlot(), lastUseIdx, VNI); + LII = LI.addRange(LR); + } + + if (MO.getSubReg() && !MO.isUndef()) + lastUseIdx = instrIdx.getRegSlot(); + else + lastUseIdx = SlotIndex(); + } else if (MO.isUse()) { + // FIXME: This should probably be handled outside of this branch, + // either as part of the def case (for defs inside of the region) or + // after the loop over the region. + if (!isEndValid && !LII->end.isBlock()) + LII->end = instrIdx.getRegSlot(); + if (!lastUseIdx.isValid()) + lastUseIdx = instrIdx.getRegSlot(); + } + } + } + } +} diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index c3ff4f1..dede490 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -18,10 +18,11 @@ using namespace llvm; -void LiveRangeCalc::reset(const MachineFunction *MF, +void LiveRangeCalc::reset(const MachineFunction *mf, SlotIndexes *SI, MachineDominatorTree *MDT, VNInfo::Allocator *VNIA) { + MF = mf; MRI = &MF->getRegInfo(); Indexes = SI; DomTree = MDT; @@ -104,28 +105,28 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { // Transfer information from the LiveIn vector to the live ranges. -void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI) { +void LiveRangeCalc::updateLiveIns() { + LiveRangeUpdater Updater; for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(), E = LiveIn.end(); I != E; ++I) { if (!I->DomNode) continue; MachineBasicBlock *MBB = I->DomNode->getBlock(); - - VNInfo *VNI = OverrideVNI ? OverrideVNI : I->Value; - assert(VNI && "No live-in value found"); - + assert(I->Value && "No live-in value found"); SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(MBB); if (I->Kill.isValid()) - I->LI->addRange(LiveRange(Start, I->Kill, VNI)); + // Value is killed inside this block. + End = I->Kill; else { - I->LI->addRange(LiveRange(Start, End, VNI)); - // The value is live-through, update LiveOut as well. Defer the Domtree - // lookup until it is needed. + // The value is live-through, update LiveOut as well. + // Defer the Domtree lookup until it is needed. assert(Seen.test(MBB->getNumber())); - LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0); + LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0); } + Updater.setDest(I->LI); + Updater.add(Start, End, I->Value); } LiveIn.clear(); } @@ -150,13 +151,11 @@ void LiveRangeCalc::extend(LiveInterval *LI, // multiple values, and we may need to create even more phi-defs to preserve // VNInfo SSA form. Perform a search for all predecessor blocks where we // know the dominating VNInfo. - VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, PhysReg); + if (findReachingDefs(LI, KillMBB, Kill, PhysReg)) + return; // When there were multiple different values, we may need new PHIs. - if (!VNI) - updateSSA(); - - updateLiveIns(VNI); + calculateValues(); } @@ -167,16 +166,18 @@ void LiveRangeCalc::calculateValues() { assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); updateSSA(); - updateLiveIns(0); + updateLiveIns(); } -VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, - MachineBasicBlock *KillMBB, - SlotIndex Kill, - unsigned PhysReg) { - // Blocks where LI should be live-in. - SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB); +bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, + MachineBasicBlock *KillMBB, + SlotIndex Kill, + unsigned PhysReg) { + unsigned KillMBBNum = KillMBB->getNumber(); + + // Block numbers where LI should be live-in. + SmallVector<unsigned, 16> WorkList(1, KillMBBNum); // Remember if we have seen more than one value. bool UniqueVNI = true; @@ -184,7 +185,7 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, // Using Seen as a visited set, perform a BFS for all reaching defs. for (unsigned i = 0; i != WorkList.size(); ++i) { - MachineBasicBlock *MBB = WorkList[i]; + MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]); #ifndef NDEBUG if (MBB->pred_empty()) { @@ -231,25 +232,50 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, // No, we need a live-in value for Pred as well if (Pred != KillMBB) - WorkList.push_back(Pred); + WorkList.push_back(Pred->getNumber()); else // Loopback to KillMBB, so value is really live through. Kill = SlotIndex(); } } - // Transfer WorkList to LiveInBlocks in reverse order. - // This ordering works best with updateSSA(). LiveIn.clear(); - LiveIn.reserve(WorkList.size()); - while(!WorkList.empty()) - addLiveInBlock(LI, DomTree->getNode(WorkList.pop_back_val())); - // The kill block may not be live-through. - assert(LiveIn.back().DomNode->getBlock() == KillMBB); - LiveIn.back().Kill = Kill; + // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but + // neither require it. Skip the sorting overhead for small updates. + if (WorkList.size() > 4) + array_pod_sort(WorkList.begin(), WorkList.end()); + + // If a unique reaching def was found, blit in the live ranges immediately. + if (UniqueVNI) { + LiveRangeUpdater Updater(LI); + for (SmallVectorImpl<unsigned>::const_iterator + I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { + SlotIndex Start, End; + tie(Start, End) = Indexes->getMBBRange(*I); + // Trim the live range in KillMBB. + if (*I == KillMBBNum && Kill.isValid()) + End = Kill; + else + LiveOut[MF->getBlockNumbered(*I)] = + LiveOutPair(TheVNI, (MachineDomTreeNode *)0); + Updater.add(Start, End, TheVNI); + } + return true; + } + + // Multiple values were found, so transfer the work list to the LiveIn array + // where UpdateSSA will use it as a work list. + LiveIn.reserve(WorkList.size()); + for (SmallVectorImpl<unsigned>::const_iterator + I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { + MachineBasicBlock *MBB = MF->getBlockNumbered(*I); + addLiveInBlock(LI, DomTree->getNode(MBB)); + if (MBB == KillMBB) + LiveIn.back().Kill = Kill; + } - return UniqueVNI ? TheVNI : 0; + return false; } diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index 909829b..57cab7b 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -34,6 +34,7 @@ template <class NodeT> class DomTreeNodeBase; typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode; class LiveRangeCalc { + const MachineFunction *MF; const MachineRegisterInfo *MRI; SlotIndexes *Indexes; MachineDominatorTree *DomTree; @@ -100,17 +101,20 @@ class LiveRangeCalc { /// used to add entries directly. SmallVector<LiveInBlock, 16> LiveIn; - /// findReachingDefs - Assuming that LI is live-in to KillMBB and killed at - /// Kill, search for values that can reach KillMBB. All blocks that need LI - /// to be live-in are added to LiveIn. If a unique reaching def is found, - /// its value is returned, if Kill is jointly dominated by multiple values, - /// NULL is returned. + /// Assuming that LI is live-in to KillMBB and killed at Kill, find the set + /// of defs that can reach it. + /// + /// If only one def can reach Kill, all paths from the def to kill are added + /// to LI, and the function returns true. + /// + /// If multiple values can reach Kill, the blocks that need LI to be live in + /// are added to the LiveIn array, and the function returns false. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - VNInfo *findReachingDefs(LiveInterval *LI, - MachineBasicBlock *KillMBB, - SlotIndex Kill, - unsigned PhysReg); + bool findReachingDefs(LiveInterval *LI, + MachineBasicBlock *KillMBB, + SlotIndex Kill, + unsigned PhysReg); /// updateSSA - Compute the values that will be live in to all requested /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. @@ -119,12 +123,11 @@ class LiveRangeCalc { /// blocks. No values are read from the live ranges. void updateSSA(); - /// updateLiveIns - Add liveness as specified in the LiveIn vector, using VNI - /// as a wildcard value for LiveIn entries without a value. - void updateLiveIns(VNInfo *VNI); + /// Add liveness as specified in the LiveIn vector. + void updateLiveIns(); public: - LiveRangeCalc() : MRI(0), Indexes(0), DomTree(0), Alloc(0) {} + LiveRangeCalc() : MF(0), MRI(0), Indexes(0), DomTree(0), Alloc(0) {} //===--------------------------------------------------------------------===// // High-level interface. diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index f81ad1c..789eddc 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -619,29 +619,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MBB); } - // Finally, if the last instruction in the block is a return, make sure to - // mark it as using all of the live-out values in the function. - // Things marked both call and return are tail calls; do not do this for - // them. The tail callee need not take the same registers as input - // that it produces as output, and there are dependencies for its input - // registers elsewhere. - if (!MBB->empty() && MBB->back().isReturn() - && !MBB->back().isCall()) { - MachineInstr *Ret = &MBB->back(); - - for (MachineRegisterInfo::liveout_iterator - I = MF->getRegInfo().liveout_begin(), - E = MF->getRegInfo().liveout_end(); I != E; ++I) { - assert(TargetRegisterInfo::isPhysicalRegister(*I) && - "Cannot have a live-out virtual register!"); - HandlePhysRegUse(*I, Ret); - - // Add live-out registers as implicit uses. - if (!Ret->readsRegister(*I)) - Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); - } - } - // MachineCSE may CSE instructions which write to non-allocatable physical // registers across MBBs. Remember if any reserved register is liveout. SmallSet<unsigned, 4> LiveOuts; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 7e9fb20..898e165 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -15,10 +15,12 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" @@ -663,6 +665,13 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { << " -- BB#" << NMBB->getNumber() << " -- BB#" << Succ->getNumber() << '\n'); + LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>(); + SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>(); + if (LIS) + LIS->insertMBBInMaps(NMBB); + else if (Indexes) + Indexes->insertMBBInMaps(NMBB); + // On some targets like Mips, branches may kill virtual registers. Make sure // that LiveVariables is properly updated after updateTerminator replaces the // terminators. @@ -689,14 +698,67 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { } } + SmallVector<unsigned, 4> UsedRegs; + if (LIS) { + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) { + MachineInstr *MI = I; + + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + if (!OI->isReg() || OI->getReg() == 0) + continue; + + unsigned Reg = OI->getReg(); + if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end()) + UsedRegs.push_back(Reg); + } + } + } + ReplaceUsesOfBlockWith(Succ, NMBB); + + // If updateTerminator() removes instructions, we need to remove them from + // SlotIndexes. + SmallVector<MachineInstr*, 4> Terminators; + if (Indexes) { + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) + Terminators.push_back(I); + } + updateTerminator(); + if (Indexes) { + SmallVector<MachineInstr*, 4> NewTerminators; + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) + NewTerminators.push_back(I); + + for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(), + E = Terminators.end(); I != E; ++I) { + if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) == + NewTerminators.end()) + Indexes->removeMachineInstrFromMaps(*I); + } + } + // Insert unconditional "jump Succ" instruction in NMBB if necessary. NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { Cond.clear(); MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl); + + if (Indexes) { + for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); + I != E; ++I) { + // Some instructions may have been moved to NMBB by updateTerminator(), + // so we first remove any instruction that already has an index. + if (Indexes->hasIndex(I)) + Indexes->removeMachineInstrFromMaps(I); + Indexes->insertMachineInstrInMaps(I); + } + } } // Fix PHI nodes in Succ so they refer to NMBB instead of this @@ -731,6 +793,67 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { LV->addNewBlock(NMBB, this, Succ); } + if (LIS) { + // After splitting the edge and updating SlotIndexes, live intervals may be + // in one of two situations, depending on whether this block was the last in + // the function. If the original block was the last in the function, all live + // intervals will end prior to the beginning of the new split block. If the + // original block was not at the end of the function, all live intervals will + // extend to the end of the new split block. + + bool isLastMBB = + llvm::next(MachineFunction::iterator(NMBB)) == getParent()->end(); + + SlotIndex StartIndex = Indexes->getMBBEndIdx(this); + SlotIndex PrevIndex = StartIndex.getPrevSlot(); + SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB); + + // Find the registers used from NMBB in PHIs in Succ. + SmallSet<unsigned, 8> PHISrcRegs; + for (MachineBasicBlock::instr_iterator + I = Succ->instr_begin(), E = Succ->instr_end(); + I != E && I->isPHI(); ++I) { + for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) { + if (I->getOperand(ni+1).getMBB() == NMBB) { + MachineOperand &MO = I->getOperand(ni); + unsigned Reg = MO.getReg(); + PHISrcRegs.insert(Reg); + if (MO.isUndef()) + continue; + + LiveInterval &LI = LIS->getInterval(Reg); + VNInfo *VNI = LI.getVNInfoAt(PrevIndex); + assert(VNI && "PHI sources should be live out of their predecessors."); + LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + } + } + } + + MachineRegisterInfo *MRI = &getParent()->getRegInfo(); + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg)) + continue; + + LiveInterval &LI = LIS->getInterval(Reg); + if (!LI.liveAt(PrevIndex)) + continue; + + bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ)); + if (isLiveOut && isLastMBB) { + VNInfo *VNI = LI.getVNInfoAt(PrevIndex); + assert(VNI && "LiveInterval should have VNInfo where it is live."); + LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + } else if (!isLiveOut && !isLastMBB) { + LI.removeRange(StartIndex, EndIndex); + } + } + + // Update all intervals for registers whose uses may have been modified by + // updateTerminator(). + LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs); + } + if (MachineDominatorTree *MDT = P->getAnalysisIfAvailable<MachineDominatorTree>()) { // Update dominator information. diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 07a3e03..3b09c6b 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -171,7 +171,7 @@ class MachineBlockPlacement : public MachineFunctionPass { const TargetInstrInfo *TII; /// \brief A handle to the target's lowering info. - const TargetLowering *TLI; + const TargetLoweringBase *TLI; /// \brief Allocator and owner of BlockChain structures. /// diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 9647e83..5e04f2d 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -346,13 +346,6 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { } OS << '\n'; } - if (RegInfo && !RegInfo->liveout_empty()) { - OS << "Function Live Outs:"; - for (MachineRegisterInfo::liveout_iterator - I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I) - OS << ' ' << PrintReg(*I, TRI); - OS << '\n'; - } for (const_iterator BB = begin(), E = end(); BB != E; ++BB) { OS << '\n'; diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 29d8866..32d0668 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -752,20 +752,19 @@ void MachineInstr::addMemOperand(MachineFunction &MF, } bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { - const MachineBasicBlock *MBB = getParent(); - MachineBasicBlock::const_instr_iterator MII = *this; ++MII; - while (MII != MBB->end() && MII->isInsideBundle()) { + assert(!isBundledWithPred() && "Must be called on bundle header"); + for (MachineBasicBlock::const_instr_iterator MII = this;; ++MII) { if (MII->getDesc().getFlags() & Mask) { if (Type == AnyInBundle) return true; } else { - if (Type == AllInBundle) + if (Type == AllInBundle && !MII->isBundle()) return false; } - ++MII; + // This was the last instruction in the bundle. + if (!MII->isBundledWithSucc()) + return Type == AllInBundle; } - - return Type == AllInBundle; } bool MachineInstr::isIdenticalTo(const MachineInstr *Other, @@ -897,7 +896,7 @@ void MachineInstr::unbundleFromSucc() { assert(isBundledWithSucc() && "MI isn't bundled with its successor"); clearFlag(BundledSucc); MachineBasicBlock::instr_iterator Succ = this; - --Succ; + ++Succ; assert(Succ->isBundledWithPred() && "Inconsistent bundle flags"); Succ->clearFlag(BundledPred); } @@ -982,18 +981,13 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, return NULL; } -/// getBundleSize - Return the number of instructions inside the MI bundle. +/// Return the number of instructions inside the MI bundle, not counting the +/// header instruction. unsigned MachineInstr::getBundleSize() const { - assert(isBundle() && "Expecting a bundle"); - - const MachineBasicBlock *MBB = getParent(); - MachineBasicBlock::const_instr_iterator I = *this, E = MBB->instr_end(); + MachineBasicBlock::const_instr_iterator I = this; unsigned Size = 0; - while ((++I != E) && I->isInsideBundle()) { - ++Size; - } - assert(Size > 1 && "Malformed bundle"); - + while (I->isBundledWithSucc()) + ++Size, ++I; return Size; } @@ -1434,7 +1428,8 @@ static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, } } -void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { +void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, + bool SkipOpers) const { // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. const MachineFunction *MF = 0; const MachineRegisterInfo *MRI = 0; @@ -1471,6 +1466,9 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { else OS << "UNKNOWN"; + if (SkipOpers) + return; + // Print the rest of the operands. bool OmittedAnyCallClobbers = false; bool FirstOp = true; @@ -1482,10 +1480,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { OS << " "; getOperand(InlineAsm::MIOp_AsmString).print(OS, TM); - // Print HasSideEffects, IsAlignStack + // Print HasSideEffects, MayLoad, MayStore, IsAlignStack unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); if (ExtraInfo & InlineAsm::Extra_HasSideEffects) OS << " [sideeffect]"; + if (ExtraInfo & InlineAsm::Extra_MayLoad) + OS << " [mayload]"; + if (ExtraInfo & InlineAsm::Extra_MayStore) + OS << " [maystore]"; if (ExtraInfo & InlineAsm::Extra_IsAlignStack) OS << " [alignstack]"; if (getInlineAsmDialect() == InlineAsm::AD_ATT) @@ -1513,12 +1515,12 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { const MachineRegisterInfo &MRI = MF->getRegInfo(); - if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) { + if (MRI.use_empty(Reg)) { bool HasAliasLive = false; for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true); AI.isValid(); ++AI) { unsigned AliasReg = *AI; - if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) { + if (!MRI.use_empty(AliasReg)) { HasAliasLive = true; break; } @@ -1590,7 +1592,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { } bool HaveSemi = false; - if (Flags) { + const unsigned PrintableFlags = FrameSetup; + if (Flags & PrintableFlags) { if (!HaveSemi) OS << ";"; HaveSemi = true; OS << " flags: "; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 760cf8a..ed3ed4d 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -62,7 +62,7 @@ namespace { class MachineLICM : public MachineFunctionPass { const TargetMachine *TM; const TargetInstrInfo *TII; - const TargetLowering *TLI; + const TargetLoweringBase *TLI; const TargetRegisterInfo *TRI; const MachineFrameInfo *MFI; MachineRegisterInfo *MRI; diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 21877e5..a777f52 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -283,13 +283,6 @@ bool MachineRegisterInfo::isLiveIn(unsigned Reg) const { return false; } -bool MachineRegisterInfo::isLiveOut(unsigned Reg) const { - for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I) - if (*I == Reg) - return true; - return false; -} - /// getLiveInPhysReg - If VReg is a live-in virtual register, return the /// corresponding live-in physical register. unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const { diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 54a7851..a93d070 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include <queue> @@ -48,15 +49,6 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG -// Threshold to very roughly model an out-of-order processor's instruction -// buffers. If the actual value of this threshold matters much in practice, then -// it can be specified by the machine model. For now, it's an experimental -// tuning knob to determine when and if it matters. -static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden, - cl::desc("Allow expected latency to exceed the critical path by N cycles " - "before attempting to balance ILP"), - cl::init(10U)); - // Experimental heuristics static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering."), cl::init(true)); @@ -65,6 +57,9 @@ static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, cl::desc("Enable scheduling for macro fusion."), cl::init(true)); +// DAG subtrees must have at least this many nodes. +static const unsigned MinSubtreeSize = 8; + //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// @@ -268,7 +263,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { } DEBUG(dbgs() << "********** MI Scheduling **********\n"); DEBUG(dbgs() << MF->getName() - << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; + << ":BB#" << MBB->getNumber() << " " << MBB->getName() + << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; dbgs() << " Remaining: " << RemainingInstrs << "\n"); @@ -310,6 +306,12 @@ void ReadyQueue::dump() { // preservation. //===----------------------------------------------------------------------===// +ScheduleDAGMI::~ScheduleDAGMI() { + delete DFSResult; + DeleteContainerPointers(Mutations); + delete SchedImpl; +} + bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { if (SuccSU != &ExitSU) { // Do not use WillCreateCycle, it assumes SD scheduling. @@ -465,7 +467,8 @@ void ScheduleDAGMI::initRegPressure() { // Cache the list of excess pressure sets in this region. This will also track // the max pressure in the scheduled code for these sets. RegionCriticalPSets.clear(); - std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure; + const std::vector<unsigned> &RegionPressure = + RPTracker.getPressure().MaxSetPressure; for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { unsigned Limit = TRI->getRegPressureSetLimit(i); DEBUG(dbgs() << TRI->getRegPressureSetName(i) @@ -484,7 +487,7 @@ void ScheduleDAGMI::initRegPressure() { // FIXME: When the pressure tracker deals in pressure differences then we won't // iterate over all RegionCriticalPSets[i]. void ScheduleDAGMI:: -updateScheduledPressure(std::vector<unsigned> NewMaxPressure) { +updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) { for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { unsigned ID = RegionCriticalPSets[i].PSetID; int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; @@ -510,12 +513,19 @@ void ScheduleDAGMI::schedule() { postprocessDAG(); + SmallVector<SUnit*, 8> TopRoots, BotRoots; + findRootsAndBiasEdges(TopRoots, BotRoots); + + // Initialize the strategy before modifying the DAG. + // This may initialize a DFSResult to be used for queue priority. + SchedImpl->initialize(this); + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); - if (ViewMISchedDAGs) viewGraph(); - initQueues(); + // Initialize ready queues now that the DAG and priority data are finalized. + initQueues(TopRoots, BotRoots); bool IsTopNode = false; while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { @@ -550,7 +560,6 @@ void ScheduleDAGMI::buildDAGWithRegPressure() { // Build the DAG, and compute current register pressure. buildSchedGraph(AA, &RPTracker); - if (ViewMISchedDAGs) viewGraph(); // Initialize top/bottom trackers after computing region pressure. initRegPressure(); @@ -563,39 +572,56 @@ void ScheduleDAGMI::postprocessDAG() { } } -// Release all DAG roots for scheduling. -// -// Nodes with unreleased weak edges can still be roots. -void ScheduleDAGMI::releaseRoots() { - SmallVector<SUnit*, 16> BotRoots; +void ScheduleDAGMI::computeDFSResult() { + if (!DFSResult) + DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); + DFSResult->clear(); + ScheduledTrees.clear(); + DFSResult->resize(SUnits.size()); + DFSResult->compute(SUnits); + ScheduledTrees.resize(DFSResult->getNumSubtrees()); +} +void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, + SmallVectorImpl<SUnit*> &BotRoots) { for (std::vector<SUnit>::iterator I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { SUnit *SU = &(*I); + assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); + + // Order predecessors so DFSResult follows the critical path. + SU->biasCriticalPath(); + // A SUnit is ready to top schedule if it has no predecessors. - if (!I->NumPredsLeft && SU != &EntrySU) - SchedImpl->releaseTopNode(SU); + if (!I->NumPredsLeft) + TopRoots.push_back(SU); // A SUnit is ready to bottom schedule if it has no successors. - if (!I->NumSuccsLeft && SU != &ExitSU) + if (!I->NumSuccsLeft) BotRoots.push_back(SU); } - // Release bottom roots in reverse order so the higher priority nodes appear - // first. This is more natural and slightly more efficient. - for (SmallVectorImpl<SUnit*>::const_reverse_iterator - I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) - SchedImpl->releaseBottomNode(*I); + ExitSU.biasCriticalPath(); } /// Identify DAG roots and setup scheduler queues. -void ScheduleDAGMI::initQueues() { +void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, + ArrayRef<SUnit*> BotRoots) { NextClusterSucc = NULL; NextClusterPred = NULL; - // Initialize the strategy before modifying the DAG. - SchedImpl->initialize(this); - // Release all DAG roots for scheduling, not including EntrySU/ExitSU. - releaseRoots(); + // + // Nodes with unreleased weak edges can still be roots. + // Release top roots in forward order. + for (SmallVectorImpl<SUnit*>::const_iterator + I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { + SchedImpl->releaseTopNode(*I); + } + // Release bottom roots in reverse order so the higher priority nodes appear + // first. This is more natural and slightly more efficient. + for (SmallVectorImpl<SUnit*>::const_reverse_iterator + I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { + SchedImpl->releaseBottomNode(*I); + } releaseSuccessors(&EntrySU); releasePredecessors(&ExitSU); @@ -660,6 +686,15 @@ void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { SU->isScheduled = true; + if (DFSResult) { + unsigned SubtreeID = DFSResult->getSubtreeID(SU); + if (!ScheduledTrees.test(SubtreeID)) { + ScheduledTrees.set(SubtreeID); + DFSResult->scheduleTree(SubtreeID); + SchedImpl->scheduleTree(SubtreeID); + } + } + // Notify the scheduling strategy after updating the DAG. SchedImpl->schedNode(SU, IsTopNode); } @@ -1019,6 +1054,9 @@ public: #endif void reset() { + // A new HazardRec is created for each DAG and owned by SchedBoundary. + delete HazardRec; + Available.clear(); Pending.clear(); CheckPending = false; @@ -1044,7 +1082,8 @@ public: /// PendingFlag set. SchedBoundary(unsigned ID, const Twine &Name): DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), - Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") { + Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), + HazardRec(0) { reset(); } @@ -1188,10 +1227,13 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { DAG = dag; SchedModel = DAG->getSchedModel(); TRI = DAG->TRI; + Rem.init(DAG, SchedModel); Top.init(DAG, SchedModel, &Rem); Bot.init(DAG, SchedModel, &Rem); + DAG->computeDFSResult(); + // Initialize resource counts. // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or @@ -1297,7 +1339,8 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { if (L > RemLatency) RemLatency = L; } - if (RemLatency + ExpectedLatency >= Rem->CriticalPath + ILPWindow + unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); + if (RemLatency + ExpectedLatency >= CriticalPathLimit && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { Policy.ReduceLatency = true; DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n'); @@ -1541,8 +1584,8 @@ void ConvergingScheduler::checkResourceLimits( ConvergingScheduler::SchedCandidate &BotCand) { // Set ReduceLatency to true if needed. - Bot.setLatencyPolicy(TopCand.Policy); - Top.setLatencyPolicy(BotCand.Policy); + Bot.setLatencyPolicy(BotCand.Policy); + Top.setLatencyPolicy(TopCand.Policy); // Handle resource-limited regions. if (Top.IsResourceLimited && Bot.IsResourceLimited @@ -2060,12 +2103,11 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.", namespace { /// \brief Order nodes by the ILP metric. struct ILPOrder { - SchedDFSResult *DFSResult; - BitVector *ScheduledTrees; + const SchedDFSResult *DFSResult; + const BitVector *ScheduledTrees; bool MaximizeILP; - ILPOrder(SchedDFSResult *dfs, BitVector *schedtrees, bool MaxILP) - : DFSResult(dfs), ScheduledTrees(schedtrees), MaximizeILP(MaxILP) {} + ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {} /// \brief Apply a less-than relation on node priority. /// @@ -2103,26 +2145,22 @@ class ILPScheduler : public MachineSchedStrategy { /// (a motivating test case must be found). static const unsigned SubtreeLimit = 16; - SchedDFSResult DFSResult; - BitVector ScheduledTrees; + ScheduleDAGMI *DAG; ILPOrder Cmp; std::vector<SUnit*> ReadyQ; public: - ILPScheduler(bool MaximizeILP) - : DFSResult(/*BottomUp=*/true, SubtreeLimit), - Cmp(&DFSResult, &ScheduledTrees, MaximizeILP) {} + ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {} - virtual void initialize(ScheduleDAGMI *DAG) { + virtual void initialize(ScheduleDAGMI *dag) { + DAG = dag; + DAG->computeDFSResult(); + Cmp.DFSResult = DAG->getDFSResult(); + Cmp.ScheduledTrees = &DAG->getScheduledTrees(); ReadyQ.clear(); - DFSResult.clear(); - DFSResult.resize(DAG->SUnits.size()); - ScheduledTrees.clear(); } virtual void registerRoots() { - DFSResult.compute(ReadyQ); - ScheduledTrees.resize(DFSResult.getNumSubtrees()); // Restore the heap in ReadyQ with the updated DFS results. std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); } @@ -2139,21 +2177,22 @@ public: IsTopNode = false; DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): " << *SU->getInstr() - << " ILP: " << DFSResult.getILP(SU) - << " Tree: " << DFSResult.getSubtreeID(SU) << " @" - << DFSResult.getSubtreeLevel(DFSResult.getSubtreeID(SU))<< '\n'); + << " ILP: " << DAG->getDFSResult()->getILP(SU) + << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" + << DAG->getDFSResult()->getSubtreeLevel( + DAG->getDFSResult()->getSubtreeID(SU)) << '\n'); return SU; } + /// \brief Scheduler callback to notify that a new subtree is scheduled. + virtual void scheduleTree(unsigned SubtreeID) { + std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + } + /// Callback after a node is scheduled. Mark a newly scheduled tree, notify /// DFSResults, and resort the priority Q. virtual void schedNode(SUnit *SU, bool IsTopNode) { assert(!IsTopNode && "SchedDFSResult needs bottom-up"); - if (!ScheduledTrees.test(DFSResult.getSubtreeID(SU))) { - ScheduledTrees.set(DFSResult.getSubtreeID(SU)); - DFSResult.scheduleTree(DFSResult.getSubtreeID(SU)); - std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); - } } virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } @@ -2264,3 +2303,90 @@ static MachineSchedRegistry ShufflerRegistry( "shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler); #endif // !NDEBUG + +//===----------------------------------------------------------------------===// +// GraphWriter support for ScheduleDAGMI. +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +namespace llvm { + +template<> struct GraphTraits< + ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {}; + +template<> +struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const ScheduleDAG *G) { + return G->MF.getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool isNodeHidden(const SUnit *Node) { + return (Node->NumPreds > 10 || Node->NumSuccs > 10); + } + + static bool hasNodeAddressLabel(const SUnit *Node, + const ScheduleDAG *Graph) { + return false; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + static std::string getEdgeAttributes(const SUnit *Node, + SUnitIterator EI, + const ScheduleDAG *Graph) { + if (EI.isArtificialDep()) + return "color=cyan,style=dashed"; + if (EI.isCtrlDep()) + return "color=blue,style=dashed"; + return ""; + } + + static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { + std::string Str; + raw_string_ostream SS(Str); + SS << "SU(" << SU->NodeNum << ')'; + return SS.str(); + } + static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { + return G->getGraphNodeLabel(SU); + } + + static std::string getNodeAttributes(const SUnit *N, + const ScheduleDAG *Graph) { + std::string Str("shape=Mrecord"); + const SchedDFSResult *DFS = + static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult(); + if (DFS) { + Str += ",style=filled,fillcolor=\"#"; + Str += DOT::getColorString(DFS->getSubtreeID(N)); + Str += '"'; + } + return Str; + } +}; +} // namespace llvm +#endif // NDEBUG + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) { +#ifndef NDEBUG + ViewGraph(this, Name, false, Title); +#else + errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +/// Out-of-line implementation with no arguments is handy for gdb. +void ScheduleDAGMI::viewGraph() { + viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); +} diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 685ccab..f77a7b1 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "machine-trace-metrics" -#include "MachineTraceMetrics.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SparseSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h deleted file mode 100644 index 460730b..0000000 --- a/lib/CodeGen/MachineTraceMetrics.h +++ /dev/null @@ -1,350 +0,0 @@ -//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interface for the MachineTraceMetrics analysis pass -// that estimates CPU resource usage and critical data dependency paths through -// preferred traces. This is useful for super-scalar CPUs where execution speed -// can be limited both by data dependencies and by limited execution resources. -// -// Out-of-order CPUs will often be executing instructions from multiple basic -// blocks at the same time. This makes it difficult to estimate the resource -// usage accurately in a single basic block. Resources can be estimated better -// by looking at a trace through the current basic block. -// -// For every block, the MachineTraceMetrics pass will pick a preferred trace -// that passes through the block. The trace is chosen based on loop structure, -// branch probabilities, and resource usage. The intention is to pick likely -// traces that would be the most affected by code transformations. -// -// It is expensive to compute a full arbitrary trace for every block, so to -// save some computations, traces are chosen to be convergent. This means that -// if the traces through basic blocks A and B ever cross when moving away from -// A and B, they never diverge again. This applies in both directions - If the -// traces meet above A and B, they won't diverge when going further back. -// -// Traces tend to align with loops. The trace through a block in an inner loop -// will begin at the loop entry block and end at a back edge. If there are -// nested loops, the trace may begin and end at those instead. -// -// For each trace, we compute the critical path length, which is the number of -// cycles required to execute the trace when execution is limited by data -// dependencies only. We also compute the resource height, which is the number -// of cycles required to execute all instructions in the trace when ignoring -// data dependencies. -// -// Every instruction in the current block has a slack - the number of cycles -// execution of the instruction can be delayed without extending the critical -// path. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H -#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/TargetSchedule.h" - -namespace llvm { - -class InstrItineraryData; -class MachineBasicBlock; -class MachineInstr; -class MachineLoop; -class MachineLoopInfo; -class MachineRegisterInfo; -class TargetInstrInfo; -class TargetRegisterInfo; -class raw_ostream; - -class MachineTraceMetrics : public MachineFunctionPass { - const MachineFunction *MF; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - const MachineRegisterInfo *MRI; - const MachineLoopInfo *Loops; - TargetSchedModel SchedModel; - -public: - class Ensemble; - class Trace; - static char ID; - MachineTraceMetrics(); - void getAnalysisUsage(AnalysisUsage&) const; - bool runOnMachineFunction(MachineFunction&); - void releaseMemory(); - void verifyAnalysis() const; - - friend class Ensemble; - friend class Trace; - - /// Per-basic block information that doesn't depend on the trace through the - /// block. - struct FixedBlockInfo { - /// The number of non-trivial instructions in the block. - /// Doesn't count PHI and COPY instructions that are likely to be removed. - unsigned InstrCount; - - /// True when the block contains calls. - bool HasCalls; - - FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {} - - /// Returns true when resource information for this block has been computed. - bool hasResources() const { return InstrCount != ~0u; } - - /// Invalidate resource information. - void invalidate() { InstrCount = ~0u; } - }; - - /// Get the fixed resource information about MBB. Compute it on demand. - const FixedBlockInfo *getResources(const MachineBasicBlock*); - - /// A virtual register or regunit required by a basic block or its trace - /// successors. - struct LiveInReg { - /// The virtual register required, or a register unit. - unsigned Reg; - - /// For virtual registers: Minimum height of the defining instruction. - /// For regunits: Height of the highest user in the trace. - unsigned Height; - - LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {} - }; - - /// Per-basic block information that relates to a specific trace through the - /// block. Convergent traces means that only one of these is required per - /// block in a trace ensemble. - struct TraceBlockInfo { - /// Trace predecessor, or NULL for the first block in the trace. - /// Valid when hasValidDepth(). - const MachineBasicBlock *Pred; - - /// Trace successor, or NULL for the last block in the trace. - /// Valid when hasValidHeight(). - const MachineBasicBlock *Succ; - - /// The block number of the head of the trace. (When hasValidDepth()). - unsigned Head; - - /// The block number of the tail of the trace. (When hasValidHeight()). - unsigned Tail; - - /// Accumulated number of instructions in the trace above this block. - /// Does not include instructions in this block. - unsigned InstrDepth; - - /// Accumulated number of instructions in the trace below this block. - /// Includes instructions in this block. - unsigned InstrHeight; - - TraceBlockInfo() : - Pred(0), Succ(0), - InstrDepth(~0u), InstrHeight(~0u), - HasValidInstrDepths(false), HasValidInstrHeights(false) {} - - /// Returns true if the depth resources have been computed from the trace - /// above this block. - bool hasValidDepth() const { return InstrDepth != ~0u; } - - /// Returns true if the height resources have been computed from the trace - /// below this block. - bool hasValidHeight() const { return InstrHeight != ~0u; } - - /// Invalidate depth resources when some block above this one has changed. - void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; } - - /// Invalidate height resources when a block below this one has changed. - void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; } - - /// Determine if this block belongs to the same trace as TBI and comes - /// before it in the trace. - /// Also returns true when TBI == this. - bool isEarlierInSameTrace(const TraceBlockInfo &TBI) const { - return hasValidDepth() && TBI.hasValidDepth() && - Head == TBI.Head && InstrDepth <= TBI.InstrDepth; - } - - // Data-dependency-related information. Per-instruction depth and height - // are computed from data dependencies in the current trace, using - // itinerary data. - - /// Instruction depths have been computed. This implies hasValidDepth(). - bool HasValidInstrDepths; - - /// Instruction heights have been computed. This implies hasValidHeight(). - bool HasValidInstrHeights; - - /// Critical path length. This is the number of cycles in the longest data - /// dependency chain through the trace. This is only valid when both - /// HasValidInstrDepths and HasValidInstrHeights are set. - unsigned CriticalPath; - - /// Live-in registers. These registers are defined above the current block - /// and used by this block or a block below it. - /// This does not include PHI uses in the current block, but it does - /// include PHI uses in deeper blocks. - SmallVector<LiveInReg, 4> LiveIns; - - void print(raw_ostream&) const; - }; - - /// InstrCycles represents the cycle height and depth of an instruction in a - /// trace. - struct InstrCycles { - /// Earliest issue cycle as determined by data dependencies and instruction - /// latencies from the beginning of the trace. Data dependencies from - /// before the trace are not included. - unsigned Depth; - - /// Minimum number of cycles from this instruction is issued to the of the - /// trace, as determined by data dependencies and instruction latencies. - unsigned Height; - }; - - /// A trace represents a plausible sequence of executed basic blocks that - /// passes through the current basic block one. The Trace class serves as a - /// handle to internal cached data structures. - class Trace { - Ensemble &TE; - TraceBlockInfo &TBI; - - unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; } - - public: - explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {} - void print(raw_ostream&) const; - - /// Compute the total number of instructions in the trace. - unsigned getInstrCount() const { - return TBI.InstrDepth + TBI.InstrHeight; - } - - /// Return the resource depth of the top/bottom of the trace center block. - /// This is the number of cycles required to execute all instructions from - /// the trace head to the trace center block. The resource depth only - /// considers execution resources, it ignores data dependencies. - /// When Bottom is set, instructions in the trace center block are included. - unsigned getResourceDepth(bool Bottom) const; - - /// Return the resource length of the trace. This is the number of cycles - /// required to execute the instructions in the trace if they were all - /// independent, exposing the maximum instruction-level parallelism. - /// - /// Any blocks in Extrablocks are included as if they were part of the - /// trace. - unsigned getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks = - ArrayRef<const MachineBasicBlock*>()) const; - - /// Return the length of the (data dependency) critical path through the - /// trace. - unsigned getCriticalPath() const { return TBI.CriticalPath; } - - /// Return the depth and height of MI. The depth is only valid for - /// instructions in or above the trace center block. The height is only - /// valid for instructions in or below the trace center block. - InstrCycles getInstrCycles(const MachineInstr *MI) const { - return TE.Cycles.lookup(MI); - } - - /// Return the slack of MI. This is the number of cycles MI can be delayed - /// before the critical path becomes longer. - /// MI must be an instruction in the trace center block. - unsigned getInstrSlack(const MachineInstr *MI) const; - - /// Return the Depth of a PHI instruction in a trace center block successor. - /// The PHI does not have to be part of the trace. - unsigned getPHIDepth(const MachineInstr *PHI) const; - }; - - /// A trace ensemble is a collection of traces selected using the same - /// strategy, for example 'minimum resource height'. There is one trace for - /// every block in the function. - class Ensemble { - SmallVector<TraceBlockInfo, 4> BlockInfo; - DenseMap<const MachineInstr*, InstrCycles> Cycles; - friend class Trace; - - void computeTrace(const MachineBasicBlock*); - void computeDepthResources(const MachineBasicBlock*); - void computeHeightResources(const MachineBasicBlock*); - unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&); - void computeInstrDepths(const MachineBasicBlock*); - void computeInstrHeights(const MachineBasicBlock*); - void addLiveIns(const MachineInstr *DefMI, unsigned DefOp, - ArrayRef<const MachineBasicBlock*> Trace); - - protected: - MachineTraceMetrics &MTM; - virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0; - virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0; - explicit Ensemble(MachineTraceMetrics*); - const MachineLoop *getLoopFor(const MachineBasicBlock*) const; - const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const; - const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const; - - public: - virtual ~Ensemble(); - virtual const char *getName() const =0; - void print(raw_ostream&) const; - void invalidate(const MachineBasicBlock *MBB); - void verify() const; - - /// Get the trace that passes through MBB. - /// The trace is computed on demand. - Trace getTrace(const MachineBasicBlock *MBB); - }; - - /// Strategies for selecting traces. - enum Strategy { - /// Select the trace through a block that has the fewest instructions. - TS_MinInstrCount, - - TS_NumStrategies - }; - - /// Get the trace ensemble representing the given trace selection strategy. - /// The returned Ensemble object is owned by the MachineTraceMetrics analysis, - /// and valid for the lifetime of the analysis pass. - Ensemble *getEnsemble(Strategy); - - /// Invalidate cached information about MBB. This must be called *before* MBB - /// is erased, or the CFG is otherwise changed. - /// - /// This invalidates per-block information about resource usage for MBB only, - /// and it invalidates per-trace information for any trace that passes - /// through MBB. - /// - /// Call Ensemble::getTrace() again to update any trace handles. - void invalidate(const MachineBasicBlock *MBB); - -private: - // One entry per basic block, indexed by block number. - SmallVector<FixedBlockInfo, 4> BlockInfo; - - // One ensemble per strategy. - Ensemble* Ensembles[TS_NumStrategies]; -}; - -inline raw_ostream &operator<<(raw_ostream &OS, - const MachineTraceMetrics::Trace &Tr) { - Tr.print(OS); - return OS; -} - -inline raw_ostream &operator<<(raw_ostream &OS, - const MachineTraceMetrics::Ensemble &En) { - En.print(OS); - return OS; -} -} // end namespace llvm - -#endif diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 4daa211..5584708 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" @@ -39,9 +40,16 @@ DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false), cl::Hidden, cl::desc("Disable critical edge splitting " "during PHI elimination")); +static cl::opt<bool> +SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false), + cl::Hidden, cl::desc("Split all critical edges during " + "PHI elimination")); + namespace { class PHIElimination : public MachineFunctionPass { MachineRegisterInfo *MRI; // Machine register information + LiveVariables *LV; + LiveIntervals *LIS; public: static char ID; // Pass identification, replacement for typeid @@ -57,8 +65,8 @@ namespace { /// in predecessor basic blocks. /// bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); - void LowerAtomicPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt); + void LowerPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt); /// analyzePHINodes - Gather information about the PHI nodes in /// here. In particular, we want to map the number of uses of a virtual @@ -70,7 +78,12 @@ namespace { /// Split critical edges where necessary for good coalescer performance. bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, - LiveVariables &LV, MachineLoopInfo *MLI); + MachineLoopInfo *MLI); + + // These functions are temporary abstractions around LiveVariables and + // LiveIntervals, so they can go away when LiveVariables does. + bool isLiveIn(unsigned Reg, MachineBasicBlock *MBB); + bool isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB); typedef std::pair<unsigned, unsigned> BBVRegPair; typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse; @@ -87,7 +100,7 @@ namespace { }; } -STATISTIC(NumAtomic, "Number of atomic phis lowered"); +STATISTIC(NumLowered, "Number of phis lowered"); STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split"); STATISTIC(NumReused, "Number of reused lowered phis"); @@ -103,6 +116,8 @@ INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination", void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<LiveVariables>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); AU.addPreserved<MachineDominatorTree>(); AU.addPreserved<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -110,19 +125,20 @@ void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); + LV = getAnalysisIfAvailable<LiveVariables>(); + LIS = getAnalysisIfAvailable<LiveIntervals>(); bool Changed = false; // This pass takes the function out of SSA form. MRI->leaveSSA(); - // Split critical edges to help the coalescer - if (!DisableEdgeSplitting) { - if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) { - MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - Changed |= SplitPHIEdges(MF, *I, *LV, MLI); - } + // Split critical edges to help the coalescer. This does not yet support + // updating LiveIntervals, so we disable it. + if (!DisableEdgeSplitting && (LV || LIS)) { + MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= SplitPHIEdges(MF, *I, MLI); } // Populate VRegPHIUseCount @@ -137,14 +153,20 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { E = ImpDefs.end(); I != E; ++I) { MachineInstr *DefMI = *I; unsigned DefReg = DefMI->getOperand(0).getReg(); - if (MRI->use_nodbg_empty(DefReg)) + if (MRI->use_nodbg_empty(DefReg)) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(DefMI); DefMI->eraseFromParent(); + } } // Clean up the lowered PHI instructions. for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end(); - I != E; ++I) + I != E; ++I) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(I->first); MF.DeleteMachineInstr(I->first); + } LoweredPHIs.clear(); ImpDefs.clear(); @@ -166,7 +188,7 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin()); while (MBB.front().isPHI()) - LowerAtomicPHINode(MBB, AfterPHIsIt); + LowerPHINode(MBB, AfterPHIsIt); return true; } @@ -193,15 +215,11 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, } -/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block, -/// under the assumption that it needs to be lowered in a way that supports -/// atomic execution of PHIs. This lowering method is always correct all of the -/// time. +/// LowerPHINode - Lower the PHI node at the top of the specified block, /// -void PHIElimination::LowerAtomicPHINode( - MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt) { - ++NumAtomic; +void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt) { + ++NumLowered; // Unlink the PHI node from the basic block, but don't delete the PHI yet. MachineInstr *MPhi = MBB.remove(MBB.begin()); @@ -244,7 +262,6 @@ void PHIElimination::LowerAtomicPHINode( } // Update live variable information if there is any. - LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>(); if (LV) { MachineInstr *PHICopy = prior(AfterPHIsIt); @@ -283,6 +300,48 @@ void PHIElimination::LowerAtomicPHINode( } } + // Update LiveIntervals for the new copy or implicit def. + if (LIS) { + MachineInstr *NewInstr = prior(AfterPHIsIt); + SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr); + + SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB); + if (IncomingReg) { + // Add the region from the beginning of MBB to the copy instruction to + // IncomingReg's live interval. + LiveInterval &IncomingLI = LIS->getOrCreateInterval(IncomingReg); + VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex); + if (!IncomingVNI) + IncomingVNI = IncomingLI.getNextValue(MBBStartIndex, + LIS->getVNInfoAllocator()); + IncomingLI.addRange(LiveRange(MBBStartIndex, + DestCopyIndex.getRegSlot(), + IncomingVNI)); + } + + LiveInterval &DestLI = LIS->getInterval(DestReg); + assert(DestLI.begin() != DestLI.end() && + "PHIs should have nonempty LiveIntervals."); + if (DestLI.endIndex().isDead()) { + // A dead PHI's live range begins and ends at the start of the MBB, but + // the lowered copy, which will still be dead, needs to begin and end at + // the copy instruction. + VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex); + assert(OrigDestVNI && "PHI destination should be live at block entry."); + DestLI.removeRange(MBBStartIndex, MBBStartIndex.getDeadSlot()); + DestLI.createDeadDef(DestCopyIndex.getRegSlot(), + LIS->getVNInfoAllocator()); + DestLI.removeValNo(OrigDestVNI); + } else { + // Otherwise, remove the region from the beginning of MBB to the copy + // instruction from DestReg's live interval. + DestLI.removeRange(MBBStartIndex, DestCopyIndex.getRegSlot()); + VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); + assert(DestVNI && "PHI destination should be live at its definition."); + DestVNI->def = DestCopyIndex.getRegSlot(); + } + } + // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(), @@ -315,45 +374,44 @@ void PHIElimination::LowerAtomicPHINode( findPHICopyInsertPoint(&opBlock, &MBB, SrcReg); // Insert the copy. + MachineInstr *NewSrcInstr = 0; if (!reusedIncoming && IncomingReg) { if (SrcUndef) { // The source register is undefined, so there is no need for a real // COPY, but we still need to ensure joint dominance by defs. // Insert an IMPLICIT_DEF instruction. - BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::IMPLICIT_DEF), IncomingReg); + NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), + IncomingReg); // Clean up the old implicit-def, if there even was one. if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg)) if (DefMI->isImplicitDef()) ImpDefs.insert(DefMI); } else { - BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), IncomingReg) - .addReg(SrcReg, 0, SrcSubReg); + NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), IncomingReg) + .addReg(SrcReg, 0, SrcSubReg); } } - // Now update live variable information if we have it. Otherwise we're done - if (SrcUndef || !LV) continue; + // We only need to update the LiveVariables kill of SrcReg if this was the + // last PHI use of SrcReg to be lowered on this CFG edge and it is not live + // out of the predecessor. We can also ignore undef sources. + if (LV && !SrcUndef && + !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)] && + !LV->isLiveOut(SrcReg, opBlock)) { + // We want to be able to insert a kill of the register if this PHI (aka, + // the copy we just inserted) is the last use of the source value. Live + // variable analysis conservatively handles this by saying that the value + // is live until the end of the block the PHI entry lives in. If the value + // really is dead at the PHI copy, there will be no successor blocks which + // have the value live-in. + + // Okay, if we now know that the value is not live out of the block, we + // can add a kill marker in this block saying that it kills the incoming + // value! - // We want to be able to insert a kill of the register if this PHI (aka, the - // copy we just inserted) is the last use of the source value. Live - // variable analysis conservatively handles this by saying that the value is - // live until the end of the block the PHI entry lives in. If the value - // really is dead at the PHI copy, there will be no successor blocks which - // have the value live-in. - - // Also check to see if this register is in use by another PHI node which - // has not yet been eliminated. If so, it will be killed at an appropriate - // point later. - - // Is it used by any PHI instructions in this block? - bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]; - - // Okay, if we now know that the value is not live out of the block, we can - // add a kill marker in this block saying that it kills the incoming value! - if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) { // In our final twist, we have to decide which instruction kills the // register. In most cases this is the copy, however, terminator // instructions at the end of the block may also use the value. In this @@ -394,11 +452,74 @@ void PHIElimination::LowerAtomicPHINode( unsigned opBlockNum = opBlock.getNumber(); LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum); } + + if (LIS) { + if (NewSrcInstr) { + LIS->InsertMachineInstrInMaps(NewSrcInstr); + LIS->addLiveRangeToEndOfBlock(IncomingReg, NewSrcInstr); + } + + if (!SrcUndef && + !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]) { + LiveInterval &SrcLI = LIS->getInterval(SrcReg); + + bool isLiveOut = false; + for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(), + SE = opBlock.succ_end(); SI != SE; ++SI) { + SlotIndex startIdx = LIS->getMBBStartIdx(*SI); + VNInfo *VNI = SrcLI.getVNInfoAt(startIdx); + + // Definitions by other PHIs are not truly live-in for our purposes. + if (VNI && VNI->def != startIdx) { + isLiveOut = true; + break; + } + } + + if (!isLiveOut) { + MachineBasicBlock::iterator KillInst = opBlock.end(); + MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); + for (MachineBasicBlock::iterator Term = FirstTerm; + Term != opBlock.end(); ++Term) { + if (Term->readsRegister(SrcReg)) + KillInst = Term; + } + + if (KillInst == opBlock.end()) { + // No terminator uses the register. + + if (reusedIncoming || !IncomingReg) { + // We may have to rewind a bit if we didn't just insert a copy. + KillInst = FirstTerm; + while (KillInst != opBlock.begin()) { + --KillInst; + if (KillInst->isDebugValue()) + continue; + if (KillInst->readsRegister(SrcReg)) + break; + } + } else { + // We just inserted this copy. + KillInst = prior(InsertPos); + } + } + assert(KillInst->readsRegister(SrcReg) && + "Cannot find kill instruction"); + + SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst); + SrcLI.removeRange(LastUseIndex.getRegSlot(), + LIS->getMBBEndIdx(&opBlock)); + } + } + } } // Really delete the PHI instruction now, if it is not in the LoweredPHIs map. - if (reusedIncoming || !IncomingReg) + if (reusedIncoming || !IncomingReg) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(MPhi); MF.DeleteMachineInstr(MPhi); + } } /// analyzePHINodes - Gather information about the PHI nodes in here. In @@ -418,7 +539,6 @@ void PHIElimination::analyzePHINodes(const MachineFunction& MF) { bool PHIElimination::SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, - LiveVariables &LV, MachineLoopInfo *MLI) { if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) return false; // Quick exit for basic blocks without PHIs. @@ -438,10 +558,10 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // Avoid splitting backedges of loops. It would introduce small // out-of-line blocks into the loop which is very bad for code placement. - if (PreMBB == &MBB) + if (PreMBB == &MBB && !SplitAllCriticalEdges) continue; const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0; - if (IsLoopHeader && PreLoop == CurLoop) + if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges) continue; // LV doesn't consider a phi use live-out, so isLiveOut only returns true @@ -450,7 +570,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // there is a risk it may not be coalesced away. // // If the copy would be a kill, there is no need to split the edge. - if (!LV.isLiveOut(Reg, *PreMBB)) + if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges) continue; DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" @@ -465,7 +585,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // is likely to be left after coalescing. If we are looking at a loop // exiting edge, split it so we won't insert code in the loop, otherwise // don't bother. - bool ShouldSplit = !LV.isLiveIn(Reg, MBB); + bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges; // Check for a loop exiting edge. if (!ShouldSplit && CurLoop != PreLoop) { @@ -492,3 +612,33 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, } return Changed; } + +bool PHIElimination::isLiveIn(unsigned Reg, MachineBasicBlock *MBB) { + assert((LV || LIS) && + "isLiveIn() requires either LiveVariables or LiveIntervals"); + if (LIS) + return LIS->isLiveInToMBB(LIS->getInterval(Reg), MBB); + else + return LV->isLiveIn(Reg, *MBB); +} + +bool PHIElimination::isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB) { + assert((LV || LIS) && + "isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals"); + // LiveVariables considers uses in PHIs to be in the predecessor basic block, + // so that a register used only in a PHI is not live out of the block. In + // contrast, LiveIntervals considers uses in PHIs to be on the edge rather than + // in the predecessor basic block, so that a register used only in a PHI is live + // out of the block. + if (LIS) { + const LiveInterval &LI = LIS->getInterval(Reg); + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + if (LI.liveAt(LIS->getMBBStartIdx(*SI))) + return true; + } + return false; + } else { + return LV->isLiveOut(Reg, *MBB); + } +} diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 2a135bc..b79f9f9 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -89,7 +89,7 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), cl::value_desc("pass-name"), cl::init("option-unspecified")); -// Experimental option to run live inteerval analysis early. +// Experimental option to run live interval analysis early. static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, cl::desc("Run live interval analysis earlier in the pipeline")); @@ -238,9 +238,6 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); substitutePass(&PostRAMachineLICMID, &MachineLICMID); - // Disable early if-conversion. Targets that are ready can enable it. - disablePass(&EarlyIfConverterID); - // Temporarily disable experimental passes. const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>(); if (!ST.enableMachineScheduler()) @@ -551,7 +548,12 @@ void TargetPassConfig::addMachineSSAOptimization() { addPass(&DeadMachineInstructionElimID); printAndVerify("After codegen DCE pass"); - addPass(&EarlyIfConverterID); + // Allow targets to insert passes that improve instruction level parallelism, + // like if-conversion. Such passes will typically need dominator trees and + // loop info, just like LICM and CSE below. + if (addILPOpts()) + printAndVerify("After ILP optimizations"); + addPass(&MachineLICMID); addPass(&MachineCSEID); addPass(&MachineSinkingID); diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 488f1d4..53fe273 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -418,11 +418,11 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { // Start with no live registers. LiveRegs.reset(); - // Determine the live-out physregs for this block. - if (!BB->empty() && BB->back().isReturn()) { - // In a return block, examine the function live-out regs. - for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), - E = MRI.liveout_end(); I != E; ++I) { + // Examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; LiveRegs.set(Reg); // Repeat, for all subregs. @@ -430,20 +430,6 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { LiveRegs.set(*SubRegs); } } - else { - // In a non-return block, examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) { - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - LiveRegs.set(Reg); - // Repeat, for all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - LiveRegs.set(*SubRegs); - } - } - } } bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 1d0e71e..b18d52d 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -139,7 +139,6 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { /// variables for the function's frame information and eliminate call frame /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { - const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); MachineFrameInfo *MFI = Fn.getFrameInfo(); @@ -186,7 +185,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { // here. The sub/add sp instruction pairs are still inserted, but we don't // need to track the SP adjustment for frame index elimination. if (TFI->canSimplifyCallFramePseudos(Fn)) - RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); + TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); } } @@ -693,6 +692,14 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // space in small chunks instead of one large contiguous block. if (Fn.getTarget().Options.EnableSegmentedStacks) TFI.adjustForSegmentedStacks(Fn); + + // Emit additional code that is required to explicitly handle the stack in + // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The + // approach is rather similar to that of Segmented Stacks, but it uses a + // different conditional check and another BIF for allocating more stack + // space. + if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE) + TFI.adjustForHiPEPrologue(Fn); } /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical @@ -739,7 +746,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { MachineBasicBlock::iterator PrevI = BB->end(); if (I != BB->begin()) PrevI = prior(I); - TRI.eliminateCallFramePseudoInstr(Fn, *BB, I); + TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); // Visit the instructions created by eliminateCallFramePseudoInstr(). if (PrevI == BB->end()) @@ -751,34 +758,36 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { MachineInstr *MI = I; bool DoIncr = true; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) - if (MI->getOperand(i).isFI()) { - // Some instructions (e.g. inline asm instructions) can have - // multiple frame indices and/or cause eliminateFrameIndex - // to insert more than one instruction. We need the register - // scavenger to go through all of these instructions so that - // it can update its register information. We keep the - // iterator at the point before insertion so that we can - // revisit them in full. - bool AtBeginning = (I == BB->begin()); - if (!AtBeginning) --I; - - // If this instruction has a FrameIndex operand, we need to - // use that target machine register info object to eliminate - // it. - TRI.eliminateFrameIndex(MI, SPAdj, - FrameIndexVirtualScavenging ? NULL : RS); - - // Reset the iterator if we were at the beginning of the BB. - if (AtBeginning) { - I = BB->begin(); - DoIncr = false; - } + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (!MI->getOperand(i).isFI()) + continue; - MI = 0; - break; + // Some instructions (e.g. inline asm instructions) can have + // multiple frame indices and/or cause eliminateFrameIndex + // to insert more than one instruction. We need the register + // scavenger to go through all of these instructions so that + // it can update its register information. We keep the + // iterator at the point before insertion so that we can + // revisit them in full. + bool AtBeginning = (I == BB->begin()); + if (!AtBeginning) --I; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + TRI.eliminateFrameIndex(MI, SPAdj, i, + FrameIndexVirtualScavenging ? NULL : RS); + + // Reset the iterator if we were at the beginning of the BB. + if (AtBeginning) { + I = BB->begin(); + DoIncr = false; } + MI = 0; + break; + } + if (DoIncr && I != BB->end()) ++I; // Update register states. diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 8d849dc..bb9c05c 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -113,12 +113,27 @@ namespace { // PhysRegState - One of the RegState enums, or a virtreg. std::vector<unsigned> PhysRegState; + // Set of register units. typedef SparseSet<unsigned> UsedInInstrSet; - // UsedInInstr - Set of physregs that are used in the current instruction, - // and so cannot be allocated. + // Set of register units that are used in the current instruction, and so + // cannot be allocated. UsedInInstrSet UsedInInstr; + // Mark a physreg as used in this instruction. + void markRegUsedInInstr(unsigned PhysReg) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + UsedInInstr.insert(*Units); + } + + // Check if a physreg or any of its aliases are used in this instruction. + bool isRegUsedInInstr(unsigned PhysReg) const { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + if (UsedInInstr.count(*Units)) + return true; + return false; + } + // SkippedInstrs - Descriptors of instructions whose clobber list was // ignored because all registers were spilled. It is still necessary to // mark all the clobbered registers as used by the function. @@ -177,7 +192,6 @@ namespace { unsigned VirtReg, unsigned Hint); void spillAll(MachineBasicBlock::iterator MI); bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg); - void addRetOperands(MachineBasicBlock *MBB); }; char RAFast::ID = 0; } @@ -334,7 +348,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { unsigned PhysReg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Bad usePhysReg operand"); - + markRegUsedInInstr(PhysReg); switch (PhysRegState[PhysReg]) { case regDisabled: break; @@ -342,7 +356,6 @@ void RAFast::usePhysReg(MachineOperand &MO) { PhysRegState[PhysReg] = regFree; // Fall through case regFree: - UsedInInstr.insert(PhysReg); MO.setIsKill(); return; default: @@ -362,13 +375,11 @@ void RAFast::usePhysReg(MachineOperand &MO) { "Instruction is not using a subregister of a reserved register"); // Leave the superregister in the working set. PhysRegState[Alias] = regFree; - UsedInInstr.insert(Alias); MO.getParent()->addRegisterKilled(Alias, TRI, true); return; case regFree: if (TRI->isSuperRegister(PhysReg, Alias)) { // Leave the superregister in the working set. - UsedInInstr.insert(Alias); MO.getParent()->addRegisterKilled(Alias, TRI, true); return; } @@ -382,7 +393,6 @@ void RAFast::usePhysReg(MachineOperand &MO) { // All aliases are disabled, bring register into working set. PhysRegState[PhysReg] = regFree; - UsedInInstr.insert(PhysReg); MO.setIsKill(); } @@ -391,7 +401,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { /// reserved instead of allocated. void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState) { - UsedInInstr.insert(PhysReg); + markRegUsedInInstr(PhysReg); switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; @@ -431,7 +441,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // can be allocated directly. // Returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RAFast::calcSpillCost(unsigned PhysReg) const { - if (UsedInInstr.count(PhysReg)) { + if (isRegUsedInInstr(PhysReg)) { DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n"); return spillImpossible; } @@ -456,8 +466,6 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { unsigned Cost = 0; for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { unsigned Alias = *AI; - if (UsedInInstr.count(Alias)) - return spillImpossible; switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: break; @@ -532,7 +540,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, // First try to find a completely free register. for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){ unsigned PhysReg = *I; - if (PhysRegState[PhysReg] == regFree && !UsedInInstr.count(PhysReg)) { + if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) { assignVirtToPhysReg(*LRI, PhysReg); return LRI; } @@ -598,7 +606,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, LRI->LastUse = MI; LRI->LastOpNum = OpNum; LRI->Dirty = true; - UsedInInstr.insert(LRI->PhysReg); + markRegUsedInInstr(LRI->PhysReg); return LRI; } @@ -648,7 +656,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, assert(LRI->PhysReg && "Register not assigned"); LRI->LastUse = MI; LRI->LastOpNum = OpNum; - UsedInInstr.insert(LRI->PhysReg); + markRegUsedInInstr(LRI->PhysReg); return LRI; } @@ -709,8 +717,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + markRegUsedInInstr(Reg); for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - UsedInInstr.insert(*AI); if (ThroughRegs.count(PhysRegState[*AI])) definePhysReg(MI, *AI, regFree); } @@ -766,65 +774,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI) << " as used in instr\n"); - UsedInInstr.insert(Reg); + markRegUsedInInstr(Reg); } // Also mark PartialDefs as used to avoid reallocation. for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i) - UsedInInstr.insert(PartialDefs[i]); -} - -/// addRetOperand - ensure that a return instruction has an operand for each -/// value live out of the function. -/// -/// Things marked both call and return are tail calls; do not do this for them. -/// The tail callee need not take the same registers as input that it produces -/// as output, and there are dependencies for its input registers elsewhere. -/// -/// FIXME: This should be done as part of instruction selection, and this helper -/// should be deleted. Until then, we use custom logic here to create the proper -/// operand under all circumstances. We can't use addRegisterKilled because that -/// doesn't make sense for undefined values. We can't simply avoid calling it -/// for undefined values, because we must ensure that the operand always exists. -void RAFast::addRetOperands(MachineBasicBlock *MBB) { - if (MBB->empty() || !MBB->back().isReturn() || MBB->back().isCall()) - return; - - MachineInstr *MI = &MBB->back(); - - for (MachineRegisterInfo::liveout_iterator - I = MBB->getParent()->getRegInfo().liveout_begin(), - E = MBB->getParent()->getRegInfo().liveout_end(); I != E; ++I) { - unsigned Reg = *I; - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && - "Cannot have a live-out virtual register."); - - bool hasDef = PhysRegState[Reg] == regReserved; - - // Check if this register already has an operand. - bool Found = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - - unsigned OperReg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(OperReg)) - continue; - - if (OperReg == Reg || TRI->isSuperRegister(OperReg, Reg)) { - // If the ret already has an operand for this physreg or a superset, - // don't duplicate it. Set the kill flag if the value is defined. - if (hasDef && !MO.isKill()) - MO.setIsKill(); - Found = true; - break; - } - } - if (!Found) - MachineInstrBuilder(*MF, MI) - .addReg(Reg, llvm::RegState::Implicit | getKillRegState(hasDef)); - } + markRegUsedInInstr(PartialDefs[i]); } void RAFast::AllocateBasicBlock() { @@ -1023,7 +978,7 @@ void RAFast::AllocateBasicBlock() { for (UsedInInstrSet::iterator I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) - MRI->setPhysRegUsed(*I); + MRI->setRegUnitUsed(*I); // Track registers defined by instruction - early clobbers and tied uses at // this point. @@ -1036,8 +991,7 @@ void RAFast::AllocateBasicBlock() { if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; // Look for physreg defs and tied uses. if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - UsedInInstr.insert(*AI); + markRegUsedInInstr(Reg); } } @@ -1089,7 +1043,7 @@ void RAFast::AllocateBasicBlock() { for (UsedInInstrSet::iterator I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) - MRI->setPhysRegUsed(*I); + MRI->setRegUnitUsed(*I); if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { DEBUG(dbgs() << "-- coalescing: " << *MI); @@ -1109,9 +1063,6 @@ void RAFast::AllocateBasicBlock() { MBB->erase(Coalesced[i]); NumCopies += Coalesced.size(); - // addRetOperands must run after we've seen all defs in this block. - addRetOperands(MBB); - DEBUG(MBB->dump()); } @@ -1128,7 +1079,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { MRI->freezeReservedRegs(Fn); RegClassInfo.runOnMachineFunction(Fn); UsedInInstr.clear(); - UsedInInstr.setUniverse(TRI->getNumRegs()); + UsedInInstr.setUniverse(TRI->getNumRegUnits()); assert(!MRI->isSSA() && "regalloc requires leaving SSA"); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 1884452..6344a73 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -632,16 +632,33 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // Keep track of the cheapest interference seen so far. EvictionCost BestCost(~0u); unsigned BestPhys = 0; + unsigned OrderLimit = Order.getOrder().size(); // When we are just looking for a reduced cost per use, don't break any // hints, and only evict smaller spill weights. if (CostPerUseLimit < ~0u) { BestCost.BrokenHints = 0; BestCost.MaxWeight = VirtReg.weight; + + // Check of any registers in RC are below CostPerUseLimit. + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); + unsigned MinCost = RegClassInfo.getMinCost(RC); + if (MinCost >= CostPerUseLimit) { + DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost + << ", no cheaper registers to be found.\n"); + return 0; + } + + // It is normal for register classes to have a long tail of registers with + // the same cost. We don't need to look at them if they're too expensive. + if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { + OrderLimit = RegClassInfo.getLastCostChange(RC); + DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); + } } Order.rewind(); - while (unsigned PhysReg = Order.next()) { + while (unsigned PhysReg = Order.nextWithDups(OrderLimit)) { if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 078a0df..87382d8 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -83,6 +83,9 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { unsigned N = 0; SmallVector<MCPhysReg, 16> CSRAlias; + unsigned MinCost = 0xff; + unsigned LastCost = ~0u; + unsigned LastCostChange = 0; // FIXME: Once targets reserve registers instead of removing them from the // allocation order, we can simply use begin/end here. @@ -92,17 +95,31 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { // Remove reserved registers from the allocation order. if (Reserved.test(PhysReg)) continue; + unsigned Cost = TRI->getCostPerUse(PhysReg); + MinCost = std::min(MinCost, Cost); + if (CSRNum[PhysReg]) // PhysReg aliases a CSR, save it for later. CSRAlias.push_back(PhysReg); - else + else { + if (Cost != LastCost) + LastCostChange = N; RCI.Order[N++] = PhysReg; + LastCost = Cost; + } } RCI.NumRegs = N + CSRAlias.size(); assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass"); // CSR aliases go after the volatile registers, preserve the target's order. - std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]); + for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) { + unsigned PhysReg = CSRAlias[i]; + unsigned Cost = TRI->getCostPerUse(PhysReg); + if (Cost != LastCost) + LastCostChange = N; + RCI.Order[N++] = PhysReg; + LastCost = Cost; + } // Register allocator stress test. Clip register class to N registers. if (StressRA && RCI.NumRegs > StressRA) @@ -113,6 +130,9 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs) RCI.ProperSubClass = true; + RCI.MinCost = uint8_t(MinCost); + RCI.LastCostChange = LastCostChange; + DEBUG({ dbgs() << "AllocationOrder(" << RC->getName() << ") = ["; for (unsigned I = 0; I != RCI.NumRegs; ++I) diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 36d8101..e2488ad 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -15,7 +15,6 @@ #define DEBUG_TYPE "regalloc" #include "RegisterCoalescer.h" -#include "LiveDebugVariables.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" @@ -84,7 +83,6 @@ namespace { const TargetRegisterInfo* TRI; const TargetInstrInfo* TII; LiveIntervals *LIS; - LiveDebugVariables *LDV; const MachineLoopInfo* Loops; AliasAnalysis *AA; RegisterClassInfo RegClassInfo; @@ -169,8 +167,7 @@ namespace { /// reMaterializeTrivialDef - If the source of a copy is defined by a /// trivial computation, replace the copy by rematerialize the definition. - bool reMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, - MachineInstr *CopyMI); + bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI); /// canJoinPhys - Return true if a physreg copy should be joined. bool canJoinPhys(const CoalescerPair &CP); @@ -208,7 +205,6 @@ char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) @@ -394,8 +390,6 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AliasAnalysis>(); AU.addRequired<LiveIntervals>(); AU.addPreserved<LiveIntervals>(); - AU.addRequired<LiveDebugVariables>(); - AU.addPreserved<LiveDebugVariables>(); AU.addPreserved<SlotIndexes>(); AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); @@ -737,9 +731,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, /// reMaterializeTrivialDef - If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. -bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt, - unsigned DstReg, +bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI) { + unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); + unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) + return false; + + LiveInterval &SrcInt = LIS->getInterval(SrcReg); SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); assert(SrcLR != SrcInt.end() && "Live range not found!"); @@ -760,13 +759,17 @@ bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt, const MCInstrDesc &MCID = DefMI->getDesc(); if (MCID.getNumDefs() != 1) return false; + // Only support subregister destinations when the def is read-undef. + MachineOperand &DstOperand = CopyMI->getOperand(0); + if (DstOperand.getSubReg() && !DstOperand.isUndef()) + return false; if (!DefMI->isImplicitDef()) { // Make sure the copy destination register class fits the instruction // definition register class. The mismatch can happen as a result of earlier // extract_subreg, insert_subreg, subreg_to_reg coalescing. const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF); if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - if (MRI->getRegClass(DstReg) != RC) + if (!MRI->constrainRegClass(DstReg, RC)) return false; } else if (!RC->contains(DstReg)) return false; @@ -778,6 +781,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt, TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI); MachineInstr *NewMI = prior(MII); + // The original DefMI may have been a subregister def, but the full register + // class of its destination matches the destination of CopyMI, and CopyMI is + // either a full register def or is read-undef. Therefore we can clear the + // subregister index on the rematerialized instruction. + NewMI->getOperand(0).setSubReg(0); + // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). // We need to remember these so we can add intervals once we insert // NewMI into SlotIndexes. @@ -883,9 +892,6 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg); - // Update LiveDebugVariables. - LDV->renameRegister(SrcReg, DstReg, SubIdx); - SmallPtrSet<MachineInstr*, 8> Visited; for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); MachineInstr *UseMI = I.skipInstruction();) { @@ -1010,9 +1016,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (!canJoinPhys(CP)) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. - if (!CP.isFlipped() && - reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), - CP.getDstReg(), CopyMI)) + if (reMaterializeTrivialDef(CP, CopyMI)) return true; return false; } @@ -1045,9 +1049,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // If definition of source is defined by trivial computation, try // rematerializing it. - if (!CP.isFlipped() && - reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), - CP.getDstReg(), CopyMI)) + if (reMaterializeTrivialDef(CP, CopyMI)) return true; // If we can eliminate the copy without merging the live ranges, do so now. @@ -2136,7 +2138,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { TRI = TM->getRegisterInfo(); TII = TM->getInstrInfo(); LIS = &getAnalysis<LiveIntervals>(); - LDV = &getAnalysis<LiveDebugVariables>(); AA = &getAnalysis<AliasAnalysis>(); Loops = &getAnalysis<MachineLoopInfo>(); @@ -2182,7 +2183,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { } DEBUG(dump()); - DEBUG(LDV->dump()); if (VerifyCoalescing) MF->verify(this, "After register coalescing"); return true; diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 62e95aa..97f22e1 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -357,15 +357,14 @@ protected: /// Collect physical and virtual register operands. static void collectOperands(const MachineInstr *MI, RegisterOperands &RegOpers) { - for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) + for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) RegOpers.collect(*OperI); // Remove redundant physreg dead defs. - for (unsigned i = RegOpers.DeadDefs.size(); i > 0; --i) { - unsigned Reg = RegOpers.DeadDefs[i-1]; - if (containsReg(RegOpers.Defs, Reg)) - RegOpers.DeadDefs.erase(&RegOpers.DeadDefs[i-1]); - } + SmallVectorImpl<unsigned>::iterator I = + std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(), + std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs)); + RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end()); } /// Force liveness of registers. diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 88f67da..6da901f 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -316,6 +316,16 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, return Survivor; } +static unsigned getFrameIndexOperandNum(MachineInstr *MI) { + unsigned i = 0; + while (!MI->getOperand(i).isFI()) { + ++i; + assert(i < MI->getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + return i; +} + unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj) { @@ -364,12 +374,16 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, "Cannot scavenge register without an emergency spill slot!"); TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI); MachineBasicBlock::iterator II = prior(I); - TRI->eliminateFrameIndex(II, SPAdj, this); + + unsigned FIOperandNum = getFrameIndexOperandNum(II); + TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI); II = prior(UseMI); - TRI->eliminateFrameIndex(II, SPAdj, this); + + FIOperandNum = getFrameIndexOperandNum(II); + TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); } ScavengeRestore = prior(UseMI); diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index e639c55..45b4f68 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -135,20 +135,14 @@ void SUnit::removePred(const SDep &D) { for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) if (*I == D) { - bool FoundSucc = false; // Find the corresponding successor in N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); - for (SmallVector<SDep, 4>::iterator II = N->Succs.begin(), - EE = N->Succs.end(); II != EE; ++II) - if (*II == P) { - FoundSucc = true; - N->Succs.erase(II); - break; - } - assert(FoundSucc && "Mismatching preds / succs lists!"); - (void)FoundSucc; + SmallVectorImpl<SDep>::iterator Succ = std::find(N->Succs.begin(), + N->Succs.end(), P); + assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!"); + N->Succs.erase(Succ); Preds.erase(I); // Update the bookkeeping. if (P.getKind() == SDep::Data) { @@ -301,6 +295,21 @@ void SUnit::ComputeHeight() { } while (!WorkList.empty()); } +void SUnit::biasCriticalPath() { + if (NumPreds < 2) + return; + + SUnit::pred_iterator BestI = Preds.begin(); + unsigned MaxDepth = BestI->getSUnit()->getDepth(); + for (SUnit::pred_iterator + I = llvm::next(BestI), E = Preds.end(); I != E; ++I) { + if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth) + BestI = I; + } + if (BestI != Preds.begin()) + std::swap(*Preds.begin(), *BestI); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or /// a group of nodes flagged together. diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 662fc0e..71e7a21 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -168,20 +168,6 @@ void ScheduleDAGInstrs::finishBlock() { BB = 0; } -/// Initialize the map with the number of registers. -void Reg2SUnitsMap::setRegLimit(unsigned Limit) { - PhysRegSet.setUniverse(Limit); - SUnits.resize(Limit); -} - -/// Clear the map without deallocating storage. -void Reg2SUnitsMap::clear() { - for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) { - SUnits[*I].clear(); - } - PhysRegSet.clear(); -} - /// Initialize the DAG and common scheduler state for the current scheduling /// region. This does not actually create the DAG, only clears it. The /// scheduling driver may call BuildSchedGraph multiple times per scheduling @@ -228,7 +214,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { if (Reg == 0) continue; if (TRI->isPhysicalRegister(Reg)) - Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); + Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); else { assert(!IsPostRA && "Virtual register encountered after regalloc."); if (MO.readsReg()) // ignore undef operands @@ -245,7 +231,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; if (!Uses.contains(Reg)) - Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); + Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); } } } @@ -263,15 +249,14 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { Alias.isValid(); ++Alias) { if (!Uses.contains(*Alias)) continue; - std::vector<PhysRegSUOper> &UseList = Uses[*Alias]; - for (unsigned i = 0, e = UseList.size(); i != e; ++i) { - SUnit *UseSU = UseList[i].SU; + for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) { + SUnit *UseSU = I->SU; if (UseSU == SU) continue; // Adjust the dependence latency using operand def/use information, // then allow the target to perform its own adjustments. - int UseOp = UseList[i].OpIdx; + int UseOp = I->OpIdx; MachineInstr *RegUse = 0; SDep Dep; if (UseOp < 0) @@ -311,9 +296,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { Alias.isValid(); ++Alias) { if (!Defs.contains(*Alias)) continue; - std::vector<PhysRegSUOper> &DefList = Defs[*Alias]; - for (unsigned i = 0, e = DefList.size(); i != e; ++i) { - SUnit *DefSU = DefList[i].SU; + for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) { + SUnit *DefSU = I->SU; if (DefSU == &ExitSU) continue; if (DefSU != SU && @@ -337,33 +321,37 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. - Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx)); + Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg())); } else { addPhysRegDataDeps(SU, OperIdx); - - // Either insert a new Reg2SUnits entry with an empty SUnits list, or - // retrieve the existing SUnits list for this register's defs. - std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()]; + unsigned Reg = MO.getReg(); // clear this register's use list - if (Uses.contains(MO.getReg())) - Uses[MO.getReg()].clear(); - - if (!MO.isDead()) - DefList.clear(); - - // Calls will not be reordered because of chain dependencies (see - // below). Since call operands are dead, calls may continue to be added - // to the DefList making dependence checking quadratic in the size of - // the block. Instead, we leave only one call at the back of the - // DefList. - if (SU->isCall) { - while (!DefList.empty() && DefList.back().SU->isCall) - DefList.pop_back(); + if (Uses.contains(Reg)) + Uses.eraseAll(Reg); + + if (!MO.isDead()) { + Defs.eraseAll(Reg); + } else if (SU->isCall) { + // Calls will not be reordered because of chain dependencies (see + // below). Since call operands are dead, calls may continue to be added + // to the DefList making dependence checking quadratic in the size of + // the block. Instead, we leave only one call at the back of the + // DefList. + Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg); + Reg2SUnitsMap::iterator B = P.first; + Reg2SUnitsMap::iterator I = P.second; + for (bool isBegin = I == B; !isBegin; /* empty */) { + isBegin = (--I) == B; + if (!I->SU->isCall) + break; + I = Defs.erase(I); + } } + // Defs are pushed in the order they are visited and never reordered. - DefList.push_back(PhysRegSUOper(SU, OperIdx)); + Defs.insert(PhysRegSUOper(SU, OperIdx, Reg)); } } @@ -726,8 +714,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, assert(Defs.empty() && Uses.empty() && "Only BuildGraph should update Defs/Uses"); - Defs.setRegLimit(TRI->getNumRegs()); - Uses.setRegLimit(TRI->getNumRegs()); + Defs.setUniverse(TRI->getNumRegs()); + Uses.setUniverse(TRI->getNumRegs()); assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); // FIXME: Allow SparseSet to reserve space for the creation of virtual @@ -758,7 +746,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI"); } - assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() && + assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) && "Cannot schedule terminators or labels!"); SUnit *SU = MISUnitMap[MI]; @@ -1006,7 +994,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { else if (SU == &ExitSU) oss << "<exit>"; else - SU->getInstr()->print(oss); + SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true); return oss.str(); } @@ -1030,58 +1018,95 @@ class SchedDFSImpl { /// List PredSU, SuccSU pairs that represent data edges between subtrees. std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs; + struct RootData { + unsigned NodeID; + unsigned ParentNodeID; // Parent node (member of the parent subtree). + unsigned SubInstrCount; // Instr count in this tree only, not children. + + RootData(unsigned id): NodeID(id), + ParentNodeID(SchedDFSResult::InvalidSubtreeID), + SubInstrCount(0) {} + + unsigned getSparseSetIndex() const { return NodeID; } + }; + + SparseSet<RootData> RootSet; + public: - SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSData.size()) {} + SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) { + RootSet.setUniverse(R.DFSNodeData.size()); + } - /// SubtreID is initialized to zero, set to itself to flag the root of a - /// subtree, set to the parent to indicate an interior node, - /// then set to a representative subtree ID during finalization. + /// Return true if this node been visited by the DFS traversal. + /// + /// During visitPostorderNode the Node's SubtreeID is assigned to the Node + /// ID. Later, SubtreeID is updated but remains valid. bool isVisited(const SUnit *SU) const { - return R.DFSData[SU->NodeNum].SubtreeID; + return R.DFSNodeData[SU->NodeNum].SubtreeID + != SchedDFSResult::InvalidSubtreeID; } /// Initialize this node's instruction count. We don't need to flag the node /// visited until visitPostorder because the DAG cannot have cycles. void visitPreorder(const SUnit *SU) { - R.DFSData[SU->NodeNum].InstrCount = SU->getInstr()->isTransient() ? 0 : 1; + R.DFSNodeData[SU->NodeNum].InstrCount = + SU->getInstr()->isTransient() ? 0 : 1; } - /// Mark this node as either the root of a subtree or an interior - /// node. Increment the parent node's instruction count. - void visitPostorder(const SUnit *SU, const SDep *PredDep, const SUnit *Parent) { - R.DFSData[SU->NodeNum].SubtreeID = SU->NodeNum; - - // Join the child to its parent if they are connected via data dependence - // and do not exceed the limit. - if (!Parent || PredDep->getKind() != SDep::Data) - return; - - unsigned PredCnt = R.DFSData[SU->NodeNum].InstrCount; - if (PredCnt > R.SubtreeLimit) - return; - - R.DFSData[SU->NodeNum].SubtreeID = Parent->NodeNum; + /// Called once for each node after all predecessors are visited. Revisit this + /// node's predecessors and potentially join them now that we know the ILP of + /// the other predecessors. + void visitPostorderNode(const SUnit *SU) { + // Mark this node as the root of a subtree. It may be joined with its + // successors later. + R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum; + RootData RData(SU->NodeNum); + RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1; + + // If any predecessors are still in their own subtree, they either cannot be + // joined or are large enough to remain separate. If this parent node's + // total instruction count is not greater than a child subtree by at least + // the subtree limit, then try to join it now since splitting subtrees is + // only useful if multiple high-pressure paths are possible. + unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount; + for (SUnit::const_pred_iterator + PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { + if (PI->getKind() != SDep::Data) + continue; + unsigned PredNum = PI->getSUnit()->NodeNum; + if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit) + joinPredSubtree(*PI, SU, /*CheckLimit=*/false); + + // Either link or merge the TreeData entry from the child to the parent. + if (R.DFSNodeData[PredNum].SubtreeID == PredNum) { + // If the predecessor's parent is invalid, this is a tree edge and the + // current node is the parent. + if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID) + RootSet[PredNum].ParentNodeID = SU->NodeNum; + } + else if (RootSet.count(PredNum)) { + // The predecessor is not a root, but is still in the root set. This + // must be the new parent that it was just joined to. Note that + // RootSet[PredNum].ParentNodeID may either be invalid or may still be + // set to the original parent. + RData.SubInstrCount += RootSet[PredNum].SubInstrCount; + RootSet.erase(PredNum); + } + } + RootSet[SU->NodeNum] = RData; + } - // Add the recently finished predecessor's bottom-up descendent count. - R.DFSData[Parent->NodeNum].InstrCount += PredCnt; - SubtreeClasses.join(Parent->NodeNum, SU->NodeNum); + /// Called once for each tree edge after calling visitPostOrderNode on the + /// predecessor. Increment the parent node's instruction count and + /// preemptively join this subtree to its parent's if it is small enough. + void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) { + R.DFSNodeData[Succ->NodeNum].InstrCount + += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount; + joinPredSubtree(PredDep, Succ); } - /// Determine whether the DFS cross edge should be considered a subtree edge - /// or a connection between subtrees. - void visitCross(const SDep &PredDep, const SUnit *Succ) { - if (PredDep.getKind() == SDep::Data) { - // If this is a cross edge to a root, join the subtrees. This happens when - // the root was first reached by a non-data dependence. - unsigned NodeNum = PredDep.getSUnit()->NodeNum; - unsigned PredCnt = R.DFSData[NodeNum].InstrCount; - if (R.DFSData[NodeNum].SubtreeID == NodeNum && PredCnt < R.SubtreeLimit) { - R.DFSData[NodeNum].SubtreeID = Succ->NodeNum; - R.DFSData[Succ->NodeNum].InstrCount += PredCnt; - SubtreeClasses.join(Succ->NodeNum, NodeNum); - return; - } - } + /// Add a connection for cross edges. + void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) { ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ)); } @@ -1089,13 +1114,27 @@ public: /// between trees. void finalize() { SubtreeClasses.compress(); + R.DFSTreeData.resize(SubtreeClasses.getNumClasses()); + assert(SubtreeClasses.getNumClasses() == RootSet.size() + && "number of roots should match trees"); + for (SparseSet<RootData>::const_iterator + RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) { + unsigned TreeID = SubtreeClasses[RI->NodeID]; + if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID) + R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID]; + R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount; + // Note that SubInstrCount may be greater than InstrCount if we joined + // subtrees across a cross edge. InstrCount will be attributed to the + // original parent, while SubInstrCount will be attributed to the joined + // parent. + } R.SubtreeConnections.resize(SubtreeClasses.getNumClasses()); R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses()); DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n"); - for (unsigned Idx = 0, End = R.DFSData.size(); Idx != End; ++Idx) { - R.DFSData[Idx].SubtreeID = SubtreeClasses[Idx]; + for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) { + R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx]; DEBUG(dbgs() << " SU(" << Idx << ") in tree " - << R.DFSData[Idx].SubtreeID << '\n'); + << R.DFSNodeData[Idx].SubtreeID << '\n'); } for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator I = ConnectionPairs.begin(), E = ConnectionPairs.end(); @@ -1111,21 +1150,53 @@ public: } protected: + /// Join the predecessor subtree with the successor that is its DFS + /// parent. Apply some heuristics before joining. + bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, + bool CheckLimit = true) { + assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges"); + + // Check if the predecessor is already joined. + const SUnit *PredSU = PredDep.getSUnit(); + unsigned PredNum = PredSU->NodeNum; + if (R.DFSNodeData[PredNum].SubtreeID != PredNum) + return false; + + // Four is the magic number of successors before a node is considered a + // pinch point. + unsigned NumDataSucs = 0; + for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(), + SE = PredSU->Succs.end(); SI != SE; ++SI) { + if (SI->getKind() == SDep::Data) { + if (++NumDataSucs >= 4) + return false; + } + } + if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit) + return false; + R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum; + SubtreeClasses.join(Succ->NodeNum, PredNum); + return true; + } + /// Called by finalize() to record a connection between trees. void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) { if (!Depth) return; - SmallVectorImpl<SchedDFSResult::Connection> &Connections = - R.SubtreeConnections[FromTree]; - for (SmallVectorImpl<SchedDFSResult::Connection>::iterator - I = Connections.begin(), E = Connections.end(); I != E; ++I) { - if (I->TreeID == ToTree) { - I->Level = std::max(I->Level, Depth); - return; + do { + SmallVectorImpl<SchedDFSResult::Connection> &Connections = + R.SubtreeConnections[FromTree]; + for (SmallVectorImpl<SchedDFSResult::Connection>::iterator + I = Connections.begin(), E = Connections.end(); I != E; ++I) { + if (I->TreeID == ToTree) { + I->Level = std::max(I->Level, Depth); + return; + } } - } - Connections.push_back(SchedDFSResult::Connection(ToTree, Depth)); + Connections.push_back(SchedDFSResult::Connection(ToTree, Depth)); + FromTree = R.DFSTreeData[FromTree].ParentTreeID; + } while (FromTree != SchedDFSResult::InvalidSubtreeID); } }; } // namespace llvm @@ -1157,28 +1228,44 @@ public: }; } // anonymous +static bool hasDataSucc(const SUnit *SU) { + for (SUnit::const_succ_iterator + SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) { + if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode()) + return true; + } + return false; +} + /// Compute an ILP metric for all nodes in the subDAG reachable via depth-first /// search from this root. -void SchedDFSResult::compute(ArrayRef<SUnit *> Roots) { +void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) { if (!IsBottomUp) llvm_unreachable("Top-down ILP metric is unimplemnted"); SchedDFSImpl Impl(*this); - for (ArrayRef<const SUnit*>::const_iterator - RootI = Roots.begin(), RootE = Roots.end(); RootI != RootE; ++RootI) { + for (ArrayRef<SUnit>::const_iterator + SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) { + const SUnit *SU = &*SI; + if (Impl.isVisited(SU) || hasDataSucc(SU)) + continue; + SchedDAGReverseDFS DFS; - Impl.visitPreorder(*RootI); - DFS.follow(*RootI); + Impl.visitPreorder(SU); + DFS.follow(SU); for (;;) { // Traverse the leftmost path as far as possible. while (DFS.getPred() != DFS.getPredEnd()) { const SDep &PredDep = *DFS.getPred(); DFS.advance(); - // If the pred is already valid, skip it. We may preorder visit a node - // with InstrCount==0 more than once, but it won't affect heuristics - // because we don't care about cross edges to leaf copies. + // Ignore non-data edges. + if (PredDep.getKind() != SDep::Data + || PredDep.getSUnit()->isBoundaryNode()) { + continue; + } + // An already visited edge is a cross edge, assuming an acyclic DAG. if (Impl.isVisited(PredDep.getSUnit())) { - Impl.visitCross(PredDep, DFS.getCurr()); + Impl.visitCrossEdge(PredDep, DFS.getCurr()); continue; } Impl.visitPreorder(PredDep.getSUnit()); @@ -1187,7 +1274,9 @@ void SchedDFSResult::compute(ArrayRef<SUnit *> Roots) { // Visit the top of the stack in postorder and backtrack. const SUnit *Child = DFS.getCurr(); const SDep *PredDep = DFS.backtrack(); - Impl.visitPostorder(Child, PredDep, PredDep ? DFS.getCurr() : 0); + Impl.visitPostorderNode(Child); + if (PredDep) + Impl.visitPostorderEdge(*PredDep, DFS.getCurr()); if (DFS.isComplete()) break; } diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 6c50913..8ddb3e8 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -41,6 +41,10 @@ namespace llvm { return true; } + static bool isNodeHidden(const SUnit *Node) { + return (Node->NumPreds > 10 || Node->NumSuccs > 10); + } + static bool hasNodeAddressLabel(const SUnit *Node, const ScheduleDAG *Graph) { return true; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ff00d0d..ec52d7e 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2634,7 +2634,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); if (Result != ISD::SETCC_INVALID && (!LegalOperations || - TLI.isCondCodeLegal(Result, LL.getSimpleValueType()))) + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + TLI.getSetCCResultType(N0.getSimpleValueType()))))) return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), LL, LR, Result); } @@ -3144,7 +3146,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); if (Result != ISD::SETCC_INVALID && (!LegalOperations || - TLI.isCondCodeLegal(Result, LL.getSimpleValueType()))) + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + TLI.getSetCCResultType(N0.getValueType()))))) return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), LL, LR, Result); } @@ -5100,16 +5104,26 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // If we haven't found a load, we can't narrow it. Don't transform one with // multiple uses, this would require adding a new load. - if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() || - // Don't change the width of a volatile load. - cast<LoadSDNode>(N0)->isVolatile()) + if (!isa<LoadSDNode>(N0) || !N0.hasOneUse()) + return SDValue(); + + // Don't change the width of a volatile load. + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + if (LN0->isVolatile()) return SDValue(); // Verify that we are actually reducing a load width here. - if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits) + if (LN0->getMemoryVT().getSizeInBits() < EVTBits) + return SDValue(); + + // For the transform to be legal, the load must produce only two values + // (the value loaded and the chain). Don't transform a pre-increment + // load, for example, which produces an extra value. Otherwise the + // transformation is not equivalent, and the downstream logic to replace + // uses gets things wrong. + if (LN0->getNumValues() > 2) return SDValue(); - LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT PtrType = N0.getOperand(1).getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) @@ -5153,8 +5167,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { EVT ShImmTy = getShiftAmountTy(Result.getValueType()); if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) ShImmTy = VT; - Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, - Result, DAG.getConstant(ShLeftAmt, ShImmTy)); + // If the shift amount is as large as the result size (but, presumably, + // no larger than the source) then the useful bits of the result are + // zero; we can't simply return the shortened shift, because the result + // of that operation is undefined. + if (ShLeftAmt >= VT.getSizeInBits()) + Result = DAG.getConstant(0, VT); + else + Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, + Result, DAG.getConstant(ShLeftAmt, ShImmTy)); } // Return the new loaded value. @@ -5340,6 +5361,38 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + // Fold a series of buildvector, bitcast, and truncate if possible. + // For example fold + // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to + // (2xi32 (buildvector x, y)). + if (Level == AfterLegalizeVectorOps && VT.isVector() && + N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && + N0.getOperand(0).hasOneUse()) { + + SDValue BuildVect = N0.getOperand(0); + EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); + EVT TruncVecEltTy = VT.getVectorElementType(); + + // Check that the element types match. + if (BuildVectEltTy == TruncVecEltTy) { + // Now we only need to compute the offset of the truncated elements. + unsigned BuildVecNumElts = BuildVect.getNumOperands(); + unsigned TruncVecNumElts = VT.getVectorNumElements(); + unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; + + assert((BuildVecNumElts % TruncVecNumElts) == 0 && + "Invalid number of elements"); + + SmallVector<SDValue, 8> Opnds; + for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) + Opnds.push_back(BuildVect.getOperand(i)); + + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0], + Opnds.size()); + } + } + // See if we can simplify the input to this truncate through knowledge that // only the low bits are being used. // For example "trunc (or (shl x, 8), y)" // -> trunc y @@ -5822,13 +5875,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N1, NewCFP); } - // (fadd (fadd x, x), x) -> (fmul 3.0, x) - if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) && - N0.getOperand(0) == N1) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, - N1, DAG.getConstantFP(3.0, VT)); - } - // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && @@ -5874,12 +5920,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N0, NewCFP); } - // (fadd x, (fadd x, x)) -> (fmul 3.0, x) - if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) && - N1.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, - N0, DAG.getConstantFP(3.0, VT)); - } // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && @@ -5904,6 +5944,26 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } + if (N0.getOpcode() == ISD::FADD) { + ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); + // (fadd (fadd x, x), x) -> (fmul 3.0, x) + if (!CFP && N0.getOperand(0) == N0.getOperand(1) && + (N0.getOperand(0) == N1)) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, DAG.getConstantFP(3.0, VT)); + } + } + + if (N1.getOpcode() == ISD::FADD) { + ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); + // (fadd x, (fadd x, x)) -> (fmul 3.0, x) + if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && + N1.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, DAG.getConstantFP(3.0, VT)); + } + } + // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && @@ -6735,18 +6795,24 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (Op0.getOpcode() == Op1.getOpcode()) { // Avoid missing important xor optimizations. SDValue Tmp = visitXOR(TheXor); - if (Tmp.getNode() && Tmp.getNode() != TheXor) { - DEBUG(dbgs() << "\nReplacing.8 "; - TheXor->dump(&DAG); - dbgs() << "\nWith: "; - Tmp.getNode()->dump(&DAG); - dbgs() << '\n'); - WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, Tmp); - removeFromWorkList(TheXor); - DAG.DeleteNode(TheXor); - return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), - MVT::Other, Chain, Tmp, N2); + if (Tmp.getNode()) { + if (Tmp.getNode() != TheXor) { + DEBUG(dbgs() << "\nReplacing.8 "; + TheXor->dump(&DAG); + dbgs() << "\nWith: "; + Tmp.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, Tmp); + removeFromWorkList(TheXor); + DAG.DeleteNode(TheXor); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, Tmp, N2); + } + + // visitXOR has changed XOR's operands. + Op0 = TheXor->getOperand(0); + Op1 = TheXor->getOperand(1); } } @@ -6894,6 +6960,16 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { ISD::MemIndexedMode AM = ISD::UNINDEXED; if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) return false; + + // Backends without true r+i pre-indexed forms may need to pass a + // constant base with a variable offset so that constant coercion + // will work with the patterns in canonical form. + bool Swapped = false; + if (isa<ConstantSDNode>(BasePtr)) { + std::swap(BasePtr, Offset); + Swapped = true; + } + // Don't create a indexed load / store with zero offset. if (isa<ConstantSDNode>(Offset) && cast<ConstantSDNode>(Offset)->isNullValue()) @@ -6919,6 +6995,48 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; } + // If the offset is a constant, there may be other adds of constants that + // can be folded with this one. We should do this to avoid having to keep + // a copy of the original base pointer. + SmallVector<SDNode *, 16> OtherUses; + if (isa<ConstantSDNode>(Offset)) + for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(), + E = BasePtr.getNode()->use_end(); I != E; ++I) { + SDNode *Use = *I; + if (Use == Ptr.getNode()) + continue; + + if (Use->isPredecessorOf(N)) + continue; + + if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) { + OtherUses.clear(); + break; + } + + SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1); + if (Op1.getNode() == BasePtr.getNode()) + std::swap(Op0, Op1); + assert(Op0.getNode() == BasePtr.getNode() && + "Use of ADD/SUB but not an operand"); + + if (!isa<ConstantSDNode>(Op1)) { + OtherUses.clear(); + break; + } + + // FIXME: In some cases, we can be smarter about this. + if (Op1.getValueType() != Offset.getValueType()) { + OtherUses.clear(); + break; + } + + OtherUses.push_back(Use); + } + + if (Swapped) + std::swap(BasePtr, Offset); + // Now check for #3 and #4. bool RealUse = false; @@ -6968,6 +7086,43 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Finally, since the node is now dead, remove it from the graph. DAG.DeleteNode(N); + if (Swapped) + std::swap(BasePtr, Offset); + + // Replace other uses of BasePtr that can be updated to use Ptr + for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { + unsigned OffsetIdx = 1; + if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) + OffsetIdx = 0; + assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == + BasePtr.getNode() && "Expected BasePtr operand"); + + APInt OV = + cast<ConstantSDNode>(Offset)->getAPIntValue(); + if (AM == ISD::PRE_DEC) + OV = -OV; + + ConstantSDNode *CN = + cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); + APInt CNV = CN->getAPIntValue(); + if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) + CNV += OV; + else + CNV -= OV; + + SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0); + SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0)); + if (OffsetIdx == 0) + std::swap(NewOp1, NewOp2); + + SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(), + OtherUses[i]->getDebugLoc(), + OtherUses[i]->getValueType(0), NewOp1, NewOp2); + DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); + removeFromWorkList(OtherUses[i]); + DAG.DeleteNode(OtherUses[i]); + } + // Replace the uses of Ptr with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); removeFromWorkList(Ptr.getNode()); @@ -7176,12 +7331,15 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > LD->getAlignment()) - return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + if (Align > LD->getMemOperand()->getBaseAlignment()) { + SDValue NewLoad = + DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), Align); + return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); + } } } @@ -7576,6 +7734,8 @@ struct ConsecutiveMemoryChainSorter { bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { EVT MemVT = St->getMemoryVT(); int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; + bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); // Don't merge vectors into wider inputs. if (MemVT.isVector() || !MemVT.isSimple()) @@ -7751,16 +7911,14 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // We only use vectors if the constant is known to be zero and the // function is not marked with the noimplicitfloat attribute. - if (NonZero || (DAG.getMachineFunction().getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, - Attribute::NoImplicitFloat))) + if (NonZero || NoVectors) LastLegalVectorType = 0; // Check if we found a legal integer type to store. if (LastLegalType == 0 && LastLegalVectorType == 0) return false; - bool UseVector = LastLegalVectorType > LastLegalType; + bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; // Make sure we have something to merge. @@ -7913,7 +8071,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // All loads much share the same chain. if (LoadNodes[i].MemNode->getChain() != FirstChain) break; - + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; if (CurrAddress - StartAddress != (ElementSizeBytes * i)) break; @@ -7933,7 +8091,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Only use vector types if the vector type is larger than the integer type. // If they are the same, use integers. - bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType; + bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); // We add +1 here because the LastXXX variables refer to location while @@ -9104,11 +9262,6 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { /// SimplifyVBinOp - Visit a binary vector operation, like ADD. SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { - // After legalize, the target may be depending on adds and other - // binary ops to provide legal ways to construct constants or other - // things. Simplifying them may result in a loss of legality. - if (LegalOperations) return SDValue(); - assert(N->getValueType(0).isVector() && "SimplifyVBinOp only works on vectors!"); @@ -9178,11 +9331,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { /// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG. SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { - // After legalize, the target may be depending on adds and other - // binary ops to provide legal ways to construct constants or other - // things. Simplifying them may result in a loss of legality. - if (LegalOperations) return SDValue(); - assert(N->getValueType(0).isVector() && "SimplifyVUnaryOp only works on vectors!"); @@ -9285,7 +9433,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // src value info, don't do the transformation if the memory // locations are not in the default address space. LLD->getPointerInfo().getAddrSpace() != 0 || - RLD->getPointerInfo().getAddrSpace() != 0) + RLD->getPointerInfo().getAddrSpace() != 0 || + !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), + LLD->getBasePtr().getValueType())) return false; // Check that the select condition doesn't reach either load. If so, diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 0d90a07..ff9b2ba 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -87,6 +87,27 @@ void FastISel::startNewBlock() { LastLocalValue = EmitStartPt; } +bool FastISel::LowerArguments() { + if (!FuncInfo.CanLowerReturn) + // Fallback to SDISel argument lowering code to deal with sret pointer + // parameter. + return false; + + if (!FastLowerArguments()) + return false; + + // Enter non-dead arguments into ValueMap for uses in non-entry BBs. + for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), + E = FuncInfo.Fn->arg_end(); I != E; ++I) { + if (!I->use_empty()) { + DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I); + assert(VI != LocalValueMap.end() && "Missed an argument?"); + FuncInfo.ValueMap[I] = VI->second; + } + } + return true; +} + void FastISel::flushLocalValueMap() { LocalValueMap.clear(); LastLocalValue = EmitStartPt; @@ -684,7 +705,7 @@ bool FastISel::SelectCall(const User *I) { // all the values which have already been materialized, // appear after the call. It also makes sense to skip intrinsics // since they tend to be inlined. - if (!isa<IntrinsicInst>(F)) + if (!isa<IntrinsicInst>(Call)) flushLocalValueMap(); // An arbitrary call. Bail. @@ -801,7 +822,7 @@ FastISel::SelectInstruction(const Instruction *I) { } // First, try doing target-independent selection. - if (SelectOperator(I, I->getOpcode())) { + if (!SkipTargetIndependentFastISel() && SelectOperator(I, I->getOpcode())) { ++NumFastIselSuccessIndependent; DL = DebugLoc(); return true; @@ -836,7 +857,8 @@ FastISel::SelectInstruction(const Instruction *I) { void FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { - if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { + if (FuncInfo.MBB->getBasicBlock()->size() > 1 && + FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // For more accurate line information if this is the only instruction // in the block then emit it, otherwise we have the unconditional // fall-through case, which needs no instructions. @@ -1067,6 +1089,10 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo, FastISel::~FastISel() {} +bool FastISel::FastLowerArguments() { + return false; +} + unsigned FastISel::FastEmit_(MVT, MVT, unsigned) { return 0; diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 5eaf67e..f085e44 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -102,7 +102,8 @@ private: SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, - RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128); + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128); SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, @@ -110,6 +111,7 @@ private: RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); @@ -1841,26 +1843,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { return ExpandVectorBuildThroughStack(Node); } -static bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, - SDValue &Chain, const TargetLowering &TLI) { - const Function *F = DAG.getMachineFunction().getFunction(); - - // Conservatively require the attributes of the call to match those of - // the return. Ignore noalias because it doesn't affect the call sequence. - Attribute CallerRetAttr = F->getAttributes().getRetAttributes(); - if (AttrBuilder(CallerRetAttr) - .removeAttribute(Attribute::NoAlias).hasAttributes()) - return false; - - // It's not safe to eliminate the sign / zero extension of the return value. - if (CallerRetAttr.hasAttribute(Attribute::ZExt) || - CallerRetAttr.hasAttribute(Attribute::SExt)) - return false; - - // Check if the only use is a function return node. - return TLI.isUsedByReturnOnly(Node, Chain); -} - // ExpandLibCall - Expand a node into a call to a libcall. If the result value // does not fit into a register, return the lo part and set the hi part to the // by-reg argument. If it does fit into a single register, return the result @@ -1891,7 +1873,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, // isTailCall may be true since the callee does not reference caller stack // frame. Check if it's in the right position. SDValue TCChain = InChain; - bool isTailCall = isInTailCallPosition(DAG, Node, TCChain, TLI); + bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain); if (isTailCall) InChain = TCChain; @@ -2115,6 +2097,120 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, Results.push_back(Rem); } +/// isSinCosLibcallAvailable - Return true if sincos libcall is available. +static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = RTLIB::SINCOS_F32; break; + case MVT::f64: LC = RTLIB::SINCOS_F64; break; + case MVT::f80: LC = RTLIB::SINCOS_F80; break; + case MVT::f128: LC = RTLIB::SINCOS_F128; break; + case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; + } + return TLI.getLibcallName(LC) != 0; +} + +/// canCombineSinCosLibcall - Return true if sincos libcall is available and +/// can be used to combine sin and cos. +static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, + const TargetMachine &TM) { + if (!isSinCosLibcallAvailable(Node, TLI)) + return false; + // GNU sin/cos functions set errno while sincos does not. Therefore + // combining sin and cos is only safe if unsafe-fpmath is enabled. + bool isGNU = Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU; + if (isGNU && !TM.Options.UnsafeFPMath) + return false; + return true; +} + +/// useSinCos - Only issue sincos libcall if both sin and cos are +/// needed. +static bool useSinCos(SDNode *Node) { + unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN + ? ISD::FCOS : ISD::FSIN; + + SDValue Op0 = Node->getOperand(0); + for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node) + continue; + // The other user might have been turned into sincos already. + if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS) + return true; + } + return false; +} + +/// ExpandSinCosLibCall - Issue libcalls to sincos to compute sin / cos +/// pairs. +void +SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = RTLIB::SINCOS_F32; break; + case MVT::f64: LC = RTLIB::SINCOS_F64; break; + case MVT::f80: LC = RTLIB::SINCOS_F80; break; + case MVT::f128: LC = RTLIB::SINCOS_F128; break; + case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; + } + + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + EVT RetVT = Node->getValueType(0); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + // Pass the argument. + Entry.Node = Node->getOperand(0); + Entry.Ty = RetTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + // Pass the return address of sin. + SDValue SinPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = SinPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + // Also pass the return address of the cos. + SDValue CosPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = CosPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + DebugLoc dl = Node->getDebugLoc(); + TargetLowering:: + CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()), + false, false, false, false, + 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, + MachinePointerInfo(), false, false, false, 0)); + Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, + MachinePointerInfo(), false, false, false, 0)); +} + /// ExpandLegalINT_TO_FP - This function is responsible for legalizing a /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are @@ -2443,18 +2539,6 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { } } -/// SplatByte - Distribute ByteVal over NumBits bits. -// FIXME: Move this helper to a common place. -static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { - APInt Val = APInt(NumBits, ByteVal); - unsigned Shift = 8; - for (unsigned i = NumBits; i > 8; i >>= 1) { - Val = (Val << Shift) | Val; - Shift <<= 1; - } - return Val; -} - /// ExpandBitCount - Expand the specified bitcount instruction into operations. /// SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, @@ -2472,10 +2556,10 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // This is the "best" algorithm from // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel - SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT); - SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT); - SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT); - SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT); + SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), VT); + SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), VT); + SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), VT); + SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), VT); // v = v - ((v >> 1) & 0x55555555...) Op = DAG.getNode(ISD::SUB, dl, VT, Op, @@ -2825,7 +2909,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue True, False; EVT VT = Node->getOperand(0).getValueType(); EVT NVT = Node->getValueType(0); - APFloat apf(APInt::getNullValue(VT.getSizeInBits())); + APFloat apf(DAG.EVTToAPFloatSemantics(VT), + APInt::getNullValue(VT.getSizeInBits())); APInt x = APInt::getSignBit(NVT.getSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); Tmp1 = DAG.getConstantFP(apf, VT); @@ -3060,14 +3145,33 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::SQRT_PPCF128)); break; case ISD::FSIN: - Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_F128, - RTLIB::SIN_PPCF128)); + case ISD::FCOS: { + EVT VT = Node->getValueType(0); + bool isSIN = Node->getOpcode() == ISD::FSIN; + // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / + // fcos which share the same operand and both are used. + if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || + canCombineSinCosLibcall(Node, TLI, TM)) + && useSinCos(Node)) { + SDVTList VTs = DAG.getVTList(VT, VT); + Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); + if (!isSIN) + Tmp1 = Tmp1.getValue(1); + Results.push_back(Tmp1); + } else if (isSIN) { + Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128)); + } else { + Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128)); + } break; - case ISD::FCOS: - Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_F128, - RTLIB::COS_PPCF128)); + } + case ISD::FSINCOS: + // Expand into sincos libcall. + ExpandSinCosLibCall(Node, Results); break; case ISD::FLOG: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, @@ -3200,7 +3304,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::UREM: case ISD::SREM: { EVT VT = Node->getValueType(0); - SDVTList VTs = DAG.getVTList(VT, VT); bool isSigned = Node->getOpcode() == ISD::SREM; unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; @@ -3211,6 +3314,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If div is legal, it's better to do the normal expansion !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) && useDivRem(Node, isSigned, false))) { + SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 92dc5a9..1ee2192 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -152,23 +152,23 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::ADD_F32, - RTLIB::ADD_F64, - RTLIB::ADD_F80, - RTLIB::ADD_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::CEIL_F32, - RTLIB::CEIL_F64, - RTLIB::CEIL_F80, - RTLIB::CEIL_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, + RTLIB::CEIL_F64, + RTLIB::CEIL_F80, + RTLIB::CEIL_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { @@ -216,90 +216,90 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::COS_F32, - RTLIB::COS_F64, - RTLIB::COS_F80, - RTLIB::COS_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, + RTLIB::COS_F64, + RTLIB::COS_F80, + RTLIB::COS_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::DIV_F32, - RTLIB::DIV_F64, - RTLIB::DIV_F80, - RTLIB::DIV_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::EXP_F32, - RTLIB::EXP_F64, - RTLIB::EXP_F80, - RTLIB::EXP_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, + RTLIB::EXP_F64, + RTLIB::EXP_F80, + RTLIB::EXP_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::EXP2_F32, - RTLIB::EXP2_F64, - RTLIB::EXP2_F80, - RTLIB::EXP2_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, + RTLIB::EXP2_F64, + RTLIB::EXP2_F80, + RTLIB::EXP2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::FLOOR_F32, - RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, - RTLIB::FLOOR_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, + RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, + RTLIB::FLOOR_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::LOG_F32, - RTLIB::LOG_F64, - RTLIB::LOG_F80, - RTLIB::LOG_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, + RTLIB::LOG_F64, + RTLIB::LOG_F80, + RTLIB::LOG_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::LOG2_F32, - RTLIB::LOG2_F64, - RTLIB::LOG2_F80, - RTLIB::LOG2_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, + RTLIB::LOG2_F64, + RTLIB::LOG2_F80, + RTLIB::LOG2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::LOG10_F32, - RTLIB::LOG10_F64, - RTLIB::LOG10_F80, - RTLIB::LOG10_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, + RTLIB::LOG10_F64, + RTLIB::LOG10_F80, + RTLIB::LOG10_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -307,35 +307,35 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)), GetSoftenedFloat(N->getOperand(2)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::FMA_F32, - RTLIB::FMA_F64, - RTLIB::FMA_F80, - RTLIB::FMA_PPCF128), - NVT, Ops, 3, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_PPCF128), + NVT, Ops, 3, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::MUL_F32, - RTLIB::MUL_F64, - RTLIB::MUL_F80, - RTLIB::MUL_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { @@ -343,12 +343,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { // Expand Y = FNEG(X) -> Y = SUB -0.0, X SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)), GetSoftenedFloat(N->getOperand(0)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -356,7 +356,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc()); } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -364,8 +364,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); - return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, - N->getDebugLoc()); + return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, + N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -373,19 +373,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); - return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::POW_F32, - RTLIB::POW_F64, - RTLIB::POW_F80, - RTLIB::POW_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, + RTLIB::POW_F64, + RTLIB::POW_F80, + RTLIB::POW_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -393,80 +393,80 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { "Unsupported power type!"); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::POWI_F32, - RTLIB::POWI_F64, - RTLIB::POWI_F80, - RTLIB::POWI_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, + RTLIB::POWI_F64, + RTLIB::POWI_F80, + RTLIB::POWI_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::REM_F32, - RTLIB::REM_F64, - RTLIB::REM_F80, - RTLIB::REM_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, + RTLIB::REM_F64, + RTLIB::REM_F80, + RTLIB::REM_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::RINT_F32, - RTLIB::RINT_F64, - RTLIB::RINT_F80, - RTLIB::RINT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, + RTLIB::RINT_F64, + RTLIB::RINT_F80, + RTLIB::RINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::SIN_F32, - RTLIB::SIN_F64, - RTLIB::SIN_F80, - RTLIB::SIN_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, + RTLIB::SIN_F64, + RTLIB::SIN_F80, + RTLIB::SIN_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::SQRT_F32, - RTLIB::SQRT_F64, - RTLIB::SQRT_F80, - RTLIB::SQRT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, + RTLIB::SQRT_F64, + RTLIB::SQRT_F80, + RTLIB::SQRT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::TRUNC_F32, - RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, - RTLIB::TRUNC_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, + RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, + RTLIB::TRUNC_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { @@ -559,8 +559,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { // Sign/zero extend the argument if the libcall takes a larger type. SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); - return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - &Op, 1, false, dl); + return TLI.makeLibCall(DAG, LC, + TLI.getTypeToTransformTo(*DAG.getContext(), RVT), + &Op, 1, false, dl); } @@ -607,92 +608,6 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return false; } -/// SoftenSetCCOperands - Soften the operands of a comparison. This code is -/// shared among BR_CC, SELECT_CC, and SETCC handlers. -void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, DebugLoc dl) { - SDValue LHSInt = GetSoftenedFloat(NewLHS); - SDValue RHSInt = GetSoftenedFloat(NewRHS); - EVT VT = NewLHS.getValueType(); - - assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!"); - - // Expand into one or more soft-fp libcall(s). - RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; - switch (CCCode) { - case ISD::SETEQ: - case ISD::SETOEQ: - LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; - break; - case ISD::SETNE: - case ISD::SETUNE: - LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64; - break; - case ISD::SETGE: - case ISD::SETOGE: - LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; - break; - case ISD::SETLT: - case ISD::SETOLT: - LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; - break; - case ISD::SETLE: - case ISD::SETOLE: - LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; - break; - case ISD::SETGT: - case ISD::SETOGT: - LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; - break; - case ISD::SETUO: - LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; - break; - case ISD::SETO: - LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64; - break; - default: - LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; - switch (CCCode) { - case ISD::SETONE: - // SETONE = SETOLT | SETOGT - LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; - // Fallthrough - case ISD::SETUGT: - LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; - break; - case ISD::SETUGE: - LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; - break; - case ISD::SETULT: - LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; - break; - case ISD::SETULE: - LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; - break; - case ISD::SETUEQ: - LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; - break; - default: llvm_unreachable("Do not know how to soften this setcc!"); - } - } - - // Use the target specific return value for comparions lib calls. - EVT RetVT = TLI.getCmpLibcallReturnType(); - SDValue Ops[2] = { LHSInt, RHSInt }; - NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); - NewRHS = DAG.getConstant(0, RetVT); - CCCode = TLI.getCmpLibcallCC(LC1); - if (LC2 != RTLIB::UNKNOWN_LIBCALL) { - SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), - NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); - NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS, - NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2))); - NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); - NewRHS = SDValue(); - } -} - SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), GetSoftenedFloat(N->getOperand(0))); @@ -706,15 +621,19 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); - SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); - // If SoftenSetCCOperands returned a scalar, we need to compare the result + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (NewRHS.getNode() == 0) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); @@ -733,7 +652,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { @@ -741,22 +660,26 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) { EVT RVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16; SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); - SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); - // If SoftenSetCCOperands returned a scalar, we need to compare the result + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (NewRHS.getNode() == 0) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); @@ -773,9 +696,13 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); - SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); - // If SoftenSetCCOperands returned a scalar, use it. + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If softenSetCCOperands returned a scalar, use it. if (NewRHS.getNode() == 0) { assert(NewLHS.getValueType() == N->getValueType(0) && "Unexpected setcc expansion!"); @@ -886,9 +813,11 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, assert(NVT.getSizeInBits() == integerPartWidth && "Do not know how to expand this float constant!"); APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt(); - Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[1])), + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(integerPartWidth, C.getRawData()[1])), NVT); - Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[0])), + Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(integerPartWidth, C.getRawData()[0])), NVT); } @@ -947,13 +876,13 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::DIV_F32, - RTLIB::DIV_F64, - RTLIB::DIV_F80, - RTLIB::DIV_PPCF128), - N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); GetPairElements(Call, Lo, Hi); } @@ -1014,26 +943,26 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; - SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::FMA_F32, - RTLIB::FMA_F64, - RTLIB::FMA_F80, - RTLIB::FMA_PPCF128), - N->getValueType(0), Ops, 3, false, - N->getDebugLoc()); + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_PPCF128), + N->getValueType(0), Ops, 3, false, + N->getDebugLoc()); GetPairElements(Call, Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::MUL_F32, - RTLIB::MUL_F64, - RTLIB::MUL_F80, - RTLIB::MUL_PPCF128), - N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); GetPairElements(Call, Lo, Hi); } @@ -1060,7 +989,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0)); - Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), NVT); } void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, @@ -1111,13 +1041,13 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_PPCF128), - N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); GetPairElements(Call, Lo, Hi); } @@ -1155,7 +1085,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, Chain = Hi.getValue(1); // The low part is zero. - Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), NVT); // Modified the chain - switch anything that used the old chain to use the // new one. @@ -1179,7 +1110,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, // The integer can be represented exactly in an f64. Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, MVT::i32, Src); - Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), NVT); Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src); } else { RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -1193,7 +1125,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); - Hi = MakeLibCall(LC, VT, &Src, 1, true, dl); + Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl); GetPairElements(Hi, Lo, Hi); } @@ -1225,7 +1157,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, - DAG.getConstantFP(APFloat(APInt(128, Parts)), + DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, + APInt(128, Parts)), MVT::ppcf128)); Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT), Lo, Hi, DAG.getCondCode(ISD::SETLT)); @@ -1364,7 +1297,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl); + return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { @@ -1377,7 +1310,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { assert(N->getOperand(0).getValueType() == MVT::ppcf128 && "Logic only correct for ppcf128!"); const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; - APFloat APF = APFloat(APInt(128, TwoE31)); + APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31)); SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. @@ -1396,7 +1329,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); - return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1, + false, dl); } SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 5e33ef1..182b7f3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1767,7 +1767,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, dl), + Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, @@ -1777,7 +1778,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); - SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, dl), + Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, @@ -1992,7 +1994,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, dl), + Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, @@ -2054,7 +2057,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, @@ -2092,9 +2095,19 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, // Expand the subcomponents. SDValue LHSL, LHSH; GetExpandedInteger(N->getOperand(0), LHSL, LHSH); - - SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) }; EVT VT = LHSL.getValueType(); + + // If the shift amount operand is coming from a vector legalization it may + // have an illegal type. Fix that first by casting the operand, otherwise + // the new SHL_PARTS operation would need further legalization. + SDValue ShiftOp = N->getOperand(1); + MVT ShiftTy = TLI.getShiftAmountTy(VT); + assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(VT.getSizeInBits()) && + "ShiftAmountTy is too small to cover the range of this type!"); + if (ShiftOp.getValueType() != ShiftTy) + ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); + + SDValue Ops[] = { LHSL, LHSH, ShiftOp }; Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3); Hi = Lo.getValue(1); return; @@ -2138,7 +2151,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl), Lo, Hi); return; } @@ -2221,7 +2234,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, @@ -2361,7 +2374,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, @@ -2381,7 +2394,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, @@ -2668,7 +2681,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); - return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, N->getDebugLoc()); } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2764,17 +2777,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL); } -static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unknown FP format"); - case MVT::f32: return &APFloat::IEEEsingle; - case MVT::f64: return &APFloat::IEEEdouble; - case MVT::f80: return &APFloat::x87DoubleExtended; - case MVT::f128: return &APFloat::IEEEquad; - case MVT::ppcf128: return &APFloat::PPCDoubleDouble; - } -} - SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); EVT SrcVT = Op.getValueType(); @@ -2784,8 +2786,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { // The following optimization is valid only if every value in SrcVT (when // treated as signed) is representable in DstVT. Check that the mantissa // size of DstVT is >= than the number of bits in SrcVT -1. - const fltSemantics *sem = EVTToAPFloatSemantics(DstVT); - if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 && + const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT); + if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 && TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ // Do a signed conversion then adjust the result. SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op); @@ -2846,7 +2848,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); - return MakeLibCall(LC, DstVT, &Op, 1, true, dl); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl); } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 6aea2d8..e26d165 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1020,50 +1020,20 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, unsigned NumOps = N->getNumOperands(); DebugLoc dl = N->getDebugLoc(); if (NumOps == 0) { - return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, dl); } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, dl); } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, dl); } SmallVector<SDValue, 8> Ops(NumOps); for (unsigned i = 0; i < NumOps; ++i) Ops[i] = N->getOperand(i); - return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl); -} - -/// MakeLibCall - Generate a libcall taking the given operands as arguments and -/// returning a result of type RetVT. -SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, - const SDValue *Ops, unsigned NumOps, - bool isSigned, DebugLoc dl) { - TargetLowering::ArgListTy Args; - Args.reserve(NumOps); - - TargetLowering::ArgListEntry Entry; - for (unsigned i = 0; i != NumOps; ++i) { - Entry.Node = Ops[i]; - Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.isSExt = isSigned; - Entry.isZExt = !isSigned; - Args.push_back(Entry); - } - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy()); - - Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI); - - return CallInfo.first; + return TLI.makeLibCall(DAG, LC, N->getValueType(0), + &Ops[0], NumOps, isSigned, dl); } // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 8c53ba3..7de42ea 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -80,35 +80,35 @@ private: /// PromotedIntegers - For integer nodes that are below legal width, this map /// indicates what promoted value to use. - DenseMap<SDValue, SDValue> PromotedIntegers; + SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers; /// ExpandedIntegers - For integer nodes that need to be expanded this map /// indicates which operands are the expanded version of the input. - DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedIntegers; + SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers; /// SoftenedFloats - For floating point nodes converted to integers of /// the same size, this map indicates the converted value to use. - DenseMap<SDValue, SDValue> SoftenedFloats; + SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats; /// ExpandedFloats - For float nodes that need to be expanded this map /// indicates which operands are the expanded version of the input. - DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedFloats; + SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats; /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the /// scalar value of type 'ty' to use. - DenseMap<SDValue, SDValue> ScalarizedVectors; + SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors; /// SplitVectors - For nodes that need to be split this map indicates /// which operands are the expanded version of the input. - DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors; + SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors; /// WidenedVectors - For vector nodes that need to be widened, indicates /// the widened value to use. - DenseMap<SDValue, SDValue> WidenedVectors; + SmallDenseMap<SDValue, SDValue, 8> WidenedVectors; /// ReplacedValues - For values that have been replaced with another, /// indicates the replacement value to use. - DenseMap<SDValue, SDValue> ReplacedValues; + SmallDenseMap<SDValue, SDValue, 8> ReplacedValues; /// Worklist - This defines a worklist of nodes to process. In order to be /// pushed onto this worklist, all operands of a node must have already been @@ -159,9 +159,6 @@ private: SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); SDValue JoinIntegers(SDValue Lo, SDValue Hi); SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); - SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT, - const SDValue *Ops, unsigned NumOps, bool isSigned, - DebugLoc dl); std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); @@ -433,9 +430,6 @@ private: SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); - void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, DebugLoc dl); - //===--------------------------------------------------------------------===// // Float Expansion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index de6bbe3..c6e066e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -40,7 +40,7 @@ class VectorLegalizer { /// LegalizedNodes - For nodes that are of legal width, and that have more /// than one use, this map indicates what regularized operand to use. This /// allows us to avoid legalizing the same thing more than once. - DenseMap<SDValue, SDValue> LegalizedNodes; + SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; // Adds a node to the translation cache void AddLegalizedOperand(SDValue From, SDValue To) { @@ -61,6 +61,8 @@ class VectorLegalizer { // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if // SINT_TO_FLOAT and SHR on vectors isn't legal. SDValue ExpandUINT_TO_FLOAT(SDValue Op); + // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. + SDValue ExpandSEXTINREG(SDValue Op); // Implement vselect in terms of XOR, AND, OR when blend is not supported // by the target. SDValue ExpandVSELECT(SDValue Op); @@ -83,6 +85,25 @@ class VectorLegalizer { }; bool VectorLegalizer::Run() { + // Before we start legalizing vector nodes, check if there are any vectors. + bool HasVectors = false; + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) { + // Check if the values of the nodes contain vectors. We don't need to check + // the operands because we are going to check their values at some point. + for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); + J != E; ++J) + HasVectors |= J->isVector(); + + // If we found a vector node we can start the legalization. + if (HasVectors) + break; + } + + // If this basic block has no vectors then no need to legalize vectors. + if (!HasVectors) + return false; + // The legalize process is inherently a bottom-up recursive process (users // legalize their uses before themselves). Given infinite stack space, we // could just start legalizing on the root and traverse the whole graph. In @@ -262,7 +283,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { // FALL THROUGH } case TargetLowering::Expand: - if (Node->getOpcode() == ISD::VSELECT) + if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) + Result = ExpandSEXTINREG(Op); + else if (Node->getOpcode() == ISD::VSELECT) Result = ExpandVSELECT(Op); else if (Node->getOpcode() == ISD::SELECT) Result = ExpandSELECT(Op); @@ -359,30 +382,135 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { EVT SrcVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = LD->getExtensionType(); - SmallVector<SDValue, 8> LoadVals; + SmallVector<SDValue, 8> Vals; SmallVector<SDValue, 8> LoadChains; unsigned NumElem = SrcVT.getVectorNumElements(); - unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; - for (unsigned Idx=0; Idx<NumElem; Idx++) { - SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl, - Op.getNode()->getValueType(0).getScalarType(), - Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), - SrcVT.getScalarType(), - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + EVT SrcEltVT = SrcVT.getScalarType(); + EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType(); + + if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { + // When elements in a vector is not byte-addressable, we cannot directly + // load each element by advancing pointer, which could only address bytes. + // Instead, we load all significant words, mask bits off, and concatenate + // them to form each element. Finally, they are extended to destination + // scalar type to build the destination vector. + EVT WideVT = TLI.getPointerTy(); + + assert(WideVT.isRound() && + "Could not handle the sophisticated case when the widest integer is" + " not power of 2."); + assert(WideVT.bitsGE(SrcEltVT) && + "Type is not legalized?"); + + unsigned WideBytes = WideVT.getStoreSize(); + unsigned Offset = 0; + unsigned RemainingBytes = SrcVT.getStoreSize(); + SmallVector<SDValue, 8> LoadVals; + + while (RemainingBytes > 0) { + SDValue ScalarLoad; + unsigned LoadBytes = WideBytes; + + if (RemainingBytes >= LoadBytes) { + ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); + } else { + EVT LoadVT = WideVT; + while (RemainingBytes < LoadBytes) { + LoadBytes >>= 1; // Reduce the load size by half. + LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); + } + ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), + LoadVT, LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + } - BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(Stride)); + RemainingBytes -= LoadBytes; + Offset += LoadBytes; + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getIntPtrConstant(LoadBytes)); + + LoadVals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + // Extract bits, pack and extend/trunc them into destination type. + unsigned SrcEltBits = SrcEltVT.getSizeInBits(); + SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT); + + unsigned BitOffset = 0; + unsigned WideIdx = 0; + unsigned WideBits = WideVT.getSizeInBits(); + + for (unsigned Idx = 0; Idx != NumElem; ++Idx) { + SDValue Lo, Hi, ShAmt; + + if (BitOffset < WideBits) { + ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT)); + Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); + Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); + } - LoadVals.push_back(ScalarLoad.getValue(0)); - LoadChains.push_back(ScalarLoad.getValue(1)); + BitOffset += SrcEltBits; + if (BitOffset >= WideBits) { + WideIdx++; + Offset -= WideBits; + if (Offset > 0) { + ShAmt = DAG.getConstant(SrcEltBits - Offset, + TLI.getShiftAmountTy(WideVT)); + Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); + Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask); + } + } + + if (Hi.getNode()) + Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi); + + switch (ExtType) { + default: llvm_unreachable("Unknown extended-load op!"); + case ISD::EXTLOAD: + Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT); + break; + case ISD::ZEXTLOAD: + Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); + break; + case ISD::SEXTLOAD: + ShAmt = DAG.getConstant(WideBits - SrcEltBits, + TLI.getShiftAmountTy(WideVT)); + Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); + Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); + Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); + break; + } + Vals.push_back(Lo); + } + } else { + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl, + Op.getNode()->getValueType(0).getScalarType(), + Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getIntPtrConstant(Stride)); + + Vals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } } SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LoadChains[0], LoadChains.size()); SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, - Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size()); + Op.getNode()->getValueType(0), &Vals[0], Vals.size()); AddLegalizedOperand(Op.getValue(0), Value); AddLegalizedOperand(Op.getValue(1), NewChain); @@ -501,6 +629,26 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); } +SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { + EVT VT = Op.getValueType(); + + // Make sure that the SRA and SHL instructions are available. + if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + DebugLoc DL = Op.getDebugLoc(); + EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); + + unsigned BW = VT.getScalarType().getSizeInBits(); + unsigned OrigBW = OrigTy.getScalarType().getSizeInBits(); + SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT); + + Op = Op.getOperand(0); + Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); + return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 31b9bf3..addfccb 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/DataLayout.h" @@ -142,6 +143,12 @@ private: std::vector<SUnit*> LiveRegDefs; std::vector<SUnit*> LiveRegGens; + // Collect interferences between physical register use/defs. + // Each interference is an SUnit and set of physical registers. + SmallVector<SUnit*, 4> Interferences; + typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT; + LRegsMapT LRegsMap; + /// Topo - A topological ordering for SUnits which permits fast IsReachable /// and similar queries. ScheduleDAGTopologicalSort Topo; @@ -225,6 +232,8 @@ private: SmallVector<SUnit*, 2>&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + void releaseInterferences(unsigned Reg = 0); + SUnit *PickNodeToScheduleBottomUp(); void ListScheduleBottomUp(); @@ -274,8 +283,17 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, // the expansion of custom DAG-to-DAG patterns. if (VT == MVT::Untyped) { const SDNode *Node = RegDefPos.GetNode(); - unsigned Opcode = Node->getMachineOpcode(); + // Special handling for CopyFromReg of untyped values. + if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); + RegClass = RC->getID(); + Cost = 1; + return; + } + + unsigned Opcode = Node->getMachineOpcode(); if (Opcode == TargetOpcode::REG_SEQUENCE) { unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); @@ -312,6 +330,7 @@ void ScheduleDAGRRList::Schedule() { LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); CallSeqEndForStart.clear(); + assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); // Build the scheduling graph. BuildSchedGraph(NULL); @@ -725,6 +744,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[I->getReg()] = NULL; LiveRegGens[I->getReg()] = NULL; + releaseInterferences(I->getReg()); } } // Release the special call resource dependence, if this is the beginning @@ -739,6 +759,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[CallResource] = NULL; LiveRegGens[CallResource] = NULL; + releaseInterferences(CallResource); } } @@ -794,6 +815,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[I->getReg()] = NULL; LiveRegGens[I->getReg()] = NULL; + releaseInterferences(I->getReg()); } } @@ -821,6 +843,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[CallResource] = NULL; LiveRegGens[CallResource] = NULL; + releaseInterferences(CallResource); } } @@ -1305,34 +1328,58 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { return !LRegs.empty(); } +void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = Interferences.size(); i > 0; --i) { + SUnit *SU = Interferences[i-1]; + LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); + if (Reg) { + SmallVector<unsigned, 4> &LRegs = LRegsPos->second; + if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end()) + continue; + } + SU->isPending = false; + // The interfering node may no longer be available due to backtracking. + // Furthermore, it may have been made available again, in which case it is + // now already in the AvailableQueue. + if (SU->isAvailable && !SU->NodeQueueId) { + DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); + AvailableQueue->push(SU); + } + if (i < Interferences.size()) + Interferences[i-1] = Interferences.back(); + Interferences.pop_back(); + LRegsMap.erase(LRegsPos); + } +} + /// Return a node that can be scheduled in this cycle. Requirements: /// (1) Ready: latency has been satisfied /// (2) No Hazards: resources are available /// (3) No Interferences: may unschedule to break register interferences. SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { - SmallVector<SUnit*, 4> Interferences; - DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; - - SUnit *CurSU = AvailableQueue->pop(); + SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop(); while (CurSU) { SmallVector<unsigned, 4> LRegs; if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) break; - LRegsMap.insert(std::make_pair(CurSU, LRegs)); - - CurSU->isPending = true; // This SU is not in AvailableQueue right now. - Interferences.push_back(CurSU); + DEBUG(dbgs() << " Interfering reg " << TRI->getName(LRegs[0]) + << " SU #" << CurSU->NodeNum << '\n'); + std::pair<LRegsMapT::iterator, bool> LRegsPair = + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + if (LRegsPair.second) { + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + Interferences.push_back(CurSU); + } + else { + assert(CurSU->isPending && "Intereferences are pending"); + // Update the interference with current live regs. + LRegsPair.first->second = LRegs; + } CurSU = AvailableQueue->pop(); } - if (CurSU) { - // Add the nodes that aren't ready back onto the available list. - for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { - Interferences[i]->isPending = false; - assert(Interferences[i]->isAvailable && "must still be available"); - AvailableQueue->push(Interferences[i]); - } + if (CurSU) return CurSU; - } // All candidates are delayed due to live physical reg dependencies. // Try backtracking, code duplication, or inserting cross class copies @@ -1353,6 +1400,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { } } if (!WillCreateCycle(TrySU, BtSU)) { + // BacktrackBottomUp mutates Interferences! BacktrackBottomUp(TrySU, BtSU); // Force the current node to be scheduled before the node that @@ -1362,19 +1410,19 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { if (!BtSU->isPending) AvailableQueue->remove(BtSU); } + DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" + << TrySU->NodeNum << ")\n"); AddPred(TrySU, SDep(BtSU, SDep::Artificial)); // If one or more successors has been unscheduled, then the current - // node is no longer avaialable. Schedule a successor that's now - // available instead. - if (!TrySU->isAvailable) { + // node is no longer available. + if (!TrySU->isAvailable) CurSU = AvailableQueue->pop(); - } else { + AvailableQueue->remove(TrySU); CurSU = TrySU; - TrySU->isPending = false; - Interferences.erase(Interferences.begin()+i); } + // Interferences has been mutated. We must break. break; } } @@ -1425,17 +1473,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { TrySU->isAvailable = false; CurSU = NewDef; } - assert(CurSU && "Unable to resolve live physical register dependencies!"); - - // Add the nodes that aren't ready back onto the available list. - for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { - Interferences[i]->isPending = false; - // May no longer be available due to backtracking. - if (Interferences[i]->isAvailable) { - AvailableQueue->push(Interferences[i]); - } - } return CurSU; } @@ -1456,7 +1494,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size()); - while (!AvailableQueue->empty()) { + while (!AvailableQueue->empty() || !Interferences.empty()) { DEBUG(dbgs() << "\nExamining Available:\n"; AvailableQueue->dump(this)); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 76067a1..2ff37e0 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -15,8 +15,8 @@ #ifndef SCHEDULEDAGSDNODES_H #define SCHEDULEDAGSDNODES_H +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/SelectionDAG.h" namespace llvm { /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 6c29c67..db8ae6e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -60,18 +60,6 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { return Res; } -static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unknown FP format"); - case MVT::f16: return &APFloat::IEEEhalf; - case MVT::f32: return &APFloat::IEEEsingle; - case MVT::f64: return &APFloat::IEEEdouble; - case MVT::f80: return &APFloat::x87DoubleExtended; - case MVT::f128: return &APFloat::IEEEquad; - case MVT::ppcf128: return &APFloat::PPCDoubleDouble; - } -} - // Default null implementations of the callbacks. void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} @@ -95,7 +83,8 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, // convert modifies in place, so make a copy. APFloat Val2 = APFloat(Val); bool losesInfo; - (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, + (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &losesInfo); return !losesInfo; } @@ -1081,7 +1070,7 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { EltVT==MVT::f16) { bool ignored; APFloat apf = APFloat(Val); - apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, + apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(apf, VT, isTarget); } else @@ -2442,7 +2431,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { - APFloat apf(APInt::getNullValue(VT.getSizeInBits())); + APFloat apf(EVTToAPFloatSemantics(VT), + APInt::getNullValue(VT.getSizeInBits())); (void)apf.convertFromAPInt(Val, Opcode==ISD::SINT_TO_FP, APFloat::rmNearestTiesToEven); @@ -2450,9 +2440,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, } case ISD::BITCAST: if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) - return getConstantFP(APFloat(Val), VT); + return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT); else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) - return getConstantFP(APFloat(Val), VT); + return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), VT); @@ -2499,7 +2489,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, bool ignored; // This can return overflow, underflow, or inexact; we don't care. // FIXME need to be more flexible about rounding mode. - (void)V.convert(*EVTToAPFloatSemantics(VT), + (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(V, VT); } @@ -2690,44 +2680,117 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return SDValue(N, 0); } -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, - EVT VT, - ConstantSDNode *Cst1, - ConstantSDNode *Cst2) { - const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue(); +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, + SDNode *Cst1, SDNode *Cst2) { + SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs; + SmallVector<SDValue, 4> Outputs; + EVT SVT = VT.getScalarType(); - switch (Opcode) { - case ISD::ADD: return getConstant(C1 + C2, VT); - case ISD::SUB: return getConstant(C1 - C2, VT); - case ISD::MUL: return getConstant(C1 * C2, VT); - case ISD::UDIV: - if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT); - break; - case ISD::UREM: - if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT); - break; - case ISD::SDIV: - if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT); - break; - case ISD::SREM: - if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT); - break; - case ISD::AND: return getConstant(C1 & C2, VT); - case ISD::OR: return getConstant(C1 | C2, VT); - case ISD::XOR: return getConstant(C1 ^ C2, VT); - case ISD::SHL: return getConstant(C1 << C2, VT); - case ISD::SRL: return getConstant(C1.lshr(C2), VT); - case ISD::SRA: return getConstant(C1.ashr(C2), VT); - case ISD::ROTL: return getConstant(C1.rotl(C2), VT); - case ISD::ROTR: return getConstant(C1.rotr(C2), VT); - default: break; + ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1); + ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2); + if (Scalar1 && Scalar2) { + // Scalar instruction. + Inputs.push_back(std::make_pair(Scalar1, Scalar2)); + } else { + // For vectors extract each constant element into Inputs so we can constant + // fold them individually. + BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); + BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2); + if (!BV1 || !BV2) + return SDValue(); + + assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); + + for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { + ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I)); + ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I)); + if (!V1 || !V2) // Not a constant, bail. + return SDValue(); + + // Avoid BUILD_VECTOR nodes that perform implicit truncation. + // FIXME: This is valid and could be handled by truncating the APInts. + if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) + return SDValue(); + + Inputs.push_back(std::make_pair(V1, V2)); + } } - return SDValue(); + // We have a number of constant values, constant fold them element by element. + for (unsigned I = 0, E = Inputs.size(); I != E; ++I) { + const APInt &C1 = Inputs[I].first->getAPIntValue(); + const APInt &C2 = Inputs[I].second->getAPIntValue(); + + switch (Opcode) { + case ISD::ADD: + Outputs.push_back(getConstant(C1 + C2, SVT)); + break; + case ISD::SUB: + Outputs.push_back(getConstant(C1 - C2, SVT)); + break; + case ISD::MUL: + Outputs.push_back(getConstant(C1 * C2, SVT)); + break; + case ISD::UDIV: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.udiv(C2), SVT)); + break; + case ISD::UREM: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.urem(C2), SVT)); + break; + case ISD::SDIV: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.sdiv(C2), SVT)); + break; + case ISD::SREM: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.srem(C2), SVT)); + break; + case ISD::AND: + Outputs.push_back(getConstant(C1 & C2, SVT)); + break; + case ISD::OR: + Outputs.push_back(getConstant(C1 | C2, SVT)); + break; + case ISD::XOR: + Outputs.push_back(getConstant(C1 ^ C2, SVT)); + break; + case ISD::SHL: + Outputs.push_back(getConstant(C1 << C2, SVT)); + break; + case ISD::SRL: + Outputs.push_back(getConstant(C1.lshr(C2), SVT)); + break; + case ISD::SRA: + Outputs.push_back(getConstant(C1.ashr(C2), SVT)); + break; + case ISD::ROTL: + Outputs.push_back(getConstant(C1.rotl(C2), SVT)); + break; + case ISD::ROTR: + Outputs.push_back(getConstant(C1.rotr(C2), SVT)); + break; + default: + return SDValue(); + } + } + + // Handle the scalar case first. + if (Outputs.size() == 1) + return Outputs.back(); + + // Otherwise build a big vector out of the scalar elements we generated. + return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(), + Outputs.size()); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, - SDValue N1, SDValue N2) { +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, + SDValue N2) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); switch (Opcode) { @@ -3023,16 +3086,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } } - if (N1C) { - if (N2C) { - SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C); - if (SV.getNode()) return SV; - } else { // Cannonicalize constant to RHS if commutative - if (isCommutativeBinOp(Opcode)) { - std::swap(N1C, N2C); - std::swap(N1, N2); - } - } + // Perform trivial constant folding. + SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode()); + if (SV.getNode()) return SV; + + // Canonicalize constant to RHS if commutative. + if (N1C && !N2C && isCommutativeBinOp(Opcode)) { + std::swap(N1C, N2C); + std::swap(N1, N2); } // Constant fold FP operations. @@ -3040,7 +3101,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode()); if (N1CFP) { if (!N2CFP && isCommutativeBinOp(Opcode)) { - // Cannonicalize constant to RHS if commutative + // Canonicalize constant to RHS if commutative. std::swap(N1CFP, N2CFP); std::swap(N1, N2); } else if (N2CFP) { @@ -3084,7 +3145,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, bool ignored; // This can return overflow, underflow, or inexact; we don't care. // FIXME need to be more flexible about rounding mode. - (void)V.convert(*EVTToAPFloatSemantics(VT), + (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(V, VT); } @@ -3316,17 +3377,6 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { &ArgChains[0], ArgChains.size()); } -/// SplatByte - Distribute ByteVal over NumBits bits. -static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { - APInt Val = APInt(NumBits, ByteVal); - unsigned Shift = 8; - for (unsigned i = NumBits; i > 8; i >>= 1) { - Val = (Val << Shift) | Val; - Shift <<= 1; - } - return Val; -} - /// getMemsetValue - Vectorized representation of the memset value /// operand. static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, @@ -3335,17 +3385,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, unsigned NumBits = VT.getScalarType().getSizeInBits(); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { - APInt Val = SplatByte(NumBits, C->getZExtValue() & 255); + assert(C->getAPIntValue().getBitWidth() == 8); + APInt Val = APInt::getSplat(NumBits, C->getAPIntValue()); if (VT.isInteger()) return DAG.getConstant(Val, VT); - return DAG.getConstantFP(APFloat(Val), VT); + return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT); } Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); if (NumBits > 8) { // Use a multiplication with 0x010101... to extend the input to the // required length. - APInt Magic = SplatByte(NumBits, 0x01); + APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT)); } @@ -3374,10 +3425,11 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, } assert(!VT.isVector() && "Can't handle vector type here!"); - unsigned NumVTBytes = VT.getSizeInBits() / 8; + unsigned NumVTBits = VT.getSizeInBits(); + unsigned NumVTBytes = NumVTBits / 8; unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size())); - APInt Val(NumBytes*8, 0); + APInt Val(NumVTBits, 0); if (TLI.isLittleEndian()) { for (unsigned i = 0; i != NumBytes; ++i) Val |= (uint64_t)(unsigned char)Str[i] << i*8; @@ -3570,6 +3622,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + if (!TRI->needsStackRealignment(MF)) + while (NewAlign > Align && + TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign)) + NewAlign /= 2; + if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) @@ -3806,6 +3867,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, unsigned Align, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -3873,6 +3935,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -3927,6 +3990,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); // Check to see if we should lower the memset to stores first. // For cases within the target-specified limits, this is the best choice. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8c22db3..b8ab2a9 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -15,9 +15,9 @@ #include "SelectionDAGBuilder.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" @@ -3519,7 +3519,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { EVT VT = TLI.getValueType(I.getType()); - if (I.getAlignment() * 8 < VT.getSizeInBits()) + if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); SDValue L = @@ -3549,7 +3549,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { EVT VT = TLI.getValueType(I.getValueOperand()->getType()); - if (I.getAlignment() * 8 < VT.getSizeInBits()) + if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); if (TLI.getInsertFencesForAtomic()) @@ -3663,7 +3663,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, /// /// Op = (Op & 0x007fffff) | 0x3f800000; /// -/// where Op is the hexidecimal representation of floating point value. +/// where Op is the hexadecimal representation of floating point value. static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, @@ -3677,7 +3677,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { /// /// (float)(int)(((Op & 0x7f800000) >> 23) - 127); /// -/// where Op is the hexidecimal representation of floating point value. +/// where Op is the hexadecimal representation of floating point value. static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, DebugLoc dl) { @@ -3693,7 +3693,8 @@ GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, /// getF32Constant - Get 32-bit floating point constant. static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt) { - return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); + return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), + MVT::f32); } /// expandExp - Lower an exp intrinsic. Handles the special sequences for @@ -4466,6 +4467,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + if (!Align) + Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, MachinePointerInfo(I.getArgOperand(0)), @@ -4482,6 +4485,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + if (!Align) + Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)))); @@ -4499,6 +4504,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + if (!Align) + Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)), @@ -5168,6 +5175,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2); DAG.setRoot(Res); } + return 0; } case Intrinsic::invariant_start: // Discard region information. @@ -5276,8 +5284,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI.LowerCallTo. - if (isTailCall && - !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI)) + if (isTailCall && !isInTailCallPosition(CS, TLI)) isTailCall = false; TargetLowering:: @@ -5947,6 +5954,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + OpInfo.Type == InlineAsm::isClobber) + continue; + // If this is a memory input, and if the operand is not indirect, do what we // need to to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && @@ -6050,6 +6061,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { ExtraInfo |= InlineAsm::Extra_MayLoad; else if (OpInfo.Type == InlineAsm::isOutput) ExtraInfo |= InlineAsm::Extra_MayStore; + else if (OpInfo.Type == InlineAsm::isClobber) + ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); } } @@ -6584,10 +6597,6 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { const DataLayout *TD = TLI.getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; - // Check whether the function can return without sret-demotion. - SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); - if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector<EVT, 1> ValueVTs; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 5701b13..3b5823b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -140,6 +140,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FSQRT: return "fsqrt"; case ISD::FSIN: return "fsin"; case ISD::FCOS: return "fcos"; + case ISD::FSINCOS: return "fsincos"; case ISD::FTRUNC: return "ftrunc"; case ISD::FFLOOR: return "ffloor"; case ISD::FCEIL: return "fceil"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index d4e9a50..acae58c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -144,7 +144,12 @@ EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, "instruction selector")); static cl::opt<bool> EnableFastISelAbort("fast-isel-abort", cl::Hidden, - cl::desc("Enable abort calls when \"fast\" instruction fails")); + cl::desc("Enable abort calls when \"fast\" instruction selection " + "fails to lower an instruction")); +static cl::opt<bool> +EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden, + cl::desc("Enable abort calls when \"fast\" instruction selection " + "fails to lower a formal argument")); static cl::opt<bool> UseMBPI("use-mbpi", @@ -354,6 +359,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TTI = getAnalysisIfAvailable<TargetTransformInfo>(); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0; + TargetSubtargetInfo &ST = + const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>()); + ST.resetSubtargetFeatures(MF); + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); @@ -368,6 +377,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SDB->init(GFI, *AA, LibInfo); + MF->setHasMSInlineAsm(false); SelectAllBasicBlocks(Fn); // If the first basic block in the function has live ins that need to be @@ -438,24 +448,26 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Determine if there are any calls in this machine function. MachineFrameInfo *MFI = MF->getFrameInfo(); - if (!MFI->hasCalls()) { - for (MachineFunction::const_iterator - I = MF->begin(), E = MF->end(); I != E; ++I) { - const MachineBasicBlock *MBB = I; - for (MachineBasicBlock::const_iterator - II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { - const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode()); - - if ((MCID.isCall() && !MCID.isReturn()) || - II->isStackAligningInlineAsm()) { - MFI->setHasCalls(true); - goto done; - } + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; + ++I) { + + if (MFI->hasCalls() && MF->hasMSInlineAsm()) + break; + + const MachineBasicBlock *MBB = I; + for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end(); + II != IE; ++II) { + const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode()); + if ((MCID.isCall() && !MCID.isReturn()) || + II->isStackAligningInlineAsm()) { + MFI->setHasCalls(true); + } + if (II->isMSInlineAsm()) { + MF->setHasMSInlineAsm(true); } } } - done: // Determine if there is a call to setjmp in the machine function. MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); @@ -1032,10 +1044,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (FuncInfo->MBB->isLandingPad()) PrepareEHLandingPad(); - // Lower any arguments needed in this block if this is the entry block. - if (LLVMBB == &Fn.getEntryBlock()) - LowerArguments(LLVMBB); - // Before doing SelectionDAG ISel, see if FastISel has been requested. if (FastIS) { FastIS->startNewBlock(); @@ -1043,9 +1051,21 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Emit code for any incoming arguments. This must happen before // beginning FastISel on the entry block. if (LLVMBB == &Fn.getEntryBlock()) { - CurDAG->setRoot(SDB->getControlRoot()); - SDB->clear(); - CodeGenAndEmitDAG(); + // Lower any arguments needed in this block if this is the entry block. + if (!FastIS->LowerArguments()) { + + if (EnableFastISelAbortArgs) + // The "fast" selector couldn't lower these arguments. For the + // purpose of debugging, just abort. + llvm_unreachable("FastISel didn't lower all arguments"); + + // Call target indepedent SDISel argument lowering code if the target + // specific routine is not successful. + LowerArguments(LLVMBB); + CurDAG->setRoot(SDB->getControlRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } // If we inserted any instructions at the beginning, make a note of // where they are, so we can be sure to emit subsequent instructions @@ -1156,6 +1176,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } FastIS->recomputeInsertPt(); + } else { + // Lower any arguments needed in this block if this is the entry block. + if (LLVMBB == &Fn.getEntryBlock()) + LowerArguments(LLVMBB); } if (Begin != BI) diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 34f3bc9..f5fc66c 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -33,1114 +33,168 @@ #include <cctype> using namespace llvm; -/// InitLibcallNames - Set default libcall names. -/// -static void InitLibcallNames(const char **Names) { - Names[RTLIB::SHL_I16] = "__ashlhi3"; - Names[RTLIB::SHL_I32] = "__ashlsi3"; - Names[RTLIB::SHL_I64] = "__ashldi3"; - Names[RTLIB::SHL_I128] = "__ashlti3"; - Names[RTLIB::SRL_I16] = "__lshrhi3"; - Names[RTLIB::SRL_I32] = "__lshrsi3"; - Names[RTLIB::SRL_I64] = "__lshrdi3"; - Names[RTLIB::SRL_I128] = "__lshrti3"; - Names[RTLIB::SRA_I16] = "__ashrhi3"; - Names[RTLIB::SRA_I32] = "__ashrsi3"; - Names[RTLIB::SRA_I64] = "__ashrdi3"; - Names[RTLIB::SRA_I128] = "__ashrti3"; - Names[RTLIB::MUL_I8] = "__mulqi3"; - Names[RTLIB::MUL_I16] = "__mulhi3"; - Names[RTLIB::MUL_I32] = "__mulsi3"; - Names[RTLIB::MUL_I64] = "__muldi3"; - Names[RTLIB::MUL_I128] = "__multi3"; - Names[RTLIB::MULO_I32] = "__mulosi4"; - Names[RTLIB::MULO_I64] = "__mulodi4"; - Names[RTLIB::MULO_I128] = "__muloti4"; - Names[RTLIB::SDIV_I8] = "__divqi3"; - Names[RTLIB::SDIV_I16] = "__divhi3"; - Names[RTLIB::SDIV_I32] = "__divsi3"; - Names[RTLIB::SDIV_I64] = "__divdi3"; - Names[RTLIB::SDIV_I128] = "__divti3"; - Names[RTLIB::UDIV_I8] = "__udivqi3"; - Names[RTLIB::UDIV_I16] = "__udivhi3"; - Names[RTLIB::UDIV_I32] = "__udivsi3"; - Names[RTLIB::UDIV_I64] = "__udivdi3"; - Names[RTLIB::UDIV_I128] = "__udivti3"; - Names[RTLIB::SREM_I8] = "__modqi3"; - Names[RTLIB::SREM_I16] = "__modhi3"; - Names[RTLIB::SREM_I32] = "__modsi3"; - Names[RTLIB::SREM_I64] = "__moddi3"; - Names[RTLIB::SREM_I128] = "__modti3"; - Names[RTLIB::UREM_I8] = "__umodqi3"; - Names[RTLIB::UREM_I16] = "__umodhi3"; - Names[RTLIB::UREM_I32] = "__umodsi3"; - Names[RTLIB::UREM_I64] = "__umoddi3"; - Names[RTLIB::UREM_I128] = "__umodti3"; - - // These are generally not available. - Names[RTLIB::SDIVREM_I8] = 0; - Names[RTLIB::SDIVREM_I16] = 0; - Names[RTLIB::SDIVREM_I32] = 0; - Names[RTLIB::SDIVREM_I64] = 0; - Names[RTLIB::SDIVREM_I128] = 0; - Names[RTLIB::UDIVREM_I8] = 0; - Names[RTLIB::UDIVREM_I16] = 0; - Names[RTLIB::UDIVREM_I32] = 0; - Names[RTLIB::UDIVREM_I64] = 0; - Names[RTLIB::UDIVREM_I128] = 0; - - Names[RTLIB::NEG_I32] = "__negsi2"; - Names[RTLIB::NEG_I64] = "__negdi2"; - Names[RTLIB::ADD_F32] = "__addsf3"; - Names[RTLIB::ADD_F64] = "__adddf3"; - Names[RTLIB::ADD_F80] = "__addxf3"; - Names[RTLIB::ADD_F128] = "__addtf3"; - Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; - Names[RTLIB::SUB_F32] = "__subsf3"; - Names[RTLIB::SUB_F64] = "__subdf3"; - Names[RTLIB::SUB_F80] = "__subxf3"; - Names[RTLIB::SUB_F128] = "__subtf3"; - Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; - Names[RTLIB::MUL_F32] = "__mulsf3"; - Names[RTLIB::MUL_F64] = "__muldf3"; - Names[RTLIB::MUL_F80] = "__mulxf3"; - Names[RTLIB::MUL_F128] = "__multf3"; - Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; - Names[RTLIB::DIV_F32] = "__divsf3"; - Names[RTLIB::DIV_F64] = "__divdf3"; - Names[RTLIB::DIV_F80] = "__divxf3"; - Names[RTLIB::DIV_F128] = "__divtf3"; - Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; - Names[RTLIB::REM_F32] = "fmodf"; - Names[RTLIB::REM_F64] = "fmod"; - Names[RTLIB::REM_F80] = "fmodl"; - Names[RTLIB::REM_F128] = "fmodl"; - Names[RTLIB::REM_PPCF128] = "fmodl"; - Names[RTLIB::FMA_F32] = "fmaf"; - Names[RTLIB::FMA_F64] = "fma"; - Names[RTLIB::FMA_F80] = "fmal"; - Names[RTLIB::FMA_F128] = "fmal"; - Names[RTLIB::FMA_PPCF128] = "fmal"; - Names[RTLIB::POWI_F32] = "__powisf2"; - Names[RTLIB::POWI_F64] = "__powidf2"; - Names[RTLIB::POWI_F80] = "__powixf2"; - Names[RTLIB::POWI_F128] = "__powitf2"; - Names[RTLIB::POWI_PPCF128] = "__powitf2"; - Names[RTLIB::SQRT_F32] = "sqrtf"; - Names[RTLIB::SQRT_F64] = "sqrt"; - Names[RTLIB::SQRT_F80] = "sqrtl"; - Names[RTLIB::SQRT_F128] = "sqrtl"; - Names[RTLIB::SQRT_PPCF128] = "sqrtl"; - Names[RTLIB::LOG_F32] = "logf"; - Names[RTLIB::LOG_F64] = "log"; - Names[RTLIB::LOG_F80] = "logl"; - Names[RTLIB::LOG_F128] = "logl"; - Names[RTLIB::LOG_PPCF128] = "logl"; - Names[RTLIB::LOG2_F32] = "log2f"; - Names[RTLIB::LOG2_F64] = "log2"; - Names[RTLIB::LOG2_F80] = "log2l"; - Names[RTLIB::LOG2_F128] = "log2l"; - Names[RTLIB::LOG2_PPCF128] = "log2l"; - Names[RTLIB::LOG10_F32] = "log10f"; - Names[RTLIB::LOG10_F64] = "log10"; - Names[RTLIB::LOG10_F80] = "log10l"; - Names[RTLIB::LOG10_F128] = "log10l"; - Names[RTLIB::LOG10_PPCF128] = "log10l"; - Names[RTLIB::EXP_F32] = "expf"; - Names[RTLIB::EXP_F64] = "exp"; - Names[RTLIB::EXP_F80] = "expl"; - Names[RTLIB::EXP_F128] = "expl"; - Names[RTLIB::EXP_PPCF128] = "expl"; - Names[RTLIB::EXP2_F32] = "exp2f"; - Names[RTLIB::EXP2_F64] = "exp2"; - Names[RTLIB::EXP2_F80] = "exp2l"; - Names[RTLIB::EXP2_F128] = "exp2l"; - Names[RTLIB::EXP2_PPCF128] = "exp2l"; - Names[RTLIB::SIN_F32] = "sinf"; - Names[RTLIB::SIN_F64] = "sin"; - Names[RTLIB::SIN_F80] = "sinl"; - Names[RTLIB::SIN_F128] = "sinl"; - Names[RTLIB::SIN_PPCF128] = "sinl"; - Names[RTLIB::COS_F32] = "cosf"; - Names[RTLIB::COS_F64] = "cos"; - Names[RTLIB::COS_F80] = "cosl"; - Names[RTLIB::COS_F128] = "cosl"; - Names[RTLIB::COS_PPCF128] = "cosl"; - Names[RTLIB::POW_F32] = "powf"; - Names[RTLIB::POW_F64] = "pow"; - Names[RTLIB::POW_F80] = "powl"; - Names[RTLIB::POW_F128] = "powl"; - Names[RTLIB::POW_PPCF128] = "powl"; - Names[RTLIB::CEIL_F32] = "ceilf"; - Names[RTLIB::CEIL_F64] = "ceil"; - Names[RTLIB::CEIL_F80] = "ceill"; - Names[RTLIB::CEIL_F128] = "ceill"; - Names[RTLIB::CEIL_PPCF128] = "ceill"; - Names[RTLIB::TRUNC_F32] = "truncf"; - Names[RTLIB::TRUNC_F64] = "trunc"; - Names[RTLIB::TRUNC_F80] = "truncl"; - Names[RTLIB::TRUNC_F128] = "truncl"; - Names[RTLIB::TRUNC_PPCF128] = "truncl"; - Names[RTLIB::RINT_F32] = "rintf"; - Names[RTLIB::RINT_F64] = "rint"; - Names[RTLIB::RINT_F80] = "rintl"; - Names[RTLIB::RINT_F128] = "rintl"; - Names[RTLIB::RINT_PPCF128] = "rintl"; - Names[RTLIB::NEARBYINT_F32] = "nearbyintf"; - Names[RTLIB::NEARBYINT_F64] = "nearbyint"; - Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; - Names[RTLIB::NEARBYINT_F128] = "nearbyintl"; - Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; - Names[RTLIB::FLOOR_F32] = "floorf"; - Names[RTLIB::FLOOR_F64] = "floor"; - Names[RTLIB::FLOOR_F80] = "floorl"; - Names[RTLIB::FLOOR_F128] = "floorl"; - Names[RTLIB::FLOOR_PPCF128] = "floorl"; - Names[RTLIB::COPYSIGN_F32] = "copysignf"; - Names[RTLIB::COPYSIGN_F64] = "copysign"; - Names[RTLIB::COPYSIGN_F80] = "copysignl"; - Names[RTLIB::COPYSIGN_F128] = "copysignl"; - Names[RTLIB::COPYSIGN_PPCF128] = "copysignl"; - Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2"; - Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2"; - Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; - Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; - Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; - Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; - Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; - Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2"; - Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; - Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; - Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2"; - Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; - Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi"; - Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi"; - Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; - Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; - Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; - Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi"; - Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi"; - Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; - Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; - Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; - Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi"; - Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; - Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti"; - Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi"; - Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi"; - Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti"; - Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; - Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; - Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; - Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi"; - Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi"; - Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; - Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; - Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; - Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi"; - Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi"; - Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; - Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; - Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; - Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; - Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; - Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti"; - Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi"; - Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi"; - Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti"; - Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi"; - Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; - Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti"; - Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; - Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; - Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf"; - Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf"; - Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf"; - Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; - Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; - Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; - Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf"; - Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; - Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf"; - Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf"; - Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf"; - Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf"; - Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf"; - Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; - Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; - Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf"; - Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf"; - Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf"; - Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; - Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; - Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf"; - Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf"; - Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf"; - Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf"; - Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf"; - Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf"; - Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf"; - Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf"; - Names[RTLIB::OEQ_F32] = "__eqsf2"; - Names[RTLIB::OEQ_F64] = "__eqdf2"; - Names[RTLIB::OEQ_F128] = "__eqtf2"; - Names[RTLIB::UNE_F32] = "__nesf2"; - Names[RTLIB::UNE_F64] = "__nedf2"; - Names[RTLIB::UNE_F128] = "__netf2"; - Names[RTLIB::OGE_F32] = "__gesf2"; - Names[RTLIB::OGE_F64] = "__gedf2"; - Names[RTLIB::OGE_F128] = "__getf2"; - Names[RTLIB::OLT_F32] = "__ltsf2"; - Names[RTLIB::OLT_F64] = "__ltdf2"; - Names[RTLIB::OLT_F128] = "__lttf2"; - Names[RTLIB::OLE_F32] = "__lesf2"; - Names[RTLIB::OLE_F64] = "__ledf2"; - Names[RTLIB::OLE_F128] = "__letf2"; - Names[RTLIB::OGT_F32] = "__gtsf2"; - Names[RTLIB::OGT_F64] = "__gtdf2"; - Names[RTLIB::OGT_F128] = "__gttf2"; - Names[RTLIB::UO_F32] = "__unordsf2"; - Names[RTLIB::UO_F64] = "__unorddf2"; - Names[RTLIB::UO_F128] = "__unordtf2"; - Names[RTLIB::O_F32] = "__unordsf2"; - Names[RTLIB::O_F64] = "__unorddf2"; - Names[RTLIB::O_F128] = "__unordtf2"; - Names[RTLIB::MEMCPY] = "memcpy"; - Names[RTLIB::MEMMOVE] = "memmove"; - Names[RTLIB::MEMSET] = "memset"; - Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; - Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; - Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; - Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; - Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; - Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; - Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; - Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; - Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; - Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; - Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; - Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; - Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; - Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; - Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; - Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; - Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; - Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; - Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; - Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; - Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; - Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; - Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; - Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; - Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; -} - -/// InitLibcallCallingConvs - Set default libcall CallingConvs. -/// -static void InitLibcallCallingConvs(CallingConv::ID *CCs) { - for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { - CCs[i] = CallingConv::C; - } -} - -/// getFPEXT - Return the FPEXT_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { - if (RetVT == MVT::f64) - return FPEXT_F32_F64; - if (RetVT == MVT::f128) - return FPEXT_F32_F128; - } else if (OpVT == MVT::f64) { - if (RetVT == MVT::f128) - return FPEXT_F64_F128; - } - - return UNKNOWN_LIBCALL; -} - -/// getFPROUND - Return the FPROUND_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { - if (RetVT == MVT::f32) { - if (OpVT == MVT::f64) - return FPROUND_F64_F32; - if (OpVT == MVT::f80) - return FPROUND_F80_F32; - if (OpVT == MVT::f128) - return FPROUND_F128_F32; - if (OpVT == MVT::ppcf128) - return FPROUND_PPCF128_F32; - } else if (RetVT == MVT::f64) { - if (OpVT == MVT::f80) - return FPROUND_F80_F64; - if (OpVT == MVT::f128) - return FPROUND_F128_F64; - if (OpVT == MVT::ppcf128) - return FPROUND_PPCF128_F64; - } - - return UNKNOWN_LIBCALL; -} - -/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { - if (RetVT == MVT::i8) - return FPTOSINT_F32_I8; - if (RetVT == MVT::i16) - return FPTOSINT_F32_I16; - if (RetVT == MVT::i32) - return FPTOSINT_F32_I32; - if (RetVT == MVT::i64) - return FPTOSINT_F32_I64; - if (RetVT == MVT::i128) - return FPTOSINT_F32_I128; - } else if (OpVT == MVT::f64) { - if (RetVT == MVT::i8) - return FPTOSINT_F64_I8; - if (RetVT == MVT::i16) - return FPTOSINT_F64_I16; - if (RetVT == MVT::i32) - return FPTOSINT_F64_I32; - if (RetVT == MVT::i64) - return FPTOSINT_F64_I64; - if (RetVT == MVT::i128) - return FPTOSINT_F64_I128; - } else if (OpVT == MVT::f80) { - if (RetVT == MVT::i32) - return FPTOSINT_F80_I32; - if (RetVT == MVT::i64) - return FPTOSINT_F80_I64; - if (RetVT == MVT::i128) - return FPTOSINT_F80_I128; - } else if (OpVT == MVT::f128) { - if (RetVT == MVT::i32) - return FPTOSINT_F128_I32; - if (RetVT == MVT::i64) - return FPTOSINT_F128_I64; - if (RetVT == MVT::i128) - return FPTOSINT_F128_I128; - } else if (OpVT == MVT::ppcf128) { - if (RetVT == MVT::i32) - return FPTOSINT_PPCF128_I32; - if (RetVT == MVT::i64) - return FPTOSINT_PPCF128_I64; - if (RetVT == MVT::i128) - return FPTOSINT_PPCF128_I128; - } - return UNKNOWN_LIBCALL; -} - -/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { - if (RetVT == MVT::i8) - return FPTOUINT_F32_I8; - if (RetVT == MVT::i16) - return FPTOUINT_F32_I16; - if (RetVT == MVT::i32) - return FPTOUINT_F32_I32; - if (RetVT == MVT::i64) - return FPTOUINT_F32_I64; - if (RetVT == MVT::i128) - return FPTOUINT_F32_I128; - } else if (OpVT == MVT::f64) { - if (RetVT == MVT::i8) - return FPTOUINT_F64_I8; - if (RetVT == MVT::i16) - return FPTOUINT_F64_I16; - if (RetVT == MVT::i32) - return FPTOUINT_F64_I32; - if (RetVT == MVT::i64) - return FPTOUINT_F64_I64; - if (RetVT == MVT::i128) - return FPTOUINT_F64_I128; - } else if (OpVT == MVT::f80) { - if (RetVT == MVT::i32) - return FPTOUINT_F80_I32; - if (RetVT == MVT::i64) - return FPTOUINT_F80_I64; - if (RetVT == MVT::i128) - return FPTOUINT_F80_I128; - } else if (OpVT == MVT::f128) { - if (RetVT == MVT::i32) - return FPTOUINT_F128_I32; - if (RetVT == MVT::i64) - return FPTOUINT_F128_I64; - if (RetVT == MVT::i128) - return FPTOUINT_F128_I128; - } else if (OpVT == MVT::ppcf128) { - if (RetVT == MVT::i32) - return FPTOUINT_PPCF128_I32; - if (RetVT == MVT::i64) - return FPTOUINT_PPCF128_I64; - if (RetVT == MVT::i128) - return FPTOUINT_PPCF128_I128; - } - return UNKNOWN_LIBCALL; -} - -/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::i32) { - if (RetVT == MVT::f32) - return SINTTOFP_I32_F32; - if (RetVT == MVT::f64) - return SINTTOFP_I32_F64; - if (RetVT == MVT::f80) - return SINTTOFP_I32_F80; - if (RetVT == MVT::f128) - return SINTTOFP_I32_F128; - if (RetVT == MVT::ppcf128) - return SINTTOFP_I32_PPCF128; - } else if (OpVT == MVT::i64) { - if (RetVT == MVT::f32) - return SINTTOFP_I64_F32; - if (RetVT == MVT::f64) - return SINTTOFP_I64_F64; - if (RetVT == MVT::f80) - return SINTTOFP_I64_F80; - if (RetVT == MVT::f128) - return SINTTOFP_I64_F128; - if (RetVT == MVT::ppcf128) - return SINTTOFP_I64_PPCF128; - } else if (OpVT == MVT::i128) { - if (RetVT == MVT::f32) - return SINTTOFP_I128_F32; - if (RetVT == MVT::f64) - return SINTTOFP_I128_F64; - if (RetVT == MVT::f80) - return SINTTOFP_I128_F80; - if (RetVT == MVT::f128) - return SINTTOFP_I128_F128; - if (RetVT == MVT::ppcf128) - return SINTTOFP_I128_PPCF128; - } - return UNKNOWN_LIBCALL; -} - -/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::i32) { - if (RetVT == MVT::f32) - return UINTTOFP_I32_F32; - if (RetVT == MVT::f64) - return UINTTOFP_I32_F64; - if (RetVT == MVT::f80) - return UINTTOFP_I32_F80; - if (RetVT == MVT::f128) - return UINTTOFP_I32_F128; - if (RetVT == MVT::ppcf128) - return UINTTOFP_I32_PPCF128; - } else if (OpVT == MVT::i64) { - if (RetVT == MVT::f32) - return UINTTOFP_I64_F32; - if (RetVT == MVT::f64) - return UINTTOFP_I64_F64; - if (RetVT == MVT::f80) - return UINTTOFP_I64_F80; - if (RetVT == MVT::f128) - return UINTTOFP_I64_F128; - if (RetVT == MVT::ppcf128) - return UINTTOFP_I64_PPCF128; - } else if (OpVT == MVT::i128) { - if (RetVT == MVT::f32) - return UINTTOFP_I128_F32; - if (RetVT == MVT::f64) - return UINTTOFP_I128_F64; - if (RetVT == MVT::f80) - return UINTTOFP_I128_F80; - if (RetVT == MVT::f128) - return UINTTOFP_I128_F128; - if (RetVT == MVT::ppcf128) - return UINTTOFP_I128_PPCF128; - } - return UNKNOWN_LIBCALL; -} - -/// InitCmpLibcallCCs - Set default comparison libcall CC. -/// -static void InitCmpLibcallCCs(ISD::CondCode *CCs) { - memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); - CCs[RTLIB::OEQ_F32] = ISD::SETEQ; - CCs[RTLIB::OEQ_F64] = ISD::SETEQ; - CCs[RTLIB::OEQ_F128] = ISD::SETEQ; - CCs[RTLIB::UNE_F32] = ISD::SETNE; - CCs[RTLIB::UNE_F64] = ISD::SETNE; - CCs[RTLIB::UNE_F128] = ISD::SETNE; - CCs[RTLIB::OGE_F32] = ISD::SETGE; - CCs[RTLIB::OGE_F64] = ISD::SETGE; - CCs[RTLIB::OGE_F128] = ISD::SETGE; - CCs[RTLIB::OLT_F32] = ISD::SETLT; - CCs[RTLIB::OLT_F64] = ISD::SETLT; - CCs[RTLIB::OLT_F128] = ISD::SETLT; - CCs[RTLIB::OLE_F32] = ISD::SETLE; - CCs[RTLIB::OLE_F64] = ISD::SETLE; - CCs[RTLIB::OLE_F128] = ISD::SETLE; - CCs[RTLIB::OGT_F32] = ISD::SETGT; - CCs[RTLIB::OGT_F64] = ISD::SETGT; - CCs[RTLIB::OGT_F128] = ISD::SETGT; - CCs[RTLIB::UO_F32] = ISD::SETNE; - CCs[RTLIB::UO_F64] = ISD::SETNE; - CCs[RTLIB::UO_F128] = ISD::SETNE; - CCs[RTLIB::O_F32] = ISD::SETEQ; - CCs[RTLIB::O_F64] = ISD::SETEQ; - CCs[RTLIB::O_F128] = ISD::SETEQ; -} - /// NOTE: The constructor takes ownership of TLOF. TargetLowering::TargetLowering(const TargetMachine &tm, const TargetLoweringObjectFile *tlof) - : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) { - // All operations default to being supported. - memset(OpActions, 0, sizeof(OpActions)); - memset(LoadExtActions, 0, sizeof(LoadExtActions)); - memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); - memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); - memset(CondCodeActions, 0, sizeof(CondCodeActions)); - - // Set default actions for various operations. - for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { - // Default all indexed load / store to expand. - for (unsigned IM = (unsigned)ISD::PRE_INC; - IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { - setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); - setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); - } - - // These operations default to expand. - setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); - } + : TargetLoweringBase(tm, tlof) {} - // Most targets ignore the @llvm.prefetch intrinsic. - setOperationAction(ISD::PREFETCH, MVT::Other, Expand); - - // ConstantFP nodes default to expand. Targets can either change this to - // Legal, in which case all fp constants are legal, or use isFPImmLegal() - // to optimize expansions for certain constants. - setOperationAction(ISD::ConstantFP, MVT::f16, Expand); - setOperationAction(ISD::ConstantFP, MVT::f32, Expand); - setOperationAction(ISD::ConstantFP, MVT::f64, Expand); - setOperationAction(ISD::ConstantFP, MVT::f80, Expand); - setOperationAction(ISD::ConstantFP, MVT::f128, Expand); - - // These library functions default to expand. - setOperationAction(ISD::FLOG , MVT::f16, Expand); - setOperationAction(ISD::FLOG2, MVT::f16, Expand); - setOperationAction(ISD::FLOG10, MVT::f16, Expand); - setOperationAction(ISD::FEXP , MVT::f16, Expand); - setOperationAction(ISD::FEXP2, MVT::f16, Expand); - setOperationAction(ISD::FFLOOR, MVT::f16, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); - setOperationAction(ISD::FCEIL, MVT::f16, Expand); - setOperationAction(ISD::FRINT, MVT::f16, Expand); - setOperationAction(ISD::FTRUNC, MVT::f16, Expand); - setOperationAction(ISD::FLOG , MVT::f32, Expand); - setOperationAction(ISD::FLOG2, MVT::f32, Expand); - setOperationAction(ISD::FLOG10, MVT::f32, Expand); - setOperationAction(ISD::FEXP , MVT::f32, Expand); - setOperationAction(ISD::FEXP2, MVT::f32, Expand); - setOperationAction(ISD::FFLOOR, MVT::f32, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); - setOperationAction(ISD::FCEIL, MVT::f32, Expand); - setOperationAction(ISD::FRINT, MVT::f32, Expand); - setOperationAction(ISD::FTRUNC, MVT::f32, Expand); - setOperationAction(ISD::FLOG , MVT::f64, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - setOperationAction(ISD::FLOG10, MVT::f64, Expand); - setOperationAction(ISD::FEXP , MVT::f64, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); - setOperationAction(ISD::FCEIL, MVT::f64, Expand); - setOperationAction(ISD::FRINT, MVT::f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::f64, Expand); - setOperationAction(ISD::FLOG , MVT::f128, Expand); - setOperationAction(ISD::FLOG2, MVT::f128, Expand); - setOperationAction(ISD::FLOG10, MVT::f128, Expand); - setOperationAction(ISD::FEXP , MVT::f128, Expand); - setOperationAction(ISD::FEXP2, MVT::f128, Expand); - setOperationAction(ISD::FFLOOR, MVT::f128, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand); - setOperationAction(ISD::FCEIL, MVT::f128, Expand); - setOperationAction(ISD::FRINT, MVT::f128, Expand); - setOperationAction(ISD::FTRUNC, MVT::f128, Expand); - - // Default ISD::TRAP to expand (which turns it into abort). - setOperationAction(ISD::TRAP, MVT::Other, Expand); - - // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" - // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. - // - setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); - - IsLittleEndian = TD->isLittleEndian(); - PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); - memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); - memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); - maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; - maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize - = maxStoresPerMemmoveOptSize = 4; - benefitFromCodePlacementOpt = false; - UseUnderscoreSetJmp = false; - UseUnderscoreLongJmp = false; - SelectIsExpensive = false; - IntDivIsCheap = false; - Pow2DivIsCheap = false; - JumpIsExpensive = false; - predictableSelectIsExpensive = false; - StackPointerRegisterToSaveRestore = 0; - ExceptionPointerRegister = 0; - ExceptionSelectorRegister = 0; - BooleanContents = UndefinedBooleanContent; - BooleanVectorContents = UndefinedBooleanContent; - SchedPreferenceInfo = Sched::ILP; - JumpBufSize = 0; - JumpBufAlignment = 0; - MinFunctionAlignment = 0; - PrefFunctionAlignment = 0; - PrefLoopAlignment = 0; - MinStackArgumentAlignment = 1; - ShouldFoldAtomicFences = false; - InsertFencesForAtomic = false; - SupportJumpTables = true; - MinimumJumpTableEntries = 4; - - InitLibcallNames(LibcallRoutineNames); - InitCmpLibcallCCs(CmpLibcallCCs); - InitLibcallCallingConvs(LibcallCallingConvs); -} - -TargetLowering::~TargetLowering() { - delete &TLOF; -} - -MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const { - return MVT::getIntegerVT(8*TD->getPointerSize(0)); +const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { + return NULL; } -/// canOpTrap - Returns true if the operation can trap for the value type. -/// VT must be a legal type. -bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const { - assert(isTypeLegal(VT)); - switch (Op) { - default: +/// Check whether a given call node is in tail position within its function. If +/// so, it sets Chain to the input chain of the tail call. +bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, + SDValue &Chain) const { + const Function *F = DAG.getMachineFunction().getFunction(); + + // Conservatively require the attributes of the call to match those of + // the return. Ignore noalias because it doesn't affect the call sequence. + AttributeSet CallerAttrs = F->getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex) + .removeAttribute(Attribute::NoAlias).hasAttributes()) return false; - case ISD::FDIV: - case ISD::FREM: - case ISD::SDIV: - case ISD::UDIV: - case ISD::SREM: - case ISD::UREM: - return true; - } -} + // It's not safe to eliminate the sign / zero extension of the return value. + if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + return false; -static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, - unsigned &NumIntermediates, - MVT &RegisterVT, - TargetLowering *TLI) { - // Figure out the right, legal destination reg to copy into. - unsigned NumElts = VT.getVectorNumElements(); - MVT EltTy = VT.getVectorElementType(); - - unsigned NumVectorRegs = 1; - - // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we - // could break down into LHS/RHS like LegalizeDAG does. - if (!isPowerOf2_32(NumElts)) { - NumVectorRegs = NumElts; - NumElts = 1; - } - - // Divide the input until we get to a supported size. This will always - // end with a scalar if the target doesn't support vectors. - while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { - NumElts >>= 1; - NumVectorRegs <<= 1; - } - - NumIntermediates = NumVectorRegs; - - MVT NewVT = MVT::getVectorVT(EltTy, NumElts); - if (!TLI->isTypeLegal(NewVT)) - NewVT = EltTy; - IntermediateVT = NewVT; - - unsigned NewVTSize = NewVT.getSizeInBits(); - - // Convert sizes such as i33 to i64. - if (!isPowerOf2_32(NewVTSize)) - NewVTSize = NextPowerOf2(NewVTSize); - - MVT DestVT = TLI->getRegisterType(NewVT); - RegisterVT = DestVT; - if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); - - // Otherwise, promotion or legal types use the same number of registers as - // the vector decimated to the appropriate level. - return NumVectorRegs; -} - -/// isLegalRC - Return true if the value types that can be represented by the -/// specified register class are all legal. -bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const { - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { - if (isTypeLegal(*I)) - return true; - } - return false; -} - -/// findRepresentativeClass - Return the largest legal super-reg register class -/// of the register class for the specified type and its associated "cost". -std::pair<const TargetRegisterClass*, uint8_t> -TargetLowering::findRepresentativeClass(MVT VT) const { - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; - if (!RC) - return std::make_pair(RC, 0); - - // Compute the set of all super-register classes. - BitVector SuperRegRC(TRI->getNumRegClasses()); - for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) - SuperRegRC.setBitsInMask(RCI.getMask()); - - // Find the first legal register class with the largest spill size. - const TargetRegisterClass *BestRC = RC; - for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) { - const TargetRegisterClass *SuperRC = TRI->getRegClass(i); - // We want the largest possible spill size. - if (SuperRC->getSize() <= BestRC->getSize()) - continue; - if (!isLegalRC(SuperRC)) - continue; - BestRC = SuperRC; - } - return std::make_pair(BestRC, 1); -} - -/// computeRegisterProperties - Once all of the register classes are added, -/// this allows us to compute derived properties we expose. -void TargetLowering::computeRegisterProperties() { - assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && - "Too many value types for ValueTypeActions to hold!"); - - // Everything defaults to needing one register. - for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { - NumRegistersForVT[i] = 1; - RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; - } - // ...except isVoid, which doesn't need any registers. - NumRegistersForVT[MVT::isVoid] = 0; - - // Find the largest integer register class. - unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; - for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) - assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); - - // Every integer value type larger than this largest register takes twice as - // many registers to represent as the previous ValueType. - for (unsigned ExpandedReg = LargestIntReg + 1; - ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) { - NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; - RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; - TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); - ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg, - TypeExpandInteger); - } - - // Inspect all of the ValueType's smaller than the largest integer - // register to see which ones need promotion. - unsigned LegalIntReg = LargestIntReg; - for (unsigned IntReg = LargestIntReg - 1; - IntReg >= (unsigned)MVT::i1; --IntReg) { - MVT IVT = (MVT::SimpleValueType)IntReg; - if (isTypeLegal(IVT)) { - LegalIntReg = IntReg; - } else { - RegisterTypeForVT[IntReg] = TransformToType[IntReg] = - (const MVT::SimpleValueType)LegalIntReg; - ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); - } - } - - // ppcf128 type is really two f64's. - if (!isTypeLegal(MVT::ppcf128)) { - NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; - RegisterTypeForVT[MVT::ppcf128] = MVT::f64; - TransformToType[MVT::ppcf128] = MVT::f64; - ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); - } - - // Decide how to handle f64. If the target does not have native f64 support, - // expand it to i64 and we will be generating soft float library calls. - if (!isTypeLegal(MVT::f64)) { - NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; - RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; - TransformToType[MVT::f64] = MVT::i64; - ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); - } - - // Decide how to handle f32. If the target does not have native support for - // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. - if (!isTypeLegal(MVT::f32)) { - if (isTypeLegal(MVT::f64)) { - NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; - RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; - TransformToType[MVT::f32] = MVT::f64; - ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger); - } else { - NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; - RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; - TransformToType[MVT::f32] = MVT::i32; - ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); - } - } - - // Loop over all of the vector value types to see which need transformations. - for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT VT = (MVT::SimpleValueType)i; - if (isTypeLegal(VT)) continue; - - // Determine if there is a legal wider type. If so, we should promote to - // that wider vector type. - MVT EltVT = VT.getVectorElementType(); - unsigned NElts = VT.getVectorNumElements(); - if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) { - bool IsLegalWiderType = false; - // First try to promote the elements of integer vectors. If no legal - // promotion was found, fallback to the widen-vector method. - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; - // Promote vectors of integers to vectors with the same number - // of elements, with a wider element type. - if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() - && SVT.getVectorNumElements() == NElts && - isTypeLegal(SVT) && SVT.getScalarType().isInteger()) { - TransformToType[i] = SVT; - RegisterTypeForVT[i] = SVT; - NumRegistersForVT[i] = 1; - ValueTypeActions.setTypeAction(VT, TypePromoteInteger); - IsLegalWiderType = true; - break; - } - } - - if (IsLegalWiderType) continue; - - // Try to widen the vector. - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; - if (SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && - isTypeLegal(SVT)) { - TransformToType[i] = SVT; - RegisterTypeForVT[i] = SVT; - NumRegistersForVT[i] = 1; - ValueTypeActions.setTypeAction(VT, TypeWidenVector); - IsLegalWiderType = true; - break; - } - } - if (IsLegalWiderType) continue; - } - - MVT IntermediateVT; - MVT RegisterVT; - unsigned NumIntermediates; - NumRegistersForVT[i] = - getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, - RegisterVT, this); - RegisterTypeForVT[i] = RegisterVT; - - MVT NVT = VT.getPow2VectorType(); - if (NVT == VT) { - // Type is already a power of 2. The default action is to split. - TransformToType[i] = MVT::Other; - unsigned NumElts = VT.getVectorNumElements(); - ValueTypeActions.setTypeAction(VT, - NumElts > 1 ? TypeSplitVector : TypeScalarizeVector); - } else { - TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, TypeWidenVector); - } - } - - // Determine the 'representative' register class for each value type. - // An representative register class is the largest (meaning one which is - // not a sub-register class / subreg register class) legal register class for - // a group of value types. For example, on i386, i8, i16, and i32 - // representative would be GR32; while on x86_64 it's GR64. - for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { - const TargetRegisterClass* RRC; - uint8_t Cost; - tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); - RepRegClassForVT[i] = RRC; - RepRegClassCostForVT[i] = Cost; - } + // Check if the only use is a function return node. + return isUsedByReturnOnly(Node, Chain); } -const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { - return NULL; -} -EVT TargetLowering::getSetCCResultType(EVT VT) const { - assert(!VT.isVector() && "No default SetCC type for vectors!"); - return getPointerTy(0).SimpleTy; +/// Generate a libcall taking the given operands as arguments and returning a +/// result of type RetVT. +SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, + RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, DebugLoc dl) const { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); + + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + TargetLowering:: + CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, 0, getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + std::pair<SDValue,SDValue> CallInfo = LowerCallTo(CLI); + + return CallInfo.first; } -MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const { - return MVT::i32; // return the default value -} -/// getVectorTypeBreakdown - Vector types are broken down into some number of -/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 -/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. -/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. -/// -/// This method returns the number of registers needed, and the VT for each -/// register. It also returns the VT and quantity of the intermediate values -/// before they are promoted/expanded. -/// -unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, - EVT &IntermediateVT, - unsigned &NumIntermediates, - MVT &RegisterVT) const { - unsigned NumElts = VT.getVectorNumElements(); - - // If there is a wider vector type with the same element type as this one, - // or a promoted vector type that has the same number of elements which - // are wider, then we should convert to that legal vector type. - // This handles things like <2 x float> -> <4 x float> and - // <4 x i1> -> <4 x i32>. - LegalizeTypeAction TA = getTypeAction(Context, VT); - if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { - EVT RegisterEVT = getTypeToTransformTo(Context, VT); - if (isTypeLegal(RegisterEVT)) { - IntermediateVT = RegisterEVT; - RegisterVT = RegisterEVT.getSimpleVT(); - NumIntermediates = 1; - return 1; +/// SoftenSetCCOperands - Soften the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, + SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, + DebugLoc dl) const { + assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) + && "Unsupported setcc type!"); + + // Expand into one or more soft-fp libcall(s). + RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; + switch (CCCode) { + case ISD::SETEQ: + case ISD::SETOEQ: + LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : + (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + break; + case ISD::SETNE: + case ISD::SETUNE: + LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : + (VT == MVT::f64) ? RTLIB::UNE_F64 : RTLIB::UNE_F128; + break; + case ISD::SETGE: + case ISD::SETOGE: + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : + (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; + break; + case ISD::SETLT: + case ISD::SETOLT: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + break; + case ISD::SETLE: + case ISD::SETOLE: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : + (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; + break; + case ISD::SETGT: + case ISD::SETOGT: + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUO: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : + (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + break; + case ISD::SETO: + LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : + (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128; + break; + default: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : + (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + switch (CCCode) { + case ISD::SETONE: + // SETONE = SETOLT | SETOGT + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + // Fallthrough + case ISD::SETUGT: + LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUGE: + LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : + (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; + break; + case ISD::SETULT: + LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + break; + case ISD::SETULE: + LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : + (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; + break; + case ISD::SETUEQ: + LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : + (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + break; + default: llvm_unreachable("Do not know how to soften this setcc!"); } } - // Figure out the right, legal destination reg to copy into. - EVT EltTy = VT.getVectorElementType(); - - unsigned NumVectorRegs = 1; - - // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we - // could break down into LHS/RHS like LegalizeDAG does. - if (!isPowerOf2_32(NumElts)) { - NumVectorRegs = NumElts; - NumElts = 1; + // Use the target specific return value for comparions lib calls. + EVT RetVT = getCmpLibcallReturnType(); + SDValue Ops[2] = { NewLHS, NewRHS }; + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewRHS = DAG.getConstant(0, RetVT); + CCCode = getCmpLibcallCC(LC1); + if (LC2 != RTLIB::UNKNOWN_LIBCALL) { + SDValue Tmp = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), + NewLHS, NewRHS, DAG.getCondCode(CCCode)); + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewLHS = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), NewLHS, + NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); + NewRHS = SDValue(); } - - // Divide the input until we get to a supported size. This will always - // end with a scalar if the target doesn't support vectors. - while (NumElts > 1 && !isTypeLegal( - EVT::getVectorVT(Context, EltTy, NumElts))) { - NumElts >>= 1; - NumVectorRegs <<= 1; - } - - NumIntermediates = NumVectorRegs; - - EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); - if (!isTypeLegal(NewVT)) - NewVT = EltTy; - IntermediateVT = NewVT; - - MVT DestVT = getRegisterType(Context, NewVT); - RegisterVT = DestVT; - unsigned NewVTSize = NewVT.getSizeInBits(); - - // Convert sizes such as i33 to i64. - if (!isPowerOf2_32(NewVTSize)) - NewVTSize = NextPowerOf2(NewVTSize); - - if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); - - // Otherwise, promotion or legal types use the same number of registers as - // the vector decimated to the appropriate level. - return NumVectorRegs; -} - -/// Get the EVTs and ArgFlags collections that represent the legalized return -/// type of the given function. This does not require a DAG or a return value, -/// and is suitable for use before any DAGs for the function are constructed. -/// TODO: Move this out of TargetLowering.cpp. -void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, - SmallVectorImpl<ISD::OutputArg> &Outs, - const TargetLowering &TLI) { - SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, ReturnType, ValueVTs); - unsigned NumValues = ValueVTs.size(); - if (NumValues == 0) return; - - for (unsigned j = 0, f = NumValues; j != f; ++j) { - EVT VT = ValueVTs[j]; - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - - if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) - ExtendKind = ISD::SIGN_EXTEND; - else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) - ExtendKind = ISD::ZERO_EXTEND; - - // FIXME: C calling convention requires the return type to be promoted to - // at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; - } - - unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); - MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); - - // 'inreg' on function refers to return value - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg)) - Flags.setInReg(); - - // Propagate extension type if any - if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) - Flags.setSExt(); - else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) - Flags.setZExt(); - - for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0)); - } -} - -/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate -/// function arguments in the caller parameter area. This is the actual -/// alignment, not its logarithm. -unsigned TargetLowering::getByValTypeAlignment(Type *Ty) const { - return TD->getCallFrameTypeAlignment(Ty); } /// getJumpTableEncoding - Return the entry encoding for a jump table in the @@ -1199,103 +253,6 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { } //===----------------------------------------------------------------------===// -// TargetTransformInfo Helpers -//===----------------------------------------------------------------------===// - -int TargetLowering::InstructionOpcodeToISD(unsigned Opcode) const { - enum InstructionOpcodes { -#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, -#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM -#include "llvm/IR/Instruction.def" - }; - switch (static_cast<InstructionOpcodes>(Opcode)) { - case Ret: return 0; - case Br: return 0; - case Switch: return 0; - case IndirectBr: return 0; - case Invoke: return 0; - case Resume: return 0; - case Unreachable: return 0; - case Add: return ISD::ADD; - case FAdd: return ISD::FADD; - case Sub: return ISD::SUB; - case FSub: return ISD::FSUB; - case Mul: return ISD::MUL; - case FMul: return ISD::FMUL; - case UDiv: return ISD::UDIV; - case SDiv: return ISD::UDIV; - case FDiv: return ISD::FDIV; - case URem: return ISD::UREM; - case SRem: return ISD::SREM; - case FRem: return ISD::FREM; - case Shl: return ISD::SHL; - case LShr: return ISD::SRL; - case AShr: return ISD::SRA; - case And: return ISD::AND; - case Or: return ISD::OR; - case Xor: return ISD::XOR; - case Alloca: return 0; - case Load: return ISD::LOAD; - case Store: return ISD::STORE; - case GetElementPtr: return 0; - case Fence: return 0; - case AtomicCmpXchg: return 0; - case AtomicRMW: return 0; - case Trunc: return ISD::TRUNCATE; - case ZExt: return ISD::ZERO_EXTEND; - case SExt: return ISD::SIGN_EXTEND; - case FPToUI: return ISD::FP_TO_UINT; - case FPToSI: return ISD::FP_TO_SINT; - case UIToFP: return ISD::UINT_TO_FP; - case SIToFP: return ISD::SINT_TO_FP; - case FPTrunc: return ISD::FP_ROUND; - case FPExt: return ISD::FP_EXTEND; - case PtrToInt: return ISD::BITCAST; - case IntToPtr: return ISD::BITCAST; - case BitCast: return ISD::BITCAST; - case ICmp: return ISD::SETCC; - case FCmp: return ISD::SETCC; - case PHI: return 0; - case Call: return 0; - case Select: return ISD::SELECT; - case UserOp1: return 0; - case UserOp2: return 0; - case VAArg: return 0; - case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; - case InsertElement: return ISD::INSERT_VECTOR_ELT; - case ShuffleVector: return ISD::VECTOR_SHUFFLE; - case ExtractValue: return ISD::MERGE_VALUES; - case InsertValue: return ISD::MERGE_VALUES; - case LandingPad: return 0; - } - - llvm_unreachable("Unknown instruction type encountered!"); -} - -std::pair<unsigned, MVT> -TargetLowering::getTypeLegalizationCost(Type *Ty) const { - LLVMContext &C = Ty->getContext(); - EVT MTy = getValueType(Ty); - - unsigned Cost = 1; - // We keep legalizing the type until we find a legal kind. We assume that - // the only operation that costs anything is the split. After splitting - // we need to handle two types. - while (true) { - LegalizeKind LK = getTypeConversion(C, MTy); - - if (LK.first == TypeLegal) - return std::make_pair(Cost, MTy.getSimpleVT()); - - if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) - Cost *= 2; - - // Keep legalizing the type. - MTy = LK.second; - } -} - -//===----------------------------------------------------------------------===// // Optimization Methods //===----------------------------------------------------------------------===// @@ -2239,7 +1196,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, APInt newMask = APInt::getLowBitsSet(maskWidth, width); for (unsigned offset=0; offset<origWidth/width; offset++) { if ((newMask & Mask) == Mask) { - if (!TD->isLittleEndian()) + if (!getDataLayout()->isLittleEndian()) bestOffset = (origWidth/width - offset - 1) * (width/8); else bestOffset = (uint64_t)offset * (width/8); @@ -2913,7 +1870,9 @@ PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { TargetLowering::ConstraintType TargetLowering::getConstraintType(const std::string &Constraint) const { - if (Constraint.size() == 1) { + unsigned S = Constraint.size(); + + if (S == 1) { switch (Constraint[0]) { default: break; case 'r': return C_RegisterClass; @@ -2942,9 +1901,11 @@ TargetLowering::getConstraintType(const std::string &Constraint) const { } } - if (Constraint.size() > 1 && Constraint[0] == '{' && - Constraint[Constraint.size()-1] == '}') + if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') { + if (S == 8 && !Constraint.compare(1, 6, "memory", 6)) // "{memory}" + return C_Memory; return C_Register; + } return C_Unknown; } @@ -3040,7 +2001,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, std::make_pair(0u, static_cast<const TargetRegisterClass*>(0)); // Figure out which register class contains this reg. - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo(); for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), E = RI->regclass_end(); RCI != E; ++RCI) { const TargetRegisterClass *RC = *RCI; @@ -3077,7 +2038,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, /// a matching constraint like "4". bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const { assert(!ConstraintCode.empty() && "No known constraint!"); - return isdigit(ConstraintCode[0]); + return isdigit(static_cast<unsigned char>(ConstraintCode[0])); } /// getMatchedOperand - If this is an input matching constraint, this method @@ -3164,7 +2125,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { - unsigned BitSize = TD->getTypeSizeInBits(OpTy); + unsigned BitSize = getDataLayout()->getTypeSizeInBits(OpTy); switch (BitSize) { default: break; case 1: @@ -3179,7 +2140,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( } } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { OpInfo.ConstraintVT = MVT::getIntegerVT( - 8*TD->getPointerSize(PT->getAddressSpace())); + 8*getDataLayout()->getPointerSize(PT->getAddressSpace())); } else { OpInfo.ConstraintVT = MVT::getVT(OpTy, true); } @@ -3474,44 +2435,6 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, } } -//===----------------------------------------------------------------------===// -// Loop Strength Reduction hooks -//===----------------------------------------------------------------------===// - -/// isLegalAddressingMode - Return true if the addressing mode represented -/// by AM is legal for this target, for a load/store of the specified type. -bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { - // The default implementation of this implements a conservative RISCy, r+r and - // r+i addr mode. - - // Allows a sign-extended 16-bit immediate field. - if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) - return false; - - // No global is ever allowed as a base. - if (AM.BaseGV) - return false; - - // Only support r+r, - switch (AM.Scale) { - case 0: // "r+i" or just "i", depending on HasBaseReg. - break; - case 1: - if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. - return false; - // Otherwise we have r+r or r+i. - break; - case 2: - if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. - return false; - // Allow 2*r as r+r. - break; - } - - return true; -} - /// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication /// with the multiplicative inverse of the constant. SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 09e923c..b58bb85 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -43,7 +43,7 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { class SjLjEHPrepare : public FunctionPass { - const TargetLowering *TLI; + const TargetLoweringBase *TLI; Type *FunctionContextTy; Constant *RegisterFn; Constant *UnregisterFn; @@ -58,7 +58,7 @@ namespace { AllocaInst *FuncCtx; public: static char ID; // Pass identification, replacement for typeid - explicit SjLjEHPrepare(const TargetLowering *tli = NULL) + explicit SjLjEHPrepare(const TargetLoweringBase *tli = NULL) : FunctionPass(ID), TLI(tli) { } bool doInitialization(Module &M); bool runOnFunction(Function &F); @@ -82,7 +82,7 @@ namespace { char SjLjEHPrepare::ID = 0; // Public Interface To the SjLjEHPrepare pass. -FunctionPass *llvm::createSjLjEHPreparePass(const TargetLowering *TLI) { +FunctionPass *llvm::createSjLjEHPreparePass(const TargetLoweringBase *TLI) { return new SjLjEHPrepare(TLI); } // doInitialization - Set up decalarations and types needed to process diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 95faafa..20049a8 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -142,6 +142,76 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { ++NumLocalRenum; } +// Repair indexes after adding and removing instructions. +void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End) { + // FIXME: Is this really necessary? The only caller repairIntervalsForRange() + // does the same thing. + // Find anchor points, which are at the beginning/end of blocks or at + // instructions that already have indexes. + while (Begin != MBB->begin() && !hasIndex(Begin)) + --Begin; + while (End != MBB->end() && !hasIndex(End)) + ++End; + + bool includeStart = (Begin == MBB->begin()); + SlotIndex startIdx; + if (includeStart) + startIdx = getMBBStartIdx(MBB); + else + startIdx = getInstructionIndex(Begin); + + SlotIndex endIdx; + if (End == MBB->end()) + endIdx = getMBBEndIdx(MBB); + else + endIdx = getInstructionIndex(End); + + // FIXME: Conceptually, this code is implementing an iterator on MBB that + // optionally includes an additional position prior to MBB->begin(), indicated + // by the includeStart flag. This is done so that we can iterate MIs in a MBB + // in parallel with SlotIndexes, but there should be a better way to do this. + IndexList::iterator ListB = startIdx.listEntry(); + IndexList::iterator ListI = endIdx.listEntry(); + MachineBasicBlock::iterator MBBI = End; + bool pastStart = false; + while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) { + assert(ListI->getIndex() >= startIdx.getIndex() && + (includeStart || !pastStart) && + "Decremented past the beginning of region to repair."); + + MachineInstr *SlotMI = ListI->getInstr(); + MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : 0; + bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart); + + if (SlotMI == MI && !MBBIAtBegin) { + --ListI; + if (MBBI != Begin) + --MBBI; + else + pastStart = true; + } else if (MI && mi2iMap.find(MI) == mi2iMap.end()) { + if (MBBI != Begin) + --MBBI; + else + pastStart = true; + } else { + --ListI; + if (SlotMI) + removeMachineInstrFromMaps(SlotMI); + } + } + + // In theory this could be combined with the previous loop, but it is tricky + // to update the IndexList while we are iterating it. + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + if (!MI->isDebugValue() && mi2iMap.find(MI) == mi2iMap.end()) + insertMachineInstrInMaps(MI); + } +} #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void SlotIndexes::dump() const { diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 42502eb..ec44b8c 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -23,7 +23,6 @@ #define DEBUG_TYPE "stackcoloring" #include "llvm/CodeGen/Passes.h" -#include "MachineTraceMetrics.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" @@ -103,12 +102,13 @@ class StackColoring : public MachineFunctionPass { }; /// Maps active slots (per bit) for each basic block. - DenseMap<MachineBasicBlock*, BlockLifetimeInfo> BlockLiveness; + typedef DenseMap<const MachineBasicBlock*, BlockLifetimeInfo> LivenessMap; + LivenessMap BlockLiveness; /// Maps serial numbers to basic blocks. - DenseMap<MachineBasicBlock*, int> BasicBlocks; + DenseMap<const MachineBasicBlock*, int> BasicBlocks; /// Maps basic blocks to a serial number. - SmallVector<MachineBasicBlock*, 8> BasicBlockNumbering; + SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering; /// Maps liveness intervals for each slot. SmallVector<LiveInterval*, 16> Intervals; @@ -145,7 +145,7 @@ public: private: /// Debug. - void dump(); + void dump() const; /// Removes all of the lifetime marker instructions from the function. /// \returns true if any markers were removed. @@ -200,31 +200,35 @@ void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -void StackColoring::dump() { +void StackColoring::dump() const { for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF); FI != FE; ++FI) { - unsigned Num = BasicBlocks[*FI]; - DEBUG(dbgs()<<"Inspecting block #"<<Num<<" ["<<FI->getName()<<"]\n"); - Num = 0; + DEBUG(dbgs()<<"Inspecting block #"<<BasicBlocks.lookup(*FI)<< + " ["<<FI->getName()<<"]\n"); + + LivenessMap::const_iterator BI = BlockLiveness.find(*FI); + assert(BI != BlockLiveness.end() && "Block not found"); + const BlockLifetimeInfo &BlockInfo = BI->second; + DEBUG(dbgs()<<"BEGIN : {"); - for (unsigned i=0; i < BlockLiveness[*FI].Begin.size(); ++i) - DEBUG(dbgs()<<BlockLiveness[*FI].Begin.test(i)<<" "); + for (unsigned i=0; i < BlockInfo.Begin.size(); ++i) + DEBUG(dbgs()<<BlockInfo.Begin.test(i)<<" "); DEBUG(dbgs()<<"}\n"); DEBUG(dbgs()<<"END : {"); - for (unsigned i=0; i < BlockLiveness[*FI].End.size(); ++i) - DEBUG(dbgs()<<BlockLiveness[*FI].End.test(i)<<" "); + for (unsigned i=0; i < BlockInfo.End.size(); ++i) + DEBUG(dbgs()<<BlockInfo.End.test(i)<<" "); DEBUG(dbgs()<<"}\n"); DEBUG(dbgs()<<"LIVE_IN: {"); - for (unsigned i=0; i < BlockLiveness[*FI].LiveIn.size(); ++i) - DEBUG(dbgs()<<BlockLiveness[*FI].LiveIn.test(i)<<" "); + for (unsigned i=0; i < BlockInfo.LiveIn.size(); ++i) + DEBUG(dbgs()<<BlockInfo.LiveIn.test(i)<<" "); DEBUG(dbgs()<<"}\n"); DEBUG(dbgs()<<"LIVEOUT: {"); - for (unsigned i=0; i < BlockLiveness[*FI].LiveOut.size(); ++i) - DEBUG(dbgs()<<BlockLiveness[*FI].LiveOut.test(i)<<" "); + for (unsigned i=0; i < BlockInfo.LiveOut.size(); ++i) + DEBUG(dbgs()<<BlockInfo.LiveOut.test(i)<<" "); DEBUG(dbgs()<<"}\n"); } } @@ -242,8 +246,11 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { BasicBlocks[*FI] = BasicBlockNumbering.size(); BasicBlockNumbering.push_back(*FI); - BlockLiveness[*FI].Begin.resize(NumSlot); - BlockLiveness[*FI].End.resize(NumSlot); + // Keep a reference to avoid repeated lookups. + BlockLifetimeInfo &BlockInfo = BlockLiveness[*FI]; + + BlockInfo.Begin.resize(NumSlot); + BlockInfo.End.resize(NumSlot); for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end(); BI != BE; ++BI) { @@ -255,7 +262,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { Markers.push_back(BI); bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START; - MachineOperand &MI = BI->getOperand(0); + const MachineOperand &MI = BI->getOperand(0); unsigned Slot = MI.getIndex(); MarkersFound++; @@ -267,15 +274,15 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { } if (IsStart) { - BlockLiveness[*FI].Begin.set(Slot); + BlockInfo.Begin.set(Slot); } else { - if (BlockLiveness[*FI].Begin.test(Slot)) { + if (BlockInfo.Begin.test(Slot)) { // Allocas that start and end within a single block are handled // specially when computing the LiveIntervals to avoid pessimizing // the liveness propagation. - BlockLiveness[*FI].Begin.reset(Slot); + BlockInfo.Begin.reset(Slot); } else { - BlockLiveness[*FI].End.set(Slot); + BlockInfo.End.set(Slot); } } } @@ -292,47 +299,58 @@ void StackColoring::calculateLocalLiveness() { // formulation, and END is equivalent to GEN. The result of this computation // is a map from blocks to bitvectors where the bitvectors represent which // allocas are live in/out of that block. - SmallPtrSet<MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(), - BasicBlockNumbering.end()); + SmallPtrSet<const MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(), + BasicBlockNumbering.end()); unsigned NumSSMIters = 0; bool changed = true; while (changed) { changed = false; ++NumSSMIters; - SmallPtrSet<MachineBasicBlock*, 8> NextBBSet; + SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet; - for (SmallVector<MachineBasicBlock*, 8>::iterator + for (SmallVector<const MachineBasicBlock*, 8>::iterator PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); PI != PE; ++PI) { - MachineBasicBlock *BB = *PI; + const MachineBasicBlock *BB = *PI; if (!BBSet.count(BB)) continue; + // Use an iterator to avoid repeated lookups. + LivenessMap::iterator BI = BlockLiveness.find(BB); + assert(BI != BlockLiveness.end() && "Block not found"); + BlockLifetimeInfo &BlockInfo = BI->second; + BitVector LocalLiveIn; BitVector LocalLiveOut; // Forward propagation from begins to ends. - for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), - PE = BB->pred_end(); PI != PE; ++PI) - LocalLiveIn |= BlockLiveness[*PI].LiveOut; - LocalLiveIn |= BlockLiveness[BB].End; - LocalLiveIn.reset(BlockLiveness[BB].Begin); + for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), + PE = BB->pred_end(); PI != PE; ++PI) { + LivenessMap::const_iterator I = BlockLiveness.find(*PI); + assert(I != BlockLiveness.end() && "Predecessor not found"); + LocalLiveIn |= I->second.LiveOut; + } + LocalLiveIn |= BlockInfo.End; + LocalLiveIn.reset(BlockInfo.Begin); // Reverse propagation from ends to begins. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) - LocalLiveOut |= BlockLiveness[*SI].LiveIn; - LocalLiveOut |= BlockLiveness[BB].Begin; - LocalLiveOut.reset(BlockLiveness[BB].End); + for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + LivenessMap::const_iterator I = BlockLiveness.find(*SI); + assert(I != BlockLiveness.end() && "Successor not found"); + LocalLiveOut |= I->second.LiveIn; + } + LocalLiveOut |= BlockInfo.Begin; + LocalLiveOut.reset(BlockInfo.End); LocalLiveIn |= LocalLiveOut; LocalLiveOut |= LocalLiveIn; // After adopting the live bits, we need to turn-off the bits which // are de-activated in this block. - LocalLiveOut.reset(BlockLiveness[BB].End); - LocalLiveIn.reset(BlockLiveness[BB].Begin); + LocalLiveOut.reset(BlockInfo.End); + LocalLiveIn.reset(BlockInfo.Begin); // If we have both BEGIN and END markers in the same basic block then // we know that the BEGIN marker comes after the END, because we already @@ -341,25 +359,25 @@ void StackColoring::calculateLocalLiveness() { // Want to enable the LIVE_IN and LIVE_OUT of slots that have both // BEGIN and END because it means that the value lives before and after // this basic block. - BitVector LocalEndBegin = BlockLiveness[BB].End; - LocalEndBegin &= BlockLiveness[BB].Begin; + BitVector LocalEndBegin = BlockInfo.End; + LocalEndBegin &= BlockInfo.Begin; LocalLiveIn |= LocalEndBegin; LocalLiveOut |= LocalEndBegin; - if (LocalLiveIn.test(BlockLiveness[BB].LiveIn)) { + if (LocalLiveIn.test(BlockInfo.LiveIn)) { changed = true; - BlockLiveness[BB].LiveIn |= LocalLiveIn; + BlockInfo.LiveIn |= LocalLiveIn; - for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), PE = BB->pred_end(); PI != PE; ++PI) NextBBSet.insert(*PI); } - if (LocalLiveOut.test(BlockLiveness[BB].LiveOut)) { + if (LocalLiveOut.test(BlockInfo.LiveOut)) { changed = true; - BlockLiveness[BB].LiveOut |= LocalLiveOut; + BlockInfo.LiveOut |= LocalLiveOut; - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) NextBBSet.insert(*SI); } @@ -383,9 +401,9 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { Finishes.resize(NumSlots); // Create the interval for the basic blocks with lifetime markers in them. - for (SmallVector<MachineInstr*, 8>::iterator it = Markers.begin(), + for (SmallVectorImpl<MachineInstr*>::const_iterator it = Markers.begin(), e = Markers.end(); it != e; ++it) { - MachineInstr *MI = *it; + const MachineInstr *MI = *it; if (MI->getParent() != MBB) continue; @@ -394,7 +412,7 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { "Invalid Lifetime marker"); bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START; - MachineOperand &Mo = MI->getOperand(0); + const MachineOperand &Mo = MI->getOperand(0); int Slot = Mo.getIndex(); assert(Slot >= 0 && "Invalid slot"); @@ -481,7 +499,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { // Keep a list of *allocas* which need to be remapped. DenseMap<const AllocaInst*, const AllocaInst*> Allocas; - for (DenseMap<int, int>::iterator it = SlotRemap.begin(), + for (DenseMap<int, int>::const_iterator it = SlotRemap.begin(), e = SlotRemap.end(); it != e; ++it) { const AllocaInst *From = MFI->getObjectAllocation(it->first); const AllocaInst *To = MFI->getObjectAllocation(it->second); @@ -576,8 +594,8 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { } void StackColoring::removeInvalidSlotRanges() { - MachineFunction::iterator BB, BBE; - MachineBasicBlock::iterator I, IE; + MachineFunction::const_iterator BB, BBE; + MachineBasicBlock::const_iterator I, IE; for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { @@ -596,7 +614,7 @@ void StackColoring::removeInvalidSlotRanges() { // Check all of the machine operands. for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { - MachineOperand &MO = I->getOperand(i); + const MachineOperand &MO = I->getOperand(i); if (!MO.isFI()) continue; diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 665388b..f3be37c 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -16,6 +16,8 @@ #define DEBUG_TYPE "stack-protector" #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/Dominators.h" #include "llvm/IR/Attributes.h" @@ -32,17 +34,27 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +STATISTIC(NumFunProtected, "Number of functions protected"); +STATISTIC(NumAddrTaken, "Number of local variables that have their address" + " taken."); + namespace { class StackProtector : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// target type sizes. - const TargetLowering *TLI; + const TargetLoweringBase *TLI; Function *F; Module *M; DominatorTree *DT; + /// VisitedPHIs - The set of PHI nodes visited when determining + /// if a variable's reference has been taken. This set + /// is maintained to ensure we don't visit the same PHI node multiple + /// times. + SmallPtrSet<const PHINode*, 16> VisitedPHIs; + /// InsertStackProtectors - Insert code into the prologue and epilogue of /// the function. /// @@ -58,17 +70,21 @@ namespace { /// ContainsProtectableArray - Check whether the type either is an array or /// contains an array of sufficient size so that we need stack protectors /// for it. - bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const; + bool ContainsProtectableArray(Type *Ty, bool Strong = false, + bool InStruct = false) const; + + /// \brief Check whether a stack allocation has its address taken. + bool HasAddressTaken(const Instruction *AI); /// RequiresStackProtector - Check whether or not this function needs a /// stack protector based upon the stack protector level. - bool RequiresStackProtector() const; + bool RequiresStackProtector(); public: static char ID; // Pass identification, replacement for typeid. StackProtector() : FunctionPass(ID), TLI(0) { initializeStackProtectorPass(*PassRegistry::getPassRegistry()); } - StackProtector(const TargetLowering *tli) + StackProtector(const TargetLoweringBase *tli) : FunctionPass(ID), TLI(tli) { initializeStackProtectorPass(*PassRegistry::getPassRegistry()); } @@ -85,7 +101,7 @@ char StackProtector::ID = 0; INITIALIZE_PASS(StackProtector, "stack-protector", "Insert stack protectors", false, false) -FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) { +FunctionPass *llvm::createStackProtectorPass(const TargetLoweringBase *tli) { return new StackProtector(tli); } @@ -96,15 +112,21 @@ bool StackProtector::runOnFunction(Function &Fn) { if (!RequiresStackProtector()) return false; + ++NumFunProtected; return InsertStackProtectors(); } /// ContainsProtectableArray - Check whether the type either is an array or /// contains a char array of sufficient size so that we need stack protectors /// for it. -bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const { +bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong, + bool InStruct) const { if (!Ty) return false; if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + // In strong mode any array, regardless of type and size, triggers a + // protector + if (Strong) + return true; const TargetMachine &TM = TLI->getTargetMachine(); if (!AT->getElementType()->isIntegerTy(8)) { Triple Trip(TM.getTargetTriple()); @@ -126,39 +148,103 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const { for (StructType::element_iterator I = ST->element_begin(), E = ST->element_end(); I != E; ++I) - if (ContainsProtectableArray(*I, true)) + if (ContainsProtectableArray(*I, Strong, true)) return true; return false; } -/// RequiresStackProtector - Check whether or not this function needs a stack -/// protector based upon the stack protector level. The heuristic we use is to -/// add a guard variable to functions that call alloca, and functions with -/// buffers larger than SSPBufferSize bytes. -bool StackProtector::RequiresStackProtector() const { +bool StackProtector::HasAddressTaken(const Instruction *AI) { + for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE; ++UI) { + const User *U = *UI; + if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (AI == SI->getValueOperand()) + return true; + } else if (const PtrToIntInst *SI = dyn_cast<PtrToIntInst>(U)) { + if (AI == SI->getOperand(0)) + return true; + } else if (isa<CallInst>(U)) { + return true; + } else if (isa<InvokeInst>(U)) { + return true; + } else if (const SelectInst *SI = dyn_cast<SelectInst>(U)) { + if (HasAddressTaken(SI)) + return true; + } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { + // Keep track of what PHI nodes we have already visited to ensure + // they are only visited once. + if (VisitedPHIs.insert(PN)) + if (HasAddressTaken(PN)) + return true; + } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { + if (HasAddressTaken(GEP)) + return true; + } else if (const BitCastInst *BI = dyn_cast<BitCastInst>(U)) { + if (HasAddressTaken(BI)) + return true; + } + } + return false; +} + +/// \brief Check whether or not this function needs a stack protector based +/// upon the stack protector level. +/// +/// We use two heuristics: a standard (ssp) and strong (sspstrong). +/// The standard heuristic which will add a guard variable to functions that +/// call alloca with a either a variable size or a size >= SSPBufferSize, +/// functions with character buffers larger than SSPBufferSize, and functions +/// with aggregates containing character buffers larger than SSPBufferSize. The +/// strong heuristic will add a guard variables to functions that call alloca +/// regardless of size, functions with any buffer regardless of type and size, +/// functions with aggregates that contain any buffer regardless of type and +/// size, and functions that contain stack-based variables that have had their +/// address taken. +bool StackProtector::RequiresStackProtector() { + bool Strong = false; if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::StackProtectReq)) return true; - - if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtect)) + else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectStrong)) + Strong = true; + else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtect)) return false; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { BasicBlock *BB = I; for (BasicBlock::iterator - II = BB->begin(), IE = BB->end(); II != IE; ++II) + II = BB->begin(), IE = BB->end(); II != IE; ++II) { if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) { - if (AI->isArrayAllocation()) - // This is a call to alloca with a variable size. Emit stack - // protectors. + if (AI->isArrayAllocation()) { + // SSP-Strong: Enable protectors for any call to alloca, regardless + // of size. + if (Strong) + return true; + + if (const ConstantInt *CI = + dyn_cast<ConstantInt>(AI->getArraySize())) { + unsigned BufferSize = TLI->getTargetMachine().Options.SSPBufferSize; + if (CI->getLimitedValue(BufferSize) >= BufferSize) + // A call to alloca with size >= SSPBufferSize requires + // stack protectors. + return true; + } else // A call to alloca with a variable size requires protectors. + return true; + } + + if (ContainsProtectableArray(AI->getAllocatedType(), Strong)) return true; - if (ContainsProtectableArray(AI->getAllocatedType())) + if (Strong && HasAddressTaken(AI)) { + ++NumAddrTaken; return true; + } } + } } return false; diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index d5fbf14..20eb918 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -80,7 +80,7 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), strlen(MAI.getSeparatorString())) == 0) atInsnStart = true; - if (atInsnStart && !std::isspace(*Str)) { + if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) { Length += MAI.getMaxInstLength(); atInsnStart = false; } diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp new file mode 100644 index 0000000..2a02f6a --- /dev/null +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -0,0 +1,1290 @@ +//===-- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetLoweringBase class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cctype> +using namespace llvm; + +/// InitLibcallNames - Set default libcall names. +/// +static void InitLibcallNames(const char **Names, const TargetMachine &TM) { + Names[RTLIB::SHL_I16] = "__ashlhi3"; + Names[RTLIB::SHL_I32] = "__ashlsi3"; + Names[RTLIB::SHL_I64] = "__ashldi3"; + Names[RTLIB::SHL_I128] = "__ashlti3"; + Names[RTLIB::SRL_I16] = "__lshrhi3"; + Names[RTLIB::SRL_I32] = "__lshrsi3"; + Names[RTLIB::SRL_I64] = "__lshrdi3"; + Names[RTLIB::SRL_I128] = "__lshrti3"; + Names[RTLIB::SRA_I16] = "__ashrhi3"; + Names[RTLIB::SRA_I32] = "__ashrsi3"; + Names[RTLIB::SRA_I64] = "__ashrdi3"; + Names[RTLIB::SRA_I128] = "__ashrti3"; + Names[RTLIB::MUL_I8] = "__mulqi3"; + Names[RTLIB::MUL_I16] = "__mulhi3"; + Names[RTLIB::MUL_I32] = "__mulsi3"; + Names[RTLIB::MUL_I64] = "__muldi3"; + Names[RTLIB::MUL_I128] = "__multi3"; + Names[RTLIB::MULO_I32] = "__mulosi4"; + Names[RTLIB::MULO_I64] = "__mulodi4"; + Names[RTLIB::MULO_I128] = "__muloti4"; + Names[RTLIB::SDIV_I8] = "__divqi3"; + Names[RTLIB::SDIV_I16] = "__divhi3"; + Names[RTLIB::SDIV_I32] = "__divsi3"; + Names[RTLIB::SDIV_I64] = "__divdi3"; + Names[RTLIB::SDIV_I128] = "__divti3"; + Names[RTLIB::UDIV_I8] = "__udivqi3"; + Names[RTLIB::UDIV_I16] = "__udivhi3"; + Names[RTLIB::UDIV_I32] = "__udivsi3"; + Names[RTLIB::UDIV_I64] = "__udivdi3"; + Names[RTLIB::UDIV_I128] = "__udivti3"; + Names[RTLIB::SREM_I8] = "__modqi3"; + Names[RTLIB::SREM_I16] = "__modhi3"; + Names[RTLIB::SREM_I32] = "__modsi3"; + Names[RTLIB::SREM_I64] = "__moddi3"; + Names[RTLIB::SREM_I128] = "__modti3"; + Names[RTLIB::UREM_I8] = "__umodqi3"; + Names[RTLIB::UREM_I16] = "__umodhi3"; + Names[RTLIB::UREM_I32] = "__umodsi3"; + Names[RTLIB::UREM_I64] = "__umoddi3"; + Names[RTLIB::UREM_I128] = "__umodti3"; + + // These are generally not available. + Names[RTLIB::SDIVREM_I8] = 0; + Names[RTLIB::SDIVREM_I16] = 0; + Names[RTLIB::SDIVREM_I32] = 0; + Names[RTLIB::SDIVREM_I64] = 0; + Names[RTLIB::SDIVREM_I128] = 0; + Names[RTLIB::UDIVREM_I8] = 0; + Names[RTLIB::UDIVREM_I16] = 0; + Names[RTLIB::UDIVREM_I32] = 0; + Names[RTLIB::UDIVREM_I64] = 0; + Names[RTLIB::UDIVREM_I128] = 0; + + Names[RTLIB::NEG_I32] = "__negsi2"; + Names[RTLIB::NEG_I64] = "__negdi2"; + Names[RTLIB::ADD_F32] = "__addsf3"; + Names[RTLIB::ADD_F64] = "__adddf3"; + Names[RTLIB::ADD_F80] = "__addxf3"; + Names[RTLIB::ADD_F128] = "__addtf3"; + Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; + Names[RTLIB::SUB_F32] = "__subsf3"; + Names[RTLIB::SUB_F64] = "__subdf3"; + Names[RTLIB::SUB_F80] = "__subxf3"; + Names[RTLIB::SUB_F128] = "__subtf3"; + Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; + Names[RTLIB::MUL_F32] = "__mulsf3"; + Names[RTLIB::MUL_F64] = "__muldf3"; + Names[RTLIB::MUL_F80] = "__mulxf3"; + Names[RTLIB::MUL_F128] = "__multf3"; + Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; + Names[RTLIB::DIV_F32] = "__divsf3"; + Names[RTLIB::DIV_F64] = "__divdf3"; + Names[RTLIB::DIV_F80] = "__divxf3"; + Names[RTLIB::DIV_F128] = "__divtf3"; + Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; + Names[RTLIB::REM_F32] = "fmodf"; + Names[RTLIB::REM_F64] = "fmod"; + Names[RTLIB::REM_F80] = "fmodl"; + Names[RTLIB::REM_F128] = "fmodl"; + Names[RTLIB::REM_PPCF128] = "fmodl"; + Names[RTLIB::FMA_F32] = "fmaf"; + Names[RTLIB::FMA_F64] = "fma"; + Names[RTLIB::FMA_F80] = "fmal"; + Names[RTLIB::FMA_F128] = "fmal"; + Names[RTLIB::FMA_PPCF128] = "fmal"; + Names[RTLIB::POWI_F32] = "__powisf2"; + Names[RTLIB::POWI_F64] = "__powidf2"; + Names[RTLIB::POWI_F80] = "__powixf2"; + Names[RTLIB::POWI_F128] = "__powitf2"; + Names[RTLIB::POWI_PPCF128] = "__powitf2"; + Names[RTLIB::SQRT_F32] = "sqrtf"; + Names[RTLIB::SQRT_F64] = "sqrt"; + Names[RTLIB::SQRT_F80] = "sqrtl"; + Names[RTLIB::SQRT_F128] = "sqrtl"; + Names[RTLIB::SQRT_PPCF128] = "sqrtl"; + Names[RTLIB::LOG_F32] = "logf"; + Names[RTLIB::LOG_F64] = "log"; + Names[RTLIB::LOG_F80] = "logl"; + Names[RTLIB::LOG_F128] = "logl"; + Names[RTLIB::LOG_PPCF128] = "logl"; + Names[RTLIB::LOG2_F32] = "log2f"; + Names[RTLIB::LOG2_F64] = "log2"; + Names[RTLIB::LOG2_F80] = "log2l"; + Names[RTLIB::LOG2_F128] = "log2l"; + Names[RTLIB::LOG2_PPCF128] = "log2l"; + Names[RTLIB::LOG10_F32] = "log10f"; + Names[RTLIB::LOG10_F64] = "log10"; + Names[RTLIB::LOG10_F80] = "log10l"; + Names[RTLIB::LOG10_F128] = "log10l"; + Names[RTLIB::LOG10_PPCF128] = "log10l"; + Names[RTLIB::EXP_F32] = "expf"; + Names[RTLIB::EXP_F64] = "exp"; + Names[RTLIB::EXP_F80] = "expl"; + Names[RTLIB::EXP_F128] = "expl"; + Names[RTLIB::EXP_PPCF128] = "expl"; + Names[RTLIB::EXP2_F32] = "exp2f"; + Names[RTLIB::EXP2_F64] = "exp2"; + Names[RTLIB::EXP2_F80] = "exp2l"; + Names[RTLIB::EXP2_F128] = "exp2l"; + Names[RTLIB::EXP2_PPCF128] = "exp2l"; + Names[RTLIB::SIN_F32] = "sinf"; + Names[RTLIB::SIN_F64] = "sin"; + Names[RTLIB::SIN_F80] = "sinl"; + Names[RTLIB::SIN_F128] = "sinl"; + Names[RTLIB::SIN_PPCF128] = "sinl"; + Names[RTLIB::COS_F32] = "cosf"; + Names[RTLIB::COS_F64] = "cos"; + Names[RTLIB::COS_F80] = "cosl"; + Names[RTLIB::COS_F128] = "cosl"; + Names[RTLIB::COS_PPCF128] = "cosl"; + Names[RTLIB::POW_F32] = "powf"; + Names[RTLIB::POW_F64] = "pow"; + Names[RTLIB::POW_F80] = "powl"; + Names[RTLIB::POW_F128] = "powl"; + Names[RTLIB::POW_PPCF128] = "powl"; + Names[RTLIB::CEIL_F32] = "ceilf"; + Names[RTLIB::CEIL_F64] = "ceil"; + Names[RTLIB::CEIL_F80] = "ceill"; + Names[RTLIB::CEIL_F128] = "ceill"; + Names[RTLIB::CEIL_PPCF128] = "ceill"; + Names[RTLIB::TRUNC_F32] = "truncf"; + Names[RTLIB::TRUNC_F64] = "trunc"; + Names[RTLIB::TRUNC_F80] = "truncl"; + Names[RTLIB::TRUNC_F128] = "truncl"; + Names[RTLIB::TRUNC_PPCF128] = "truncl"; + Names[RTLIB::RINT_F32] = "rintf"; + Names[RTLIB::RINT_F64] = "rint"; + Names[RTLIB::RINT_F80] = "rintl"; + Names[RTLIB::RINT_F128] = "rintl"; + Names[RTLIB::RINT_PPCF128] = "rintl"; + Names[RTLIB::NEARBYINT_F32] = "nearbyintf"; + Names[RTLIB::NEARBYINT_F64] = "nearbyint"; + Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; + Names[RTLIB::NEARBYINT_F128] = "nearbyintl"; + Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; + Names[RTLIB::FLOOR_F32] = "floorf"; + Names[RTLIB::FLOOR_F64] = "floor"; + Names[RTLIB::FLOOR_F80] = "floorl"; + Names[RTLIB::FLOOR_F128] = "floorl"; + Names[RTLIB::FLOOR_PPCF128] = "floorl"; + Names[RTLIB::COPYSIGN_F32] = "copysignf"; + Names[RTLIB::COPYSIGN_F64] = "copysign"; + Names[RTLIB::COPYSIGN_F80] = "copysignl"; + Names[RTLIB::COPYSIGN_F128] = "copysignl"; + Names[RTLIB::COPYSIGN_PPCF128] = "copysignl"; + Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2"; + Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2"; + Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; + Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; + Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; + Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; + Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; + Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2"; + Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; + Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; + Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2"; + Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; + Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi"; + Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi"; + Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; + Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; + Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; + Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi"; + Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi"; + Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; + Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; + Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; + Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi"; + Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; + Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti"; + Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi"; + Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti"; + Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; + Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; + Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi"; + Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi"; + Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; + Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; + Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; + Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi"; + Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi"; + Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; + Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; + Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; + Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; + Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; + Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti"; + Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi"; + Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi"; + Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti"; + Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi"; + Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; + Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti"; + Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; + Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; + Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf"; + Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf"; + Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf"; + Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; + Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; + Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; + Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf"; + Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; + Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf"; + Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf"; + Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf"; + Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf"; + Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf"; + Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; + Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; + Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf"; + Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf"; + Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf"; + Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; + Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; + Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf"; + Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf"; + Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf"; + Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf"; + Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf"; + Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf"; + Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf"; + Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf"; + Names[RTLIB::OEQ_F32] = "__eqsf2"; + Names[RTLIB::OEQ_F64] = "__eqdf2"; + Names[RTLIB::OEQ_F128] = "__eqtf2"; + Names[RTLIB::UNE_F32] = "__nesf2"; + Names[RTLIB::UNE_F64] = "__nedf2"; + Names[RTLIB::UNE_F128] = "__netf2"; + Names[RTLIB::OGE_F32] = "__gesf2"; + Names[RTLIB::OGE_F64] = "__gedf2"; + Names[RTLIB::OGE_F128] = "__getf2"; + Names[RTLIB::OLT_F32] = "__ltsf2"; + Names[RTLIB::OLT_F64] = "__ltdf2"; + Names[RTLIB::OLT_F128] = "__lttf2"; + Names[RTLIB::OLE_F32] = "__lesf2"; + Names[RTLIB::OLE_F64] = "__ledf2"; + Names[RTLIB::OLE_F128] = "__letf2"; + Names[RTLIB::OGT_F32] = "__gtsf2"; + Names[RTLIB::OGT_F64] = "__gtdf2"; + Names[RTLIB::OGT_F128] = "__gttf2"; + Names[RTLIB::UO_F32] = "__unordsf2"; + Names[RTLIB::UO_F64] = "__unorddf2"; + Names[RTLIB::UO_F128] = "__unordtf2"; + Names[RTLIB::O_F32] = "__unordsf2"; + Names[RTLIB::O_F64] = "__unorddf2"; + Names[RTLIB::O_F128] = "__unordtf2"; + Names[RTLIB::MEMCPY] = "memcpy"; + Names[RTLIB::MEMMOVE] = "memmove"; + Names[RTLIB::MEMSET] = "memset"; + Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; + Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; + Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; + Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; + Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; + Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; + Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; + Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; + Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; + Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; + Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; + Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; + Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; + Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; + Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; + Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; + Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; + Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; + Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; + Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; + Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; + Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; + Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; + Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; + Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; + + if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) { + Names[RTLIB::SINCOS_F32] = "sincosf"; + Names[RTLIB::SINCOS_F64] = "sincos"; + Names[RTLIB::SINCOS_F80] = "sincosl"; + Names[RTLIB::SINCOS_F128] = "sincosl"; + Names[RTLIB::SINCOS_PPCF128] = "sincosl"; + } else { + // These are generally not available. + Names[RTLIB::SINCOS_F32] = 0; + Names[RTLIB::SINCOS_F64] = 0; + Names[RTLIB::SINCOS_F80] = 0; + Names[RTLIB::SINCOS_F128] = 0; + Names[RTLIB::SINCOS_PPCF128] = 0; + } +} + +/// InitLibcallCallingConvs - Set default libcall CallingConvs. +/// +static void InitLibcallCallingConvs(CallingConv::ID *CCs) { + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { + CCs[i] = CallingConv::C; + } +} + +/// getFPEXT - Return the FPEXT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::f64) + return FPEXT_F32_F64; + if (RetVT == MVT::f128) + return FPEXT_F32_F128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::f128) + return FPEXT_F64_F128; + } + + return UNKNOWN_LIBCALL; +} + +/// getFPROUND - Return the FPROUND_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { + if (RetVT == MVT::f32) { + if (OpVT == MVT::f64) + return FPROUND_F64_F32; + if (OpVT == MVT::f80) + return FPROUND_F80_F32; + if (OpVT == MVT::f128) + return FPROUND_F128_F32; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F32; + } else if (RetVT == MVT::f64) { + if (OpVT == MVT::f80) + return FPROUND_F80_F64; + if (OpVT == MVT::f128) + return FPROUND_F128_F64; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F64; + } + + return UNKNOWN_LIBCALL; +} + +/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i8) + return FPTOSINT_F32_I8; + if (RetVT == MVT::i16) + return FPTOSINT_F32_I16; + if (RetVT == MVT::i32) + return FPTOSINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i8) + return FPTOSINT_F64_I8; + if (RetVT == MVT::i16) + return FPTOSINT_F64_I16; + if (RetVT == MVT::i32) + return FPTOSINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOSINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F80_I128; + } else if (OpVT == MVT::f128) { + if (RetVT == MVT::i32) + return FPTOSINT_F128_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F128_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F128_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOSINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOSINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOSINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i8) + return FPTOUINT_F32_I8; + if (RetVT == MVT::i16) + return FPTOUINT_F32_I16; + if (RetVT == MVT::i32) + return FPTOUINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i8) + return FPTOUINT_F64_I8; + if (RetVT == MVT::i16) + return FPTOUINT_F64_I16; + if (RetVT == MVT::i32) + return FPTOUINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOUINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F80_I128; + } else if (OpVT == MVT::f128) { + if (RetVT == MVT::i32) + return FPTOUINT_F128_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F128_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F128_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOUINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOUINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOUINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return SINTTOFP_I32_F32; + if (RetVT == MVT::f64) + return SINTTOFP_I32_F64; + if (RetVT == MVT::f80) + return SINTTOFP_I32_F80; + if (RetVT == MVT::f128) + return SINTTOFP_I32_F128; + if (RetVT == MVT::ppcf128) + return SINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return SINTTOFP_I64_F32; + if (RetVT == MVT::f64) + return SINTTOFP_I64_F64; + if (RetVT == MVT::f80) + return SINTTOFP_I64_F80; + if (RetVT == MVT::f128) + return SINTTOFP_I64_F128; + if (RetVT == MVT::ppcf128) + return SINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return SINTTOFP_I128_F32; + if (RetVT == MVT::f64) + return SINTTOFP_I128_F64; + if (RetVT == MVT::f80) + return SINTTOFP_I128_F80; + if (RetVT == MVT::f128) + return SINTTOFP_I128_F128; + if (RetVT == MVT::ppcf128) + return SINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return UINTTOFP_I32_F32; + if (RetVT == MVT::f64) + return UINTTOFP_I32_F64; + if (RetVT == MVT::f80) + return UINTTOFP_I32_F80; + if (RetVT == MVT::f128) + return UINTTOFP_I32_F128; + if (RetVT == MVT::ppcf128) + return UINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return UINTTOFP_I64_F32; + if (RetVT == MVT::f64) + return UINTTOFP_I64_F64; + if (RetVT == MVT::f80) + return UINTTOFP_I64_F80; + if (RetVT == MVT::f128) + return UINTTOFP_I64_F128; + if (RetVT == MVT::ppcf128) + return UINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return UINTTOFP_I128_F32; + if (RetVT == MVT::f64) + return UINTTOFP_I128_F64; + if (RetVT == MVT::f80) + return UINTTOFP_I128_F80; + if (RetVT == MVT::f128) + return UINTTOFP_I128_F128; + if (RetVT == MVT::ppcf128) + return UINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// InitCmpLibcallCCs - Set default comparison libcall CC. +/// +static void InitCmpLibcallCCs(ISD::CondCode *CCs) { + memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); + CCs[RTLIB::OEQ_F32] = ISD::SETEQ; + CCs[RTLIB::OEQ_F64] = ISD::SETEQ; + CCs[RTLIB::OEQ_F128] = ISD::SETEQ; + CCs[RTLIB::UNE_F32] = ISD::SETNE; + CCs[RTLIB::UNE_F64] = ISD::SETNE; + CCs[RTLIB::UNE_F128] = ISD::SETNE; + CCs[RTLIB::OGE_F32] = ISD::SETGE; + CCs[RTLIB::OGE_F64] = ISD::SETGE; + CCs[RTLIB::OGE_F128] = ISD::SETGE; + CCs[RTLIB::OLT_F32] = ISD::SETLT; + CCs[RTLIB::OLT_F64] = ISD::SETLT; + CCs[RTLIB::OLT_F128] = ISD::SETLT; + CCs[RTLIB::OLE_F32] = ISD::SETLE; + CCs[RTLIB::OLE_F64] = ISD::SETLE; + CCs[RTLIB::OLE_F128] = ISD::SETLE; + CCs[RTLIB::OGT_F32] = ISD::SETGT; + CCs[RTLIB::OGT_F64] = ISD::SETGT; + CCs[RTLIB::OGT_F128] = ISD::SETGT; + CCs[RTLIB::UO_F32] = ISD::SETNE; + CCs[RTLIB::UO_F64] = ISD::SETNE; + CCs[RTLIB::UO_F128] = ISD::SETNE; + CCs[RTLIB::O_F32] = ISD::SETEQ; + CCs[RTLIB::O_F64] = ISD::SETEQ; + CCs[RTLIB::O_F128] = ISD::SETEQ; +} + +/// NOTE: The constructor takes ownership of TLOF. +TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, + const TargetLoweringObjectFile *tlof) + : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) { + // All operations default to being supported. + memset(OpActions, 0, sizeof(OpActions)); + memset(LoadExtActions, 0, sizeof(LoadExtActions)); + memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); + memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); + memset(CondCodeActions, 0, sizeof(CondCodeActions)); + + // Set default actions for various operations. + for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { + // Default all indexed load / store to expand. + for (unsigned IM = (unsigned)ISD::PRE_INC; + IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { + setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); + setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); + } + + // These operations default to expand. + setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); + } + + // Most targets ignore the @llvm.prefetch intrinsic. + setOperationAction(ISD::PREFETCH, MVT::Other, Expand); + + // ConstantFP nodes default to expand. Targets can either change this to + // Legal, in which case all fp constants are legal, or use isFPImmLegal() + // to optimize expansions for certain constants. + setOperationAction(ISD::ConstantFP, MVT::f16, Expand); + setOperationAction(ISD::ConstantFP, MVT::f32, Expand); + setOperationAction(ISD::ConstantFP, MVT::f64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f80, Expand); + setOperationAction(ISD::ConstantFP, MVT::f128, Expand); + + // These library functions default to expand. + setOperationAction(ISD::FLOG , MVT::f16, Expand); + setOperationAction(ISD::FLOG2, MVT::f16, Expand); + setOperationAction(ISD::FLOG10, MVT::f16, Expand); + setOperationAction(ISD::FEXP , MVT::f16, Expand); + setOperationAction(ISD::FEXP2, MVT::f16, Expand); + setOperationAction(ISD::FFLOOR, MVT::f16, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); + setOperationAction(ISD::FCEIL, MVT::f16, Expand); + setOperationAction(ISD::FRINT, MVT::f16, Expand); + setOperationAction(ISD::FTRUNC, MVT::f16, Expand); + setOperationAction(ISD::FLOG , MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FEXP , MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FFLOOR, MVT::f32, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); + setOperationAction(ISD::FCEIL, MVT::f32, Expand); + setOperationAction(ISD::FRINT, MVT::f32, Expand); + setOperationAction(ISD::FTRUNC, MVT::f32, Expand); + setOperationAction(ISD::FLOG , MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + setOperationAction(ISD::FEXP , MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Expand); + setOperationAction(ISD::FRINT, MVT::f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::f64, Expand); + setOperationAction(ISD::FLOG , MVT::f128, Expand); + setOperationAction(ISD::FLOG2, MVT::f128, Expand); + setOperationAction(ISD::FLOG10, MVT::f128, Expand); + setOperationAction(ISD::FEXP , MVT::f128, Expand); + setOperationAction(ISD::FEXP2, MVT::f128, Expand); + setOperationAction(ISD::FFLOOR, MVT::f128, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand); + setOperationAction(ISD::FCEIL, MVT::f128, Expand); + setOperationAction(ISD::FRINT, MVT::f128, Expand); + setOperationAction(ISD::FTRUNC, MVT::f128, Expand); + + // Default ISD::TRAP to expand (which turns it into abort). + setOperationAction(ISD::TRAP, MVT::Other, Expand); + + // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" + // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. + // + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); + + IsLittleEndian = TD->isLittleEndian(); + PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); + memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); + memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); + MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; + MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize + = MaxStoresPerMemmoveOptSize = 4; + BenefitFromCodePlacementOpt = false; + UseUnderscoreSetJmp = false; + UseUnderscoreLongJmp = false; + SelectIsExpensive = false; + IntDivIsCheap = false; + Pow2DivIsCheap = false; + JumpIsExpensive = false; + PredictableSelectIsExpensive = false; + StackPointerRegisterToSaveRestore = 0; + ExceptionPointerRegister = 0; + ExceptionSelectorRegister = 0; + BooleanContents = UndefinedBooleanContent; + BooleanVectorContents = UndefinedBooleanContent; + SchedPreferenceInfo = Sched::ILP; + JumpBufSize = 0; + JumpBufAlignment = 0; + MinFunctionAlignment = 0; + PrefFunctionAlignment = 0; + PrefLoopAlignment = 0; + MinStackArgumentAlignment = 1; + ShouldFoldAtomicFences = false; + InsertFencesForAtomic = false; + SupportJumpTables = true; + MinimumJumpTableEntries = 4; + + InitLibcallNames(LibcallRoutineNames, TM); + InitCmpLibcallCCs(CmpLibcallCCs); + InitLibcallCallingConvs(LibcallCallingConvs); +} + +TargetLoweringBase::~TargetLoweringBase() { + delete &TLOF; +} + +MVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const { + return MVT::getIntegerVT(8*TD->getPointerSize(0)); +} + +/// canOpTrap - Returns true if the operation can trap for the value type. +/// VT must be a legal type. +bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { + assert(isTypeLegal(VT)); + switch (Op) { + default: + return false; + case ISD::FDIV: + case ISD::FREM: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + return true; + } +} + + +static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT, + TargetLoweringBase *TLI) { + // Figure out the right, legal destination reg to copy into. + unsigned NumElts = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + MVT NewVT = MVT::getVectorVT(EltTy, NumElts); + if (!TLI->isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + + MVT DestVT = TLI->getRegisterType(NewVT); + RegisterVT = DestVT; + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); + + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; +} + +/// isLegalRC - Return true if the value types that can be represented by the +/// specified register class are all legal. +bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const { + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (isTypeLegal(*I)) + return true; + } + return false; +} + +/// findRepresentativeClass - Return the largest legal super-reg register class +/// of the register class for the specified type and its associated "cost". +std::pair<const TargetRegisterClass*, uint8_t> +TargetLoweringBase::findRepresentativeClass(MVT VT) const { + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; + if (!RC) + return std::make_pair(RC, 0); + + // Compute the set of all super-register classes. + BitVector SuperRegRC(TRI->getNumRegClasses()); + for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) + SuperRegRC.setBitsInMask(RCI.getMask()); + + // Find the first legal register class with the largest spill size. + const TargetRegisterClass *BestRC = RC; + for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) { + const TargetRegisterClass *SuperRC = TRI->getRegClass(i); + // We want the largest possible spill size. + if (SuperRC->getSize() <= BestRC->getSize()) + continue; + if (!isLegalRC(SuperRC)) + continue; + BestRC = SuperRC; + } + return std::make_pair(BestRC, 1); +} + +/// computeRegisterProperties - Once all of the register classes are added, +/// this allows us to compute derived properties we expose. +void TargetLoweringBase::computeRegisterProperties() { + assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && + "Too many value types for ValueTypeActions to hold!"); + + // Everything defaults to needing one register. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + NumRegistersForVT[i] = 1; + RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; + } + // ...except isVoid, which doesn't need any registers. + NumRegistersForVT[MVT::isVoid] = 0; + + // Find the largest integer register class. + unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; + for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) + assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); + + // Every integer value type larger than this largest register takes twice as + // many registers to represent as the previous ValueType. + for (unsigned ExpandedReg = LargestIntReg + 1; + ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) { + NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; + RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; + TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); + ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg, + TypeExpandInteger); + } + + // Inspect all of the ValueType's smaller than the largest integer + // register to see which ones need promotion. + unsigned LegalIntReg = LargestIntReg; + for (unsigned IntReg = LargestIntReg - 1; + IntReg >= (unsigned)MVT::i1; --IntReg) { + MVT IVT = (MVT::SimpleValueType)IntReg; + if (isTypeLegal(IVT)) { + LegalIntReg = IntReg; + } else { + RegisterTypeForVT[IntReg] = TransformToType[IntReg] = + (const MVT::SimpleValueType)LegalIntReg; + ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); + } + } + + // ppcf128 type is really two f64's. + if (!isTypeLegal(MVT::ppcf128)) { + NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::ppcf128] = MVT::f64; + TransformToType[MVT::ppcf128] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); + } + + // Decide how to handle f64. If the target does not have native f64 support, + // expand it to i64 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; + RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; + TransformToType[MVT::f64] = MVT::i64; + ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); + } + + // Decide how to handle f32. If the target does not have native support for + // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. + if (!isTypeLegal(MVT::f32)) { + if (isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; + TransformToType[MVT::f32] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger); + } else { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; + TransformToType[MVT::f32] = MVT::i32; + ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); + } + } + + // Loop over all of the vector value types to see which need transformations. + for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT VT = (MVT::SimpleValueType)i; + if (isTypeLegal(VT)) continue; + + // Determine if there is a legal wider type. If so, we should promote to + // that wider vector type. + MVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) { + bool IsLegalWiderType = false; + // First try to promote the elements of integer vectors. If no legal + // promotion was found, fallback to the widen-vector method. + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType)nVT; + // Promote vectors of integers to vectors with the same number + // of elements, with a wider element type. + if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() + && SVT.getVectorNumElements() == NElts && + isTypeLegal(SVT) && SVT.getScalarType().isInteger()) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypePromoteInteger); + IsLegalWiderType = true; + break; + } + } + + if (IsLegalWiderType) continue; + + // Try to widen the vector. + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType)nVT; + if (SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts && + isTypeLegal(SVT)) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + IsLegalWiderType = true; + break; + } + } + if (IsLegalWiderType) continue; + } + + MVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = + getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, + RegisterVT, this); + RegisterTypeForVT[i] = RegisterVT; + + MVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + unsigned NumElts = VT.getVectorNumElements(); + ValueTypeActions.setTypeAction(VT, + NumElts > 1 ? TypeSplitVector : TypeScalarizeVector); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + } + } + + // Determine the 'representative' register class for each value type. + // An representative register class is the largest (meaning one which is + // not a sub-register class / subreg register class) legal register class for + // a group of value types. For example, on i386, i8, i16, and i32 + // representative would be GR32; while on x86_64 it's GR64. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + const TargetRegisterClass* RRC; + uint8_t Cost; + tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); + RepRegClassForVT[i] = RRC; + RepRegClassCostForVT[i] = Cost; + } +} + +EVT TargetLoweringBase::getSetCCResultType(EVT VT) const { + assert(!VT.isVector() && "No default SetCC type for vectors!"); + return getPointerTy(0).SimpleTy; +} + +MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const { + return MVT::i32; // return the default value +} + +/// getVectorTypeBreakdown - Vector types are broken down into some number of +/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 +/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. +/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. +/// +/// This method returns the number of registers needed, and the VT for each +/// register. It also returns the VT and quantity of the intermediate values +/// before they are promoted/expanded. +/// +unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, + EVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT) const { + unsigned NumElts = VT.getVectorNumElements(); + + // If there is a wider vector type with the same element type as this one, + // or a promoted vector type that has the same number of elements which + // are wider, then we should convert to that legal vector type. + // This handles things like <2 x float> -> <4 x float> and + // <4 x i1> -> <4 x i32>. + LegalizeTypeAction TA = getTypeAction(Context, VT); + if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { + EVT RegisterEVT = getTypeToTransformTo(Context, VT); + if (isTypeLegal(RegisterEVT)) { + IntermediateVT = RegisterEVT; + RegisterVT = RegisterEVT.getSimpleVT(); + NumIntermediates = 1; + return 1; + } + } + + // Figure out the right, legal destination reg to copy into. + EVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !isTypeLegal( + EVT::getVectorVT(Context, EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); + if (!isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + MVT DestVT = getRegisterType(Context, NewVT); + RegisterVT = DestVT; + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); + + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; +} + +/// Get the EVTs and ArgFlags collections that represent the legalized return +/// type of the given function. This does not require a DAG or a return value, +/// and is suitable for use before any DAGs for the function are constructed. +/// TODO: Move this out of TargetLowering.cpp. +void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, + SmallVectorImpl<ISD::OutputArg> &Outs, + const TargetLowering &TLI) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, ReturnType, ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); + MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg)) + Flags.setInReg(); + + // Propagate extension type if any + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + Flags.setSExt(); + else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) + Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0)); + } +} + +/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate +/// function arguments in the caller parameter area. This is the actual +/// alignment, not its logarithm. +unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const { + return TD->getCallFrameTypeAlignment(Ty); +} + +//===----------------------------------------------------------------------===// +// TargetTransformInfo Helpers +//===----------------------------------------------------------------------===// + +int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { + enum InstructionOpcodes { +#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, +#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM +#include "llvm/IR/Instruction.def" + }; + switch (static_cast<InstructionOpcodes>(Opcode)) { + case Ret: return 0; + case Br: return 0; + case Switch: return 0; + case IndirectBr: return 0; + case Invoke: return 0; + case Resume: return 0; + case Unreachable: return 0; + case Add: return ISD::ADD; + case FAdd: return ISD::FADD; + case Sub: return ISD::SUB; + case FSub: return ISD::FSUB; + case Mul: return ISD::MUL; + case FMul: return ISD::FMUL; + case UDiv: return ISD::UDIV; + case SDiv: return ISD::UDIV; + case FDiv: return ISD::FDIV; + case URem: return ISD::UREM; + case SRem: return ISD::SREM; + case FRem: return ISD::FREM; + case Shl: return ISD::SHL; + case LShr: return ISD::SRL; + case AShr: return ISD::SRA; + case And: return ISD::AND; + case Or: return ISD::OR; + case Xor: return ISD::XOR; + case Alloca: return 0; + case Load: return ISD::LOAD; + case Store: return ISD::STORE; + case GetElementPtr: return 0; + case Fence: return 0; + case AtomicCmpXchg: return 0; + case AtomicRMW: return 0; + case Trunc: return ISD::TRUNCATE; + case ZExt: return ISD::ZERO_EXTEND; + case SExt: return ISD::SIGN_EXTEND; + case FPToUI: return ISD::FP_TO_UINT; + case FPToSI: return ISD::FP_TO_SINT; + case UIToFP: return ISD::UINT_TO_FP; + case SIToFP: return ISD::SINT_TO_FP; + case FPTrunc: return ISD::FP_ROUND; + case FPExt: return ISD::FP_EXTEND; + case PtrToInt: return ISD::BITCAST; + case IntToPtr: return ISD::BITCAST; + case BitCast: return ISD::BITCAST; + case ICmp: return ISD::SETCC; + case FCmp: return ISD::SETCC; + case PHI: return 0; + case Call: return 0; + case Select: return ISD::SELECT; + case UserOp1: return 0; + case UserOp2: return 0; + case VAArg: return 0; + case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; + case InsertElement: return ISD::INSERT_VECTOR_ELT; + case ShuffleVector: return ISD::VECTOR_SHUFFLE; + case ExtractValue: return ISD::MERGE_VALUES; + case InsertValue: return ISD::MERGE_VALUES; + case LandingPad: return 0; + } + + llvm_unreachable("Unknown instruction type encountered!"); +} + +std::pair<unsigned, MVT> +TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const { + LLVMContext &C = Ty->getContext(); + EVT MTy = getValueType(Ty); + + unsigned Cost = 1; + // We keep legalizing the type until we find a legal kind. We assume that + // the only operation that costs anything is the split. After splitting + // we need to handle two types. + while (true) { + LegalizeKind LK = getTypeConversion(C, MTy); + + if (LK.first == TypeLegal) + return std::make_pair(Cost, MTy.getSimpleVT()); + + if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) + Cost *= 2; + + // Keep legalizing the type. + MTy = LK.second; + } +} + +//===----------------------------------------------------------------------===// +// Loop Strength Reduction hooks +//===----------------------------------------------------------------------===// + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +bool TargetLoweringBase::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + // The default implementation of this implements a conservative RISCy, r+r and + // r+i addr mode. + + // Allows a sign-extended 16-bit immediate field. + if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) + return false; + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // Only support r+r, + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + return false; + // Otherwise we have r+r or r+i. + break; + case 2: + if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + return false; + // Allow 2*r as r+r. + break; + } + + return true; +} diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 76c2546..1170bf2 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -406,14 +406,14 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { // MachO //===----------------------------------------------------------------------===// -/// emitModuleFlags - Emit the module flags that specify the garbage collection -/// information. +/// emitModuleFlags - Perform code emission for module flags. void TargetLoweringObjectFileMachO:: emitModuleFlags(MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags, Mangler *Mang, const TargetMachine &TM) const { unsigned VersionVal = 0; unsigned ImageInfoFlags = 0; + MDNode *LinkerOptions = 0; StringRef SectionVal; for (ArrayRef<Module::ModuleFlagEntry>::iterator @@ -427,14 +427,33 @@ emitModuleFlags(MCStreamer &Streamer, StringRef Key = MFE.Key->getString(); Value *Val = MFE.Val; - if (Key == "Objective-C Image Info Version") + if (Key == "Objective-C Image Info Version") { VersionVal = cast<ConstantInt>(Val)->getZExtValue(); - else if (Key == "Objective-C Garbage Collection" || - Key == "Objective-C GC Only" || - Key == "Objective-C Is Simulated") + } else if (Key == "Objective-C Garbage Collection" || + Key == "Objective-C GC Only" || + Key == "Objective-C Is Simulated") { ImageInfoFlags |= cast<ConstantInt>(Val)->getZExtValue(); - else if (Key == "Objective-C Image Info Section") + } else if (Key == "Objective-C Image Info Section") { SectionVal = cast<MDString>(Val)->getString(); + } else if (Key == "Linker Options") { + LinkerOptions = cast<MDNode>(Val); + } + } + + // Emit the linker options if present. + if (LinkerOptions) { + for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { + MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i)); + SmallVector<std::string, 4> StrOptions; + + // Convert to strings. + for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { + MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii)); + StrOptions.push_back(MDOption->getString()); + } + + Streamer.EmitLinkerOptions(StrOptions); + } } // The section is mandatory. If we don't have it, then we don't have GC info. diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index 9b776d1..84b4bfc 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 8e6f809..26c5fe4 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -67,7 +67,6 @@ class TwoAddressInstructionPass : public MachineFunctionPass { const InstrItineraryData *InstrItins; MachineRegisterInfo *MRI; LiveVariables *LV; - SlotIndexes *Indexes; LiveIntervals *LIS; AliasAnalysis *AA; CodeGenOpt::Level OptLevel; @@ -121,7 +120,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned SrcIdx, unsigned DstIdx, - unsigned Dist); + unsigned Dist, bool shouldOnlyCommute); void scanUses(unsigned DstReg); @@ -164,6 +163,8 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction", char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; +static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS); + /// sink3AddrInstruction - A two-address instruction has been converted to a /// three-address instruction to avoid clobbering a register. Try to sink it /// past the instruction that would kill the above mentioned register to reduce @@ -205,14 +206,29 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, // Find the instruction that kills SavedReg. MachineInstr *KillMI = NULL; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SavedReg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); - if (!UseMO.isKill()) - continue; - KillMI = UseMO.getParent(); - break; + if (LIS) { + LiveInterval &LI = LIS->getInterval(SavedReg); + assert(LI.end() != LI.begin() && + "Reg should not have empty live interval."); + + SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); + LiveInterval::const_iterator I = LI.find(MBBEndIdx); + if (I != LI.end() && I->start < MBBEndIdx) + return false; + + --I; + KillMI = LIS->getInstructionFromIndex(I->end); + } + if (!KillMI) { + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SavedReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + if (!UseMO.isKill()) + continue; + KillMI = UseMO.getParent(); + break; + } } // If we find the instruction that kills SavedReg, and it is in an @@ -251,7 +267,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, if (DefReg == MOReg) return false; - if (MO.isKill()) { + if (MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))) { if (OtherMI == KillMI && MOReg == SavedReg) // Save the operand that kills the register. We want to unset the kill // marker if we can sink MI past it. @@ -264,13 +280,15 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, } assert(KillMO && "Didn't find kill"); - // Update kill and LV information. - KillMO->setIsKill(false); - KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); - KillMO->setIsKill(true); + if (!LIS) { + // Update kill and LV information. + KillMO->setIsKill(false); + KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); + KillMO->setIsKill(true); - if (LV) - LV->replaceKillInstruction(SavedReg, KillMI, MI); + if (LV) + LV->replaceKillInstruction(SavedReg, KillMI, MI); + } // Move instruction to its destination. MBB->remove(MI); @@ -331,6 +349,33 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, return true; } +/// isPLainlyKilled - Test if the given register value, which is used by the +// given instruction, is killed by the given instruction. +static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, + LiveIntervals *LIS) { + if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) && + !LIS->isNotInMIMap(MI)) { + // FIXME: Sometimes tryInstructionTransform() will add instructions and + // test whether they can be folded before keeping them. In this case it + // sets a kill before recursively calling tryInstructionTransform() again. + // If there is no interval available, we assume that this instruction is + // one of those. A kill flag is manually inserted on the operand so the + // check below will handle it. + LiveInterval &LI = LIS->getInterval(Reg); + // This is to match the kill flag version where undefs don't have kill + // flags. + if (!LI.hasAtLeastOneValue()) + return false; + + SlotIndex useIdx = LIS->getInstructionIndex(MI); + LiveInterval::const_iterator I = LI.find(useIdx); + assert(I != LI.end() && "Reg must be live-in to use."); + return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx); + } + + return MI->killsRegister(Reg); +} + /// isKilled - Test if the given register value, which is used by the given /// instruction, is killed by the given instruction. This looks through /// coalescable copies to see if the original value is potentially not killed. @@ -346,12 +391,20 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, /// normal heuristics commute the (two-address) add, which lets /// coalescing eliminate the extra copy. /// +/// If allowFalsePositives is true then likely kills are treated as kills even +/// if it can't be proven that they are kills. static bool isKilled(MachineInstr &MI, unsigned Reg, const MachineRegisterInfo *MRI, - const TargetInstrInfo *TII) { + const TargetInstrInfo *TII, + LiveIntervals *LIS, + bool allowFalsePositives) { MachineInstr *DefMI = &MI; for (;;) { - if (!DefMI->killsRegister(Reg)) + // All uses of physical registers are likely to be kills. + if (TargetRegisterInfo::isPhysicalRegister(Reg) && + (allowFalsePositives || MRI->hasOneUse(Reg))) + return true; + if (!isPlainlyKilled(DefMI, Reg, LIS)) return false; if (TargetRegisterInfo::isPhysicalRegister(Reg)) return true; @@ -472,7 +525,7 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, // insert => %reg1030<def> = MOV8rr %reg1029 // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead> - if (!MI->killsRegister(regC)) + if (!isPlainlyKilled(MI, regC, LIS)) return false; // Ok, we have something like: @@ -528,19 +581,9 @@ commuteInstruction(MachineBasicBlock::iterator &mi, } DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI); - // If the instruction changed to commute it, update livevar. - if (NewMI != MI) { - if (LV) - // Update live variables - LV->replaceKillInstruction(RegC, MI, NewMI); - if (Indexes) - Indexes->replaceMachineInstrInMaps(MI, NewMI); - - MBB->insert(mi, NewMI); // Insert the new inst - MBB->erase(mi); // Nuke the old inst. - mi = NewMI; - DistanceMap.insert(std::make_pair(NewMI, Dist)); - } + assert(NewMI == MI && + "TargetInstrInfo::commuteInstruction() should not return a new " + "instruction unless it was requested."); // Update source register map. unsigned FromRegC = getMappedReg(RegC, SrcRegMap); @@ -587,8 +630,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); bool Sunk = false; - if (Indexes) - Indexes->replaceMachineInstrInMaps(mi, NewMI); + if (LIS) + LIS->ReplaceMachineInstrInMaps(mi, NewMI); if (NewMI->findRegisterUseOperand(RegB, false, TRI)) // FIXME: Temporary workaround. If the new instruction doesn't @@ -700,9 +743,9 @@ bool TwoAddressInstructionPass:: rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned Reg) { - // Bail immediately if we don't have LV available. We use it to find kills - // efficiently. - if (!LV) + // Bail immediately if we don't have LV or LIS available. We use them to find + // kills efficiently. + if (!LV && !LIS) return false; MachineInstr *MI = &*mi; @@ -711,7 +754,22 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB); + MachineInstr *KillMI = 0; + if (LIS) { + LiveInterval &LI = LIS->getInterval(Reg); + assert(LI.end() != LI.begin() && + "Reg should not have empty live interval."); + + SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); + LiveInterval::const_iterator I = LI.find(MBBEndIdx); + if (I != LI.end() && I->start < MBBEndIdx) + return false; + + --I; + KillMI = LIS->getInstructionFromIndex(I->end); + } else { + KillMI = LV->getVarInfo(Reg).findKill(MBB); + } if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) // Don't mess with copies, they may be coalesced later. return false; @@ -747,24 +805,27 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, Defs.insert(MOReg); else { Uses.insert(MOReg); - if (MO.isKill() && MOReg != Reg) + if (MOReg != Reg && (MO.isKill() || + (LIS && isPlainlyKilled(MI, MOReg, LIS)))) Kills.insert(MOReg); } } // Move the copies connected to MI down as well. - MachineBasicBlock::iterator From = MI; - MachineBasicBlock::iterator To = llvm::next(From); - while (To->isCopy() && Defs.count(To->getOperand(1).getReg())) { - Defs.insert(To->getOperand(0).getReg()); - ++To; + MachineBasicBlock::iterator Begin = MI; + MachineBasicBlock::iterator AfterMI = llvm::next(Begin); + + MachineBasicBlock::iterator End = AfterMI; + while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) { + Defs.insert(End->getOperand(0).getReg()); + ++End; } // Check if the reschedule will not break depedencies. unsigned NumVisited = 0; MachineBasicBlock::iterator KillPos = KillMI; ++KillPos; - for (MachineBasicBlock::iterator I = To; I != KillPos; ++I) { + for (MachineBasicBlock::iterator I = End; I != KillPos; ++I) { MachineInstr *OtherMI = I; // DBG_VALUE cannot be counted against the limit. if (OtherMI->isDebugValue()) @@ -795,11 +856,13 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, } else { if (Defs.count(MOReg)) return false; + bool isKill = MO.isKill() || + (LIS && isPlainlyKilled(OtherMI, MOReg, LIS)); if (MOReg != Reg && - ((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg))) + ((isKill && Uses.count(MOReg)) || Kills.count(MOReg))) // Don't want to extend other live ranges and update kills. return false; - if (MOReg == Reg && !MO.isKill()) + if (MOReg == Reg && !isKill) // We can't schedule across a use of the register in question. return false; // Ensure that if this is register in question, its the kill we expect. @@ -810,19 +873,35 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, } // Move debug info as well. - while (From != MBB->begin() && llvm::prior(From)->isDebugValue()) - --From; + while (Begin != MBB->begin() && llvm::prior(Begin)->isDebugValue()) + --Begin; + + nmi = End; + MachineBasicBlock::iterator InsertPos = KillPos; + if (LIS) { + // We have to move the copies first so that the MBB is still well-formed + // when calling handleMove(). + for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) { + MachineInstr *CopyMI = MBBI; + ++MBBI; + MBB->splice(InsertPos, MBB, CopyMI); + LIS->handleMove(CopyMI); + InsertPos = CopyMI; + } + End = llvm::next(MachineBasicBlock::iterator(MI)); + } // Copies following MI may have been moved as well. - nmi = To; - MBB->splice(KillPos, MBB, From, To); + MBB->splice(InsertPos, MBB, Begin, End); DistanceMap.erase(DI); // Update live variables - LV->removeVirtualRegisterKilled(Reg, KillMI); - LV->addVirtualRegisterKilled(Reg, MI); - if (LIS) + if (LIS) { LIS->handleMove(MI); + } else { + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI); return true; @@ -858,9 +937,9 @@ bool TwoAddressInstructionPass:: rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned Reg) { - // Bail immediately if we don't have LV available. We use it to find kills - // efficiently. - if (!LV) + // Bail immediately if we don't have LV or LIS available. We use them to find + // kills efficiently. + if (!LV && !LIS) return false; MachineInstr *MI = &*mi; @@ -869,7 +948,22 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB); + MachineInstr *KillMI = 0; + if (LIS) { + LiveInterval &LI = LIS->getInterval(Reg); + assert(LI.end() != LI.begin() && + "Reg should not have empty live interval."); + + SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); + LiveInterval::const_iterator I = LI.find(MBBEndIdx); + if (I != LI.end() && I->start < MBBEndIdx) + return false; + + --I; + KillMI = LIS->getInstructionFromIndex(I->end); + } else { + KillMI = LV->getVarInfo(Reg).findKill(MBB); + } if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) // Don't mess with copies, they may be coalesced later. return false; @@ -896,10 +990,11 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, continue; if (isDefTooClose(MOReg, DI->second, MI)) return false; - if (MOReg == Reg && !MO.isKill()) + bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS)); + if (MOReg == Reg && !isKill) return false; Uses.insert(MOReg); - if (MO.isKill() && MOReg != Reg) + if (isKill && MOReg != Reg) Kills.insert(MOReg); } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { Defs.insert(MOReg); @@ -939,7 +1034,8 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, if (Kills.count(MOReg)) // Don't want to extend other live ranges and update kills. return false; - if (OtherMI != MI && MOReg == Reg && !MO.isKill()) + if (OtherMI != MI && MOReg == Reg && + !(MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS)))) // We can't schedule across a use of the register in question. return false; } else { @@ -973,10 +1069,12 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, DistanceMap.erase(DI); // Update live variables - LV->removeVirtualRegisterKilled(Reg, KillMI); - LV->addVirtualRegisterKilled(Reg, MI); - if (LIS) + if (LIS) { LIS->handleMove(KillMI); + } else { + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } DEBUG(dbgs() << "\trescheduled kill: " << *KillMI); return true; @@ -987,11 +1085,13 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, /// either eliminate the tied operands or improve the opportunities for /// coalescing away the register copy. Returns true if no copy needs to be /// inserted to untie mi's operands (either because they were untied, or -/// because mi was rescheduled, and will be visited again later). +/// because mi was rescheduled, and will be visited again later). If the +/// shouldOnlyCommute flag is true, only instruction commutation is attempted. bool TwoAddressInstructionPass:: tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, - unsigned SrcIdx, unsigned DstIdx, unsigned Dist) { + unsigned SrcIdx, unsigned DstIdx, + unsigned Dist, bool shouldOnlyCommute) { if (OptLevel == CodeGenOpt::None) return false; @@ -1001,7 +1101,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); - bool regBKilled = isKilled(MI, regB, MRI, TII); + bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true); if (TargetRegisterInfo::isVirtualRegister(regA)) scanUses(regA); @@ -1021,7 +1121,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, if (regCIdx != ~0U) { regC = MI.getOperand(regCIdx).getReg(); - if (!regBKilled && isKilled(MI, regC, MRI, TII)) + if (!regBKilled && isKilled(MI, regC, MRI, TII, LIS, false)) // If C dies but B does not, swap the B and C operands. // This makes the live ranges of A and C joinable. TryCommute = true; @@ -1040,6 +1140,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } + if (shouldOnlyCommute) + return false; + // If there is one more use of regB later in the same MBB, consider // re-schedule this MI below it. if (rescheduleMIBelowKill(mi, nmi, regB)) { @@ -1115,10 +1218,12 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA); unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); MachineBasicBlock::iterator NewMI = NewMIs[1]; - bool TransformSuccess = - tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist); - if (TransformSuccess || - NewMIs[1]->getOperand(NewSrcIdx).isKill()) { + bool TransformResult = + tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true); + (void)TransformResult; + assert(!TransformResult && + "tryInstructionTransform() should return false."); + if (NewMIs[1]->getOperand(NewSrcIdx).isKill()) { // Success, or at least we made an improvement. Keep the unfolded // instructions and discard the original. if (LV) { @@ -1149,10 +1254,26 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, } LV->addVirtualRegisterKilled(Reg, NewMIs[1]); } + + SmallVector<unsigned, 4> OrigRegs; + if (LIS) { + for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), + MOE = MI.operands_end(); MOI != MOE; ++MOI) { + if (MOI->isReg()) + OrigRegs.push_back(MOI->getReg()); + } + } + MI.eraseFromParent(); + + // Update LiveIntervals. + if (LIS) { + MachineBasicBlock::iterator Begin(NewMIs[0]); + MachineBasicBlock::iterator End(NewMIs[1]); + LIS->repairIntervalsInRange(MBB, Begin, End, OrigRegs); + } + mi = NewMIs[1]; - if (TransformSuccess) - return true; } else { // Transforming didn't eliminate the tie and didn't lead to an // improvement. Clean up the unfolded instructions and keep the @@ -1215,9 +1336,15 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, TiedPairList &TiedPairs, unsigned &Dist) { bool IsEarlyClobber = false; + for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { + const MachineOperand &DstMO = MI->getOperand(TiedPairs[tpi].second); + IsEarlyClobber |= DstMO.isEarlyClobber(); + } + bool RemovedKillFlag = false; bool AllUsesCopied = true; unsigned LastCopiedReg = 0; + SlotIndex LastCopyIdx; unsigned RegB = 0; for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { unsigned SrcIdx = TiedPairs[tpi].first; @@ -1225,7 +1352,6 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, const MachineOperand &DstMO = MI->getOperand(DstIdx); unsigned RegA = DstMO.getReg(); - IsEarlyClobber |= DstMO.isEarlyClobber(); // Grab RegB from the instruction because it may have changed if the // instruction was commuted. @@ -1263,9 +1389,17 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, DistanceMap.insert(std::make_pair(PrevMI, Dist)); DistanceMap[MI] = ++Dist; - SlotIndex CopyIdx; - if (Indexes) - CopyIdx = Indexes->insertMachineInstrInMaps(PrevMI).getRegSlot(); + if (LIS) { + LastCopyIdx = LIS->InsertMachineInstrInMaps(PrevMI).getRegSlot(); + + if (TargetRegisterInfo::isVirtualRegister(RegA)) { + LiveInterval &LI = LIS->getInterval(RegA); + VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); + SlotIndex endIdx = + LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber); + LI.addRange(LiveRange(LastCopyIdx, endIdx, VNI)); + } + } DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI); @@ -1311,6 +1445,18 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, LV->addVirtualRegisterKilled(RegB, PrevMI); } + // Update LiveIntervals. + if (LIS) { + LiveInterval &LI = LIS->getInterval(RegB); + SlotIndex MIIdx = LIS->getInstructionIndex(MI); + LiveInterval::const_iterator I = LI.find(MIIdx); + assert(I != LI.end() && "RegB must be live-in to use."); + + SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber); + if (I->end == UseIdx) + LI.removeRange(LastCopyIdx, UseIdx); + } + } else if (RemovedKillFlag) { // Some tied uses of regB matched their destination registers, so // regB is still used in this instruction, but a kill flag was @@ -1335,7 +1481,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); InstrItins = TM.getInstrItineraryData(); - Indexes = getAnalysisIfAvailable<SlotIndexes>(); LV = getAnalysisIfAvailable<LiveVariables>(); LIS = getAnalysisIfAvailable<LiveIntervals>(); AA = &getAnalysis<AliasAnalysis>(); @@ -1399,7 +1544,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { unsigned SrcReg = mi->getOperand(SrcIdx).getReg(); unsigned DstReg = mi->getOperand(DstIdx).getReg(); if (SrcReg != DstReg && - tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist)) { + tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) { // The tied operands have been eliminated or shifted further down the // block to ease elimination. Continue processing with 'nmi'. TiedOperands.clear(); @@ -1437,6 +1582,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { } } + if (LIS) + MF->verify(this, "After two-address instruction pass"); + return MadeChange; } @@ -1462,6 +1610,13 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { llvm_unreachable(0); } + SmallVector<unsigned, 4> OrigRegs; + if (LIS) { + OrigRegs.push_back(MI->getOperand(0).getReg()); + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) + OrigRegs.push_back(MI->getOperand(i).getReg()); + } + bool DefEmitted = false; for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { MachineOperand &UseMO = MI->getOperand(i); @@ -1505,6 +1660,9 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { DEBUG(dbgs() << "Inserted: " << *CopyMI); } + MachineBasicBlock::iterator EndMBBI = + llvm::next(MachineBasicBlock::iterator(MI)); + if (!DefEmitted) { DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); @@ -1514,4 +1672,8 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { DEBUG(dbgs() << "Eliminated: " << *MI); MI->eraseFromParent(); } + + // Udpate LiveIntervals. + if (LIS) + LIS->repairIntervalsInRange(MBB, MBBI, EndMBBI, OrigRegs); } |