diff options
Diffstat (limited to 'lib/CodeGen')
104 files changed, 7357 insertions, 4887 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index b520d8f..c23351b 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -16,6 +16,7 @@ #define DEBUG_TYPE "post-RA-sched" #include "AggressiveAntiDepBreaker.h" +#include "RegisterClassInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" @@ -114,12 +115,13 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg) AggressiveAntiDepBreaker:: AggressiveAntiDepBreaker(MachineFunction& MFi, + const RegisterClassInfo &RCI, TargetSubtarget::RegClassVector& CriticalPathRCs) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), - AllocatableSet(TRI->getAllocatableSet(MF)), + RegClassInfo(RCI), State(NULL) { /* Collect a bitset of all registers that are only broken if they are on the critical path. */ @@ -357,7 +359,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, RegRefs = State->GetRegRefs(); // Handle dead defs by simulating a last-use of the register just - // after the def. A dead def can occur because the def is truely + // after the def. A dead def can occur because the def is truly // dead, or because only a subregister is live at the def. If we // don't do this the dead def will be incorrectly merged into the // previous def. @@ -618,9 +620,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( const TargetRegisterClass *SuperRC = TRI->getMinimalPhysRegClass(SuperReg, MVT::Other); - const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF); - const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF); - if (RB == RE) { + ArrayRef<unsigned> Order = RegClassInfo.getOrder(SuperRC); + if (Order.empty()) { DEBUG(dbgs() << "\tEmpty Super Regclass!!\n"); return false; } @@ -628,17 +629,17 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( DEBUG(dbgs() << "\tFind Registers:"); if (RenameOrder.count(SuperRC) == 0) - RenameOrder.insert(RenameOrderType::value_type(SuperRC, RE)); + RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size())); - const TargetRegisterClass::iterator OrigR = RenameOrder[SuperRC]; - const TargetRegisterClass::iterator EndR = ((OrigR == RE) ? RB : OrigR); - TargetRegisterClass::iterator R = OrigR; + unsigned OrigR = RenameOrder[SuperRC]; + unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR); + unsigned R = OrigR; do { - if (R == RB) R = RE; + if (R == 0) R = Order.size(); --R; - const unsigned NewSuperReg = *R; + const unsigned NewSuperReg = Order[R]; // Don't consider non-allocatable registers - if (!AllocatableSet.test(NewSuperReg)) continue; + if (!RegClassInfo.isAllocatable(NewSuperReg)) continue; // Don't replace a register with itself. if (NewSuperReg == SuperReg) continue; @@ -719,7 +720,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( const std::vector<SUnit>& SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, - unsigned InsertPosIndex) { + unsigned InsertPosIndex, + DbgValueVector &DbgValues) { + std::vector<unsigned> &KillIndices = State->GetKillIndices(); std::vector<unsigned> &DefIndices = State->GetDefIndices(); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& @@ -817,7 +820,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg)); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - if (!AllocatableSet.test(AntiDepReg)) { + if (!RegClassInfo.isAllocatable(AntiDepReg)) { // Don't break anti-dependencies on non-allocatable registers. DEBUG(dbgs() << " (non-allocatable)\n"); continue; @@ -923,14 +926,10 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // sure to update that as well. const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()]; if (!SU) continue; - for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) { - MachineInstr *DI = SU->DbgInstrList[i]; - assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() && - DI->getOperand(0).getReg() - && "Non register dbg_value attached to SUnit!"); - if (DI->getOperand(0).getReg() == AntiDepReg) - DI->getOperand(0).setReg(NewReg); - } + for (DbgValueVector::iterator DVI = DbgValues.begin(), + DVE = DbgValues.end(); DVI != DVE; ++DVI) + if (DVI->second == Q->second.Operand->getParent()) + UpdateDbgValue(DVI->first, AntiDepReg, NewReg); } // We just went back in time and modified history; the diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 9d715cc..e43fe65 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -30,6 +30,8 @@ #include <map> namespace llvm { +class RegisterClassInfo; + /// Class AggressiveAntiDepState /// Contains all the state necessary for anti-dep breaking. class AggressiveAntiDepState { @@ -117,11 +119,7 @@ namespace llvm { MachineRegisterInfo &MRI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - - /// AllocatableSet - The set of allocatable registers. - /// We'll be ignoring anti-dependencies on non-allocatable registers, - /// because they may not be safe to break. - const BitVector AllocatableSet; + const RegisterClassInfo &RegClassInfo; /// CriticalPathSet - The set of registers that should only be /// renamed if they are on the critical path. @@ -133,6 +131,7 @@ namespace llvm { public: AggressiveAntiDepBreaker(MachineFunction& MFi, + const RegisterClassInfo &RCI, TargetSubtarget::RegClassVector& CriticalPathRCs); ~AggressiveAntiDepBreaker(); @@ -146,7 +145,8 @@ namespace llvm { unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, - unsigned InsertPosIndex); + unsigned InsertPosIndex, + DbgValueVector &DbgValues); /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. @@ -157,8 +157,8 @@ namespace llvm { void FinishBlock(); private: - typedef std::map<const TargetRegisterClass *, - TargetRegisterClass::const_iterator> RenameOrderType; + /// Keep track of a position in the allocation order for each regclass. + typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType; /// IsImplicitDefUse - Return true if MO represents a register /// that is both implicitly used and defined in MI diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index 20c7625..1005f10 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "AllocationOrder.h" +#include "RegisterClassInfo.h" #include "VirtRegMap.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -23,8 +24,8 @@ using namespace llvm; // Compare VirtRegMap::getRegAllocPref(). AllocationOrder::AllocationOrder(unsigned VirtReg, const VirtRegMap &VRM, - const BitVector &ReservedRegs) - : Pos(0), Reserved(ReservedRegs) { + const RegisterClassInfo &RegClassInfo) + : Begin(0), End(0), Pos(0), RCI(RegClassInfo), OwnedBegin(false) { const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg); std::pair<unsigned, unsigned> HintPair = VRM.getRegInfo().getRegAllocationHint(VirtReg); @@ -36,33 +37,43 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, if (TargetRegisterInfo::isVirtualRegister(Hint)) Hint = VRM.getPhys(Hint); - // The remaining allocation order may depend on the hint. - tie(Begin, End) = VRM.getTargetRegInfo() - .getAllocationOrder(RC, HintPair.first, Hint, VRM.getMachineFunction()); + // The first hint pair component indicates a target-specific hint. + if (HintPair.first) { + const TargetRegisterInfo &TRI = VRM.getTargetRegInfo(); + // The remaining allocation order may depend on the hint. + ArrayRef<unsigned> Order = + TRI.getRawAllocationOrder(RC, HintPair.first, Hint, + VRM.getMachineFunction()); + if (Order.empty()) + return; - // Target-dependent hints require resolution. - if (HintPair.first) - Hint = VRM.getTargetRegInfo().ResolveRegAllocHint(HintPair.first, Hint, - VRM.getMachineFunction()); + // Copy the allocation order with reserved registers removed. + OwnedBegin = true; + unsigned *P = new unsigned[Order.size()]; + Begin = P; + for (unsigned i = 0; i != Order.size(); ++i) + if (!RCI.isReserved(Order[i])) + *P++ = Order[i]; + End = P; + + // Target-dependent hints require resolution. + Hint = TRI.ResolveRegAllocHint(HintPair.first, Hint, + VRM.getMachineFunction()); + } else { + // If there is no hint or just a normal hint, use the cached allocation + // order from RegisterClassInfo. + ArrayRef<unsigned> O = RCI.getOrder(RC); + Begin = O.begin(); + End = O.end(); + } // The hint must be a valid physreg for allocation. if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || - !RC->contains(Hint) || ReservedRegs.test(Hint))) + !RC->contains(Hint) || RCI.isReserved(Hint))) Hint = 0; } -unsigned AllocationOrder::next() { - // First take the hint. - if (!Pos) { - Pos = Begin; - if (Hint) - return Hint; - } - // Then look at the order from TRI. - while(Pos != End) { - unsigned Reg = *Pos++; - if (Reg != Hint && !Reserved.test(Reg)) - return Reg; - } - return 0; +AllocationOrder::~AllocationOrder() { + if (OwnedBegin) + delete [] Begin; } diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h index 61fd8f8..d1e48a1 100644 --- a/lib/CodeGen/AllocationOrder.h +++ b/lib/CodeGen/AllocationOrder.h @@ -19,15 +19,16 @@ namespace llvm { -class BitVector; +class RegisterClassInfo; class VirtRegMap; class AllocationOrder { const unsigned *Begin; const unsigned *End; const unsigned *Pos; - const BitVector &Reserved; + const RegisterClassInfo &RCI; unsigned Hint; + bool OwnedBegin; public: /// AllocationOrder - Create a new AllocationOrder for VirtReg. @@ -37,12 +38,28 @@ public: /// TargetRegisterInfo::getReservedRegs(). AllocationOrder(unsigned VirtReg, const VirtRegMap &VRM, - const BitVector &ReservedRegs); + const RegisterClassInfo &RegClassInfo); + + ~AllocationOrder(); /// next - Return the next physical register in the allocation order, or 0. /// It is safe to call next again after it returned 0. /// It will keep returning 0 until rewind() is called. - unsigned next(); + unsigned next() { + // First take the hint. + if (!Pos) { + Pos = Begin; + if (Hint) + return Hint; + } + // Then look at the order from TRI. + while (Pos != End) { + unsigned Reg = *Pos++; + if (Reg != Hint) + return Reg; + } + return 0; + } /// rewind - Start over from the beginning. void rewind() { Pos = 0; } diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h index 086b757..df47f98 100644 --- a/lib/CodeGen/AntiDepBreaker.h +++ b/lib/CodeGen/AntiDepBreaker.h @@ -30,6 +30,9 @@ namespace llvm { /// anti-dependencies. class AntiDepBreaker { public: + typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > + DbgValueVector; + virtual ~AntiDepBreaker(); /// Start - Initialize anti-dep breaking for a new basic block. @@ -40,9 +43,10 @@ public: /// the number of anti-dependencies broken. /// virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, - MachineBasicBlock::iterator Begin, - MachineBasicBlock::iterator End, - unsigned InsertPosIndex) =0; + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex, + DbgValueVector &DbgValues) = 0; /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. @@ -52,6 +56,14 @@ public: /// Finish - Finish anti-dep breaking for a basic block. virtual void FinishBlock() =0; + + /// UpdateDbgValue - Update DBG_VALUE if dependency breaker is updating + /// other machine instruction to use NewReg. + void UpdateDbgValue(MachineInstr *MI, unsigned OldReg, unsigned NewReg) { + assert (MI->isDebugValue() && "MI is not DBG_VALUE!"); + if (MI && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == OldReg) + MI->getOperand(0).setReg(NewReg); + } }; } diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 0db28a6..5861fa4 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -52,7 +52,7 @@ void ARMException::EndModule() { /// being emitted immediately after the function entry point. void ARMException::BeginFunction(const MachineFunction *MF) { Asm->OutStreamer.EmitFnStart(); - if (!Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory) + if (Asm->MF->getFunction()->needsUnwindTableEntry()) Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber())); } @@ -60,7 +60,7 @@ void ARMException::BeginFunction(const MachineFunction *MF) { /// EndFunction - Gather and emit post-function exception information. /// void ARMException::EndFunction() { - if (Asm->MF->getFunction()->doesNotThrow() && !UnwindTablesMandatory) + if (!Asm->MF->getFunction()->needsUnwindTableEntry()) Asm->OutStreamer.EmitCantUnwind(); else { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 61f5672..b544ff1 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -35,10 +35,12 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/SmallString.h" @@ -191,22 +193,25 @@ bool AsmPrinter::doInitialization(Module &M) { if (MAI->doesSupportDebugInformation()) DD = new DwarfDebug(this, &M); - if (MAI->doesSupportExceptionHandling()) - switch (MAI->getExceptionHandlingType()) { - default: - case ExceptionHandling::DwarfTable: - DE = new DwarfTableException(this); - break; - case ExceptionHandling::DwarfCFI: - DE = new DwarfCFIException(this); - break; - case ExceptionHandling::ARM: - DE = new ARMException(this); - break; - } + switch (MAI->getExceptionHandlingType()) { + case ExceptionHandling::None: + return false; + case ExceptionHandling::SjLj: + case ExceptionHandling::DwarfCFI: + DE = new DwarfCFIException(this); + return false; + case ExceptionHandling::ARM: + DE = new ARMException(this); + return false; + case ExceptionHandling::Win64: + DE = new Win64Exception(this); + return false; + } +#else + return false; #endif // ANDROID_TARGET_BUILD - return false; + llvm_unreachable("Unknown exception type."); } void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { @@ -271,7 +276,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { } MCSymbol *GVSym = Mang->getSymbol(GV); - EmitVisibility(GVSym, GV->getVisibility()); + EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration()); if (!GV->hasInitializer()) // External globals require no extra code. return; @@ -293,12 +298,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (GVKind.isCommon() || GVKind.isBSSLocal()) { if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. - if (isVerbose()) { - WriteAsOperand(OutStreamer.GetCommentOS(), GV, - /*PrintType=*/false, GV->getParent()); - OutStreamer.GetCommentOS() << '\n'; - } - // Handle common symbols. if (GVKind.isCommon()) { unsigned Align = 1 << AlignLog; @@ -492,39 +491,11 @@ void AsmPrinter::EmitFunctionEntryLabel() { } -static void EmitDebugLoc(DebugLoc DL, const MachineFunction *MF, - raw_ostream &CommentOS) { - const LLVMContext &Ctx = MF->getFunction()->getContext(); - if (!DL.isUnknown()) { // Print source line info. - DIScope Scope(DL.getScope(Ctx)); - // Omit the directory, because it's likely to be long and uninteresting. - if (Scope.Verify()) - CommentOS << Scope.getFilename(); - else - CommentOS << "<unknown>"; - CommentOS << ':' << DL.getLine(); - if (DL.getCol() != 0) - CommentOS << ':' << DL.getCol(); - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); - if (!InlinedAtDL.isUnknown()) { - CommentOS << "[ "; - EmitDebugLoc(InlinedAtDL, MF, CommentOS); - CommentOS << " ]"; - } - } -} - /// EmitComments - Pretty-print comments for instructions. static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { const MachineFunction *MF = MI.getParent()->getParent(); const TargetMachine &TM = MF->getTarget(); - DebugLoc DL = MI.getDebugLoc(); - if (!DL.isUnknown()) { // Print source line info. - EmitDebugLoc(DL, MF, CommentOS); - CommentOS << '\n'; - } - // Check for spills and reloads int FI; @@ -631,6 +602,45 @@ static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { return true; } +AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() { + if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI && + MF->getFunction()->needsUnwindTableEntry()) + return CFI_M_EH; + + if (MMI->hasDebugInfo()) + return CFI_M_Debug; + + return CFI_M_None; +} + +bool AsmPrinter::needsSEHMoves() { + return MAI->getExceptionHandlingType() == ExceptionHandling::Win64 && + MF->getFunction()->needsUnwindTableEntry(); +} + +void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { + MCSymbol *Label = MI.getOperand(0).getMCSymbol(); + + if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) + return; + + if (needsCFIMoves() == CFI_M_None) + return; + + MachineModuleInfo &MMI = MF->getMMI(); + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + bool FoundOne = false; + (void)FoundOne; + for (std::vector<MachineMove>::iterator I = Moves.begin(), + E = Moves.end(); I != E; ++I) { + if (I->getLabel() == Label) { + EmitCFIFrameMove(*I); + FoundOne = true; + } + } + assert(FoundOne); +} + /// EmitFunctionBody - This method emits the body and trailer for a /// function. void AsmPrinter::EmitFunctionBody() { @@ -669,6 +679,9 @@ void AsmPrinter::EmitFunctionBody() { switch (II->getOpcode()) { case TargetOpcode::PROLOG_LABEL: + emitPrologLabel(*II); + break; + case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol()); @@ -689,6 +702,9 @@ void AsmPrinter::EmitFunctionBody() { if (isVerbose()) EmitKill(II, *this); break; default: + if (!TM.hasMCUseLoc()) + MCLineEntry::Make(&OutStreamer, getCurrentSection()); + EmitInstruction(II); break; } @@ -767,6 +783,53 @@ getDebugValueLocation(const MachineInstr *MI) const { return MachineLocation(); } +/// EmitDwarfRegOp - Emit dwarf register operation. +void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); + + for (const unsigned *SR = TRI->getSuperRegisters(MLoc.getReg()); + *SR && Reg < 0; ++SR) { + Reg = TRI->getDwarfRegNum(*SR, false); + // FIXME: Get the bit range this register uses of the superregister + // so that we can produce a DW_OP_bit_piece + } + + // FIXME: Handle cases like a super register being encoded as + // DW_OP_reg 32 DW_OP_piece 4 DW_OP_reg 33 + + // FIXME: We have no reasonable way of handling errors in here. The + // caller might be in the middle of an dwarf expression. We should + // probably assert that Reg >= 0 once debug info generation is more mature. + + if (int Offset = MLoc.getOffset()) { + if (Reg < 32) { + OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); + EmitInt8(dwarf::DW_OP_breg0 + Reg); + } else { + OutStreamer.AddComment("DW_OP_bregx"); + EmitInt8(dwarf::DW_OP_bregx); + OutStreamer.AddComment(Twine(Reg)); + EmitULEB128(Reg); + } + EmitSLEB128(Offset); + } else { + if (Reg < 32) { + OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); + EmitInt8(dwarf::DW_OP_reg0 + Reg); + } else { + OutStreamer.AddComment("DW_OP_regx"); + EmitInt8(dwarf::DW_OP_regx); + OutStreamer.AddComment(Twine(Reg)); + EmitULEB128(Reg); + } + } + + // FIXME: Produce a DW_OP_bit_piece if we used a superregister +} + bool AsmPrinter::doFinalization(Module &M) { // Emit global variables. for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); @@ -1879,7 +1942,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { return false; // The predecessor has to be immediately before this block. - const MachineBasicBlock *Pred = *PI; + MachineBasicBlock *Pred = *PI; if (!Pred->isLayoutSuccessor(MBB)) return false; @@ -1888,9 +1951,28 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { if (Pred->empty()) return true; - // Otherwise, check the last instruction. - const MachineInstr &LastInst = Pred->back(); - return !LastInst.getDesc().isBarrier(); + // Check the terminators in the previous blocks + for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(), + IE = Pred->end(); II != IE; ++II) { + MachineInstr &MI = *II; + + // If it is not a simple branch, we are in a table somewhere. + if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch()) + return false; + + // If we are the operands of one of the branches, this is not + // a fall through. + for (MachineInstr::mop_iterator OI = MI.operands_begin(), + OE = MI.operands_end(); OI != OE; ++OI) { + const MachineOperand& OP = *OI; + if (OP.isJTI()) + return false; + if (OP.isMBB() && OP.getMBB() == MBB) + return false; + } + } + + return true; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 9c8184a..dd5b0e2 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -155,7 +155,7 @@ void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const { const TargetLoweringObjectFile &TLOF = getObjFileLowering(); const MCExpr *Exp = - TLOF.getExprForDwarfReference(Sym, Mang, MMI, Encoding, OutStreamer); + TLOF.getExprForDwarfReference(Sym, Encoding, OutStreamer); OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding)); } @@ -206,108 +206,28 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, // Dwarf Lowering Routines //===----------------------------------------------------------------------===// - -/// EmitFrameMoves - Emit frame instructions to describe the layout of the -/// frame. -void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves, - MCSymbol *BaseLabel, bool isEH) const { - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - - int stackGrowth = TM.getTargetData()->getPointerSize(); - if (TM.getFrameLowering()->getStackGrowthDirection() != - TargetFrameLowering::StackGrowsUp) - stackGrowth *= -1; - - for (unsigned i = 0, N = Moves.size(); i < N; ++i) { - const MachineMove &Move = Moves[i]; - MCSymbol *Label = Move.getLabel(); - // Throw out move if the label is invalid. - if (Label && !Label->isDefined()) continue; // Not emitted, in dead code. - - const MachineLocation &Dst = Move.getDestination(); - const MachineLocation &Src = Move.getSource(); - - // Advance row if new location. - if (BaseLabel && Label) { - MCSymbol *ThisSym = Label; - if (ThisSym != BaseLabel) { - EmitCFAByte(dwarf::DW_CFA_advance_loc4); - EmitLabelDifference(ThisSym, BaseLabel, 4); - BaseLabel = ThisSym; - } - } - - // If advancing cfa. - if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { - assert(!Src.isReg() && "Machine move not supported yet."); - - if (Src.getReg() == MachineLocation::VirtualFP) { - EmitCFAByte(dwarf::DW_CFA_def_cfa_offset); - } else { - EmitCFAByte(dwarf::DW_CFA_def_cfa); - EmitULEB128(RI->getDwarfRegNum(Src.getReg(), isEH), "Register"); - } - - EmitULEB128(-Src.getOffset(), "Offset"); - continue; - } - - if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { - assert(Dst.isReg() && "Machine move not supported yet."); - EmitCFAByte(dwarf::DW_CFA_def_cfa_register); - EmitULEB128(RI->getDwarfRegNum(Dst.getReg(), isEH), "Register"); - continue; - } - - unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH); - int Offset = Dst.getOffset() / stackGrowth; - - if (Offset < 0) { - EmitCFAByte(dwarf::DW_CFA_offset_extended_sf); - EmitULEB128(Reg, "Reg"); - EmitSLEB128(Offset, "Offset"); - } else if (Reg < 64) { - EmitCFAByte(dwarf::DW_CFA_offset + Reg); - EmitULEB128(Offset, "Offset"); - } else { - EmitCFAByte(dwarf::DW_CFA_offset_extended); - EmitULEB128(Reg, "Reg"); - EmitULEB128(Offset, "Offset"); - } - } -} - -/// EmitFrameMoves - Emit frame instructions to describe the layout of the -/// frame. -void AsmPrinter::EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const { +/// EmitCFIFrameMove - Emit a frame instruction. +void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const { const TargetRegisterInfo *RI = TM.getRegisterInfo(); - for (unsigned i = 0, N = Moves.size(); i < N; ++i) { - const MachineMove &Move = Moves[i]; - MCSymbol *Label = Move.getLabel(); - // Throw out move if the label is invalid. - if (Label && !Label->isDefined()) continue; // Not emitted, in dead code. - - const MachineLocation &Dst = Move.getDestination(); - const MachineLocation &Src = Move.getSource(); - - // If advancing cfa. - if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { - assert(!Src.isReg() && "Machine move not supported yet."); + const MachineLocation &Dst = Move.getDestination(); + const MachineLocation &Src = Move.getSource(); - if (Src.getReg() == MachineLocation::VirtualFP) { - OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset()); - } else { - assert("Machine move not supported yet"); - // Reg + Offset - } - } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { - assert(Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true)); + // If advancing cfa. + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + if (Src.getReg() == MachineLocation::VirtualFP) { + OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset()); } else { - assert(!Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true), - Dst.getOffset()); + // Reg + Offset + OutStreamer.EmitCFIDefCfa(RI->getDwarfRegNum(Src.getReg(), true), + Src.getOffset()); } + } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { + assert(Dst.isReg() && "Machine move not supported yet."); + OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true)); + } else { + assert(!Dst.isReg() && "Machine move not supported yet."); + OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true), + Dst.getOffset()); } } diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index 1377e4d..4da7876 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -5,9 +5,10 @@ add_llvm_library(LLVMAsmPrinter AsmPrinterInlineAsm.cpp DIE.cpp DwarfCFIException.cpp + DwarfCompileUnit.cpp DwarfDebug.cpp DwarfException.cpp - DwarfTableException.cpp OcamlGCPrinter.cpp + Win64Exception.cpp ) diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 68be2ee..91b7d08 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -32,6 +32,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -40,92 +41,105 @@ using namespace llvm; DwarfCFIException::DwarfCFIException(AsmPrinter *A) : DwarfException(A), - shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) - {} + shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false), + moveTypeModule(AsmPrinter::CFI_M_None) {} DwarfCFIException::~DwarfCFIException() {} /// EndModule - Emit all exception information that should come after the /// content. void DwarfCFIException::EndModule() { - if (!Asm->MAI->isExceptionHandlingDwarf()) - return; + if (moveTypeModule == AsmPrinter::CFI_M_Debug) + Asm->OutStreamer.EmitCFISections(false, true); - if (!shouldEmitTableModule) + if (!Asm->MAI->isExceptionHandlingDwarf()) return; const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); - // Begin eh frame section. - Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); + if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel) + return; // Emit references to all used personality functions + bool AtLeastOne = false; const std::vector<const Function*> &Personalities = MMI->getPersonalities(); for (size_t i = 0, e = Personalities.size(); i != e; ++i) { - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("personality", i)); - Asm->EmitReference(Personalities[i], PerEncoding); + if (!Personalities[i]) + continue; + MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]); + TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym); + AtLeastOne = true; + } + + if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) { + // This is a temporary hack to keep sections in the same order they + // were before. This lets us produce bit identical outputs while + // transitioning to CFI. + Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); } } /// BeginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. void DwarfCFIException::BeginFunction(const MachineFunction *MF) { - shouldEmitTable = shouldEmitMoves = false; + shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; // If any landing pads survive, we need an EH table. - shouldEmitTable = !MMI->getLandingPads().empty(); + bool hasLandingPads = !MMI->getLandingPads().empty(); // See if we need frame move info. - shouldEmitMoves = - !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory; + AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); + if (MoveType == AsmPrinter::CFI_M_EH || + (MoveType == AsmPrinter::CFI_M_Debug && + moveTypeModule == AsmPrinter::CFI_M_None)) + moveTypeModule = MoveType; - if (shouldEmitMoves || shouldEmitTable) - // Assumes in correct section after the entry point. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); + shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None; - shouldEmitTableModule |= shouldEmitTable; + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; - if (shouldEmitMoves) { - const TargetFrameLowering *TFL = Asm->TM.getFrameLowering(); - Asm->OutStreamer.EmitCFIStartProc(); + shouldEmitPersonality = hasLandingPads && + PerEncoding != dwarf::DW_EH_PE_omit && Per; - // Indicate locations of general callee saved registers in frame. - std::vector<MachineMove> Moves; - TFL->getInitialFrameState(Moves); - Asm->EmitCFIFrameMoves(Moves); - Asm->EmitCFIFrameMoves(MMI->getFrameMoves()); - } + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + shouldEmitLSDA = shouldEmitPersonality && + LSDAEncoding != dwarf::DW_EH_PE_omit; - if (!shouldEmitTable) + if (!shouldEmitPersonality && !shouldEmitMoves) return; - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + Asm->OutStreamer.EmitCFIStartProc(); + + // Indicate personality routine, if any. + if (!shouldEmitPersonality) + return; + + const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI); + Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); // Provide LSDA information. - unsigned LSDAEncoding = TLOF.getLSDAEncoding(); - if (LSDAEncoding != dwarf::DW_EH_PE_omit) - Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception", - Asm->getFunctionNumber()), - LSDAEncoding); + if (!shouldEmitLSDA) + return; - // Indicate personality routine, if any. - unsigned PerEncoding = TLOF.getPersonalityEncoding(); - if (PerEncoding != dwarf::DW_EH_PE_omit && - MMI->getPersonalities()[MMI->getPersonalityIndex()]) - Asm->OutStreamer.EmitCFIPersonality(Asm->GetTempSymbol("personality", - MMI->getPersonalityIndex()), - PerEncoding); + Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception", + Asm->getFunctionNumber()), + LSDAEncoding); } /// EndFunction - Gather and emit post-function exception information. /// void DwarfCFIException::EndFunction() { - if (!shouldEmitMoves && !shouldEmitTable) return; + if (!shouldEmitPersonality && !shouldEmitMoves) + return; - if (shouldEmitMoves) - Asm->OutStreamer.EmitCFIEndProc(); + Asm->OutStreamer.EmitCFIEndProc(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); @@ -133,6 +147,6 @@ void DwarfCFIException::EndFunction() { // Map all labels and get rid of any dead landing pads. MMI->TidyLandingPads(); - if (shouldEmitTable) + if (shouldEmitPersonality) EmitExceptionTable(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp new file mode 100644 index 0000000..bff1a35 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -0,0 +1,1054 @@ +//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Unit ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfdebug" + +#include "DwarfCompileUnit.h" +#include "DwarfDebug.h" +#include "llvm/Constants.h" +#include "llvm/Analysis/DIBuilder.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +/// CompileUnit - Compile unit constructor. +CompileUnit::CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW) + : ID(I), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) { + DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); +} + +/// ~CompileUnit - Destructor for compile unit. +CompileUnit::~CompileUnit() { + for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) + DIEBlocks[j]->~DIEBlock(); +} + +/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug +/// information entry. +DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { + DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); + return Value; +} + +/// addUInt - Add an unsigned integer attribute data and value. +/// +void CompileUnit::addUInt(DIE *Die, unsigned Attribute, + unsigned Form, uint64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(false, Integer); + DIEValue *Value = Integer == 1 ? + DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); + Die->addValue(Attribute, Form, Value); +} + +/// addSInt - Add an signed integer attribute data and value. +/// +void CompileUnit::addSInt(DIE *Die, unsigned Attribute, + unsigned Form, int64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(true, Integer); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); + Die->addValue(Attribute, Form, Value); +} + +/// addString - Add a string attribute data and value. DIEString only +/// keeps string reference. +void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form, + StringRef String) { + DIEValue *Value = new (DIEValueAllocator) DIEString(String); + Die->addValue(Attribute, Form, Value); +} + +/// addLabel - Add a Dwarf label attribute data and value. +/// +void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Label) { + DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); + Die->addValue(Attribute, Form, Value); +} + +/// addDelta - Add a label delta attribute data and value. +/// +void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Hi, const MCSymbol *Lo) { + DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); + Die->addValue(Attribute, Form, Value); +} + +/// addDIEEntry - Add a DIE attribute data and value. +/// +void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, + DIE *Entry) { + Die->addValue(Attribute, Form, createDIEEntry(Entry)); +} + + +/// addBlock - Add block data. +/// +void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, + DIEBlock *Block) { + Block->ComputeSize(Asm); + DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. + Die->addValue(Attribute, Block->BestForm(), Block); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { + // Verify variable. + if (!V.Verify()) + return; + + unsigned Line = V.getLineNumber(); + if (Line == 0) + return; + unsigned FileID = DD->GetOrCreateSourceID(V.getContext().getFilename(), + V.getContext().getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { + // Verify global variable. + if (!G.Verify()) + return; + + unsigned Line = G.getLineNumber(); + if (Line == 0) + return; + unsigned FileID = DD->GetOrCreateSourceID(G.getContext().getFilename(), + G.getContext().getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { + // Verify subprogram. + if (!SP.Verify()) + return; + // If the line number is 0, don't add it. + if (SP.getLineNumber() == 0) + return; + + unsigned Line = SP.getLineNumber(); + if (!SP.getContext().Verify()) + return; + unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { + // Verify type. + if (!Ty.Verify()) + return; + + unsigned Line = Ty.getLineNumber(); + if (Line == 0 || !Ty.getContext().Verify()) + return; + unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { + // Verify namespace. + if (!NS.Verify()) + return; + + unsigned Line = NS.getLineNumber(); + if (Line == 0) + return; + StringRef FN = NS.getFilename(); + + unsigned FileID = DD->GetOrCreateSourceID(FN, NS.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addVariableAddress - Add DW_AT_location attribute for a +/// DbgVariable based on provided MachineLocation. +void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, + MachineLocation Location) { + if (DV->variableHasComplexAddress()) + addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); + else if (DV->isBlockByrefVariable()) + addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); + else + addAddress(Die, dwarf::DW_AT_location, Location); +} + +/// addRegisterOp - Add register operand. +void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) { + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned DWReg = RI->getDwarfRegNum(Reg, false); + if (DWReg < 32) + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); + else { + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + } +} + +/// addRegisterOffset - Add register offset. +void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg, + int64_t Offset) { + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned DWReg = RI->getDwarfRegNum(Reg, false); + const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + if (Reg == TRI->getFrameRegister(*Asm->MF)) + // If variable offset is based in frame register then use fbreg. + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); + else if (DWReg < 32) + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); + else { + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + } + addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset); +} + +/// addAddress - Add an address attribute to a die based on the location +/// provided. +void CompileUnit::addAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + if (Location.isReg()) + addRegisterOp(Block, Location.getReg()); + else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/// addComplexAddress - Start with the address based on the location provided, +/// and generate the DWARF information necessary to find the actual variable +/// given the extra address information encoded in the DIVariable, starting from +/// the starting location. Add the DWARF information to the die. +/// +void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + unsigned N = DV->getNumAddrElements(); + unsigned i = 0; + if (Location.isReg()) { + if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) { + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1)); + i = 2; + } else + addRegisterOp(Block, Location.getReg()); + } + else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + + for (;i < N; ++i) { + uint64_t Element = DV->getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } else llvm_unreachable("unknown DIBuilder Opcode"); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/* Byref variables, in Blocks, are declared by the programmer as "SomeType + VarName;", but the compiler creates a __Block_byref_x_VarName struct, and + gives the variable VarName either the struct, or a pointer to the struct, as + its type. This is necessary for various behind-the-scenes things the + compiler needs to do with by-reference variables in Blocks. + + However, as far as the original *programmer* is concerned, the variable + should still have type 'SomeType', as originally declared. + + The function getBlockByrefType dives into the __Block_byref_x_VarName + struct to find the original type of the variable, which is then assigned to + the variable's Debug Information Entry as its real type. So far, so good. + However now the debugger will expect the variable VarName to have the type + SomeType. So we need the location attribute for the variable to be an + expression that explains to the debugger how to navigate through the + pointers and struct to find the actual variable of type SomeType. + + The following function does just that. We start by getting + the "normal" location for the variable. This will be the location + of either the struct __Block_byref_x_VarName or the pointer to the + struct __Block_byref_x_VarName. + + The struct will look something like: + + struct __Block_byref_x_VarName { + ... <various fields> + struct __Block_byref_x_VarName *forwarding; + ... <various other fields> + SomeType VarName; + ... <maybe more fields> + }; + + If we are given the struct directly (as our starting point) we + need to tell the debugger to: + + 1). Add the offset of the forwarding field. + + 2). Follow that pointer to get the real __Block_byref_x_VarName + struct to use (the real one may have been copied onto the heap). + + 3). Add the offset for the field VarName, to find the actual variable. + + If we started with a pointer to the struct, then we need to + dereference that pointer first, before the other steps. + Translating this into DWARF ops, we will need to append the following + to the current location description for the variable: + + DW_OP_deref -- optional, if we start with a pointer + DW_OP_plus_uconst <forward_fld_offset> + DW_OP_deref + DW_OP_plus_uconst <varName_fld_offset> + + That is what this function does. */ + +/// addBlockByrefAddress - Start with the address based on the location +/// provided, and generate the DWARF information necessary to find the +/// actual Block variable (navigating the Block struct) based on the +/// starting location. Add the DWARF information to the die. For +/// more information, read large comment just above here. +/// +void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + DIType Ty = DV->getType(); + DIType TmpTy = Ty; + unsigned Tag = Ty.getTag(); + bool isPointer = false; + + StringRef varName = DV->getName(); + + if (Tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty); + TmpTy = DTy.getTypeDerivedFrom(); + isPointer = true; + } + + DICompositeType blockStruct = DICompositeType(TmpTy); + + // Find the __forwarding field and the variable field in the __Block_byref + // struct. + DIArray Fields = blockStruct.getTypeArray(); + DIDescriptor varField = DIDescriptor(); + DIDescriptor forwardingField = DIDescriptor(); + + for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { + DIDescriptor Element = Fields.getElement(i); + DIDerivedType DT = DIDerivedType(Element); + StringRef fieldName = DT.getName(); + if (fieldName == "__forwarding") + forwardingField = Element; + else if (fieldName == varName) + varField = Element; + } + + // Get the offsets for the forwarding field and the variable field. + unsigned forwardingFieldOffset = + DIDerivedType(forwardingField).getOffsetInBits() >> 3; + unsigned varFieldOffset = + DIDerivedType(varField).getOffsetInBits() >> 3; + + // Decode the original location, and use that as the start of the byref + // variable's location. + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + if (Location.isReg()) { + if (Reg < 32) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + else { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + } else { + if (Reg < 32) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + else { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + + addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + } + + // If we started with a pointer to the __Block_byref... struct, then + // the first thing we need to do is dereference the pointer (DW_OP_deref). + if (isPointer) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Next add the offset for the '__forwarding' field: + // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in + // adding the offset if it's 0. + if (forwardingFieldOffset > 0) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); + } + + // Now dereference the __forwarding field to get to the real __Block_byref + // struct: DW_OP_deref. + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Now that we've got the real __Block_byref... struct, add the offset + // for the variable's field to get to the location of the actual variable: + // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. + if (varFieldOffset > 0) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/// addConstantValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, + DIType Ty) { + assert (MO.isImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + unsigned form = dwarf::DW_FORM_udata; + switch (Ty.getSizeInBits()) { + case 8: form = dwarf::DW_FORM_data1; break; + case 16: form = dwarf::DW_FORM_data2; break; + case 32: form = dwarf::DW_FORM_data4; break; + case 64: form = dwarf::DW_FORM_data8; break; + default: break; + } + + DIBasicType BTy(Ty); + if (BTy.Verify() && + (BTy.getEncoding() == dwarf::DW_ATE_signed + || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) + addSInt(Block, 0, form, MO.getImm()); + else + addUInt(Block, 0, form, MO.getImm()); + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} + +/// addConstantFPValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { + assert (MO.isFPImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + APFloat FPImm = MO.getFPImm()->getValueAPF(); + + // Get the raw data form of the floating point. + const APInt FltVal = FPImm.bitcastToAPInt(); + const char *FltPtr = (const char*)FltVal.getRawData(); + + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getTargetData().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(Block, 0, dwarf::DW_FORM_data1, + (unsigned char)0xFF & FltPtr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} + +/// addConstantValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantValue(DIE *Die, ConstantInt *CI, + bool Unsigned) { + unsigned CIBitWidth = CI->getBitWidth(); + if (CIBitWidth <= 64) { + unsigned form = 0; + switch (CIBitWidth) { + case 8: form = dwarf::DW_FORM_data1; break; + case 16: form = dwarf::DW_FORM_data2; break; + case 32: form = dwarf::DW_FORM_data4; break; + case 64: form = dwarf::DW_FORM_data8; break; + default: + form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata; + } + if (Unsigned) + addUInt(Die, dwarf::DW_AT_const_value, form, CI->getZExtValue()); + else + addSInt(Die, dwarf::DW_AT_const_value, form, CI->getSExtValue()); + return true; + } + + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + // Get the raw data form of the large APInt. + const APInt Val = CI->getValue(); + const char *Ptr = (const char*)Val.getRawData(); + + int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getTargetData().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(Block, 0, dwarf::DW_FORM_data1, + (unsigned char)0xFF & Ptr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} + +/// addTemplateParams - Add template parameters in buffer. +void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { + // Add template parameters. + for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { + DIDescriptor Element = TParams.getElement(i); + if (Element.isTemplateTypeParameter()) + Buffer.addChild(getOrCreateTemplateTypeParameterDIE( + DITemplateTypeParameter(Element))); + else if (Element.isTemplateValueParameter()) + Buffer.addChild(getOrCreateTemplateValueParameterDIE( + DITemplateValueParameter(Element))); + } + +} +/// addToContextOwner - Add Die into the list of its context owner's children. +void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { + if (Context.isType()) { + DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context)); + ContextDIE->addChild(Die); + } else if (Context.isNameSpace()) { + DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); + ContextDIE->addChild(Die); + } else if (Context.isSubprogram()) { + DIE *ContextDIE = DD->createSubprogramDIE(DISubprogram(Context)); + ContextDIE->addChild(Die); + } else if (DIE *ContextDIE = getDIE(Context)) + ContextDIE->addChild(Die); + else + addDie(Die); +} + +/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the +/// given DIType. +DIE *CompileUnit::getOrCreateTypeDIE(DIType Ty) { + DIE *TyDIE = getDIE(Ty); + if (TyDIE) + return TyDIE; + + // Create new type. + TyDIE = new DIE(dwarf::DW_TAG_base_type); + insertDIE(Ty, TyDIE); + if (Ty.isBasicType()) + constructTypeDIE(*TyDIE, DIBasicType(Ty)); + else if (Ty.isCompositeType()) + constructTypeDIE(*TyDIE, DICompositeType(Ty)); + else { + assert(Ty.isDerivedType() && "Unknown kind of DIType"); + constructTypeDIE(*TyDIE, DIDerivedType(Ty)); + } + + addToContextOwner(TyDIE, Ty.getContext()); + return TyDIE; +} + +/// addType - Add a new type attribute to the specified entity. +void CompileUnit::addType(DIE *Entity, DIType Ty) { + if (!Ty.Verify()) + return; + + // Check for pre-existence. + DIEEntry *Entry = getDIEEntry(Ty); + // If it exists then use the existing value. + if (Entry) { + Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); + return; + } + + // Construct type. + DIE *Buffer = getOrCreateTypeDIE(Ty); + + // Set up proxy. + Entry = createDIEEntry(Buffer); + insertDIEEntry(Ty, Entry); + Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); + + // If this is a complete composite type then include it in the + // list of global types. + addGlobalType(Ty); +} + +/// addGlobalType - Add a new global type to the compile unit. +/// +void CompileUnit::addGlobalType(DIType Ty) { + DIDescriptor Context = Ty.getContext(); + if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl() + && (Context.isCompileUnit() || Context.isFile() || Context.isNameSpace())) + if (DIEEntry *Entry = getDIEEntry(Ty)) + GlobalTypes[Ty.getName()] = Entry->getEntry(); +} + +/// addPubTypes - Add type for pubtypes section. +void CompileUnit::addPubTypes(DISubprogram SP) { + DICompositeType SPTy = SP.getType(); + unsigned SPTag = SPTy.getTag(); + if (SPTag != dwarf::DW_TAG_subroutine_type) + return; + + DIArray Args = SPTy.getTypeArray(); + for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) { + DIType ATy(Args.getElement(i)); + if (!ATy.Verify()) + continue; + addGlobalType(ATy); + } +} + +/// constructTypeDIE - Construct basic type die from DIBasicType. +void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { + // Get core information. + StringRef Name = BTy.getName(); + Buffer.setTag(dwarf::DW_TAG_base_type); + addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + BTy.getEncoding()); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + uint64_t Size = BTy.getSizeInBits() >> 3; + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); +} + +/// constructTypeDIE - Construct derived type die from DIDerivedType. +void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { + // Get core information. + StringRef Name = DTy.getName(); + uint64_t Size = DTy.getSizeInBits() >> 3; + unsigned Tag = DTy.getTag(); + + // FIXME - Workaround for templates. + if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; + + Buffer.setTag(Tag); + + // Map to main type, void will not have a type. + DIType FromTy = DTy.getTypeDerivedFrom(); + addType(&Buffer, FromTy); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + // Add size if non-zero (derived types might be zero-sized.) + if (Size) + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + + // Add source line info if available and TyDesc is not a forward declaration. + if (!DTy.isForwardDecl()) + addSourceLine(&Buffer, DTy); +} + +/// constructTypeDIE - Construct type DIE from DICompositeType. +void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { + // Get core information. + StringRef Name = CTy.getName(); + + uint64_t Size = CTy.getSizeInBits() >> 3; + unsigned Tag = CTy.getTag(); + Buffer.setTag(Tag); + + switch (Tag) { + case dwarf::DW_TAG_vector_type: + case dwarf::DW_TAG_array_type: + constructArrayTypeDIE(Buffer, &CTy); + break; + case dwarf::DW_TAG_enumeration_type: { + DIArray Elements = CTy.getTypeArray(); + + // Add enumerators to enumeration type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIE *ElemDie = NULL; + DIDescriptor Enum(Elements.getElement(i)); + if (Enum.isEnumerator()) { + ElemDie = constructEnumTypeDIE(DIEnumerator(Enum)); + Buffer.addChild(ElemDie); + } + } + } + break; + case dwarf::DW_TAG_subroutine_type: { + // Add return type. + DIArray Elements = CTy.getTypeArray(); + DIDescriptor RTy = Elements.getElement(0); + addType(&Buffer, DIType(RTy)); + + bool isPrototyped = true; + // Add arguments. + for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Ty = Elements.getElement(i); + if (Ty.isUnspecifiedParameter()) { + DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters); + Buffer.addChild(Arg); + isPrototyped = false; + } else { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + addType(Arg, DIType(Ty)); + Buffer.addChild(Arg); + } + } + // Add prototype flag. + if (isPrototyped) + addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + } + break; + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: { + // Add elements to structure type. + DIArray Elements = CTy.getTypeArray(); + + // A forward struct declared type may not have elements available. + unsigned N = Elements.getNumElements(); + if (N == 0) + break; + + // Add elements to structure type. + for (unsigned i = 0; i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIE *ElemDie = NULL; + if (Element.isSubprogram()) { + DISubprogram SP(Element); + ElemDie = DD->createSubprogramDIE(DISubprogram(Element)); + if (SP.isProtected()) + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_protected); + else if (SP.isPrivate()) + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_private); + else + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_public); + if (SP.isExplicit()) + addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); + } + else if (Element.isVariable()) { + DIVariable DV(Element); + ElemDie = new DIE(dwarf::DW_TAG_variable); + addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + DV.getName()); + addType(ElemDie, DV.getType()); + addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addSourceLine(ElemDie, DV); + } else if (Element.isDerivedType()) + ElemDie = createMemberDIE(DIDerivedType(Element)); + else + continue; + Buffer.addChild(ElemDie); + } + + if (CTy.isAppleBlockExtension()) + addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); + + unsigned RLang = CTy.getRunTimeLang(); + if (RLang) + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, + dwarf::DW_FORM_data1, RLang); + + DICompositeType ContainingType = CTy.getContainingType(); + if (DIDescriptor(ContainingType).isCompositeType()) + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, + getOrCreateTypeDIE(DIType(ContainingType))); + else { + DIDescriptor Context = CTy.getContext(); + addToContextOwner(&Buffer, Context); + } + + if (CTy.isObjcClassComplete()) + addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type, + dwarf::DW_FORM_flag, 1); + + if (Tag == dwarf::DW_TAG_class_type) + addTemplateParams(Buffer, CTy.getTemplateParams()); + + break; + } + default: + break; + } + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type + || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) + { + // Add size if non-zero (derived types might be zero-sized.) + if (Size) + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + else { + // Add zero size if it is not a forward declaration. + if (CTy.isForwardDecl()) + addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + else + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); + } + + // Add source line info if available. + if (!CTy.isForwardDecl()) + addSourceLine(&Buffer, CTy); + } +} + +/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE +/// for the given DITemplateTypeParameter. +DIE * +CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { + DIE *ParamDIE = getDIE(TP); + if (ParamDIE) + return ParamDIE; + + ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); + addType(ParamDIE, TP.getType()); + addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName()); + return ParamDIE; +} + +/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE +/// for the given DITemplateValueParameter. +DIE * +CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) { + DIE *ParamDIE = getDIE(TPV); + if (ParamDIE) + return ParamDIE; + + ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); + addType(ParamDIE, TPV.getType()); + if (!TPV.getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName()); + addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + TPV.getValue()); + return ParamDIE; +} + +/// getOrCreateNameSpace - Create a DIE for DINameSpace. +DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { + DIE *NDie = getDIE(NS); + if (NDie) + return NDie; + NDie = new DIE(dwarf::DW_TAG_namespace); + insertDIE(NS, NDie); + if (!NS.getName().empty()) + addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName()); + addSourceLine(NDie, NS); + addToContextOwner(NDie, NS.getContext()); + return NDie; +} + +/// constructSubrangeDIE - Construct subrange DIE from DISubrange. +void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ + DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); + int64_t L = SR.getLo(); + int64_t H = SR.getHi(); + + // The L value defines the lower bounds which is typically zero for C/C++. The + // H value is the upper bounds. Values are 64 bit. H - L + 1 is the size + // of the array. If L > H then do not emit DW_AT_lower_bound and + // DW_AT_upper_bound attributes. If L is zero and H is also zero then the + // array has one element and in such case do not emit lower bound. + + if (L > H) { + Buffer.addChild(DW_Subrange); + return; + } + if (L) + addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); + addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); + Buffer.addChild(DW_Subrange); +} + +/// constructArrayTypeDIE - Construct array type DIE from DICompositeType. +void CompileUnit::constructArrayTypeDIE(DIE &Buffer, + DICompositeType *CTy) { + Buffer.setTag(dwarf::DW_TAG_array_type); + if (CTy->getTag() == dwarf::DW_TAG_vector_type) + addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); + + // Emit derived type. + addType(&Buffer, CTy->getTypeDerivedFrom()); + DIArray Elements = CTy->getTypeArray(); + + // Get an anonymous type for index type. + DIE *IdxTy = getIndexTyDie(); + if (!IdxTy) { + // Construct an anonymous type for index type. + IdxTy = new DIE(dwarf::DW_TAG_base_type); + addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); + addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + dwarf::DW_ATE_signed); + addDie(IdxTy); + setIndexTyDie(IdxTy); + } + + // Add subranges to array type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + if (Element.getTag() == dwarf::DW_TAG_subrange_type) + constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy); + } +} + +/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. +DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) { + DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); + StringRef Name = ETy.getName(); + addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + int64_t Value = ETy.getEnumValue(); + addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); + return Enumerator; +} + +/// createMemberDIE - Create new member DIE. +DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { + DIE *MemberDie = new DIE(DT.getTag()); + StringRef Name = DT.getName(); + if (!Name.empty()) + addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + addType(MemberDie, DT.getTypeDerivedFrom()); + + addSourceLine(MemberDie, DT); + + DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + + uint64_t Size = DT.getSizeInBits(); + uint64_t FieldSize = DT.getOriginalTypeSize(); + + if (Size != FieldSize) { + // Handle bitfield. + addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); + addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); + + uint64_t Offset = DT.getOffsetInBits(); + uint64_t AlignMask = ~(DT.getAlignInBits() - 1); + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + uint64_t FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (Asm->getTargetData().isLittleEndian()) + Offset = FieldSize - (Offset + Size); + addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); + + // Here WD_AT_data_member_location points to the anonymous + // field that includes this bit field. + addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); + + } else + // This is not a bitfield. + addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); + + if (DT.getTag() == dwarf::DW_TAG_inheritance + && DT.isVirtual()) { + + // For C++, virtual base classes are not at fixed offset. Use following + // expression to extract appropriate offset from vtable. + // BaseAddr = ObAddr + *((*ObAddr) - Offset) + + DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, + VBaseLocationDie); + } else + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); + + if (DT.isProtected()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_protected); + else if (DT.isPrivate()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_private); + // Otherwise C++ member and base classes are considered public. + else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_public); + if (DT.isVirtual()) + addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, + dwarf::DW_VIRTUALITY_virtual); + + // Objective-C properties. + StringRef PropertyName = DT.getObjCPropertyName(); + if (!PropertyName.empty()) { + addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string, + PropertyName); + StringRef GetterName = DT.getObjCPropertyGetterName(); + if (!GetterName.empty()) + addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, + dwarf::DW_FORM_string, GetterName); + StringRef SetterName = DT.getObjCPropertySetterName(); + if (!SetterName.empty()) + addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, + dwarf::DW_FORM_string, SetterName); + unsigned PropertyAttributes = 0; + if (DT.isReadOnlyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; + if (DT.isReadWriteObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite; + if (DT.isAssignObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign; + if (DT.isRetainObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain; + if (DT.isCopyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy; + if (DT.isNonAtomicObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; + if (PropertyAttributes) + addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0, + PropertyAttributes); + } + return MemberDie; +} diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h new file mode 100644 index 0000000..60a9b28 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -0,0 +1,280 @@ +//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H +#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H + +#include "DIE.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/OwningPtr.h" + +namespace llvm { + +class DwarfDebug; +class MachineLocation; +class MachineOperand; +class ConstantInt; +class DbgVariable; + +//===----------------------------------------------------------------------===// +/// CompileUnit - This dwarf writer support class manages information associate +/// with a source file. +class CompileUnit { + /// ID - File identifier for source. + /// + unsigned ID; + + /// Die - Compile unit debug information entry. + /// + const OwningPtr<DIE> CUDie; + + /// Asm - Target of Dwarf emission. + AsmPrinter *Asm; + + DwarfDebug *DD; + + /// IndexTyDie - An anonymous type for index type. Owned by CUDie. + DIE *IndexTyDie; + + /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton + /// variables to debug information entries. + DenseMap<const MDNode *, DIE *> MDNodeToDieMap; + + /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton + /// descriptors to debug information entries using a DIEEntry proxy. + DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; + + /// Globals - A map of globally visible named entities for this unit. + /// + StringMap<DIE*> Globals; + + /// GlobalTypes - A map of globally visible types for this unit. + /// + StringMap<DIE*> GlobalTypes; + + /// DIEBlocks - A list of all the DIEBlocks in use. + std::vector<DIEBlock *> DIEBlocks; + +public: + CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW); + ~CompileUnit(); + + // Accessors. + unsigned getID() const { return ID; } + DIE* getCUDie() const { return CUDie.get(); } + const StringMap<DIE*> &getGlobals() const { return Globals; } + const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } + + /// hasContent - Return true if this compile unit has something to write out. + /// + bool hasContent() const { return !CUDie->getChildren().empty(); } + + /// addGlobal - Add a new global entity to the compile unit. + /// + void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; } + + /// addGlobalType - Add a new global type to the compile unit. + /// + void addGlobalType(DIType Ty); + + /// getDIE - Returns the debug information entry map slot for the + /// specified debug variable. + DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } + + DIEBlock *getDIEBlock() { + return new (DIEValueAllocator) DIEBlock(); + } + + /// insertDIE - Insert DIE into the map. + void insertDIE(const MDNode *N, DIE *D) { + MDNodeToDieMap.insert(std::make_pair(N, D)); + } + + /// getDIEEntry - Returns the debug information entry for the specified + /// debug variable. + DIEEntry *getDIEEntry(const MDNode *N) { + DenseMap<const MDNode *, DIEEntry *>::iterator I = + MDNodeToDIEEntryMap.find(N); + if (I == MDNodeToDIEEntryMap.end()) + return NULL; + return I->second; + } + + /// insertDIEEntry - Insert debug information entry into the map. + void insertDIEEntry(const MDNode *N, DIEEntry *E) { + MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); + } + + /// addDie - Adds or interns the DIE to the compile unit. + /// + void addDie(DIE *Buffer) { + this->CUDie->addChild(Buffer); + } + + // getIndexTyDie - Get an anonymous type for index type. + DIE *getIndexTyDie() { + return IndexTyDie; + } + + // setIndexTyDie - Set D as anonymous type for index which can be reused + // later. + void setIndexTyDie(DIE *D) { + IndexTyDie = D; + } +public: + + /// addUInt - Add an unsigned integer attribute data and value. + /// + void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); + + /// addSInt - Add an signed integer attribute data and value. + /// + void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); + + /// addString - Add a string attribute data and value. + /// + void addString(DIE *Die, unsigned Attribute, unsigned Form, + const StringRef Str); + + /// addLabel - Add a Dwarf label attribute data and value. + /// + void addLabel(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Label); + + /// addDelta - Add a label delta attribute data and value. + /// + void addDelta(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Hi, const MCSymbol *Lo); + + /// addDIEEntry - Add a DIE attribute data and value. + /// + void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); + + /// addBlock - Add block data. + /// + void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); + + /// addSourceLine - Add location information to specified debug information + /// entry. + void addSourceLine(DIE *Die, DIVariable V); + void addSourceLine(DIE *Die, DIGlobalVariable G); + void addSourceLine(DIE *Die, DISubprogram SP); + void addSourceLine(DIE *Die, DIType Ty); + void addSourceLine(DIE *Die, DINameSpace NS); + + /// addAddress - Add an address attribute to a die based on the location + /// provided. + void addAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + /// addConstantValue - Add constant value entry in variable DIE. + bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); + bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned); + + /// addConstantFPValue - Add constant value entry in variable DIE. + bool addConstantFPValue(DIE *Die, const MachineOperand &MO); + + /// addTemplateParams - Add template parameters in buffer. + void addTemplateParams(DIE &Buffer, DIArray TParams); + + /// addRegisterOp - Add register operand. + void addRegisterOp(DIE *TheDie, unsigned Reg); + + /// addRegisterOffset - Add register offset. + void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset); + + /// addComplexAddress - Start with the address based on the location provided, + /// and generate the DWARF information necessary to find the actual variable + /// (navigating the extra location information encoded in the type) based on + /// the starting location. Add the DWARF information to the die. + /// + void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + // FIXME: Should be reformulated in terms of addComplexAddress. + /// addBlockByrefAddress - Start with the address based on the location + /// provided, and generate the DWARF information necessary to find the + /// actual Block variable (navigating the Block struct) based on the + /// starting location. Add the DWARF information to the die. Obsolete, + /// please use addComplexAddress instead. + /// + void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + /// addVariableAddress - Add DW_AT_location attribute for a + /// DbgVariable based on provided MachineLocation. + void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location); + + /// addToContextOwner - Add Die into the list of its context owner's children. + void addToContextOwner(DIE *Die, DIDescriptor Context); + + /// addType - Add a new type attribute to the specified entity. + void addType(DIE *Entity, DIType Ty); + + /// getOrCreateNameSpace - Create a DIE for DINameSpace. + DIE *getOrCreateNameSpace(DINameSpace NS); + + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the + /// given DIType. + DIE *getOrCreateTypeDIE(DIType Ty); + + /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE + /// for the given DITemplateTypeParameter. + DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); + + /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE + /// for the given DITemplateValueParameter. + DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); + + /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug + /// information entry. + DIEEntry *createDIEEntry(DIE *Entry); + + void addPubTypes(DISubprogram SP); + + /// constructTypeDIE - Construct basic type die from DIBasicType. + void constructTypeDIE(DIE &Buffer, + DIBasicType BTy); + + /// constructTypeDIE - Construct derived type die from DIDerivedType. + void constructTypeDIE(DIE &Buffer, + DIDerivedType DTy); + + /// constructTypeDIE - Construct type DIE from DICompositeType. + void constructTypeDIE(DIE &Buffer, + DICompositeType CTy); + + /// constructSubrangeDIE - Construct subrange DIE from DISubrange. + void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); + + /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. + void constructArrayTypeDIE(DIE &Buffer, + DICompositeType *CTy); + + /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. + DIE *constructEnumTypeDIE(DIEnumerator ETy); + + /// createMemberDIE - Create new member DIE. + DIE *createMemberDIE(DIDerivedType DT); + +private: + + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + DIEInteger *DIEIntegerOne; +}; + +} // end llvm namespace +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index bad87c1..8845bfa 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "dwarfdebug" #include "DwarfDebug.h" #include "DIE.h" +#include "DwarfCompileUnit.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Instructions.h" @@ -52,13 +53,9 @@ static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::desc("Disable debug info printing")); static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, - cl::desc("Make an absense of debug location information explicit."), + cl::desc("Make an absence of debug location information explicit."), cl::init(false)); -#ifndef NDEBUG -STATISTIC(BlocksWithoutLineNo, "Number of blocks without any line number"); -#endif - namespace { const char *DWARFGroupName = "DWARF Emission"; const char *DbgTimerName = "DWARF Debug Writer"; @@ -72,189 +69,56 @@ static const unsigned InitAbbreviationsSetSize = 9; // log2(512) namespace llvm { -//===----------------------------------------------------------------------===// -/// CompileUnit - This dwarf writer support class manages information associate -/// with a source file. -class CompileUnit { - /// ID - File identifier for source. - /// - unsigned ID; - - /// Die - Compile unit debug information entry. - /// - const OwningPtr<DIE> CUDie; - - /// IndexTyDie - An anonymous type for index type. Owned by CUDie. - DIE *IndexTyDie; - - /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton - /// variables to debug information entries. - DenseMap<const MDNode *, DIE *> MDNodeToDieMap; - - /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton - /// descriptors to debug information entries using a DIEEntry proxy. - DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; - - /// Globals - A map of globally visible named entities for this unit. - /// - StringMap<DIE*> Globals; - - /// GlobalTypes - A map of globally visible types for this unit. - /// - StringMap<DIE*> GlobalTypes; - -public: - CompileUnit(unsigned I, DIE *D) - : ID(I), CUDie(D), IndexTyDie(0) {} - - // Accessors. - unsigned getID() const { return ID; } - DIE* getCUDie() const { return CUDie.get(); } - const StringMap<DIE*> &getGlobals() const { return Globals; } - const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } - - /// hasContent - Return true if this compile unit has something to write out. - /// - bool hasContent() const { return !CUDie->getChildren().empty(); } - - /// addGlobal - Add a new global entity to the compile unit. - /// - void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; } - - /// addGlobalType - Add a new global type to the compile unit. - /// - void addGlobalType(StringRef Name, DIE *Die) { - GlobalTypes[Name] = Die; - } - - /// getDIE - Returns the debug information entry map slot for the - /// specified debug variable. - DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } - - /// insertDIE - Insert DIE into the map. - void insertDIE(const MDNode *N, DIE *D) { - MDNodeToDieMap.insert(std::make_pair(N, D)); - } - - /// getDIEEntry - Returns the debug information entry for the speciefied - /// debug variable. - DIEEntry *getDIEEntry(const MDNode *N) { - DenseMap<const MDNode *, DIEEntry *>::iterator I = - MDNodeToDIEEntryMap.find(N); - if (I == MDNodeToDIEEntryMap.end()) - return NULL; - return I->second; - } - - /// insertDIEEntry - Insert debug information entry into the map. - void insertDIEEntry(const MDNode *N, DIEEntry *E) { - MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); - } - - /// addDie - Adds or interns the DIE to the compile unit. - /// - void addDie(DIE *Buffer) { - this->CUDie->addChild(Buffer); - } - - // getIndexTyDie - Get an anonymous type for index type. - DIE *getIndexTyDie() { - return IndexTyDie; - } - - // setIndexTyDie - Set D as anonymous type for index which can be reused - // later. - void setIndexTyDie(DIE *D) { - IndexTyDie = D; - } - -}; - -//===----------------------------------------------------------------------===// -/// DbgVariable - This class is used to track local variable information. -/// -class DbgVariable { - DIVariable Var; // Variable Descriptor. - DIE *TheDIE; // Variable DIE. - unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. -public: - // AbsVar may be NULL. - DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {} - - // Accessors. - DIVariable getVariable() const { return Var; } - void setDIE(DIE *D) { TheDIE = D; } - DIE *getDIE() const { return TheDIE; } - void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } - unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } - StringRef getName() const { return Var.getName(); } - unsigned getTag() const { return Var.getTag(); } - bool variableHasComplexAddress() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); - return Var.hasComplexAddress(); - } - bool isBlockByrefVariable() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); - return Var.isBlockByrefVariable(); - } - unsigned getNumAddrElements() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); - return Var.getNumAddrElements(); - } - uint64_t getAddrElement(unsigned i) const { - return Var.getAddrElement(i); - } - DIType getType() const { - DIType Ty = Var.getType(); - // FIXME: isBlockByrefVariable should be reformulated in terms of complex - // addresses instead. - if (Var.isBlockByrefVariable()) { - /* Byref variables, in Blocks, are declared by the programmer as - "SomeType VarName;", but the compiler creates a - __Block_byref_x_VarName struct, and gives the variable VarName - either the struct, or a pointer to the struct, as its type. This - is necessary for various behind-the-scenes things the compiler - needs to do with by-reference variables in blocks. - - However, as far as the original *programmer* is concerned, the - variable should still have type 'SomeType', as originally declared. - - The following function dives into the __Block_byref_x_VarName - struct to find the original type of the variable. This will be - passed back to the code generating the type for the Debug - Information Entry for the variable 'VarName'. 'VarName' will then - have the original type 'SomeType' in its debug information. - - The original type 'SomeType' will be the type of the field named - 'VarName' inside the __Block_byref_x_VarName struct. - - NOTE: In order for this to not completely fail on the debugger - side, the Debug Information Entry for the variable VarName needs to - have a DW_AT_location that tells the debugger how to unwind through - the pointers and __Block_byref_x_VarName struct to find the actual - value of the variable. The function addBlockByrefType does this. */ - DIType subType = Ty; - unsigned tag = Ty.getTag(); - - if (tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - subType = DTy.getTypeDerivedFrom(); - } - - DICompositeType blockStruct = DICompositeType(subType); - DIArray Elements = blockStruct.getTypeArray(); - - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - DIDerivedType DT = DIDerivedType(Element); - if (getName() == DT.getName()) - return (DT.getTypeDerivedFrom()); - } - return Ty; +DIType DbgVariable::getType() const { + DIType Ty = Var.getType(); + // FIXME: isBlockByrefVariable should be reformulated in terms of complex + // addresses instead. + if (Var.isBlockByrefVariable()) { + /* Byref variables, in Blocks, are declared by the programmer as + "SomeType VarName;", but the compiler creates a + __Block_byref_x_VarName struct, and gives the variable VarName + either the struct, or a pointer to the struct, as its type. This + is necessary for various behind-the-scenes things the compiler + needs to do with by-reference variables in blocks. + + However, as far as the original *programmer* is concerned, the + variable should still have type 'SomeType', as originally declared. + + The following function dives into the __Block_byref_x_VarName + struct to find the original type of the variable. This will be + passed back to the code generating the type for the Debug + Information Entry for the variable 'VarName'. 'VarName' will then + have the original type 'SomeType' in its debug information. + + The original type 'SomeType' will be the type of the field named + 'VarName' inside the __Block_byref_x_VarName struct. + + NOTE: In order for this to not completely fail on the debugger + side, the Debug Information Entry for the variable VarName needs to + have a DW_AT_location that tells the debugger how to unwind through + the pointers and __Block_byref_x_VarName struct to find the actual + value of the variable. The function addBlockByrefType does this. */ + DIType subType = Ty; + unsigned tag = Ty.getTag(); + + if (tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty); + subType = DTy.getTypeDerivedFrom(); + } + + DICompositeType blockStruct = DICompositeType(subType); + DIArray Elements = blockStruct.getTypeArray(); + + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIDerivedType DT = DIDerivedType(Element); + if (getName() == DT.getName()) + return (DT.getTypeDerivedFrom()); } return Ty; } -}; + return Ty; +} //===----------------------------------------------------------------------===// /// DbgRange - This is used to track range of instructions with identical @@ -392,19 +256,16 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) CurrentFnDbgScope(0), PrevLabel(NULL) { NextStringPoolNumber = 0; - DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; + DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; DwarfStrSectionSym = TextSectionSym = 0; DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; - DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(M); } } DwarfDebug::~DwarfDebug() { - for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) - DIEBlocks[j]->~DIEBlock(); } MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) { @@ -439,858 +300,6 @@ void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) { } } -/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug -/// information entry. -DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) { - DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); - return Value; -} - -/// addUInt - Add an unsigned integer attribute data and value. -/// -void DwarfDebug::addUInt(DIE *Die, unsigned Attribute, - unsigned Form, uint64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(false, Integer); - DIEValue *Value = Integer == 1 ? - DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); -} - -/// addSInt - Add an signed integer attribute data and value. -/// -void DwarfDebug::addSInt(DIE *Die, unsigned Attribute, - unsigned Form, int64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(true, Integer); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); -} - -/// addString - Add a string attribute data and value. DIEString only -/// keeps string reference. -void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form, - StringRef String) { - DIEValue *Value = new (DIEValueAllocator) DIEString(String); - Die->addValue(Attribute, Form, Value); -} - -/// addLabel - Add a Dwarf label attribute data and value. -/// -void DwarfDebug::addLabel(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Label) { - DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); - Die->addValue(Attribute, Form, Value); -} - -/// addDelta - Add a label delta attribute data and value. -/// -void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo) { - DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); - Die->addValue(Attribute, Form, Value); -} - -/// addDIEEntry - Add a DIE attribute data and value. -/// -void DwarfDebug::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, - DIE *Entry) { - Die->addValue(Attribute, Form, createDIEEntry(Entry)); -} - - -/// addBlock - Add block data. -/// -void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form, - DIEBlock *Block) { - Block->ComputeSize(Asm); - DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. - Die->addValue(Attribute, Block->BestForm(), Block); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) { - // Verify variable. - if (!V.Verify()) - return; - - unsigned Line = V.getLineNumber(); - if (Line == 0) - return; - unsigned FileID = GetOrCreateSourceID(V.getContext().getFilename(), - V.getContext().getDirectory()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) { - // Verify global variable. - if (!G.Verify()) - return; - - unsigned Line = G.getLineNumber(); - if (Line == 0) - return; - unsigned FileID = GetOrCreateSourceID(G.getContext().getFilename(), - G.getContext().getDirectory()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) { - // Verify subprogram. - if (!SP.Verify()) - return; - // If the line number is 0, don't add it. - if (SP.getLineNumber() == 0) - return; - - unsigned Line = SP.getLineNumber(); - if (!SP.getContext().Verify()) - return; - unsigned FileID = GetOrCreateSourceID(SP.getFilename(), SP.getDirectory()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) { - // Verify type. - if (!Ty.Verify()) - return; - - unsigned Line = Ty.getLineNumber(); - if (Line == 0 || !Ty.getContext().Verify()) - return; - unsigned FileID = GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) { - // Verify namespace. - if (!NS.Verify()) - return; - - unsigned Line = NS.getLineNumber(); - if (Line == 0) - return; - StringRef FN = NS.getFilename(); - - unsigned FileID = GetOrCreateSourceID(FN, NS.getDirectory()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based -/// on provided frame index. -void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) { - MachineLocation Location; - unsigned FrameReg; - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - Location.set(FrameReg, Offset); - - if (DV->variableHasComplexAddress()) - addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); - else if (DV->isBlockByrefVariable()) - addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); - else - addAddress(Die, dwarf::DW_AT_location, Location); -} - -/// addComplexAddress - Start with the address based on the location provided, -/// and generate the DWARF information necessary to find the actual variable -/// given the extra address information encoded in the DIVariable, starting from -/// the starting location. Add the DWARF information to the die. -/// -void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, - unsigned Attribute, - const MachineLocation &Location) { - DIType Ty = DV->getType(); - - // Decode the original location, and use that as the start of the byref - // variable's location. - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - if (Location.isReg()) { - if (Reg < 32) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); - } else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - } else { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - - addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - } - - for (unsigned i = 0, N = DV->getNumAddrElements(); i < N; ++i) { - uint64_t Element = DV->getAddrElement(i); - - if (Element == DIBuilder::OpPlus) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); - } else if (Element == DIBuilder::OpDeref) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - } else llvm_unreachable("unknown DIBuilder Opcode"); - } - - // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); -} - -/* Byref variables, in Blocks, are declared by the programmer as "SomeType - VarName;", but the compiler creates a __Block_byref_x_VarName struct, and - gives the variable VarName either the struct, or a pointer to the struct, as - its type. This is necessary for various behind-the-scenes things the - compiler needs to do with by-reference variables in Blocks. - - However, as far as the original *programmer* is concerned, the variable - should still have type 'SomeType', as originally declared. - - The function getBlockByrefType dives into the __Block_byref_x_VarName - struct to find the original type of the variable, which is then assigned to - the variable's Debug Information Entry as its real type. So far, so good. - However now the debugger will expect the variable VarName to have the type - SomeType. So we need the location attribute for the variable to be an - expression that explains to the debugger how to navigate through the - pointers and struct to find the actual variable of type SomeType. - - The following function does just that. We start by getting - the "normal" location for the variable. This will be the location - of either the struct __Block_byref_x_VarName or the pointer to the - struct __Block_byref_x_VarName. - - The struct will look something like: - - struct __Block_byref_x_VarName { - ... <various fields> - struct __Block_byref_x_VarName *forwarding; - ... <various other fields> - SomeType VarName; - ... <maybe more fields> - }; - - If we are given the struct directly (as our starting point) we - need to tell the debugger to: - - 1). Add the offset of the forwarding field. - - 2). Follow that pointer to get the real __Block_byref_x_VarName - struct to use (the real one may have been copied onto the heap). - - 3). Add the offset for the field VarName, to find the actual variable. - - If we started with a pointer to the struct, then we need to - dereference that pointer first, before the other steps. - Translating this into DWARF ops, we will need to append the following - to the current location description for the variable: - - DW_OP_deref -- optional, if we start with a pointer - DW_OP_plus_uconst <forward_fld_offset> - DW_OP_deref - DW_OP_plus_uconst <varName_fld_offset> - - That is what this function does. */ - -/// addBlockByrefAddress - Start with the address based on the location -/// provided, and generate the DWARF information necessary to find the -/// actual Block variable (navigating the Block struct) based on the -/// starting location. Add the DWARF information to the die. For -/// more information, read large comment just above here. -/// -void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, - unsigned Attribute, - const MachineLocation &Location) { - DIType Ty = DV->getType(); - DIType TmpTy = Ty; - unsigned Tag = Ty.getTag(); - bool isPointer = false; - - StringRef varName = DV->getName(); - - if (Tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - TmpTy = DTy.getTypeDerivedFrom(); - isPointer = true; - } - - DICompositeType blockStruct = DICompositeType(TmpTy); - - // Find the __forwarding field and the variable field in the __Block_byref - // struct. - DIArray Fields = blockStruct.getTypeArray(); - DIDescriptor varField = DIDescriptor(); - DIDescriptor forwardingField = DIDescriptor(); - - for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { - DIDescriptor Element = Fields.getElement(i); - DIDerivedType DT = DIDerivedType(Element); - StringRef fieldName = DT.getName(); - if (fieldName == "__forwarding") - forwardingField = Element; - else if (fieldName == varName) - varField = Element; - } - - // Get the offsets for the forwarding field and the variable field. - unsigned forwardingFieldOffset = - DIDerivedType(forwardingField).getOffsetInBits() >> 3; - unsigned varFieldOffset = - DIDerivedType(varField).getOffsetInBits() >> 3; - - // Decode the original location, and use that as the start of the byref - // variable's location. - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - if (Location.isReg()) { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - } else { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - - addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - } - - // If we started with a pointer to the __Block_byref... struct, then - // the first thing we need to do is dereference the pointer (DW_OP_deref). - if (isPointer) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - - // Next add the offset for the '__forwarding' field: - // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in - // adding the offset if it's 0. - if (forwardingFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); - } - - // Now dereference the __forwarding field to get to the real __Block_byref - // struct: DW_OP_deref. - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - - // Now that we've got the real __Block_byref... struct, add the offset - // for the variable's field to get to the location of the actual variable: - // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. - if (varFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); - } - - // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); -} - -/// addAddress - Add an address attribute to a die based on the location -/// provided. -void DwarfDebug::addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location) { - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - if (RI->getFrameRegister(*Asm->MF) == Location.getReg() - && Location.getOffset()) { - // If variable offset is based in frame register then use fbreg. - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); - addSInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - addBlock(Die, Attribute, 0, Block); - return; - } - - if (Location.isReg()) { - if (Reg < 32) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); - } else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - } else { - if (Reg < 32) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - } else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - - addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - } - - addBlock(Die, Attribute, 0, Block); -} - -/// addRegisterAddress - Add register location entry in variable DIE. -bool DwarfDebug::addRegisterAddress(DIE *Die, const MachineOperand &MO) { - assert (MO.isReg() && "Invalid machine operand!"); - if (!MO.getReg()) - return false; - MachineLocation Location; - Location.set(MO.getReg()); - addAddress(Die, dwarf::DW_AT_location, Location); - return true; -} - -/// addConstantValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantValue(DIE *Die, const MachineOperand &MO) { - assert (MO.isImm() && "Invalid machine operand!"); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - unsigned Imm = MO.getImm(); - addUInt(Block, 0, dwarf::DW_FORM_udata, Imm); - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; -} - -/// addConstantFPValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantFPValue(DIE *Die, const MachineOperand &MO) { - assert (MO.isFPImm() && "Invalid machine operand!"); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - APFloat FPImm = MO.getFPImm()->getValueAPF(); - - // Get the raw data form of the floating point. - const APInt FltVal = FPImm.bitcastToAPInt(); - const char *FltPtr = (const char*)FltVal.getRawData(); - - int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. - bool LittleEndian = Asm->getTargetData().isLittleEndian(); - int Incr = (LittleEndian ? 1 : -1); - int Start = (LittleEndian ? 0 : NumBytes - 1); - int Stop = (LittleEndian ? NumBytes : -1); - - // Output the constant to DWARF one byte at a time. - for (; Start != Stop; Start += Incr) - addUInt(Block, 0, dwarf::DW_FORM_data1, - (unsigned char)0xFF & FltPtr[Start]); - - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; -} - -/// addConstantValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantValue(DIE *Die, ConstantInt *CI, - bool Unsigned) { - if (CI->getBitWidth() <= 64) { - if (Unsigned) - addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, - CI->getZExtValue()); - else - addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, - CI->getSExtValue()); - return true; - } - - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - // Get the raw data form of the large APInt. - const APInt Val = CI->getValue(); - const char *Ptr = (const char*)Val.getRawData(); - - int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. - bool LittleEndian = Asm->getTargetData().isLittleEndian(); - int Incr = (LittleEndian ? 1 : -1); - int Start = (LittleEndian ? 0 : NumBytes - 1); - int Stop = (LittleEndian ? NumBytes : -1); - - // Output the constant to DWARF one byte at a time. - for (; Start != Stop; Start += Incr) - addUInt(Block, 0, dwarf::DW_FORM_data1, - (unsigned char)0xFF & Ptr[Start]); - - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; -} - -/// addTemplateParams - Add template parameters in buffer. -void DwarfDebug::addTemplateParams(DIE &Buffer, DIArray TParams) { - // Add template parameters. - for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { - DIDescriptor Element = TParams.getElement(i); - if (Element.isTemplateTypeParameter()) - Buffer.addChild(getOrCreateTemplateTypeParameterDIE( - DITemplateTypeParameter(Element))); - else if (Element.isTemplateValueParameter()) - Buffer.addChild(getOrCreateTemplateValueParameterDIE( - DITemplateValueParameter(Element))); - } - -} -/// addToContextOwner - Add Die into the list of its context owner's children. -void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { - if (Context.isType()) { - DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context)); - ContextDIE->addChild(Die); - } else if (Context.isNameSpace()) { - DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); - ContextDIE->addChild(Die); - } else if (Context.isSubprogram()) { - DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context)); - ContextDIE->addChild(Die); - } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context)) - ContextDIE->addChild(Die); - else - getCompileUnit(Context)->addDie(Die); -} - -/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the -/// given DIType. -DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) { - CompileUnit *TypeCU = getCompileUnit(Ty); - DIE *TyDIE = TypeCU->getDIE(Ty); - if (TyDIE) - return TyDIE; - - // Create new type. - TyDIE = new DIE(dwarf::DW_TAG_base_type); - TypeCU->insertDIE(Ty, TyDIE); - if (Ty.isBasicType()) - constructTypeDIE(*TyDIE, DIBasicType(Ty)); - else if (Ty.isCompositeType()) - constructTypeDIE(*TyDIE, DICompositeType(Ty)); - else { - assert(Ty.isDerivedType() && "Unknown kind of DIType"); - constructTypeDIE(*TyDIE, DIDerivedType(Ty)); - } - - addToContextOwner(TyDIE, Ty.getContext()); - return TyDIE; -} - -/// addType - Add a new type attribute to the specified entity. -void DwarfDebug::addType(DIE *Entity, DIType Ty) { - if (!Ty.Verify()) - return; - - // Check for pre-existence. - CompileUnit *TypeCU = getCompileUnit(Ty); - DIEEntry *Entry = TypeCU->getDIEEntry(Ty); - // If it exists then use the existing value. - if (Entry) { - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); - return; - } - - // Construct type. - DIE *Buffer = getOrCreateTypeDIE(Ty); - - // Set up proxy. - Entry = createDIEEntry(Buffer); - TypeCU->insertDIEEntry(Ty, Entry); - - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); -} - -/// constructTypeDIE - Construct basic type die from DIBasicType. -void DwarfDebug::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { - // Get core information. - StringRef Name = BTy.getName(); - Buffer.setTag(dwarf::DW_TAG_base_type); - addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - BTy.getEncoding()); - - // Add name if not anonymous or intermediate type. - if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - uint64_t Size = BTy.getSizeInBits() >> 3; - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); -} - -/// constructTypeDIE - Construct derived type die from DIDerivedType. -void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { - // Get core information. - StringRef Name = DTy.getName(); - uint64_t Size = DTy.getSizeInBits() >> 3; - unsigned Tag = DTy.getTag(); - - // FIXME - Workaround for templates. - if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; - - Buffer.setTag(Tag); - - // Map to main type, void will not have a type. - DIType FromTy = DTy.getTypeDerivedFrom(); - addType(&Buffer, FromTy); - - // Add name if not anonymous or intermediate type. - if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - - // Add size if non-zero (derived types might be zero-sized.) - if (Size) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); - - // Add source line info if available and TyDesc is not a forward declaration. - if (!DTy.isForwardDecl()) - addSourceLine(&Buffer, DTy); -} - -/// constructTypeDIE - Construct type DIE from DICompositeType. -void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { - // Get core information. - StringRef Name = CTy.getName(); - - uint64_t Size = CTy.getSizeInBits() >> 3; - unsigned Tag = CTy.getTag(); - Buffer.setTag(Tag); - - switch (Tag) { - case dwarf::DW_TAG_vector_type: - case dwarf::DW_TAG_array_type: - constructArrayTypeDIE(Buffer, &CTy); - break; - case dwarf::DW_TAG_enumeration_type: { - DIArray Elements = CTy.getTypeArray(); - - // Add enumerators to enumeration type. - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIE *ElemDie = NULL; - DIDescriptor Enum(Elements.getElement(i)); - if (Enum.isEnumerator()) { - ElemDie = constructEnumTypeDIE(DIEnumerator(Enum)); - Buffer.addChild(ElemDie); - } - } - } - break; - case dwarf::DW_TAG_subroutine_type: { - // Add return type. - DIArray Elements = CTy.getTypeArray(); - DIDescriptor RTy = Elements.getElement(0); - addType(&Buffer, DIType(RTy)); - - bool isPrototyped = true; - // Add arguments. - for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Ty = Elements.getElement(i); - if (Ty.isUnspecifiedParameter()) { - DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters); - Buffer.addChild(Arg); - isPrototyped = false; - } else { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - addType(Arg, DIType(Ty)); - Buffer.addChild(Arg); - } - } - // Add prototype flag. - if (isPrototyped) - addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); - } - break; - case dwarf::DW_TAG_structure_type: - case dwarf::DW_TAG_union_type: - case dwarf::DW_TAG_class_type: { - // Add elements to structure type. - DIArray Elements = CTy.getTypeArray(); - - // A forward struct declared type may not have elements available. - unsigned N = Elements.getNumElements(); - if (N == 0) - break; - - // Add elements to structure type. - for (unsigned i = 0; i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - DIE *ElemDie = NULL; - if (Element.isSubprogram()) { - DISubprogram SP(Element); - ElemDie = createSubprogramDIE(DISubprogram(Element)); - if (SP.isProtected()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_protected); - else if (SP.isPrivate()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_private); - else - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_public); - if (SP.isExplicit()) - addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); - } - else if (Element.isVariable()) { - DIVariable DV(Element); - ElemDie = new DIE(dwarf::DW_TAG_variable); - addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, - DV.getName()); - addType(ElemDie, DV.getType()); - addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - addSourceLine(ElemDie, DV); - } else if (Element.isDerivedType()) - ElemDie = createMemberDIE(DIDerivedType(Element)); - else - continue; - Buffer.addChild(ElemDie); - } - - if (CTy.isAppleBlockExtension()) - addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); - - unsigned RLang = CTy.getRunTimeLang(); - if (RLang) - addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, - dwarf::DW_FORM_data1, RLang); - - DICompositeType ContainingType = CTy.getContainingType(); - if (DIDescriptor(ContainingType).isCompositeType()) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, - getOrCreateTypeDIE(DIType(ContainingType))); - else { - DIDescriptor Context = CTy.getContext(); - addToContextOwner(&Buffer, Context); - } - - if (Tag == dwarf::DW_TAG_class_type) - addTemplateParams(Buffer, CTy.getTemplateParams()); - - break; - } - default: - break; - } - - // Add name if not anonymous or intermediate type. - if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - - if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type - || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) - { - // Add size if non-zero (derived types might be zero-sized.) - if (Size) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); - else { - // Add zero size if it is not a forward declaration. - if (CTy.isForwardDecl()) - addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - else - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); - } - - // Add source line info if available. - if (!CTy.isForwardDecl()) - addSourceLine(&Buffer, CTy); - } -} - -/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateTypeParameter. -DIE * -DwarfDebug::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { - CompileUnit *TypeCU = getCompileUnit(TP); - DIE *ParamDIE = TypeCU->getDIE(TP); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); - addType(ParamDIE, TP.getType()); - addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName()); - return ParamDIE; -} - -/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateValueParameter. -DIE * -DwarfDebug::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) { - CompileUnit *TVCU = getCompileUnit(TPV); - DIE *ParamDIE = TVCU->getDIE(TPV); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); - addType(ParamDIE, TPV.getType()); - if (!TPV.getName().empty()) - addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName()); - addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, - TPV.getValue()); - return ParamDIE; -} - -/// constructSubrangeDIE - Construct subrange DIE from DISubrange. -void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ - int64_t L = SR.getLo(); - int64_t H = SR.getHi(); - DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); - - addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); - if (L) - addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); - addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); - - Buffer.addChild(DW_Subrange); -} - -/// constructArrayTypeDIE - Construct array type DIE from DICompositeType. -void DwarfDebug::constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy) { - Buffer.setTag(dwarf::DW_TAG_array_type); - if (CTy->getTag() == dwarf::DW_TAG_vector_type) - addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); - - // Emit derived type. - addType(&Buffer, CTy->getTypeDerivedFrom()); - DIArray Elements = CTy->getTypeArray(); - - // Get an anonymous type for index type. - CompileUnit *TheCU = getCompileUnit(*CTy); - DIE *IdxTy = TheCU->getIndexTyDie(); - if (!IdxTy) { - // Construct an anonymous type for index type. - IdxTy = new DIE(dwarf::DW_TAG_base_type); - addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); - addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - dwarf::DW_ATE_signed); - TheCU->addDie(IdxTy); - TheCU->setIndexTyDie(IdxTy); - } - - // Add subranges to array type. - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - if (Element.getTag() == dwarf::DW_TAG_subrange_type) - constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy); - } -} - -/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. -DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator ETy) { - DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); - StringRef Name = ETy.getName(); - addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - int64_t Value = ETy.getEnumValue(); - addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); - return Enumerator; -} - /// getRealLinkageName - If special LLVM prefix that is used to inform the asm /// printer to not emit usual symbol prefix before the symbol name is used then /// return linkage name after skipping this special LLVM prefix. @@ -1301,84 +310,6 @@ static StringRef getRealLinkageName(StringRef LinkageName) { return LinkageName; } -/// createMemberDIE - Create new member DIE. -DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) { - DIE *MemberDie = new DIE(DT.getTag()); - StringRef Name = DT.getName(); - if (!Name.empty()) - addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - - addType(MemberDie, DT.getTypeDerivedFrom()); - - addSourceLine(MemberDie, DT); - - DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - - uint64_t Size = DT.getSizeInBits(); - uint64_t FieldSize = DT.getOriginalTypeSize(); - - if (Size != FieldSize) { - // Handle bitfield. - addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); - addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); - - uint64_t Offset = DT.getOffsetInBits(); - uint64_t AlignMask = ~(DT.getAlignInBits() - 1); - uint64_t HiMark = (Offset + FieldSize) & AlignMask; - uint64_t FieldOffset = (HiMark - FieldSize); - Offset -= FieldOffset; - - // Maybe we need to work from the other end. - if (Asm->getTargetData().isLittleEndian()) - Offset = FieldSize - (Offset + Size); - addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); - - // Here WD_AT_data_member_location points to the anonymous - // field that includes this bit field. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); - - } else - // This is not a bitfield. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); - - if (DT.getTag() == dwarf::DW_TAG_inheritance - && DT.isVirtual()) { - - // For C++, virtual base classes are not at fixed offset. Use following - // expression to extract appropriate offset from vtable. - // BaseAddr = ObAddr + *((*ObAddr) - Offset) - - DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits()); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, - VBaseLocationDie); - } else - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); - - if (DT.isProtected()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_protected); - else if (DT.isPrivate()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_private); - // Otherwise C++ member and base classes are considered public. - else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_public); - if (DT.isVirtual()) - addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, - dwarf::DW_VIRTUALITY_virtual); - return MemberDie; -} - /// createSubprogramDIE - Create new DIE using SP. DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { CompileUnit *SPCU = getCompileUnit(SP); @@ -1387,19 +318,35 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { return SPDie; SPDie = new DIE(dwarf::DW_TAG_subprogram); - // Constructors and operators for anonymous aggregates do not have names. - if (!SP.getName().empty()) - addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName()); + + // DW_TAG_inlined_subroutine may refer to this DIE. + SPCU->insertDIE(SP, SPDie); + + // Add to context owner. + SPCU->addToContextOwner(SPDie, SP.getContext()); + + // Add function template parameters. + SPCU->addTemplateParams(*SPDie, SP.getTemplateParams()); StringRef LinkageName = SP.getLinkageName(); if (!LinkageName.empty()) - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); + SPCU->addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + getRealLinkageName(LinkageName)); - addSourceLine(SPDie, SP); + // If this DIE is going to refer declaration info using AT_specification + // then there is no need to add other attributes. + if (SP.getFunctionDeclaration().isSubprogram()) + return SPDie; + + // Constructors and operators for anonymous aggregates do not have names. + if (!SP.getName().empty()) + SPCU->addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + SP.getName()); + + SPCU->addSourceLine(SPDie, SP); if (SP.isPrototyped()) - addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); // Add Return Type. DICompositeType SPTy = SP.getType(); @@ -1407,24 +354,24 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { unsigned SPTag = SPTy.getTag(); if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type) - addType(SPDie, SPTy); + SPCU->addType(SPDie, SPTy); else - addType(SPDie, DIType(Args.getElement(0))); + SPCU->addType(SPDie, DIType(Args.getElement(0))); unsigned VK = SP.getVirtuality(); if (VK) { - addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); - addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); + SPCU->addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); + DIEBlock *Block = SPCU->getDIEBlock(); + SPCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + SPCU->addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); + SPCU->addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); ContainingTypeMap.insert(std::make_pair(SPDie, SP.getContainingType())); } if (!SP.isDefinition()) { - addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - + SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. DICompositeType SPTy = SP.getType(); @@ -1435,35 +382,26 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); DIType ATy = DIType(DIType(Args.getElement(i))); - addType(Arg, ATy); + SPCU->addType(Arg, ATy); if (ATy.isArtificial()) - addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); SPDie->addChild(Arg); } } if (SP.isArtificial()) - addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); if (!SP.isLocalToUnit()) - addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); if (SP.isOptimized()) - addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); if (unsigned isa = Asm->getISAEncoding()) { - addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); + SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); } - // Add function template parameters. - addTemplateParams(*SPDie, SP.getTemplateParams()); - - // DW_TAG_inlined_subroutine may refer to this DIE. - SPCU->insertDIE(SP, SPDie); - - // Add to context owner. - addToContextOwner(SPDie, SP.getContext()); - return SPDie; } @@ -1518,51 +456,57 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { assert(SPDie && "Unable to find subprogram DIE!"); DISubprogram SP(SPNode); - // There is not any need to generate specification DIE for a function - // defined at compile unit level. If a function is defined inside another - // function then gdb prefers the definition at top level and but does not - // expect specification DIE in parent function. So avoid creating - // specification DIE for a function defined inside a function. - if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && - !isSubprogramContext(SP.getContext())) { - addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - - // Add arguments. - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); - if (SPTag == dwarf::DW_TAG_subroutine_type) - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(DIType(Args.getElement(i))); - addType(Arg, ATy); - if (ATy.isArtificial()) - addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); - SPDie->addChild(Arg); - } - DIE *SPDeclDie = SPDie; - SPDie = new DIE(dwarf::DW_TAG_subprogram); - addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - SPDeclDie); - SPCU->addDie(SPDie); + DISubprogram SPDecl = SP.getFunctionDeclaration(); + if (SPDecl.isSubprogram()) + // Refer function declaration directly. + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + createSubprogramDIE(SPDecl)); + else { + // There is not any need to generate specification DIE for a function + // defined at compile unit level. If a function is defined inside another + // function then gdb prefers the definition at top level and but does not + // expect specification DIE in parent function. So avoid creating + // specification DIE for a function defined inside a function. + if (SP.isDefinition() && !SP.getContext().isCompileUnit() && + !SP.getContext().isFile() && + !isSubprogramContext(SP.getContext())) { + SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + + // Add arguments. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIType ATy = DIType(DIType(Args.getElement(i))); + SPCU->addType(Arg, ATy); + if (ATy.isArtificial()) + SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPDie->addChild(Arg); + } + DIE *SPDeclDie = SPDie; + SPDie = new DIE(dwarf::DW_TAG_subprogram); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + SPDeclDie); + SPCU->addDie(SPDie); + } } - // Pick up abstract subprogram DIE. if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { SPDie = new DIE(dwarf::DW_TAG_subprogram); - addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsSPDIE); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsSPDIE); SPCU->addDie(SPDie); } - addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber())); - addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("func_end", Asm->getFunctionNumber())); + SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber())); + SPCU->addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("func_end", Asm->getFunctionNumber())); const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); - addAddress(SPDie, dwarf::DW_AT_frame_base, Location); + SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location); return SPDie; } @@ -1579,13 +523,14 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { if (Ranges.empty()) return 0; + CompileUnit *TheCU = getCompileUnit(Scope->getScopeNode()); SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(); if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in // .debug_range as a uint, size 4, for now. emitDIE will handle // DW_AT_ranges appropriately. - addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, + DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize()); for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); @@ -1604,8 +549,8 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); - addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); - addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); return ScopeDIE; } @@ -1639,17 +584,19 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { if (!Scope->getScopeNode()) return NULL; DIScope DS(Scope->getScopeNode()); - DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); - DISubprogram InlinedSP = getDISubprogram(DS); CompileUnit *TheCU = getCompileUnit(InlinedSP); DIE *OriginDIE = TheCU->getDIE(InlinedSP); - assert(OriginDIE && "Unable to find Origin DIE!"); - addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, OriginDIE); + if (!OriginDIE) { + DEBUG(dbgs() << "Unable to find original DIE for inlined subprogram."); + return NULL; + } + DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, OriginDIE); - addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel); - addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel); InlinedSubprogramDIEs.insert(OriginDIE); @@ -1665,8 +612,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); DILocation DL(Scope->getInlinedAt()); - addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); - addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); return ScopeDIE; } @@ -1695,7 +642,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // Define variable debug information entry. DIE *VariableDie = new DIE(Tag); - + CompileUnit *VariableCU = getCompileUnit(DV->getVariable()); DIE *AbsDIE = NULL; DenseMap<const DbgVariable *, const DbgVariable *>::iterator V2AVI = VarToAbstractVarMap.find(DV); @@ -1703,20 +650,23 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { AbsDIE = V2AVI->second->getDIE(); if (AbsDIE) - addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsDIE); + VariableCU->addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsDIE); else { - addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - addSourceLine(VariableDie, DV->getVariable()); + VariableCU->addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + Name); + VariableCU->addSourceLine(VariableDie, DV->getVariable()); // Add variable type. - addType(VariableDie, DV->getType()); + VariableCU->addType(VariableDie, DV->getType()); } if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial()) - addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, + dwarf::DW_FORM_flag, 1); else if (DIVariable(DV->getVariable()).isArtificial()) - addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, + dwarf::DW_FORM_flag, 1); if (Scope->isAbstractScope()) { DV->setDIE(VariableDie); @@ -1727,7 +677,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { unsigned Offset = DV->getDotDebugLocOffset(); if (Offset != ~0U) { - addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4, + VariableCU->addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4, Asm->GetTempSymbol("debug_loc", Offset)); DV->setDIE(VariableDie); UseDotDebugLocEntry.insert(VariableDie); @@ -1747,22 +697,31 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); if (DVInsn->getOperand(1).isImm() && TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) { - addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm()); - updated = true; - } else - updated = addRegisterAddress(VariableDie, RegOp); + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = + TFI->getFrameIndexReference(*Asm->MF, + DVInsn->getOperand(1).getImm(), + FrameReg); + MachineLocation Location(FrameReg, Offset); + VariableCU->addVariableAddress(DV, VariableDie, Location); + + } else if (RegOp.getReg()) + VariableCU->addVariableAddress(DV, VariableDie, + MachineLocation(RegOp.getReg())); + updated = true; } else if (DVInsn->getOperand(0).isImm()) - updated = addConstantValue(VariableDie, DVInsn->getOperand(0)); + updated = + VariableCU->addConstantValue(VariableDie, DVInsn->getOperand(0), + DV->getType()); else if (DVInsn->getOperand(0).isFPImm()) updated = - addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + VariableCU->addConstantFPValue(VariableDie, DVInsn->getOperand(0)); } else { - MachineLocation Location = Asm->getDebugValueLocation(DVInsn); - if (Location.getReg()) { - addAddress(VariableDie, dwarf::DW_AT_location, Location); - updated = true; - } + VariableCU->addVariableAddress(DV, VariableDie, + Asm->getDebugValueLocation(DVInsn)); + updated = true; } if (!updated) { // If variableDie is not updated then DBG_VALUE instruction does not @@ -1776,35 +735,20 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // .. else use frame index, if available. int FI = 0; - if (findVariableFrameIndex(DV, &FI)) - addVariableAddress(DV, VariableDie, FI); - + if (findVariableFrameIndex(DV, &FI)) { + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = + TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + MachineLocation Location(FrameReg, Offset); + VariableCU->addVariableAddress(DV, VariableDie, Location); + } + DV->setDIE(VariableDie); return VariableDie; } -void DwarfDebug::addPubTypes(DISubprogram SP) { - DICompositeType SPTy = SP.getType(); - unsigned SPTag = SPTy.getTag(); - if (SPTag != dwarf::DW_TAG_subroutine_type) - return; - - DIArray Args = SPTy.getTypeArray(); - for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) { - DIType ATy(Args.getElement(i)); - if (!ATy.Verify()) - continue; - DICompositeType CATy = getDICompositeType(ATy); - if (DIDescriptor(CATy).Verify() && !CATy.getName().empty() - && !CATy.isForwardDecl()) { - CompileUnit *TheCU = getCompileUnit(CATy); - if (DIEEntry *Entry = TheCU->getDIEEntry(CATy)) - TheCU->addGlobalType(CATy.getName(), Entry->getEntry()); - } - } -} - /// constructScopeDIE - Construct a DIE for this scope. DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { if (!Scope || !Scope->getScopeNode()) @@ -1858,7 +802,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { ScopeDIE->addChild(*I); if (DS.isSubprogram()) - addPubTypes(DISubprogram(DS)); + getCompileUnit(DS)->addPubTypes(DISubprogram(DS)); return ScopeDIE; } @@ -1875,11 +819,9 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, return GetOrCreateSourceID("<stdin>", StringRef()); // MCStream expects full path name as filename. - if (!DirName.empty() && !FileName.startswith("/")) { - std::string FullPathName(DirName.data()); - if (!DirName.endswith("/")) - FullPathName += "/"; - FullPathName += FileName.data(); + if (!DirName.empty() && !sys::path::is_absolute(FileName)) { + SmallString<128> FullPathName = DirName; + sys::path::append(FullPathName, FileName); // Here FullPathName will be copied into StringMap by GetOrCreateSourceID. return GetOrCreateSourceID(StringRef(FullPathName), StringRef()); } @@ -1897,21 +839,6 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, return SrcId; } -/// getOrCreateNameSpace - Create a DIE for DINameSpace. -DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) { - CompileUnit *TheCU = getCompileUnit(NS); - DIE *NDie = TheCU->getDIE(NS); - if (NDie) - return NDie; - NDie = new DIE(dwarf::DW_TAG_namespace); - TheCU->insertDIE(NS, NDie); - if (!NS.getName().empty()) - addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName()); - addSourceLine(NDie, NS); - addToContextOwner(NDie, NS.getContext()); - return NDie; -} - /// constructCompileUnit - Create new CompileUnit for the given /// metadata node with tag DW_TAG_compile_unit. void DwarfDebug::constructCompileUnit(const MDNode *N) { @@ -1921,37 +848,37 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) { unsigned ID = GetOrCreateSourceID(FN, Dir); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, - DIUnit.getProducer()); - addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, - DIUnit.getLanguage()); - addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); + CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this); + NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, + DIUnit.getProducer()); + NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + DIUnit.getLanguage()); + NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This // simplifies debug range entries. - addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); + NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. - if (Asm->MAI->doesDwarfUsesAbsoluteLabelForStmtList()) - addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("section_line")); + if(Asm->MAI->doesDwarfRequireRelocationForSectionOffset()) + NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, + Asm->GetTempSymbol("section_line")); else - addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); if (!Dir.empty()) - addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); + NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); if (DIUnit.isOptimized()) - addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); StringRef Flags = DIUnit.getFlags(); if (!Flags.empty()) - addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); - + NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); + unsigned RVer = DIUnit.getRunTimeVersion(); if (RVer) - addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, + NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, dwarf::DW_FORM_data1, RVer); - CompileUnit *NewCU = new CompileUnit(ID, Die); if (!FirstCU) FirstCU = NewCU; CUMap.insert(std::make_pair(N, NewCU)); @@ -2047,38 +974,34 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { bool isGlobalVariable = GV.getGlobal() != NULL; // Add name. - addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, - GV.getDisplayName()); + TheCU->addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, + GV.getDisplayName()); StringRef LinkageName = GV.getLinkageName(); if (!LinkageName.empty() && isGlobalVariable) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); + TheCU->addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + dwarf::DW_FORM_string, + getRealLinkageName(LinkageName)); // Add type. - addType(VariableDIE, GTy); - if (GTy.isCompositeType() && !GTy.getName().empty() - && !GTy.isForwardDecl()) { - DIEEntry *Entry = TheCU->getDIEEntry(GTy); - assert(Entry && "Missing global type!"); - TheCU->addGlobalType(GTy.getName(), Entry->getEntry()); - } + TheCU->addType(VariableDIE, GTy); + // Add scoping info. if (!GV.isLocalToUnit()) { - addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + TheCU->addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); // Expose as global. TheCU->addGlobal(GV.getName(), VariableDIE); } // Add line number info. - addSourceLine(VariableDIE, GV); + TheCU->addSourceLine(VariableDIE, GV); // Add to map. TheCU->insertDIE(N, VariableDIE); // Add to context owner. DIDescriptor GVContext = GV.getContext(); - addToContextOwner(VariableDIE, GVContext); + TheCU->addToContextOwner(VariableDIE, GVContext); // Add location. if (isGlobalVariable) { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata, Asm->Mang->getSymbol(GV.getGlobal())); // Do not create specification DIE if context is either compile unit // or a subprogram. @@ -2086,28 +1009,28 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { !GVContext.isFile() && !isSubprogramContext(GVContext)) { // Create specification DIE. DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); - addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, + TheCU->addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, VariableDIE); - addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); - addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + TheCU->addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); + TheCU->addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); TheCU->addDie(VariableSpecDIE); } else { - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } } else if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(GV.getConstant())) - addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy)); + TheCU->addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy)); else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { // GV is a merged global. DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0)))); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0)))); ConstantInt *CII = cast<ConstantInt>(CE->getOperand(2)); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue()); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue()); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } return; @@ -2133,7 +1056,7 @@ void DwarfDebug::constructSubprogramDIE(const MDNode *N) { TheCU->insertDIE(N, SubprogramDie); // Add to context owner. - addToContextOwner(SubprogramDie, SP.getContext()); + TheCU->addToContextOwner(SubprogramDie, SP.getContext()); // Expose as global. TheCU->addGlobal(SP.getName(), SubprogramDie); @@ -2148,52 +1071,80 @@ void DwarfDebug::beginModule(Module *M) { if (DisableDebugInfoPrinting) return; - DebugInfoFinder DbgFinder; - DbgFinder.processModule(*M); - - bool HasDebugInfo = false; + // If module has named metadata anchors then use them, otherwise scan the module + // using debug info finder to collect debug info. + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); + if (CU_Nodes) { + + NamedMDNode *GV_Nodes = M->getNamedMetadata("llvm.dbg.gv"); + NamedMDNode *SP_Nodes = M->getNamedMetadata("llvm.dbg.sp"); + if (!GV_Nodes && !SP_Nodes) + // If there are not any global variables or any functions then + // there is not any debug info in this module. + return; + + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) + constructCompileUnit(CU_Nodes->getOperand(i)); + + if (GV_Nodes) + for (unsigned i = 0, e = GV_Nodes->getNumOperands(); i != e; ++i) + constructGlobalVariableDIE(GV_Nodes->getOperand(i)); + + if (SP_Nodes) + for (unsigned i = 0, e = SP_Nodes->getNumOperands(); i != e; ++i) + constructSubprogramDIE(SP_Nodes->getOperand(i)); + + } else { - // Scan all the compile-units to see if there are any marked as the main unit. - // if not, we do not generate debug info. - for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) { - if (DICompileUnit(*I).isMain()) { - HasDebugInfo = true; - break; + DebugInfoFinder DbgFinder; + DbgFinder.processModule(*M); + + bool HasDebugInfo = false; + // Scan all the compile-units to see if there are any marked as the main unit. + // if not, we do not generate debug info. + for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), + E = DbgFinder.compile_unit_end(); I != E; ++I) { + if (DICompileUnit(*I).isMain()) { + HasDebugInfo = true; + break; + } } + if (!HasDebugInfo) return; + + // Create all the compile unit DIEs. + for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), + E = DbgFinder.compile_unit_end(); I != E; ++I) + constructCompileUnit(*I); + + // Create DIEs for each global variable. + for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), + E = DbgFinder.global_variable_end(); I != E; ++I) + constructGlobalVariableDIE(*I); + + // Create DIEs for each subprogram. + for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), + E = DbgFinder.subprogram_end(); I != E; ++I) + constructSubprogramDIE(*I); } - - if (!HasDebugInfo) return; - + // Tell MMI that we have debug info. MMI->setDebugInfoAvailability(true); - + // Emit initial sections. EmitSectionLabels(); - // Create all the compile unit DIEs. - for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) - constructCompileUnit(*I); - - // Create DIEs for each subprogram. - for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), - E = DbgFinder.subprogram_end(); I != E; ++I) - constructSubprogramDIE(*I); - - // Create DIEs for each global variable. - for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), - E = DbgFinder.global_variable_end(); I != E; ++I) - constructGlobalVariableDIE(*I); - //getOrCreateTypeDIE if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) - getOrCreateTypeDIE(DIType(NMD->getOperand(i))); + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIType Ty(NMD->getOperand(i)); + getCompileUnit(Ty)->getOrCreateTypeDIE(Ty); + } if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) - getOrCreateTypeDIE(DIType(NMD->getOperand(i))); + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIType Ty(NMD->getOperand(i)); + getCompileUnit(Ty)->getOrCreateTypeDIE(Ty); + } // Prime section data. SectionMap.insert(Asm->getObjFileLowering().getTextSection()); @@ -2244,7 +1195,7 @@ void DwarfDebug::endModule() { for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { DIE *ISP = *AI; - addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); } for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(), @@ -2254,7 +1205,8 @@ void DwarfDebug::endModule() { if (!N) continue; DIE *NDie = getCompileUnit(N)->getDIE(N); if (!NDie) continue; - addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + getCompileUnit(N)->addDIEEntry(SPDie, dwarf::DW_AT_containing_type, + dwarf::DW_FORM_ref4, NDie); } // Standard sections final addresses. @@ -2269,14 +1221,6 @@ void DwarfDebug::endModule() { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", i)); } - // Emit common frame information. - emitCommonDebugFrame(); - - // Emit function debug frame information - for (std::vector<FunctionDebugFrameInfo>::iterator I = DebugFrames.begin(), - E = DebugFrames.end(); I != E; ++I) - emitFunctionDebugFrame(*I); - // Compute DIE offsets and sizes. computeSizeAndOffsets(); @@ -2464,15 +1408,10 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, HI = History.begin(), HE = History.end(); HI != HE; ++HI) { const MachineInstr *Begin = *HI; assert(Begin->isDebugValue() && "Invalid History entry"); - MachineLocation MLoc; - if (Begin->getNumOperands() == 3) { - if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm()) - MLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm()); - } else - MLoc = Asm->getDebugValueLocation(Begin); - // FIXME: emitDebugLoc only understands registers. - if (!MLoc.getReg()) + // Check if DBG_VALUE is truncating a range. + if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() + && !Begin->getOperand(0).getReg()) continue; // Compute the range for a register location. @@ -2481,7 +1420,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, if (HI + 1 == HE) // If Begin is the last instruction in History then its value is valid - // until the end of the funtion. + // until the end of the function. SLabel = FunctionEndSym; else { const MachineInstr *End = HI[1]; @@ -2496,7 +1435,25 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, } // The value is valid until the next DBG_VALUE or clobber. - DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc)); + MachineLocation MLoc; + if (Begin->getNumOperands() == 3) { + if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm()) { + MLoc.set(Begin->getOperand(0).getReg(), + Begin->getOperand(1).getImm()); + DotDebugLocEntries. + push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var)); + } + // FIXME: Handle isFPImm also. + else if (Begin->getOperand(0).isImm()) { + DotDebugLocEntries. + push_back(DotDebugLocEntry(FLabel, SLabel, + Begin->getOperand(0).getImm())); + } + } else { + MLoc = Asm->getDebugValueLocation(Begin); + DotDebugLocEntries. + push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var)); + } } DotDebugLocEntries.push_back(DotDebugLocEntry()); } @@ -2533,12 +1490,17 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { if (!MI->isDebugValue()) { DebugLoc DL = MI->getDebugLoc(); if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) { + unsigned Flags = DWARF2_FLAG_IS_STMT; PrevInstLoc = DL; + if (DL == PrologEndLoc) { + Flags |= DWARF2_FLAG_PROLOGUE_END; + PrologEndLoc = DebugLoc(); + } if (!DL.isUnknown()) { const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); - recordSourceLine(DL.getLine(), DL.getCol(), Scope); + recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); } else - recordSourceLine(0, 0, 0); + recordSourceLine(0, 0, 0, 0); } } @@ -2850,41 +1812,22 @@ void DwarfDebug::identifyScopeMarkers() { } } -/// FindFirstDebugLoc - Find the first debug location in the function. This -/// is intended to be an approximation for the source position of the -/// beginning of the function. -static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) { - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) - for (MachineBasicBlock::const_iterator MBBI = I->begin(), MBBE = I->end(); - MBBI != MBBE; ++MBBI) { - DebugLoc DL = MBBI->getDebugLoc(); - if (!DL.isUnknown()) - return DL; - } - return DebugLoc(); +/// getScopeNode - Get MDNode for DebugLoc's scope. +static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) { + if (MDNode *InlinedAt = DL.getInlinedAt(Ctx)) + return getScopeNode(DebugLoc::getFromDILocation(InlinedAt), Ctx); + return DL.getScope(Ctx); } -#ifndef NDEBUG -/// CheckLineNumbers - Count basicblocks whose instructions do not have any -/// line number information. -static void CheckLineNumbers(const MachineFunction *MF) { - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) { - bool FoundLineNo = false; - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - const MachineInstr *MI = II; - if (!MI->getDebugLoc().isUnknown()) { - FoundLineNo = true; - break; - } - } - if (!FoundLineNo && I->size()) - ++BlocksWithoutLineNo; - } +/// getFnDebugLoc - Walk up the scope chain of given debug loc and find +/// line number info for the function. +static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { + const MDNode *Scope = getScopeNode(DL, Ctx); + DISubprogram SP = getDISubprogram(Scope); + if (SP.Verify()) + return DebugLoc::get(SP.getLineNumber(), 0, SP); + return DebugLoc(); } -#endif /// beginFunction - Gather pre-function debug information. Assumes being /// emitted immediately after the function entry point. @@ -2892,44 +1835,16 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return; if (!extractScopeInformation()) return; -#ifndef NDEBUG - CheckLineNumbers(MF); -#endif - FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionBeginSym); - // Emit label for the implicitly defined dbg.stoppoint at the start of the - // function. - DebugLoc FDL = FindFirstDebugLoc(MF); - if (FDL.isUnknown()) return; - - const MDNode *Scope = FDL.getScope(MF->getFunction()->getContext()); - const MDNode *TheScope = 0; - - DISubprogram SP = getDISubprogram(Scope); - unsigned Line, Col; - if (SP.Verify()) { - Line = SP.getLineNumber(); - Col = 0; - TheScope = SP; - } else { - Line = FDL.getLine(); - Col = FDL.getCol(); - TheScope = Scope; - } - - recordSourceLine(Line, Col, TheScope); - assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); /// ProcessedArgs - Collection of arguments already processed. SmallPtrSet<const MDNode *, 8> ProcessedArgs; - const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); - /// LiveUserVar - Map physreg numbers to the MDNode they contain. std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs()); @@ -2995,6 +1910,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MI->isLabel()) AtBlockEntry = false; + // First known non DBG_VALUE location marks beginning of function + // body. + if (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown()) + PrologEndLoc = MI->getDebugLoc(); + // Check if the instruction clobbers any registers with debug vars. for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { @@ -3063,6 +1983,15 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { PrevInstLoc = DebugLoc(); PrevLabel = FunctionBeginSym; + + // Record beginning of function. + if (!PrologEndLoc.isUnknown()) { + DebugLoc FnStartDL = getFnDebugLoc(PrologEndLoc, + MF->getFunction()->getContext()); + recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), + FnStartDL.getScope(MF->getFunction()->getContext()), + DWARF2_FLAG_IS_STMT); + } } /// endFunction - Gather and emit post-function debug information. @@ -3109,8 +2038,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope); if (!DisableFramePointerElim(*MF)) - addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, - dwarf::DW_FORM_flag, 1); + getCompileUnit(CurrentFnDbgScope->getScopeNode())->addUInt(CurFnDIE, + dwarf::DW_AT_APPLE_omit_frame_ptr, + dwarf::DW_FORM_flag, 1); DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), @@ -3119,7 +2049,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Clear debug info CurrentFnDbgScope = NULL; - CurrentFnArguments.clear(); + DeleteContainerPointers(CurrentFnArguments); DbgVariableToFrameIndexMap.clear(); VarToAbstractVarMap.clear(); DbgVariableToDbgInstMap.clear(); @@ -3176,7 +2106,8 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. -void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S){ +void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, + unsigned Flags) { StringRef Fn; StringRef Dir; unsigned Src = 1; @@ -3204,9 +2135,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S){ Src = GetOrCreateSourceID(Fn, Dir); } - - Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, DWARF2_FLAG_IS_STMT, - 0, 0); + Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, + 0, 0, Fn); } //===----------------------------------------------------------------------===// @@ -3264,17 +2194,15 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) { /// computeSizeAndOffsets - Compute the size and offset of all the DIEs. /// void DwarfDebug::computeSizeAndOffsets() { - unsigned PrevOffset = 0; for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { // Compute size of compile unit header. - static unsigned Offset = PrevOffset + + unsigned Offset = sizeof(int32_t) + // Length of Compilation Unit Info sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section sizeof(int8_t); // Pointer Size (in bytes) computeSizeAndOffset(I->second->getCUDie(), Offset, true); - PrevOffset = Offset; } } @@ -3296,11 +2224,6 @@ void DwarfDebug::EmitSectionLabels() { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); // Dwarf sections base addresses. - if (Asm->MAI->doesDwarfRequireFrameSection()) { - DwarfFrameSectionSym = - EmitSectionSym(Asm, TLOF.getDwarfFrameSection(), "section_debug_frame"); - } - DwarfInfoSectionSym = EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); DwarfAbbrevSectionSym = @@ -3435,8 +2358,7 @@ void DwarfDebug::emitDebugInfo() { unsigned ContentSize = Die->getSize() + sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t) + // Pointer Size (in bytes) - sizeof(int32_t); // FIXME - extra pad for gdb bug. + sizeof(int8_t); // Pointer Size (in bytes) Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); Asm->EmitInt32(ContentSize); @@ -3449,12 +2371,6 @@ void DwarfDebug::emitDebugInfo() { Asm->EmitInt8(Asm->getTargetData().getPointerSize()); emitDIE(Die); - // FIXME - extra padding for gdb bug. - Asm->OutStreamer.AddComment("4 extra padding bytes for GDB"); - Asm->EmitInt8(0); - Asm->EmitInt8(0); - Asm->EmitInt8(0); - Asm->EmitInt8(0); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID())); } } @@ -3515,91 +2431,6 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->EmitInt8(1); } -/// emitCommonDebugFrame - Emit common frame info into a debug frame section. -/// -void DwarfDebug::emitCommonDebugFrame() { - if (!Asm->MAI->doesDwarfRequireFrameSection()) - return; - - int stackGrowth = Asm->getTargetData().getPointerSize(); - if (Asm->TM.getFrameLowering()->getStackGrowthDirection() == - TargetFrameLowering::StackGrowsDown) - stackGrowth *= -1; - - // Start the dwarf frame section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfFrameSection()); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common")); - Asm->OutStreamer.AddComment("Length of Common Information Entry"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_frame_common_end"), - Asm->GetTempSymbol("debug_frame_common_begin"), 4); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common_begin")); - Asm->OutStreamer.AddComment("CIE Identifier Tag"); - Asm->EmitInt32((int)dwarf::DW_CIE_ID); - Asm->OutStreamer.AddComment("CIE Version"); - Asm->EmitInt8(dwarf::DW_CIE_VERSION); - Asm->OutStreamer.AddComment("CIE Augmentation"); - Asm->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0); // nul terminator. - Asm->EmitULEB128(1, "CIE Code Alignment Factor"); - Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor"); - Asm->OutStreamer.AddComment("CIE RA Column"); - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), false)); - - std::vector<MachineMove> Moves; - TFI->getInitialFrameState(Moves); - - Asm->EmitFrameMoves(Moves, 0, false); - - Asm->EmitAlignment(2); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common_end")); -} - -/// emitFunctionDebugFrame - Emit per function frame info into a debug frame -/// section. -void DwarfDebug:: -emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) { - if (!Asm->MAI->doesDwarfRequireFrameSection()) - return; - - // Start the dwarf frame section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfFrameSection()); - - Asm->OutStreamer.AddComment("Length of Frame Information Entry"); - MCSymbol *DebugFrameBegin = - Asm->GetTempSymbol("debug_frame_begin", DebugFrameInfo.Number); - MCSymbol *DebugFrameEnd = - Asm->GetTempSymbol("debug_frame_end", DebugFrameInfo.Number); - Asm->EmitLabelDifference(DebugFrameEnd, DebugFrameBegin, 4); - - Asm->OutStreamer.EmitLabel(DebugFrameBegin); - - Asm->OutStreamer.AddComment("FDE CIE offset"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("debug_frame_common"), - DwarfFrameSectionSym); - - Asm->OutStreamer.AddComment("FDE initial location"); - MCSymbol *FuncBeginSym = - Asm->GetTempSymbol("func_begin", DebugFrameInfo.Number); - Asm->OutStreamer.EmitSymbolValue(FuncBeginSym, - Asm->getTargetData().getPointerSize(), - 0/*AddrSpace*/); - - - Asm->OutStreamer.AddComment("FDE address range"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("func_end",DebugFrameInfo.Number), - FuncBeginSym, Asm->getTargetData().getPointerSize()); - - Asm->EmitFrameMoves(DebugFrameInfo.Moves, FuncBeginSym, false); - - Asm->EmitAlignment(2); - Asm->OutStreamer.EmitLabel(DebugFrameEnd); -} - /// emitDebugPubNames - Emit visible names into a debug pubnames section. /// void DwarfDebug::emitDebugPubNames() { @@ -3760,33 +2591,63 @@ void DwarfDebug::emitDebugLoc() { } else { Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0); Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0); - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Entry.Loc.getReg(), false); - if (int Offset = Entry.Loc.getOffset()) { - // If the value is at a certain offset from frame register then - // use DW_OP_fbreg. - unsigned OffsetSize = Offset ? MCAsmInfo::getSLEB128Size(Offset) : 1; - Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1 + OffsetSize); - Asm->OutStreamer.AddComment( - dwarf::OperationEncodingString(dwarf::DW_OP_fbreg)); - Asm->EmitInt8(dwarf::DW_OP_fbreg); - Asm->OutStreamer.AddComment("Offset"); - Asm->EmitSLEB128(Offset); - } else { - if (Reg < 32) { - Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1); - Asm->OutStreamer.AddComment( - dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); - Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg); + DIVariable DV(Entry.Variable); + Asm->OutStreamer.AddComment("Loc expr size"); + MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); + MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol(); + Asm->EmitLabelDifference(end, begin, 2); + Asm->OutStreamer.EmitLabel(begin); + if (Entry.isConstant()) { + DIBasicType BTy(DV.getType()); + if (BTy.Verify() && + (BTy.getEncoding() == dwarf::DW_ATE_signed + || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) { + Asm->OutStreamer.AddComment("DW_OP_consts"); + Asm->EmitInt8(dwarf::DW_OP_consts); + Asm->EmitSLEB128(Entry.getConstant()); } else { - Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1 + MCAsmInfo::getULEB128Size(Reg)); - Asm->EmitInt8(dwarf::DW_OP_regx); - Asm->EmitULEB128(Reg); + Asm->OutStreamer.AddComment("DW_OP_constu"); + Asm->EmitInt8(dwarf::DW_OP_constu); + Asm->EmitULEB128(Entry.getConstant()); } + } else if (DV.hasComplexAddress()) { + unsigned N = DV.getNumAddrElements(); + unsigned i = 0; + if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { + if (Entry.Loc.getOffset()) { + i = 2; + Asm->EmitDwarfRegOp(Entry.Loc); + Asm->OutStreamer.AddComment("DW_OP_deref"); + Asm->EmitInt8(dwarf::DW_OP_deref); + Asm->OutStreamer.AddComment("DW_OP_plus_uconst"); + Asm->EmitInt8(dwarf::DW_OP_plus_uconst); + Asm->EmitSLEB128(DV.getAddrElement(1)); + } else { + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1)); + Asm->EmitDwarfRegOp(Loc); + i = 2; + } + } else { + Asm->EmitDwarfRegOp(Entry.Loc); + } + + // Emit remaining complex address elements. + for (; i < N; ++i) { + uint64_t Element = DV.getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + Asm->EmitInt8(dwarf::DW_OP_plus_uconst); + Asm->EmitULEB128(DV.getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) + Asm->EmitInt8(dwarf::DW_OP_deref); + else llvm_unreachable("unknown Opcode found in complex address"); + } + } else { + // Regular entry. + Asm->EmitDwarfRegOp(Entry.Loc); } + Asm->OutStreamer.EmitLabel(end); } } } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 4aeefde..abda2e6 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -16,6 +16,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Analysis/DebugInfo.h" #include "DIE.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" @@ -39,21 +40,6 @@ class DIEAbbrev; class DIE; class DIEBlock; class DIEEntry; -class DIArray; -class DIEnumerator; -class DIDescriptor; -class DIVariable; -class DIGlobal; -class DIGlobalVariable; -class DISubprogram; -class DIBasicType; -class DIDerivedType; -class DIType; -class DINameSpace; -class DISubrange; -class DICompositeType; -class DITemplateTypeParameter; -class DITemplateValueParameter; //===----------------------------------------------------------------------===// /// SrcLineInfo - This class is used to record source line correspondence. @@ -80,10 +66,21 @@ typedef struct DotDebugLocEntry { const MCSymbol *Begin; const MCSymbol *End; MachineLocation Loc; + const MDNode *Variable; bool Merged; - DotDebugLocEntry() : Begin(0), End(0), Merged(false) {} - DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L) - : Begin(B), End(E), Loc(L), Merged(false) {} + bool Constant; + int64_t iConstant; + DotDebugLocEntry() + : Begin(0), End(0), Variable(0), Merged(false), + Constant(false), iConstant(0) {} + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, + const MDNode *V) + : Begin(B), End(E), Loc(L), Variable(V), Merged(false), + Constant(false), iConstant(0) {} + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i) + : Begin(B), End(E), Variable(0), Merged(false), + Constant(true), iConstant(i) {} + /// Empty entries are also used as a trigger to emit temp label. Such /// labels are referenced is used to find debug_loc offset for a given DIE. bool isEmpty() { return Begin == 0 && End == 0; } @@ -94,8 +91,47 @@ typedef struct DotDebugLocEntry { Next->Begin = Begin; Merged = true; } + bool isConstant() { return Constant; } + int64_t getConstant() { return iConstant; } } DotDebugLocEntry; +//===----------------------------------------------------------------------===// +/// DbgVariable - This class is used to track local variable information. +/// +class DbgVariable { + DIVariable Var; // Variable Descriptor. + DIE *TheDIE; // Variable DIE. + unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. +public: + // AbsVar may be NULL. + DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {} + + // Accessors. + DIVariable getVariable() const { return Var; } + void setDIE(DIE *D) { TheDIE = D; } + DIE *getDIE() const { return TheDIE; } + void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } + unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } + StringRef getName() const { return Var.getName(); } + unsigned getTag() const { return Var.getTag(); } + bool variableHasComplexAddress() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.hasComplexAddress(); + } + bool isBlockByrefVariable() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.isBlockByrefVariable(); + } + unsigned getNumAddrElements() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.getNumAddrElements(); + } + uint64_t getAddrElement(unsigned i) const { + return Var.getAddrElement(i); + } + DIType getType() const; +}; + class DwarfDebug { /// Asm - Target of Dwarf emission. AsmPrinter *Asm; @@ -122,12 +158,6 @@ class DwarfDebug { /// id mapped to a unique id. StringMap<unsigned> SourceIdMap; - /// DIEBlocks - A list of all the DIEBlocks in use. - std::vector<DIEBlock *> DIEBlocks; - - // DIEValueAllocator - All DIEValues are allocated through this allocator. - BumpPtrAllocator DIEValueAllocator; - /// StringPool - A String->Symbol mapping of strings used by indirect /// references. StringMap<std::pair<MCSymbol*, unsigned> > StringPool; @@ -198,8 +228,6 @@ class DwarfDebug { /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, const MDNode *> ContainingTypeMap; - typedef SmallVector<DbgScope *, 2> ScopeVector; - /// InlineInfo - Keep track of inlined functions and their location. This /// information is used to populate debug_inlined section. typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels; @@ -236,6 +264,10 @@ class DwarfDebug { DebugLoc PrevInstLoc; MCSymbol *PrevLabel; + /// PrologEndLoc - This location indicates end of function prologue and + /// beginning of function body. + DebugLoc PrologEndLoc; + struct FunctionDebugFrameInfo { unsigned Number; std::vector<MachineMove> Moves; @@ -246,157 +278,23 @@ class DwarfDebug { std::vector<FunctionDebugFrameInfo> DebugFrames; + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + // Section Symbols: these are assembler temporary labels that are emitted at // the beginning of each supported dwarf section. These are used to form // section offsets and are created by EmitSectionLabels. - MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; + MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; MCSymbol *DwarfDebugLocSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; - DIEInteger *DIEIntegerOne; - private: /// assignAbbrevNumber - Define a unique number for the abbreviation. /// void assignAbbrevNumber(DIEAbbrev &Abbrev); - /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug - /// information entry. - DIEEntry *createDIEEntry(DIE *Entry); - - /// addUInt - Add an unsigned integer attribute data and value. - /// - void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); - - /// addSInt - Add an signed integer attribute data and value. - /// - void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); - - /// addString - Add a string attribute data and value. - /// - void addString(DIE *Die, unsigned Attribute, unsigned Form, - const StringRef Str); - - /// addLabel - Add a Dwarf label attribute data and value. - /// - void addLabel(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Label); - - /// addDelta - Add a label delta attribute data and value. - /// - void addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo); - - /// addDIEEntry - Add a DIE attribute data and value. - /// - void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); - - /// addBlock - Add block data. - /// - void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); - - /// addSourceLine - Add location information to specified debug information - /// entry. - void addSourceLine(DIE *Die, DIVariable V); - void addSourceLine(DIE *Die, DIGlobalVariable G); - void addSourceLine(DIE *Die, DISubprogram SP); - void addSourceLine(DIE *Die, DIType Ty); - void addSourceLine(DIE *Die, DINameSpace NS); - - /// addAddress - Add an address attribute to a die based on the location - /// provided. - void addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location); - - /// addRegisterAddress - Add register location entry in variable DIE. - bool addRegisterAddress(DIE *Die, const MachineOperand &MO); - - /// addConstantValue - Add constant value entry in variable DIE. - bool addConstantValue(DIE *Die, const MachineOperand &MO); - bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned); - - /// addConstantFPValue - Add constant value entry in variable DIE. - bool addConstantFPValue(DIE *Die, const MachineOperand &MO); - - /// addTemplateParams - Add template parameters in buffer. - void addTemplateParams(DIE &Buffer, DIArray TParams); - - /// addComplexAddress - Start with the address based on the location provided, - /// and generate the DWARF information necessary to find the actual variable - /// (navigating the extra location information encoded in the type) based on - /// the starting location. Add the DWARF information to the die. - /// - void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, - const MachineLocation &Location); - - // FIXME: Should be reformulated in terms of addComplexAddress. - /// addBlockByrefAddress - Start with the address based on the location - /// provided, and generate the DWARF information necessary to find the - /// actual Block variable (navigating the Block struct) based on the - /// starting location. Add the DWARF information to the die. Obsolete, - /// please use addComplexAddress instead. - /// - void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, - const MachineLocation &Location); - - /// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based - /// on provided frame index. - void addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI); - - /// addToContextOwner - Add Die into the list of its context owner's children. - void addToContextOwner(DIE *Die, DIDescriptor Context); - - /// addType - Add a new type attribute to the specified entity. - void addType(DIE *Entity, DIType Ty); - - - /// getOrCreateNameSpace - Create a DIE for DINameSpace. - DIE *getOrCreateNameSpace(DINameSpace NS); - - /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the - /// given DIType. - DIE *getOrCreateTypeDIE(DIType Ty); - - /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE - /// for the given DITemplateTypeParameter. - DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); - - /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE - /// for the given DITemplateValueParameter. - DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); - - void addPubTypes(DISubprogram SP); - - /// constructTypeDIE - Construct basic type die from DIBasicType. - void constructTypeDIE(DIE &Buffer, - DIBasicType BTy); - - /// constructTypeDIE - Construct derived type die from DIDerivedType. - void constructTypeDIE(DIE &Buffer, - DIDerivedType DTy); - - /// constructTypeDIE - Construct type DIE from DICompositeType. - void constructTypeDIE(DIE &Buffer, - DICompositeType CTy); - - /// constructSubrangeDIE - Construct subrange DIE from DISubrange. - void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); - - /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. - void constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy); - - /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. - DIE *constructEnumTypeDIE(DIEnumerator ETy); - - /// createMemberDIE - Create new member DIE. - DIE *createMemberDIE(DIDerivedType DT); - - /// createSubprogramDIE - Create new DIE using SP. - DIE *createSubprogramDIE(DISubprogram SP); - /// getOrCreateDbgScope - Create DbgScope for the scope. DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt); @@ -455,14 +353,6 @@ private: /// void emitEndOfLineMatrix(unsigned SectionEnd); - /// emitCommonDebugFrame - Emit common frame info into a debug frame section. - /// - void emitCommonDebugFrame(); - - /// emitFunctionDebugFrame - Emit per function frame info into a debug frame - /// section. - void emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo); - /// emitDebugPubNames - Emit visible names into a debug pubnames section. /// void emitDebugPubNames(); @@ -511,11 +401,6 @@ private: /// inlining instance. void emitDebugInlineInfo(); - /// GetOrCreateSourceID - Look up the source id with the given directory and - /// source file names. If none currently exists, create a new id and insert it - /// in the SourceIds map. - unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName); - /// constructCompileUnit - Create new CompileUnit for the given /// metadata node with tag DW_TAG_compile_unit. void constructCompileUnit(const MDNode *N); @@ -532,7 +417,8 @@ private: /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. - void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope); + void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope, + unsigned Flags); /// recordVariableFrameIndex - Record a variable's index. void recordVariableFrameIndex(const DbgVariable *V, int Index); @@ -611,6 +497,14 @@ public: /// endInstruction - Prcess end of an instruction. void endInstruction(const MachineInstr *MI); + + /// GetOrCreateSourceID - Look up the source id with the given directory and + /// source file names. If none currently exists, create a new id and insert it + /// in the SourceIds map. + unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName); + + /// createSubprogramDIE - Create new DIE using SP. + DIE *createSubprogramDIE(DISubprogram SP); }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index 06b1de6..b5f86ab 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -15,6 +15,7 @@ #define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/AsmPrinter.h" #include <vector> namespace llvm { @@ -140,17 +141,20 @@ public: }; class DwarfCFIException : public DwarfException { - /// shouldEmitTable - Per-function flag to indicate if EH tables should - /// be emitted. - bool shouldEmitTable; + /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality + /// should be emitted. + bool shouldEmitPersonality; + + /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda + /// should be emitted. + bool shouldEmitLSDA; /// shouldEmitMoves - Per-function flag to indicate if frame moves info /// should be emitted. bool shouldEmitMoves; - /// shouldEmitTableModule - Per-module flag to indicate if EH tables - /// should be emitted. - bool shouldEmitTableModule; + AsmPrinter::CFIMoveType moveTypeModule; + public: //===--------------------------------------------------------------------===// // Main entry points. @@ -170,7 +174,7 @@ public: virtual void EndFunction(); }; -class DwarfTableException : public DwarfException { +class ARMException : public DwarfException { /// shouldEmitTable - Per-function flag to indicate if EH tables should /// be emitted. bool shouldEmitTable; @@ -182,48 +186,12 @@ class DwarfTableException : public DwarfException { /// shouldEmitTableModule - Per-module flag to indicate if EH tables /// should be emitted. bool shouldEmitTableModule; - - /// shouldEmitMovesModule - Per-module flag to indicate if frame moves - /// should be emitted. - bool shouldEmitMovesModule; - - struct FunctionEHFrameInfo { - MCSymbol *FunctionEHSym; // L_foo.eh - unsigned Number; - unsigned PersonalityIndex; - bool adjustsStack; - bool hasLandingPads; - std::vector<MachineMove> Moves; - const Function *function; - - FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P, - bool hC, bool hL, - const std::vector<MachineMove> &M, - const Function *f): - FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P), - adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { } - }; - - std::vector<FunctionEHFrameInfo> EHFrames; - - /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index - /// uses an LSDA. If so, then we need to encode that information in the CIE's - /// augmentation. - DenseMap<unsigned, bool> UsesLSDA; - - /// EmitCIE - Emit a Common Information Entry (CIE). This holds information - /// that is shared among many Frame Description Entries. There is at least - /// one CIE in every non-empty .debug_frame section. - void EmitCIE(const Function *Personality, unsigned Index); - - /// EmitFDE - Emit the Frame Description Entry (FDE) for the function. - void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo); public: //===--------------------------------------------------------------------===// // Main entry points. // - DwarfTableException(AsmPrinter *A); - virtual ~DwarfTableException(); + ARMException(AsmPrinter *A); + virtual ~ARMException(); /// EndModule - Emit all exception information that should come after the /// content. @@ -237,25 +205,25 @@ public: virtual void EndFunction(); }; +class Win64Exception : public DwarfException { + /// shouldEmitPersonality - Per-function flag to indicate if personality + /// info should be emitted. + bool shouldEmitPersonality; -class ARMException : public DwarfException { - /// shouldEmitTable - Per-function flag to indicate if EH tables should - /// be emitted. - bool shouldEmitTable; + /// shouldEmitLSDA - Per-function flag to indicate if the LSDA + /// should be emitted. + bool shouldEmitLSDA; /// shouldEmitMoves - Per-function flag to indicate if frame moves info /// should be emitted. bool shouldEmitMoves; - /// shouldEmitTableModule - Per-module flag to indicate if EH tables - /// should be emitted. - bool shouldEmitTableModule; public: //===--------------------------------------------------------------------===// // Main entry points. // - ARMException(AsmPrinter *A); - virtual ~ARMException(); + Win64Exception(AsmPrinter *A); + virtual ~Win64Exception(); /// EndModule - Emit all exception information that should come after the /// content. diff --git a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp deleted file mode 100644 index 7519011..0000000 --- a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp +++ /dev/null @@ -1,349 +0,0 @@ -//===-- CodeGen/AsmPrinter/DwarfTableException.cpp - Dwarf Exception Impl --==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains support for writing DWARF exception info into asm files. -// The implementation emits all the necessary tables "by hands". -// -//===----------------------------------------------------------------------===// - -#include "DwarfException.h" -#include "llvm/Module.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineLocation.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSection.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Twine.h" -using namespace llvm; - -DwarfTableException::DwarfTableException(AsmPrinter *A) - : DwarfException(A), - shouldEmitTable(false), shouldEmitMoves(false), - shouldEmitTableModule(false), shouldEmitMovesModule(false) {} - -DwarfTableException::~DwarfTableException() {} - -/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that -/// is shared among many Frame Description Entries. There is at least one CIE -/// in every non-empty .debug_frame section. -void DwarfTableException::EmitCIE(const Function *PersonalityFn, unsigned Index) { - // Size and sign of stack growth. - int stackGrowth = Asm->getTargetData().getPointerSize(); - if (Asm->TM.getFrameLowering()->getStackGrowthDirection() == - TargetFrameLowering::StackGrowsDown) - stackGrowth *= -1; - - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - - // Begin eh frame section. - Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); - - MCSymbol *EHFrameSym; - if (TLOF.isFunctionEHFrameSymbolPrivate()) - EHFrameSym = Asm->GetTempSymbol("EH_frame", Index); - else - EHFrameSym = Asm->OutContext.GetOrCreateSymbol(Twine("EH_frame") + - Twine(Index)); - Asm->OutStreamer.EmitLabel(EHFrameSym); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_eh_frame", Index)); - - // Define base labels. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common", Index)); - - // Define the eh frame length. - Asm->OutStreamer.AddComment("Length of Common Information Entry"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_frame_common_end", Index), - Asm->GetTempSymbol("eh_frame_common_begin", Index), - 4); - - // EH frame header. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_begin",Index)); - Asm->OutStreamer.AddComment("CIE Identifier Tag"); - Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); - Asm->OutStreamer.AddComment("DW_CIE_VERSION"); - Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/); - - // The personality presence indicates that language specific information will - // show up in the eh frame. Find out how we are supposed to lower the - // personality function reference: - - unsigned LSDAEncoding = TLOF.getLSDAEncoding(); - unsigned FDEEncoding = TLOF.getFDEEncoding(); - unsigned PerEncoding = TLOF.getPersonalityEncoding(); - - char Augmentation[6] = { 0 }; - unsigned AugmentationSize = 0; - char *APtr = Augmentation + 1; - - if (PersonalityFn) { - // There is a personality function. - *APtr++ = 'P'; - AugmentationSize += 1 + Asm->GetSizeOfEncodedValue(PerEncoding); - } - - if (UsesLSDA[Index]) { - // An LSDA pointer is in the FDE augmentation. - *APtr++ = 'L'; - ++AugmentationSize; - } - - if (FDEEncoding != dwarf::DW_EH_PE_absptr) { - // A non-default pointer encoding for the FDE. - *APtr++ = 'R'; - ++AugmentationSize; - } - - if (APtr != Augmentation + 1) - Augmentation[0] = 'z'; - - Asm->OutStreamer.AddComment("CIE Augmentation"); - Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0); - - // Round out reader. - Asm->EmitULEB128(1, "CIE Code Alignment Factor"); - Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor"); - Asm->OutStreamer.AddComment("CIE Return Address Column"); - - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true)); - - if (Augmentation[0]) { - Asm->EmitULEB128(AugmentationSize, "Augmentation Size"); - - // If there is a personality, we need to indicate the function's location. - if (PersonalityFn) { - Asm->EmitEncodingByte(PerEncoding, "Personality"); - Asm->OutStreamer.AddComment("Personality"); - Asm->EmitReference(PersonalityFn, PerEncoding); - } - if (UsesLSDA[Index]) - Asm->EmitEncodingByte(LSDAEncoding, "LSDA"); - if (FDEEncoding != dwarf::DW_EH_PE_absptr) - Asm->EmitEncodingByte(FDEEncoding, "FDE"); - } - - // Indicate locations of general callee saved registers in frame. - std::vector<MachineMove> Moves; - TFI->getInitialFrameState(Moves); - Asm->EmitFrameMoves(Moves, 0, true); - - // On Darwin the linker honors the alignment of eh_frame, which means it must - // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get - // holes which confuse readers of eh_frame. - Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index)); -} - -/// EmitFDE - Emit the Frame Description Entry (FDE) for the function. -void DwarfTableException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { - assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() && - "Should not emit 'available externally' functions at all"); - - const Function *TheFunc = EHFrameInfo.function; - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - - unsigned LSDAEncoding = TLOF.getLSDAEncoding(); - unsigned FDEEncoding = TLOF.getFDEEncoding(); - - Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); - - // Externally visible entry into the functions eh frame info. If the - // corresponding function is static, this should not be externally visible. - if (!TheFunc->hasLocalLinkage() && TLOF.isFunctionEHSymbolGlobal()) - Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,MCSA_Global); - - // If corresponding function is weak definition, this should be too. - if (TheFunc->isWeakForLinker() && Asm->MAI->getWeakDefDirective()) - Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, - MCSA_WeakDefinition); - - // If corresponding function is hidden, this should be too. - if (TheFunc->hasHiddenVisibility()) - if (MCSymbolAttr HiddenAttr = Asm->MAI->getHiddenVisibilityAttr()) - Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, - HiddenAttr); - - // If there are no calls then you can't unwind. This may mean we can omit the - // EH Frame, but some environments do not handle weak absolute symbols. If - // UnwindTablesMandatory is set we cannot do this optimization; the unwind - // info is to be available for non-EH uses. - if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory && - (!TheFunc->isWeakForLinker() || - !Asm->MAI->getWeakDefDirective() || - TLOF.getSupportsWeakOmittedEHFrame())) { - Asm->OutStreamer.EmitAssignment(EHFrameInfo.FunctionEHSym, - MCConstantExpr::Create(0, Asm->OutContext)); - // This name has no connection to the function, so it might get - // dead-stripped when the function is not, erroneously. Prohibit - // dead-stripping unconditionally. - if (Asm->MAI->hasNoDeadStrip()) - Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, - MCSA_NoDeadStrip); - } else { - Asm->OutStreamer.EmitLabel(EHFrameInfo.FunctionEHSym); - - // EH frame header. - Asm->OutStreamer.AddComment("Length of Frame Information Entry"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number), - Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), 4); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_begin", - EHFrameInfo.Number)); - - Asm->OutStreamer.AddComment("FDE CIE offset"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), - Asm->GetTempSymbol("eh_frame_common", - EHFrameInfo.PersonalityIndex), 4); - - MCSymbol *EHFuncBeginSym = - Asm->GetTempSymbol("eh_func_begin", EHFrameInfo.Number); - - Asm->OutStreamer.AddComment("FDE initial location"); - Asm->EmitReference(EHFuncBeginSym, FDEEncoding); - - Asm->OutStreamer.AddComment("FDE address range"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_func_end", - EHFrameInfo.Number), - EHFuncBeginSym, - Asm->GetSizeOfEncodedValue(FDEEncoding)); - - // If there is a personality and landing pads then point to the language - // specific data area in the exception table. - if (MMI->getPersonalities()[0] != NULL) { - unsigned Size = Asm->GetSizeOfEncodedValue(LSDAEncoding); - - Asm->EmitULEB128(Size, "Augmentation size"); - Asm->OutStreamer.AddComment("Language Specific Data Area"); - if (EHFrameInfo.hasLandingPads) - Asm->EmitReference(Asm->GetTempSymbol("exception", EHFrameInfo.Number), - LSDAEncoding); - else - Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/); - - } else { - Asm->EmitULEB128(0, "Augmentation size"); - } - - // Indicate locations of function specific callee saved registers in frame. - Asm->EmitFrameMoves(EHFrameInfo.Moves, EHFuncBeginSym, true); - - // On Darwin the linker honors the alignment of eh_frame, which means it - // must be 8-byte on 64-bit targets to match what gcc does. Otherwise you - // get holes which confuse readers of eh_frame. - Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end", - EHFrameInfo.Number)); - - // If the function is marked used, this table should be also. We cannot - // make the mark unconditional in this case, since retaining the table also - // retains the function in this case, and there is code around that depends - // on unused functions (calling undefined externals) being dead-stripped to - // link correctly. Yes, there really is. - if (MMI->isUsedFunction(EHFrameInfo.function)) - if (Asm->MAI->hasNoDeadStrip()) - Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, - MCSA_NoDeadStrip); - } - Asm->OutStreamer.AddBlankLine(); -} - -/// EndModule - Emit all exception information that should come after the -/// content. -void DwarfTableException::EndModule() { - if (!Asm->MAI->isExceptionHandlingDwarf()) - return; - - if (!shouldEmitMovesModule && !shouldEmitTableModule) - return; - - const std::vector<const Function*> &Personalities = MMI->getPersonalities(); - - for (unsigned I = 0, E = Personalities.size(); I < E; ++I) - EmitCIE(Personalities[I], I); - - for (std::vector<FunctionEHFrameInfo>::iterator - I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I) - EmitFDE(*I); -} - -/// BeginFunction - Gather pre-function exception information. Assumes it's -/// being emitted immediately after the function entry point. -void DwarfTableException::BeginFunction(const MachineFunction *MF) { - shouldEmitTable = shouldEmitMoves = false; - - // If any landing pads survive, we need an EH table. - shouldEmitTable = !MMI->getLandingPads().empty(); - - // See if we need frame move info. - shouldEmitMoves = - !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory; - - if (shouldEmitMoves || shouldEmitTable) - // Assumes in correct section after the entry point. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); - - shouldEmitTableModule |= shouldEmitTable; - shouldEmitMovesModule |= shouldEmitMoves; -} - -/// EndFunction - Gather and emit post-function exception information. -/// -void DwarfTableException::EndFunction() { - if (!shouldEmitMoves && !shouldEmitTable) return; - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", - Asm->getFunctionNumber())); - - // Record if this personality index uses a landing pad. - bool HasLandingPad = !MMI->getLandingPads().empty(); - UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad; - - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); - - if (HasLandingPad) - EmitExceptionTable(); - - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - MCSymbol *FunctionEHSym = - Asm->GetSymbolWithGlobalValueBase(Asm->MF->getFunction(), ".eh", - TLOF.isFunctionEHFrameSymbolPrivate()); - - // Save EH frame information - EHFrames. - push_back(FunctionEHFrameInfo(FunctionEHSym, - Asm->getFunctionNumber(), - MMI->getPersonalityIndex(), - Asm->MF->getFrameInfo()->adjustsStack(), - !MMI->getLandingPads().empty(), - MMI->getFrameMoves(), - Asm->MF->getFunction())); -} diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp new file mode 100644 index 0000000..c2ad5eb --- /dev/null +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -0,0 +1,116 @@ +//===-- CodeGen/AsmPrinter/Win64Exception.cpp - Dwarf Exception Impl ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing Win64 exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +using namespace llvm; + +Win64Exception::Win64Exception(AsmPrinter *A) + : DwarfException(A), + shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false) + {} + +Win64Exception::~Win64Exception() {} + +/// EndModule - Emit all exception information that should come after the +/// content. +void Win64Exception::EndModule() { +} + +/// BeginFunction - Gather pre-function exception information. Assumes it's +/// being emitted immediately after the function entry point. +void Win64Exception::BeginFunction(const MachineFunction *MF) { + shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; + + // If any landing pads survive, we need an EH table. + bool hasLandingPads = !MMI->getLandingPads().empty(); + + shouldEmitMoves = Asm->needsSEHMoves(); + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; + + shouldEmitPersonality = hasLandingPads && + PerEncoding != dwarf::DW_EH_PE_omit && Per; + + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + shouldEmitLSDA = shouldEmitPersonality && + LSDAEncoding != dwarf::DW_EH_PE_omit; + + if (!shouldEmitPersonality && !shouldEmitMoves) + return; + + Asm->OutStreamer.EmitWin64EHStartProc(Asm->CurrentFnSym); + + if (!shouldEmitPersonality) + return; + + MCSymbol *GCCHandlerSym = + Asm->GetExternalSymbolSymbol("_GCC_specific_handler"); + Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); +} + +/// EndFunction - Gather and emit post-function exception information. +/// +void Win64Exception::EndFunction() { + if (!shouldEmitPersonality && !shouldEmitMoves) + return; + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", + Asm->getFunctionNumber())); + + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + + if (shouldEmitPersonality) { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; + const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI); + + Asm->OutStreamer.PushSection(); + Asm->OutStreamer.EmitWin64EHHandlerData(); + Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext), + 4); + EmitExceptionTable(); + Asm->OutStreamer.PopSection(); + } + Asm->OutStreamer.EmitWin64EHEndProc(); +} diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 78a8743..d95f77e 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -41,6 +41,7 @@ using namespace llvm; STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumBranchOpts, "Number of branches optimized"); STATISTIC(NumTailMerge , "Number of block tails merged"); +STATISTIC(NumHoist , "Number of times common instructions are hoisted"); static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", cl::init(cl::BOU_UNSET), cl::Hidden); @@ -65,7 +66,7 @@ namespace { public: static char ID; explicit BranchFolderPass(bool defaultEnableTailMerge) - : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge) {} + : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge, true) {} virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { return "Control Flow Optimizer"; } @@ -86,12 +87,14 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { } -BranchFolder::BranchFolder(bool defaultEnableTailMerge) { +BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) { switch (FlagEnableTailMerge) { case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; case cl::BOU_TRUE: EnableTailMerge = true; break; case cl::BOU_FALSE: EnableTailMerge = false; break; } + + EnableHoistCommonCode = CommonHoist; } /// RemoveDeadBlock - Remove the specified dead machine basic block from the @@ -105,6 +108,9 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { while (!MBB->succ_empty()) MBB->removeSuccessor(MBB->succ_end()-1); + // Avoid matching if this pointer gets reused. + TriedMerging.erase(MBB); + // Remove the block. MF->erase(MBB); } @@ -168,6 +174,8 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, MachineModuleInfo *mmi) { if (!tii) return false; + TriedMerging.clear(); + TII = tii; TRI = tri; MMI = mmi; @@ -186,9 +194,10 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, bool MadeChangeThisIteration = true; while (MadeChangeThisIteration) { - MadeChangeThisIteration = false; - MadeChangeThisIteration |= TailMergeBlocks(MF); - MadeChangeThisIteration |= OptimizeBranches(MF); + MadeChangeThisIteration = TailMergeBlocks(MF); + MadeChangeThisIteration |= OptimizeBranches(MF); + if (EnableHoistCommonCode) + MadeChangeThisIteration |= HoistCommonCode(MF); MadeChange |= MadeChangeThisIteration; } @@ -795,14 +804,21 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // First find blocks with no successors. MergePotentials.clear(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E && MergePotentials.size() < TailMergeThreshold; ++I) { + if (TriedMerging.count(I)) + continue; if (I->succ_empty()) MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I)); } + // If this is a large problem, avoid visiting the same basic blocks + // multiple times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); // See if we can do any tail merging on those. - if (MergePotentials.size() < TailMergeThreshold && - MergePotentials.size() >= 2) + if (MergePotentials.size() >= 2) MadeChange |= TryTailMergeBlocks(NULL, NULL); // Look at blocks (IBB) with multiple predecessors (PBB). @@ -826,15 +842,17 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) { - if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) { + if (I->pred_size() >= 2) { SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; MachineBasicBlock *IBB = I; MachineBasicBlock *PredBB = prior(I); MergePotentials.clear(); for (MachineBasicBlock::pred_iterator P = I->pred_begin(), E2 = I->pred_end(); - P != E2; ++P) { + P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) { MachineBasicBlock *PBB = *P; + if (TriedMerging.count(PBB)) + continue; // Skip blocks that loop to themselves, can't tail merge these. if (PBB == IBB) continue; @@ -887,6 +905,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } } + // If this is a large problem, avoid visiting the same basic blocks + // multiple times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); if (MergePotentials.size() >= 2) MadeChange |= TryTailMergeBlocks(IBB, PredBB); // Reinsert an unconditional branch if needed. @@ -910,7 +933,8 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { // Make sure blocks are numbered in order MF.RenumberBlocks(); - for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); + I != E; ) { MachineBasicBlock *MBB = I++; MadeChange |= OptimizeBlock(MBB); @@ -1048,9 +1072,25 @@ ReoptimizeBlock: // AnalyzeBranch. if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 && PrevBB.succ_size() == 1 && - !MBB->hasAddressTaken()) { + !MBB->hasAddressTaken() && !MBB->isLandingPad()) { DEBUG(dbgs() << "\nMerging into block: " << PrevBB << "From MBB: " << *MBB); + // Remove redundant DBG_VALUEs first. + if (PrevBB.begin() != PrevBB.end()) { + MachineBasicBlock::iterator PrevBBIter = PrevBB.end(); + --PrevBBIter; + MachineBasicBlock::iterator MBBIter = MBB->begin(); + // Check if DBG_VALUE at the end of PrevBB is identical to the + // DBG_VALUE at the beginning of MBB. + while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end() + && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) { + if (!MBBIter->isIdenticalTo(PrevBBIter)) + break; + MachineInstr *DuplicateDbg = MBBIter; + ++MBBIter; -- PrevBBIter; + DuplicateDbg->eraseFromParent(); + } + } PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end()); PrevBB.removeSuccessor(PrevBB.succ_begin());; assert(PrevBB.succ_empty()); @@ -1339,3 +1379,282 @@ ReoptimizeBlock: return MadeChange; } + +//===----------------------------------------------------------------------===// +// Hoist Common Code +//===----------------------------------------------------------------------===// + +/// HoistCommonCode - Hoist common instruction sequences at the start of basic +/// blocks to their common predecessor. +bool BranchFolder::HoistCommonCode(MachineFunction &MF) { + bool MadeChange = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) { + MachineBasicBlock *MBB = I++; + MadeChange |= HoistCommonCodeInSuccs(MBB); + } + + return MadeChange; +} + +/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given +/// its 'true' successor. +static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, + MachineBasicBlock *TrueBB) { + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + E = BB->succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccBB = *SI; + if (SuccBB != TrueBB) + return SuccBB; + } + return NULL; +} + +/// findHoistingInsertPosAndDeps - Find the location to move common instructions +/// in successors to. The location is ususally just before the terminator, +/// however if the terminator is a conditional branch and its previous +/// instruction is the flag setting instruction, the previous instruction is +/// the preferred location. This function also gathers uses and defs of the +/// instructions from the insertion point to the end of the block. The data is +/// used by HoistCommonCodeInSuccs to ensure safety. +static +MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + SmallSet<unsigned,4> &Uses, + SmallSet<unsigned,4> &Defs) { + MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); + if (!TII->isUnpredicatedTerminator(Loc)) + return MBB->end(); + + for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = Loc->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isUse()) { + Uses.insert(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + Uses.insert(*AS); + } else if (!MO.isDead()) + // Don't try to hoist code in the rare case the terminator defines a + // register that is later used. + return MBB->end(); + } + + if (Uses.empty()) + return Loc; + if (Loc == MBB->begin()) + return MBB->end(); + + // The terminator is probably a conditional branch, try not to separate the + // branch from condition setting instruction. + MachineBasicBlock::iterator PI = Loc; + --PI; + while (PI != MBB->begin() && Loc->isDebugValue()) + --PI; + + bool IsDef = false; + for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) { + const MachineOperand &MO = PI->getOperand(i); + if (!MO.isReg() || MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (Uses.count(Reg)) + IsDef = true; + } + if (!IsDef) + // The condition setting instruction is not just before the conditional + // branch. + return Loc; + + // Be conservative, don't insert instruction above something that may have + // side-effects. And since it's potentially bad to separate flag setting + // instruction from the conditional branch, just abort the optimization + // completely. + // Also avoid moving code above predicated instruction since it's hard to + // reason about register liveness with predicated instruction. + bool DontMoveAcrossStore = true; + if (!PI->isSafeToMove(TII, 0, DontMoveAcrossStore) || + TII->isPredicated(PI)) + return MBB->end(); + + + // Find out what registers are live. Note this routine is ignoring other live + // registers which are only used by instructions in successor blocks. + for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = PI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isUse()) { + Uses.insert(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + Uses.insert(*AS); + } else { + if (Uses.count(Reg)) { + Uses.erase(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + Uses.erase(*SR); // Use getSubRegisters to be conservative + } + Defs.insert(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + Defs.insert(*AS); + } + } + + return PI; +} + +/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction +/// sequence at the start of the function, move the instructions before MBB +/// terminator if it's legal. +bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty()) + return false; + + if (!FBB) FBB = findFalseBlock(MBB, TBB); + if (!FBB) + // Malformed bcc? True and false blocks are the same? + return false; + + // Restrict the optimization to cases where MBB is the only predecessor, + // it is an obvious win. + if (TBB->pred_size() > 1 || FBB->pred_size() > 1) + return false; + + // Find a suitable position to hoist the common instructions to. Also figure + // out which registers are used or defined by instructions from the insertion + // point to the end of the block. + SmallSet<unsigned, 4> Uses, Defs; + MachineBasicBlock::iterator Loc = + findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs); + if (Loc == MBB->end()) + return false; + + bool HasDups = false; + SmallVector<unsigned, 4> LocalDefs; + SmallSet<unsigned, 4> LocalDefsSet; + MachineBasicBlock::iterator TIB = TBB->begin(); + MachineBasicBlock::iterator FIB = FBB->begin(); + MachineBasicBlock::iterator TIE = TBB->end(); + MachineBasicBlock::iterator FIE = FBB->end(); + while (TIB != TIE && FIB != FIE) { + // Skip dbg_value instructions. These do not count. + if (TIB->isDebugValue()) { + while (TIB != TIE && TIB->isDebugValue()) + ++TIB; + if (TIB == TIE) + break; + } + if (FIB->isDebugValue()) { + while (FIB != FIE && FIB->isDebugValue()) + ++FIB; + if (FIB == FIE) + break; + } + if (!TIB->isIdenticalTo(FIB, MachineInstr::CheckKillDead)) + break; + + if (TII->isPredicated(TIB)) + // Hard to reason about register liveness with predicated instruction. + break; + + bool IsSafe = true; + for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { + MachineOperand &MO = TIB->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + if (Uses.count(Reg)) { + // Avoid clobbering a register that's used by the instruction at + // the point of insertion. + IsSafe = false; + break; + } + + if (Defs.count(Reg) && !MO.isDead()) { + // Don't hoist the instruction if the def would be clobber by the + // instruction at the point insertion. FIXME: This is overly + // conservative. It should be possible to hoist the instructions + // in BB2 in the following example: + // BB1: + // r1, eflag = op1 r2, r3 + // brcc eflag + // + // BB2: + // r1 = op2, ... + // = op3, r1<kill> + IsSafe = false; + break; + } + } else if (!LocalDefsSet.count(Reg)) { + if (Defs.count(Reg)) { + // Use is defined by the instruction at the point of insertion. + IsSafe = false; + break; + } + } + } + if (!IsSafe) + break; + + bool DontMoveAcrossStore = true; + if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore)) + break; + + // Track local defs so we can update liveins. + for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { + MachineOperand &MO = TIB->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + if (!MO.isDead()) { + LocalDefs.push_back(Reg); + LocalDefsSet.insert(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + LocalDefsSet.insert(*SR); + } + } else if (MO.isKill() && LocalDefsSet.count(Reg)) { + LocalDefsSet.erase(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + LocalDefsSet.erase(*SR); + } + } + + HasDups = true;; + ++TIB; + ++FIB; + } + + if (!HasDups) + return false; + + MBB->splice(Loc, TBB, TBB->begin(), TIB); + FBB->erase(FBB->begin(), FIB); + + // Update livein's. + for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { + unsigned Def = LocalDefs[i]; + if (LocalDefsSet.count(Def)) { + TBB->addLiveIn(Def); + FBB->addLiveIn(Def); + } + } + + ++NumHoist; + return true; +} diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index 15dfa7f..4ed42c0 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -10,6 +10,7 @@ #ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP #define LLVM_CODEGEN_BRANCHFOLDING_HPP +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include <vector> @@ -19,11 +20,10 @@ namespace llvm { class RegScavenger; class TargetInstrInfo; class TargetRegisterInfo; - template<typename T> class SmallVectorImpl; class BranchFolder { public: - explicit BranchFolder(bool defaultEnableTailMerge); + explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist); bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, @@ -48,6 +48,7 @@ namespace llvm { }; typedef std::vector<MergePotentialsElt>::iterator MPIterator; std::vector<MergePotentialsElt> MergePotentials; + SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging; class SameTailElt { MPIterator MPIter; @@ -85,6 +86,7 @@ namespace llvm { std::vector<SameTailElt> SameTails; bool EnableTailMerge; + bool EnableHoistCommonCode; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineModuleInfo *MMI; @@ -110,6 +112,9 @@ namespace llvm { bool OptimizeBlock(MachineBasicBlock *MBB); void RemoveDeadBlock(MachineBasicBlock *MBB); bool OptimizeImpDefsBlock(MachineBasicBlock *MBB); + + bool HoistCommonCode(MachineFunction &MF); + bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB); }; } diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 2ca3859..aef4ff2 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -33,6 +33,7 @@ add_llvm_library(LLVMCodeGen LocalStackSlotAllocation.cpp LowerSubregs.cpp MachineBasicBlock.cpp + MachineBranchProbabilityInfo.cpp MachineCSE.cpp MachineDominators.cpp MachineFunction.cpp @@ -67,6 +68,7 @@ add_llvm_library(LLVMCodeGen RegAllocGreedy.cpp RegAllocLinearScan.cpp RegAllocPBQP.cpp + RegisterClassInfo.cpp RegisterCoalescer.cpp RegisterScavenging.cpp RenderMachineFunction.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 86ab2b6..5d722ee 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -87,8 +87,8 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg, } void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { - MachineRegisterInfo &mri = mf_.getRegInfo(); - const TargetRegisterInfo &tri = *mf_.getTarget().getRegisterInfo(); + MachineRegisterInfo &mri = MF.getRegInfo(); + const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); MachineBasicBlock *mbb = 0; MachineLoop *loop = 0; unsigned loopDepth = 0; @@ -118,7 +118,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { // Get loop info for mi. if (mi->getParent() != mbb) { mbb = mi->getParent(); - loop = loops_.getLoopFor(mbb); + loop = Loops.getLoopFor(mbb); loopDepth = loop ? loop->getLoopDepth() : 0; isExiting = loop ? loop->isLoopExiting(mbb) : false; } @@ -129,7 +129,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth); // Give extra weight to what looks like a loop induction variable update. - if (writes && isExiting && lis_.isLiveOutOfMBB(li, mbb)) + if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb)) weight *= 3; totalWeight += weight; @@ -141,9 +141,9 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { unsigned hint = copyHint(mi, li.reg, tri, mri); if (!hint) continue; - float hweight = hint_[hint] += weight; + float hweight = Hint[hint] += weight; if (TargetRegisterInfo::isPhysicalRegister(hint)) { - if (hweight > bestPhys && lis_.isAllocatable(hint)) + if (hweight > bestPhys && LIS.isAllocatable(hint)) bestPhys = hweight, hintPhys = hint; } else { if (hweight > bestVirt) @@ -151,7 +151,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { } } - hint_.clear(); + Hint.clear(); // Always prefer the physreg hint. if (unsigned hint = hintPhys ? hintPhys : hintVirt) { @@ -165,7 +165,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { return; // Mark li as unspillable if all live ranges are tiny. - if (li.isZeroLength()) { + if (li.isZeroLength(LIS.getSlotIndexes())) { li.markNotSpillable(); return; } @@ -176,7 +176,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { // FIXME: this gets much more complicated once we support non-trivial // re-materialization. bool isLoad = false; - if (lis_.isReMaterializable(li, 0, isLoad)) { + if (LIS.isReMaterializable(li, 0, isLoad)) { if (isLoad) totalWeight *= 0.9F; else @@ -187,50 +187,29 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { } void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { - MachineRegisterInfo &mri = mf_.getRegInfo(); - const TargetRegisterInfo *tri = mf_.getTarget().getRegisterInfo(); - const TargetRegisterClass *orc = mri.getRegClass(reg); - SmallPtrSet<const TargetRegisterClass*,8> rcs; - - for (MachineRegisterInfo::reg_nodbg_iterator I = mri.reg_nodbg_begin(reg), - E = mri.reg_nodbg_end(); I != E; ++I) { - // The targets don't have accurate enough regclass descriptions that we can - // handle subregs. We need something similar to - // TRI::getMatchingSuperRegClass, but returning a super class instead of a - // sub class. - if (I.getOperand().getSubReg()) { - DEBUG(dbgs() << "Cannot handle subregs: " << I.getOperand() << '\n'); - return; - } - if (const TargetRegisterClass *rc = - I->getDesc().getRegClass(I.getOperandNo(), tri)) - rcs.insert(rc); - } + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterClass *OldRC = MRI.getRegClass(reg); + const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC); - // If we found no regclass constraints, just leave reg as is. - // In theory, we could inflate to the largest superclass of reg's existing - // class, but that might not be legal for the current cpu setting. - // This could happen if reg is only used by COPY instructions, so we may need - // to improve on this. - if (rcs.empty()) { + // Stop early if there is no room to grow. + if (NewRC == OldRC) return; - } - // Compute the intersection of all classes in rcs. - // This ought to be independent of iteration order, but if the target register - // classes don't form a proper algebra, it is possible to get different - // results. The solution is to make sure the intersection of any two register - // classes is also a register class or the null set. - const TargetRegisterClass *rc = 0; - for (SmallPtrSet<const TargetRegisterClass*,8>::iterator I = rcs.begin(), - E = rcs.end(); I != E; ++I) { - rc = rc ? getCommonSubClass(rc, *I) : *I; - assert(rc && "Incompatible regclass constraints found"); + // Accumulate constraints from all uses. + for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(reg), + E = MRI.reg_nodbg_end(); I != E; ++I) { + // TRI doesn't have accurate enough information to model this yet. + if (I.getOperand().getSubReg()) + return; + const TargetRegisterClass *OpRC = + I->getDesc().getRegClass(I.getOperandNo(), TRI); + if (OpRC) + NewRC = getCommonSubClass(NewRC, OpRC); + if (!NewRC || NewRC == OldRC) + return; } - - if (rc == orc) - return; - DEBUG(dbgs() << "Inflating " << orc->getName() << ':' << PrintReg(reg) - << " to " << rc->getName() <<".\n"); - mri.setRegClass(reg, rc); + DEBUG(dbgs() << "Inflating " << OldRC->getName() << ':' << PrintReg(reg) + << " to " << NewRC->getName() <<".\n"); + MRI.setRegClass(reg, NewRC); } diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index ecd69a0..14eb054 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -22,19 +23,22 @@ #include "llvm/Target/TargetLowering.h" using namespace llvm; -CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm, - SmallVector<CCValAssign, 16> &locs, LLVMContext &C) - : CallingConv(CC), IsVarArg(isVarArg), TM(tm), - TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) { +CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, + const TargetMachine &tm, SmallVector<CCValAssign, 16> &locs, + LLVMContext &C) + : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm), + TRI(*TM.getRegisterInfo()), Locs(locs), Context(C), + CallOrPrologue(Unknown) { // No stack is used. StackOffset = 0; - + + clearFirstByValReg(); UsedRegs.resize((TRI.getNumRegs()+31)/32); } -// HandleByVal - Allocate a stack slot large enough to pass an argument by -// value. The size and alignment information of the argument is encoded in its -// parameter attribute. +// HandleByVal - Allocate space on the stack large enough to pass an argument +// by value. The size and alignment information of the argument is encoded in +// its parameter attribute. void CCState::HandleByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, int MinSize, int MinAlign, @@ -45,10 +49,11 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, Size = MinSize; if (MinAlign > (int)Align) Align = MinAlign; + if (MF.getFrameInfo()->getMaxAlignment() < Align) + MF.getFrameInfo()->setMaxAlignment(Align); + TM.getTargetLowering()->HandleByVal(this, Size); unsigned Offset = AllocateStack(Size, Align); - addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - TM.getTargetLowering()->HandleByVal(const_cast<CCState*>(this)); } /// MarkAllocated - Mark a register and all of its aliases as allocated. diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index e37356a..270c337 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -254,7 +254,7 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, // Determine a position to move orphaned loop blocks to. If TopMBB is not // entered via fallthrough and BotMBB is exited via fallthrough, prepend them - // to the top of the loop to avoid loosing that fallthrough. Otherwise append + // to the top of the loop to avoid losing that fallthrough. Otherwise append // them to the bottom, even if it previously had a fallthrough, on the theory // that it's worth an extra branch to keep the loop contiguous. MachineFunction::iterator InsertPt = diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index f79598d..4cac453 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -27,12 +27,12 @@ using namespace llvm; CriticalAntiDepBreaker:: -CriticalAntiDepBreaker(MachineFunction& MFi) : +CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), - AllocatableSet(TRI->getAllocatableSet(MF)), + RegClassInfo(RCI), Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)), KillIndices(TRI->getNumRegs(), 0), DefIndices(TRI->getNumRegs(), 0) {} @@ -385,11 +385,9 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin, unsigned LastNewReg, const TargetRegisterClass *RC) { - for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), - RE = RC->allocation_order_end(MF); R != RE; ++R) { - unsigned NewReg = *R; - // Don't consider non-allocatable registers - if (!AllocatableSet.test(NewReg)) continue; + ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC); + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned NewReg = Order[i]; // Don't replace a register with itself. if (NewReg == AntiDepReg) continue; // Don't replace a register with one that was recently used to repair @@ -421,7 +419,8 @@ unsigned CriticalAntiDepBreaker:: BreakAntiDependencies(const std::vector<SUnit>& SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, - unsigned InsertPosIndex) { + unsigned InsertPosIndex, + DbgValueVector &DbgValues) { // The code below assumes that there is at least one instruction, // so just duck out immediately if the block is empty. if (SUnits.empty()) return 0; @@ -533,7 +532,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, if (Edge->getKind() == SDep::Anti) { AntiDepReg = Edge->getReg(); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - if (!AllocatableSet.test(AntiDepReg)) + if (!RegClassInfo.isAllocatable(AntiDepReg)) // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; else if (KeepRegs.count(AntiDepReg)) @@ -628,14 +627,10 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // as well. const SUnit *SU = MISUnitMap[Q->second->getParent()]; if (!SU) continue; - for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) { - MachineInstr *DI = SU->DbgInstrList[i]; - assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() && - DI->getOperand(0).getReg() - && "Non register dbg_value attached to SUnit!"); - if (DI->getOperand(0).getReg() == AntiDepReg) - DI->getOperand(0).setReg(NewReg); - } + for (DbgValueVector::iterator DVI = DbgValues.begin(), + DVE = DbgValues.end(); DVI != DVE; ++DVI) + if (DVI->second == Q->second->getParent()) + UpdateDbgValue(DVI->first, AntiDepReg, NewReg); } // We just went back in time and modified history; the diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 0daaef2..0710780 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -17,6 +17,7 @@ #define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H #include "AntiDepBreaker.h" +#include "RegisterClassInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -27,6 +28,7 @@ #include <map> namespace llvm { +class RegisterClassInfo; class TargetInstrInfo; class TargetRegisterInfo; @@ -35,6 +37,7 @@ class TargetRegisterInfo; MachineRegisterInfo &MRI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const RegisterClassInfo &RegClassInfo; /// AllocatableSet - The set of allocatable registers. /// We'll be ignoring anti-dependencies on non-allocatable registers, @@ -66,7 +69,7 @@ class TargetRegisterInfo; SmallSet<unsigned, 4> KeepRegs; public: - CriticalAntiDepBreaker(MachineFunction& MFi); + CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&); ~CriticalAntiDepBreaker(); /// Start - Initialize anti-dep breaking for a new basic block. @@ -79,7 +82,8 @@ class TargetRegisterInfo; unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, - unsigned InsertPosIndex); + unsigned InsertPosIndex, + DbgValueVector &DbgValues); /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 34b1a39..22c5465 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -30,6 +30,7 @@ using namespace llvm; STATISTIC(NumLandingPadsSplit, "Number of landing pads split"); STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered"); +STATISTIC(NumResumesLowered, "Number of eh.resume calls lowered"); STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved"); namespace { @@ -63,7 +64,7 @@ namespace { BBSet LandingPads; bool NormalizeLandingPads(); - bool LowerUnwinds(); + bool LowerUnwindsAndResumes(); bool MoveExceptionValueCalls(); Instruction *CreateExceptionValueCall(BasicBlock *BB); @@ -251,10 +252,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() { if (!URoR) { URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow"); - if (!URoR) { - URoR = F->getParent()->getFunction("_Unwind_SjLj_Resume"); - if (!URoR) return CleanupSelectors(CatchAllSels); - } + if (!URoR) return CleanupSelectors(CatchAllSels); } SmallPtrSet<InvokeInst*, 32> URoRInvokes; @@ -480,20 +478,25 @@ bool DwarfEHPrepare::NormalizeLandingPads() { /// rethrowing any previously caught exception. This will crash horribly /// at runtime if there is no such exception: using unwind to throw a new /// exception is currently not supported. -bool DwarfEHPrepare::LowerUnwinds() { - SmallVector<TerminatorInst*, 16> UnwindInsts; - - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { - TerminatorInst *TI = I->getTerminator(); - if (isa<UnwindInst>(TI)) - UnwindInsts.push_back(TI); +bool DwarfEHPrepare::LowerUnwindsAndResumes() { + SmallVector<Instruction*, 16> ResumeInsts; + + for (Function::iterator fi = F->begin(), fe = F->end(); fi != fe; ++fi) { + for (BasicBlock::iterator bi = fi->begin(), be = fi->end(); bi != be; ++bi){ + if (isa<UnwindInst>(bi)) + ResumeInsts.push_back(bi); + else if (CallInst *call = dyn_cast<CallInst>(bi)) + if (Function *fn = dyn_cast<Function>(call->getCalledValue())) + if (fn->getName() == "llvm.eh.resume") + ResumeInsts.push_back(bi); + } } - if (UnwindInsts.empty()) return false; + if (ResumeInsts.empty()) return false; // Find the rewind function if we didn't already. if (!RewindFunction) { - LLVMContext &Ctx = UnwindInsts[0]->getContext(); + LLVMContext &Ctx = ResumeInsts[0]->getContext(); std::vector<const Type*> Params(1, Type::getInt8PtrTy(Ctx)); FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), @@ -504,24 +507,36 @@ bool DwarfEHPrepare::LowerUnwinds() { bool Changed = false; - for (SmallVectorImpl<TerminatorInst*>::iterator - I = UnwindInsts.begin(), E = UnwindInsts.end(); I != E; ++I) { - TerminatorInst *TI = *I; + for (SmallVectorImpl<Instruction*>::iterator + I = ResumeInsts.begin(), E = ResumeInsts.end(); I != E; ++I) { + Instruction *RI = *I; - // Replace the unwind instruction with a call to _Unwind_Resume (or the - // appropriate target equivalent) followed by an UnreachableInst. + // Replace the resuming instruction with a call to _Unwind_Resume (or the + // appropriate target equivalent). + + llvm::Value *ExnValue; + if (isa<UnwindInst>(RI)) + ExnValue = CreateExceptionValueCall(RI->getParent()); + else + ExnValue = cast<CallInst>(RI)->getArgOperand(0); // Create the call... - CallInst *CI = CallInst::Create(RewindFunction, - CreateExceptionValueCall(TI->getParent()), - "", TI); + CallInst *CI = CallInst::Create(RewindFunction, ExnValue, "", RI); CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); - // ...followed by an UnreachableInst. - new UnreachableInst(TI->getContext(), TI); - // Nuke the unwind instruction. - TI->eraseFromParent(); - ++NumUnwindsLowered; + // ...followed by an UnreachableInst, if it was an unwind. + // Calls to llvm.eh.resume are typically already followed by this. + if (isa<UnwindInst>(RI)) + new UnreachableInst(RI->getContext(), RI); + + if (isa<UnwindInst>(RI)) + ++NumUnwindsLowered; + else + ++NumResumesLowered; + + // Nuke the resume instruction. + RI->eraseFromParent(); + Changed = true; } @@ -657,8 +672,8 @@ bool DwarfEHPrepare::runOnFunction(Function &Fn) { // basic block where an invoke unwind edge ends). Changed |= NormalizeLandingPads(); - // Turn unwind instructions into libcalls. - Changed |= LowerUnwinds(); + // Turn unwind instructions and eh.resume calls into libcalls. + Changed |= LowerUnwindsAndResumes(); // TODO: Move eh.selector calls to landing pads and combine them. diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h index e08feeb..5b63468 100644 --- a/lib/CodeGen/ELF.h +++ b/lib/CodeGen/ELF.h @@ -173,7 +173,7 @@ namespace llvm { unsigned Offset; // sh_offset - Offset from the file start unsigned Size; // sh_size - The section size. unsigned Link; // sh_link - Section header table index link. - unsigned Info; // sh_info - Auxillary information. + unsigned Info; // sh_info - Auxiliary information. unsigned Align; // sh_addralign - Alignment of section. unsigned EntSize; // sh_entsize - Size of entries in the section e diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index 25c2e02..fa2319b 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -77,7 +77,7 @@ ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) // Create the object code emitter object for this target. ElfCE = new ELFCodeEmitter(*this); - // Inital number of sections + // Initial number of sections NumSections = 0; } @@ -662,12 +662,16 @@ bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) { // Should be an array of '{ i32, void ()* }' structs. The first value is the // init priority, which we ignore. + if (List->isNullValue()) return; ConstantArray *InitList = cast<ConstantArray>(List); for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + if (InitList->getOperand(i)->isNullValue()) + continue; ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i)); if (CS->getOperand(1)->isNullValue()) - return; // Found a null terminator, exit printing. + continue; + // Emit the function pointer. EmitGlobalConstant(CS->getOperand(1), Xtor); } diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp index aed8bc9..a7aba89 100644 --- a/lib/CodeGen/EdgeBundles.cpp +++ b/lib/CodeGen/EdgeBundles.cpp @@ -39,7 +39,7 @@ void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const { bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { MF = &mf; EC.clear(); - EC.grow(2 * MF->size()); + EC.grow(2 * MF->getNumBlockIDs()); for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { @@ -53,6 +53,19 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { EC.compress(); if (ViewEdgeBundles) view(); + + // Compute the reverse mapping. + Blocks.clear(); + Blocks.resize(getNumBundles()); + + for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) { + unsigned b0 = getBundle(i, 0); + unsigned b1 = getBundle(i, 1); + Blocks[b0].push_back(i); + if (b1 != b0) + Blocks[b1].push_back(i); + } + return false; } @@ -82,5 +95,3 @@ raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G, O << "}\n"; return O; } - - diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index b5ec303..ebc2fc9 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// Expand Psuedo-instructions produced by ISel. These are usually to allow +// Expand Pseudo-instructions produced by ISel. These are usually to allow // the expansion to contain control flow, such as a conditional move // implemented with a conditional branch and a phi, or an atomic operation // implemented with a loop. diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index db53b04..8b2c981 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -27,7 +27,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -146,10 +145,6 @@ namespace { : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {} }; - /// Roots - Basic blocks that do not have successors. These are the starting - /// points of Graph traversal. - std::vector<MachineBasicBlock*> Roots; - /// BBAnalysis - Results of if-conversion feasibility analysis indexed by /// basic block number. std::vector<BBInfo> BBAnalysis; @@ -270,7 +265,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (!TII) return false; // Tail merge tend to expose more if-conversion opportunities. - BranchFolder BF(true); + BranchFolder BF(true, false); bool BFChange = BF.OptimizeFunction(MF, TII, MF.getTarget().getRegisterInfo(), getAnalysisIfAvailable<MachineModuleInfo>()); @@ -287,11 +282,6 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { MF.RenumberBlocks(); BBAnalysis.resize(MF.getNumBlockIDs()); - // Look for root nodes, i.e. blocks without successors. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - if (I->succ_empty()) - Roots.push_back(I); - std::vector<IfcvtToken*> Tokens; MadeChange = false; unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + @@ -406,11 +396,10 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { } Tokens.clear(); - Roots.clear(); BBAnalysis.clear(); if (MadeChange && IfCvtBranchFold) { - BranchFolder BF(false); + BranchFolder BF(false, false); BF.OptimizeFunction(MF, TII, MF.getTarget().getRegisterInfo(), getAnalysisIfAvailable<MachineModuleInfo>()); @@ -924,13 +913,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, /// candidates. void IfConverter::AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens) { - std::set<MachineBasicBlock*> Visited; - for (unsigned i = 0, e = Roots.size(); i != e; ++i) { - for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited), - E = idf_ext_end(Roots[i], Visited); I != E; ++I) { - MachineBasicBlock *BB = *I; - AnalyzeBlock(BB, Tokens); - } + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *BB = I; + AnalyzeBlock(BB, Tokens); } // Sort to favor more complex ifcvt scheme. diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 86f4cfc..19ae333 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -16,6 +16,7 @@ #include "Spiller.h" #include "LiveRangeEdit.h" #include "VirtRegMap.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -31,6 +32,18 @@ using namespace llvm; +STATISTIC(NumSpilledRanges, "Number of spilled live ranges"); +STATISTIC(NumSnippets, "Number of snippets included in spills"); +STATISTIC(NumSpills, "Number of spills inserted"); +STATISTIC(NumReloads, "Number of reloads inserted"); +STATISTIC(NumFolded, "Number of folded stack accesses"); +STATISTIC(NumFoldedLoads, "Number of folded loads"); +STATISTIC(NumRemats, "Number of rematerialized defs for spilling"); +STATISTIC(NumOmitReloadSpill, "Number of omitted spills after reloads"); +STATISTIC(NumHoistLocal, "Number of locally hoisted spills"); +STATISTIC(NumHoistGlobal, "Number of globally hoisted spills"); +STATISTIC(NumRedundantSpills, "Number of redundant spills identified"); + namespace { class InlineSpiller : public Spiller { MachineFunctionPass &Pass; @@ -134,9 +147,10 @@ private: bool foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl<unsigned> &Ops, MachineInstr *LoadMI = 0); - void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI); + void insertReload(LiveInterval &NewLI, SlotIndex, + MachineBasicBlock::iterator MI); void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, - MachineBasicBlock::iterator MI); + SlotIndex, MachineBasicBlock::iterator MI); void spillAroundUses(unsigned Reg); void spillAll(); @@ -246,10 +260,11 @@ void InlineSpiller::collectRegsToSpill() { if (!isSnippet(SnipLI)) continue; SnippetCopies.insert(MI); - if (!isRegToSpill(SnipReg)) - RegsToSpill.push_back(SnipReg); - + if (isRegToSpill(SnipReg)) + continue; + RegsToSpill.push_back(SnipReg); DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n'); + ++NumSnippets; } } @@ -419,8 +434,10 @@ void InlineSpiller::analyzeSiblingValues() { } if (!DefMI && !VNI->isPHIDef()) DefMI = LIS.getInstructionFromIndex(VNI->def); - if (DefMI) - Edit->checkRematerializable(VNI, DefMI, TII, AA); + if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) { + DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@' + << VNI->def << " may remat from " << *DefMI); + } } } } @@ -431,7 +448,7 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { SlotIndex Idx = LIS.getInstructionIndex(CopyMI); VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex()); assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy"); - SibValueMap::const_iterator I = SibValues.find(VNI); + SibValueMap::iterator I = SibValues.find(VNI); if (I == SibValues.end()) return false; @@ -441,6 +458,20 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI) return false; + // SpillReg may have been deleted by remat and DCE. + if (!LIS.hasInterval(SVI.SpillReg)) { + DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n'); + SibValues.erase(I); + return false; + } + + LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg); + if (!SibLI.containsValue(SVI.SpillVNI)) { + DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n'); + SibValues.erase(I); + return false; + } + // Conservatively extend the stack slot range to the range of the original // value. We may be able to do better with stack slot coloring by being more // careful here. @@ -452,19 +483,22 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { << *StackInt << '\n'); // Already spilled everywhere. - if (SVI.AllDefsAreReloads) + if (SVI.AllDefsAreReloads) { + ++NumOmitReloadSpill; return true; - + } // We are going to spill SVI.SpillVNI immediately after its def, so clear out // any later spills of the same value. - eliminateRedundantSpills(LIS.getInterval(SVI.SpillReg), SVI.SpillVNI); + eliminateRedundantSpills(SibLI, SVI.SpillVNI); MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def); MachineBasicBlock::iterator MII; if (SVI.SpillVNI->isPHIDef()) MII = MBB->SkipPHIsAndLabels(MBB->begin()); else { - MII = LIS.getInstructionFromIndex(SVI.SpillVNI->def); + MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def); + assert(DefMI && "Defining instruction disappeared"); + MII = DefMI; ++MII; } // Insert spill without kill flag immediately after def. @@ -474,6 +508,11 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { LIS.InsertMachineInstrInMaps(MII); VRM.addSpillSlotUse(StackSlot, MII); DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII); + + if (MBB == CopyMI->getParent()) + ++NumHoistLocal; + else + ++NumHoistGlobal; return true; } @@ -489,8 +528,8 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { LiveInterval *LI; tie(LI, VNI) = WorkList.pop_back_val(); unsigned Reg = LI->reg; - DEBUG(dbgs() << "Checking redundant spills for " << PrintReg(Reg) << ':' - << VNI->id << '@' << VNI->def << '\n'); + DEBUG(dbgs() << "Checking redundant spills for " + << VNI->id << '@' << VNI->def << " in " << *LI << '\n'); // Regs to spill are taken care of. if (isRegToSpill(Reg)) @@ -528,6 +567,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { // eliminateDeadDefs won't normally remove stores, so switch opcode. MI->setDesc(TII.get(TargetOpcode::KILL)); DeadDefs.push_back(MI); + ++NumRedundantSpills; } } } while (!WorkList.empty()); @@ -623,6 +663,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, if (RM.OrigMI->getDesc().canFoldAsLoad() && foldMemoryOperand(MI, Ops, RM.OrigMI)) { Edit->markRematerialized(RM.ParentVNI); + ++NumFoldedLoads; return true; } @@ -649,6 +690,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI)); DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); + ++NumRemats; return true; } @@ -775,14 +817,15 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, VRM.addSpillSlotUse(StackSlot, FoldMI); MI->eraseFromParent(); DEBUG(dbgs() << "\tfolded: " << *FoldMI); + ++NumFolded; return true; } /// insertReload - Insert a reload of NewLI.reg before MI. void InlineSpiller::insertReload(LiveInterval &NewLI, + SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to load instruction. @@ -792,19 +835,13 @@ void InlineSpiller::insertReload(LiveInterval &NewLI, VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); + ++NumReloads; } /// insertSpill - Insert a spill of NewLI.reg after MI. void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, - MachineBasicBlock::iterator MI) { + SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - - // Get the defined value. It could be an early clobber so keep the def index. - SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); - VNInfo *VNI = OldLI.getVNInfoAt(Idx); - assert(VNI && VNI->def.getDefIndex() == Idx && "Inconsistent VNInfo"); - Idx = VNI->def; - TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to store instruction. @@ -813,10 +850,12 @@ void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); + ++NumSpills; } /// spillAroundUses - insert spill code around each use of Reg. void InlineSpiller::spillAroundUses(unsigned Reg) { + DEBUG(dbgs() << "spillAroundUses " << PrintReg(Reg) << '\n'); LiveInterval &OldLI = LIS.getInterval(Reg); // Iterate over instructions using Reg. @@ -854,9 +893,22 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { SmallVector<unsigned, 8> Ops; tie(Reads, Writes) = MI->readsWritesVirtualRegister(Reg, &Ops); + // Find the slot index where this instruction reads and writes OldLI. + // This is usually the def slot, except for tied early clobbers. + SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); + if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex())) + if (SlotIndex::isSameInstr(Idx, VNI->def)) + Idx = VNI->def; + // Check for a sibling copy. unsigned SibReg = isFullCopyOf(MI, Reg); if (SibReg && isSibling(SibReg)) { + // This may actually be a copy between snippets. + if (isRegToSpill(SibReg)) { + DEBUG(dbgs() << "Found new snippet copy: " << *MI); + SnippetCopies.insert(MI); + continue; + } if (Writes) { // Hoist the spill of a sib-reg copy. if (hoistSpill(OldLI, MI)) { @@ -867,7 +919,6 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { } } else { // This is a reload for a sib-reg copy. Drop spills downstream. - SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); LiveInterval &SibLI = LIS.getInterval(SibReg); eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx)); // The COPY will fold to a reload below. @@ -884,7 +935,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { NewLI.markNotSpillable(); if (Reads) - insertReload(NewLI, MI); + insertReload(NewLI, Idx, MI); // Rewrite instruction operands. bool hasLiveDef = false; @@ -899,10 +950,11 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { hasLiveDef = true; } } + DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI); // FIXME: Use a second vreg if instruction has no tied ops. if (Writes && hasLiveDef) - insertSpill(NewLI, OldLI, MI); + insertSpill(NewLI, OldLI, Idx, MI); DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); } @@ -938,13 +990,15 @@ void InlineSpiller::spillAll() { } // Finally delete the SnippetCopies. - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()); - MachineInstr *MI = RI.skipInstruction();) { - assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); - // FIXME: Do this with a LiveRangeEdit callback. - VRM.RemoveMachineInstrFromMaps(MI); - LIS.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(RegsToSpill[i]); + MachineInstr *MI = RI.skipInstruction();) { + assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); + // FIXME: Do this with a LiveRangeEdit callback. + VRM.RemoveMachineInstrFromMaps(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } } // Delete all spilled registers. @@ -953,6 +1007,7 @@ void InlineSpiller::spillAll() { } void InlineSpiller::spill(LiveRangeEdit &edit) { + ++NumSpilledRanges; Edit = &edit; assert(!TargetRegisterInfo::isStackSlot(edit.getReg()) && "Trying to spill a stack slot."); diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index 0aff128..b1014a9 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -26,7 +26,7 @@ void InterferenceCache::init(MachineFunction *mf, TRI = tri; PhysRegEntries.assign(TRI->getNumRegs(), 0); for (unsigned i = 0; i != CacheEntries; ++i) - Entries[i].clear(indexes); + Entries[i].clear(mf, indexes); } InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { @@ -91,10 +91,6 @@ bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray, } void InterferenceCache::Entry::update(unsigned MBBNum) { - BlockInterference *BI = &Blocks[MBBNum]; - BI->Tag = Tag; - BI->First = BI->Last = SlotIndex(); - SlotIndex Start, Stop; tie(Start, Stop) = Indexes->getMBBRange(MBBNum); @@ -109,23 +105,39 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { PrevPos = Start; } - // Check for first interference. - for (unsigned i = 0, e = Iters.size(); i != e; ++i) { - Iter &I = Iters[i]; - if (!I.valid()) - continue; - SlotIndex StartI = I.start(); - if (StartI >= Stop) - continue; - if (!BI->First.isValid() || StartI < BI->First) - BI->First = StartI; - } + MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum); + BlockInterference *BI = &Blocks[MBBNum]; + for (;;) { + BI->Tag = Tag; + BI->First = BI->Last = SlotIndex(); + + // Check for first interference. + for (unsigned i = 0, e = Iters.size(); i != e; ++i) { + Iter &I = Iters[i]; + if (!I.valid()) + continue; + SlotIndex StartI = I.start(); + if (StartI >= Stop) + continue; + if (!BI->First.isValid() || StartI < BI->First) + BI->First = StartI; + } - // No interference in block. - if (!BI->First.isValid()) - return; + PrevPos = Stop; + if (BI->First.isValid()) + break; + + // No interference in this block? Go ahead and precompute the next block. + if (++MFI == MF->end()) + return; + MBBNum = MFI->getNumber(); + BI = &Blocks[MBBNum]; + if (BI->Tag == Tag) + return; + tie(Start, Stop) = Indexes->getMBBRange(MBBNum); + } - // Check for last interference. + // Check for last interference in block. for (unsigned i = 0, e = Iters.size(); i != e; ++i) { Iter &I = Iters[i]; if (!I.valid() || I.start() >= Stop) @@ -140,5 +152,4 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { if (Backup) ++I; } - PrevPos = Stop; } diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 2e613ae..6c36fa4 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -43,6 +43,9 @@ class InterferenceCache { /// change. unsigned Tag; + /// MF - The current function. + MachineFunction *MF; + /// Indexes - Mapping block numbers to SlotIndex ranges. SlotIndexes *Indexes; @@ -67,8 +70,9 @@ class InterferenceCache { public: Entry() : PhysReg(0), Tag(0), Indexes(0) {} - void clear(SlotIndexes *indexes) { + void clear(MachineFunction *mf, SlotIndexes *indexes) { PhysReg = 0; + MF = mf; Indexes = indexes; } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 8c2794a..b98fbed 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -13,6 +13,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/PassManager.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/CodeGen/AsmPrinter.h" @@ -32,7 +33,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/StandardPasses.h" using namespace llvm; namespace llvm { @@ -149,6 +149,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, getVerboseAsm(), hasMCUseLoc(), + hasMCUseCFI(), InstPrinter, MCE, TAB, ShowMCInst); @@ -291,13 +292,21 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Standard LLVM-Level Passes. // Basic AliasAnalysis support. - createStandardAliasAnalysisPasses(&PM); + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + PM.add(createTypeBasedAliasAnalysisPass()); + PM.add(createBasicAliasAnalysisPass()); // Before running any passes, run the verifier to determine if the input // coming from the front-end and/or optimizer is valid. if (!DisableVerify) PM.add(createVerifierPass()); + // Simplify ObjC ARC code. This is done late because it makes re-optimization + // difficult. + PM.add(createObjCARCContractPass()); + // Run loop strength reduction before anything else. if (OptLevel != CodeGenOpt::None && !DisableLSR) { PM.add(createLoopStrengthReducePass(getTargetLowering())); @@ -323,8 +332,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createSjLjEHPass(getTargetLowering())); // FALLTHROUGH case ExceptionHandling::DwarfCFI: - case ExceptionHandling::DwarfTable: case ExceptionHandling::ARM: + case ExceptionHandling::Win64: PM.add(createDwarfEHPass(this)); break; case ExceptionHandling::None: diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 333d15f..292928f 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -101,9 +101,13 @@ class UserValue { void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo, LiveIntervals &LIS, const TargetInstrInfo &TII); + /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs + /// is live. Returns true if any changes were made. + bool splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs); + public: /// UserValue - Create a new UserValue. - UserValue(const MDNode *var, unsigned o, DebugLoc L, + UserValue(const MDNode *var, unsigned o, DebugLoc L, LocMap::Allocator &alloc) : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc) {} @@ -215,6 +219,10 @@ public: void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx, const TargetRegisterInfo *TRI); + /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is + /// live. Returns true if any changes were made. + bool splitRegister(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs); + /// rewriteLocations - Rewrite virtual register locations according to the /// provided virtual register map. void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI); @@ -228,7 +236,7 @@ public: /// Only first one needs DebugLoc to identify variable's lexical scope /// in source file. DebugLoc findDebugLoc(); - void print(raw_ostream&, const TargetRegisterInfo*); + void print(raw_ostream&, const TargetMachine*); }; } // namespace @@ -290,9 +298,12 @@ public: /// mapVirtReg - Map virtual register to an equivalence class. void mapVirtReg(unsigned VirtReg, UserValue *EC); - /// renameRegister - Replace all references to OldReg wiht NewReg:SubIdx. + /// renameRegister - Replace all references to OldReg with NewReg:SubIdx. void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx); + /// splitRegister - Replace all references to OldReg with NewRegs. + void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs); + /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures. void emitDebugValues(VirtRegMap *VRM); @@ -300,7 +311,7 @@ public: }; } // namespace -void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { +void UserValue::print(raw_ostream &OS, const TargetMachine *TM) { if (const MDString *MDS = dyn_cast<MDString>(variable->getOperand(2))) OS << "!\"" << MDS->getString() << "\"\t"; if (offset) @@ -312,15 +323,17 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { else OS << I.value(); } - for (unsigned i = 0, e = locations.size(); i != e; ++i) - OS << " Loc" << i << '=' << locations[i]; + for (unsigned i = 0, e = locations.size(); i != e; ++i) { + OS << " Loc" << i << '='; + locations[i].print(OS, TM); + } OS << '\n'; } void LDVImpl::print(raw_ostream &OS) { OS << "********** DEBUG VARIABLES **********\n"; for (unsigned i = 0, e = userValues.size(); i != e; ++i) - userValues[i]->print(OS, TRI); + userValues[i]->print(OS, &MF->getTarget()); } void UserValue::coalesceLocation(unsigned LocNo) { @@ -677,6 +690,143 @@ renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) { static_cast<LDVImpl*>(pImpl)->renameRegister(OldReg, NewReg, SubIdx); } +//===----------------------------------------------------------------------===// +// Live Range Splitting +//===----------------------------------------------------------------------===// + +bool +UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) { + DEBUG({ + dbgs() << "Splitting Loc" << OldLocNo << '\t'; + print(dbgs(), 0); + }); + bool DidChange = false; + LocMap::iterator LocMapI; + LocMapI.setMap(locInts); + for (unsigned i = 0; i != NewRegs.size(); ++i) { + LiveInterval *LI = NewRegs[i]; + if (LI->empty()) + continue; + + // Don't allocate the new LocNo until it is needed. + unsigned NewLocNo = ~0u; + + // Iterate over the overlaps between locInts and LI. + LocMapI.find(LI->beginIndex()); + if (!LocMapI.valid()) + continue; + LiveInterval::iterator LII = LI->advanceTo(LI->begin(), LocMapI.start()); + LiveInterval::iterator LIE = LI->end(); + while (LocMapI.valid() && LII != LIE) { + // At this point, we know that LocMapI.stop() > LII->start. + LII = LI->advanceTo(LII, LocMapI.start()); + if (LII == LIE) + break; + + // Now LII->end > LocMapI.start(). Do we have an overlap? + if (LocMapI.value() == OldLocNo && LII->start < LocMapI.stop()) { + // Overlapping correct location. Allocate NewLocNo now. + if (NewLocNo == ~0u) { + MachineOperand MO = MachineOperand::CreateReg(LI->reg, false); + MO.setSubReg(locations[OldLocNo].getSubReg()); + NewLocNo = getLocationNo(MO); + DidChange = true; + } + + SlotIndex LStart = LocMapI.start(); + SlotIndex LStop = LocMapI.stop(); + + // Trim LocMapI down to the LII overlap. + if (LStart < LII->start) + LocMapI.setStartUnchecked(LII->start); + if (LStop > LII->end) + LocMapI.setStopUnchecked(LII->end); + + // Change the value in the overlap. This may trigger coalescing. + LocMapI.setValue(NewLocNo); + + // Re-insert any removed OldLocNo ranges. + if (LStart < LocMapI.start()) { + LocMapI.insert(LStart, LocMapI.start(), OldLocNo); + ++LocMapI; + assert(LocMapI.valid() && "Unexpected coalescing"); + } + if (LStop > LocMapI.stop()) { + ++LocMapI; + LocMapI.insert(LII->end, LStop, OldLocNo); + --LocMapI; + } + } + + // Advance to the next overlap. + if (LII->end < LocMapI.stop()) { + if (++LII == LIE) + break; + LocMapI.advanceTo(LII->start); + } else { + ++LocMapI; + if (!LocMapI.valid()) + break; + LII = LI->advanceTo(LII, LocMapI.start()); + } + } + } + + // Finally, remove any remaining OldLocNo intervals and OldLocNo itself. + locations.erase(locations.begin() + OldLocNo); + LocMapI.goToBegin(); + while (LocMapI.valid()) { + unsigned v = LocMapI.value(); + if (v == OldLocNo) { + DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';' + << LocMapI.stop() << ")\n"); + LocMapI.erase(); + } else { + if (v > OldLocNo) + LocMapI.setValueUnchecked(v-1); + ++LocMapI; + } + } + + DEBUG({dbgs() << "Split result: \t"; print(dbgs(), 0);}); + return DidChange; +} + +bool +UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { + bool DidChange = false; + // Split locations referring to OldReg. Iterate backwards so splitLocation can + // safely erase unuused locations. + for (unsigned i = locations.size(); i ; --i) { + unsigned LocNo = i-1; + const MachineOperand *Loc = &locations[LocNo]; + if (!Loc->isReg() || Loc->getReg() != OldReg) + continue; + DidChange |= splitLocation(LocNo, NewRegs); + } + return DidChange; +} + +void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { + bool DidChange = false; + for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext()) + DidChange |= UV->splitRegister(OldReg, NewRegs); + + if (!DidChange) + return; + + // Map all of the new virtual registers. + UserValue *UV = lookupVirtReg(OldReg); + for (unsigned i = 0; i != NewRegs.size(); ++i) + mapVirtReg(NewRegs[i]->reg, UV); +} + +void LiveDebugVariables:: +splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { + if (pImpl) + static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs); +} + void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) { // Iterate over locations in reverse makes it easier to handle coalescing. @@ -690,6 +840,9 @@ UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) { unsigned VirtReg = Loc.getReg(); if (VRM.isAssignedReg(VirtReg) && TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) { + // This can create a %noreg operand in rare cases when the sub-register + // index is no longer available. That means the user value is in a + // non-existent sub-register, and %noreg is exactly what we want. Loc.substPhysReg(VRM.getPhys(VirtReg), TRI); } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT && VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) { @@ -701,7 +854,6 @@ UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) { } coalesceLocation(LocNo); } - DEBUG(print(dbgs(), &TRI)); } /// findInsertLocation - Find an iterator for inserting a DBG_VALUE @@ -793,6 +945,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n"); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); for (unsigned i = 0, e = userValues.size(); i != e; ++i) { + DEBUG(userValues[i]->print(dbgs(), &MF->getTarget())); userValues[i]->rewriteLocations(*VRM, *TRI); userValues[i]->emitDebugValues(VRM, *LIS, *TII); } diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index a6e40a1..3ce3c39 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -21,10 +21,12 @@ #ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H #define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { +class LiveInterval; class VirtRegMap; class LiveDebugVariables : public MachineFunctionPass { @@ -42,6 +44,11 @@ public: /// register. void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx); + /// splitRegister - Move any user variables in OldReg to the live ranges in + /// NewRegs where they are live. Mark the values as unavailable where no new + /// register is live. + void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs); + /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes /// that happened during register allocation. /// @param VRM Rename virtual registers according to map. diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index c77ae1b..9257191 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -578,13 +578,6 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, CopyMI = MI; handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, getOrCreateInterval(MO.getReg()), CopyMI); - // Def of a register also defines its sub-registers. - for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS) - // If MI also modifies the sub-register explicitly, avoid processing it - // more than once. Do not pass in TRI here so it checks for exact match. - if (!MI->definesRegister(*AS)) - handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, - getOrCreateInterval(*AS), 0); } } @@ -645,7 +638,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, end = MIIdx.getStoreIndex(); } else { DEBUG(dbgs() << " live through"); - end = baseIndex; + end = getMBBEndIdx(MBB); } } @@ -1514,7 +1507,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, // ... // def = ... // = use - // It's better to start a new interval to avoid artifically + // It's better to start a new interval to avoid artificially // extend the new interval. if (MI->readsWritesVirtualRegister(li.reg) == std::make_pair(false,true)) { diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index 205f28a..b67f966 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -35,12 +35,20 @@ void LiveIntervalUnion::unify(LiveInterval &VirtReg) { LiveInterval::iterator RegEnd = VirtReg.end(); SegmentIter SegPos = Segments.find(RegPos->start); - for (;;) { + while (SegPos.valid()) { SegPos.insert(RegPos->start, RegPos->end, &VirtReg); if (++RegPos == RegEnd) return; SegPos.advanceTo(RegPos->start); } + + // We have reached the end of Segments, so it is no longer necessary to search + // for the insertion position. + // It is faster to insert the end first. + --RegEnd; + SegPos.insert(RegEnd->start, RegEnd->end, &VirtReg); + for (; RegPos != RegEnd; ++RegPos, ++SegPos) + SegPos.insert(RegPos->start, RegPos->end, &VirtReg); } // Remove a live virtual register's segments from this union. @@ -168,6 +176,7 @@ LiveIntervalUnion::Query::firstInterference() { return FirstInterference; CheckedFirstInterference = true; InterferenceResult &IR = FirstInterference; + IR.LiveUnionI.setMap(LiveUnion->getMap()); // Quickly skip interference check for empty sets. if (VirtReg->empty() || LiveUnion->empty()) { @@ -176,10 +185,10 @@ LiveIntervalUnion::Query::firstInterference() { // VirtReg starts first, perform double binary search. IR.VirtRegI = VirtReg->find(LiveUnion->startIndex()); if (IR.VirtRegI != VirtReg->end()) - IR.LiveUnionI = LiveUnion->find(IR.VirtRegI->start); + IR.LiveUnionI.find(IR.VirtRegI->start); } else { // LiveUnion starts first, perform double binary search. - IR.LiveUnionI = LiveUnion->find(VirtReg->beginIndex()); + IR.LiveUnionI.find(VirtReg->beginIndex()); if (IR.LiveUnionI.valid()) IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start()); else @@ -235,7 +244,7 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { // // For comments on how to speed it up, see Query::findIntersection(). unsigned LiveIntervalUnion::Query:: -collectInterferingVRegs(unsigned MaxInterferingRegs) { +collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) { InterferenceResult IR = firstInterference(); LiveInterval::iterator VirtRegEnd = VirtReg->end(); LiveInterval *RecentInterferingVReg = NULL; @@ -277,6 +286,11 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) { // Cache the most recent interfering vreg to bypass isSeenInterference. RecentInterferingVReg = IR.LiveUnionI.value(); ++IR.LiveUnionI; + + // Stop collecting when the max weight is exceeded. + if (RecentInterferingVReg->weight >= MaxWeight) + return InterferingVRegs.size(); + continue; } // VirtRegI may have advanced far beyond LiveUnionI, diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h index 0964ecd..c83578e 100644 --- a/lib/CodeGen/LiveIntervalUnion.h +++ b/lib/CodeGen/LiveIntervalUnion.h @@ -95,6 +95,9 @@ public: // Remove a live virtual register's segments from this union. void extract(LiveInterval &VirtReg); + // Remove all inserted virtual registers. + void clear() { Segments.clear(); ++Tag; } + // Print union, using TRI to translate register names void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const; @@ -226,7 +229,8 @@ public: // Count the virtual registers in this union that interfere with this // query's live virtual register, up to maxInterferingRegs. - unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX); + unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX, + float MaxWeight = HUGE_VALF); // Was this virtual register visited during collectInterferingVRegs? bool isSeenInterference(LiveInterval *VReg) const; diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index b96575e..052abad 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -15,6 +15,7 @@ #include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -24,6 +25,10 @@ using namespace llvm; +STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE"); +STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE"); +STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); + LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg, LiveIntervals &LIS, VirtRegMap &VRM) { @@ -36,14 +41,16 @@ LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg, return LI; } -void LiveRangeEdit::checkRematerializable(VNInfo *VNI, +bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, const TargetInstrInfo &tii, AliasAnalysis *aa) { assert(DefMI && "Missing instruction"); - if (tii.isTriviallyReMaterializable(DefMI, aa)) - remattable_.insert(VNI); scannedRemattable_ = true; + if (!tii.isTriviallyReMaterializable(DefMI, aa)) + return false; + remattable_.insert(VNI); + return true; } void LiveRangeEdit::scanRemattable(LiveIntervals &lis, @@ -59,6 +66,7 @@ void LiveRangeEdit::scanRemattable(LiveIntervals &lis, continue; checkRematerializable(VNI, DefMI, tii, aa); } + scannedRemattable_ = true; } bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis, @@ -137,11 +145,13 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, const Remat &RM, LiveIntervals &lis, const TargetInstrInfo &tii, - const TargetRegisterInfo &tri) { + const TargetRegisterInfo &tri, + bool Late) { assert(RM.OrigMI && "Invalid remat"); tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); rematted_.insert(RM.ParentVNI); - return lis.InsertMachineInstrInMaps(--MI).getDefIndex(); + return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) + .getDefIndex(); } void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) { @@ -194,6 +204,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, UseMI->eraseFromParent(); DefMI->addRegisterDead(LI->reg, 0); Dead.push_back(DefMI); + ++NumDCEFoldedLoads; return true; } @@ -203,6 +214,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, SetVector<LiveInterval*, SmallVector<LiveInterval*, 8>, SmallPtrSet<LiveInterval*, 8> > ToShrink; + MachineRegisterInfo &MRI = VRM.getRegInfo(); for (;;) { // Erase all dead defs. @@ -236,8 +248,13 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, continue; LiveInterval &LI = LIS.getInterval(Reg); - // Shrink read registers. - if (MI->readsVirtualRegister(Reg)) + // Shrink read registers, unless it is likely to be expensive and + // unlikely to change anything. We typically don't want to shrink the + // PIC base register that has lots of uses everywhere. + // Always shrink COPY uses that probably come from live range splitting. + if (MI->readsVirtualRegister(Reg) && + (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || + LI.killedAt(Idx))) ToShrink.insert(&LI); // Remove defined value. @@ -258,6 +275,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, delegate_->LRE_WillEraseInstruction(MI); LIS.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); + ++NumDCEDeleted; } if (ToShrink.empty()) @@ -266,7 +284,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, // Shrink just one live interval. Then delete new dead defs. LiveInterval *LI = ToShrink.back(); ToShrink.pop_back(); - if (foldAsLoad(LI, Dead, VRM.getRegInfo(), LIS, TII)) + if (foldAsLoad(LI, Dead, MRI, LIS, TII)) continue; if (delegate_) delegate_->LRE_WillShrinkVirtReg(LI->reg); @@ -279,6 +297,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, unsigned NumComp = ConEQ.Classify(LI); if (NumComp <= 1) continue; + ++NumFracRanges; DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); SmallVector<LiveInterval*, 8> Dups(1, LI); for (unsigned i = 1; i != NumComp; ++i) { @@ -286,7 +305,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, if (delegate_) delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); } - ConEQ.Distribute(&Dups[0], VRM.getRegInfo()); + ConEQ.Distribute(&Dups[0], MRI); } } diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h index 54a2c83..db6740c 100644 --- a/lib/CodeGen/LiveRangeEdit.h +++ b/lib/CodeGen/LiveRangeEdit.h @@ -18,8 +18,9 @@ #ifndef LLVM_CODEGEN_LIVERANGEEDIT_H #define LLVM_CODEGEN_LIVERANGEEDIT_H -#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/LiveInterval.h" namespace llvm { @@ -113,6 +114,10 @@ public: bool empty() const { return size() == 0; } LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; } + ArrayRef<LiveInterval*> regs() const { + return ArrayRef<LiveInterval*>(newRegs_).slice(firstNew_); + } + /// FIXME: Temporary accessors until we can get rid of /// LiveIntervals::AddIntervalsForSpills SmallVectorImpl<LiveInterval*> *getNewVRegs() { return &newRegs_; } @@ -137,7 +142,7 @@ public: /// checkRematerializable - Manually add VNI to the list of rematerializable /// values if DefMI may be rematerializable. - void checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, + bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, const TargetInstrInfo&, AliasAnalysis*); /// Remat - Information needed to rematerialize at a specific location. @@ -165,7 +170,8 @@ public: const Remat &RM, LiveIntervals&, const TargetInstrInfo&, - const TargetRegisterInfo&); + const TargetRegisterInfo&, + bool Late = false); /// markRematerialized - explicitly mark a value as rematerialized after doing /// it manually. diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index ccbff0a..613f0c4 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -61,7 +61,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) { return OS; } -/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the +/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the /// parent pointer of the MBB, the MBB numbering, and any instructions in the /// MBB to be on the right operand list for registers. /// @@ -93,7 +93,7 @@ void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) { void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) { assert(N->getParent() == 0 && "machine instruction already in a basic block"); N->setParent(Parent); - + // Add the instruction's register operands to their corresponding // use/def lists. MachineFunction *MF = Parent->getParent(); @@ -110,7 +110,7 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { // Remove from the use/def lists. N->RemoveRegOperandsFromUseLists(); - + N->setParent(0); LeakDetector::addGarbageObject(N); @@ -339,32 +339,70 @@ void MachineBasicBlock::updateTerminator() { } } -void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) { - Successors.push_back(succ); - succ->addPredecessor(this); -} +void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) { + + // If we see non-zero value for the first time it means we actually use Weight + // list, so we fill all Weights with 0's. + if (weight != 0 && Weights.empty()) + Weights.resize(Successors.size()); + + if (weight != 0 || !Weights.empty()) + Weights.push_back(weight); + + Successors.push_back(succ); + succ->addPredecessor(this); + } void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) { succ->removePredecessor(this); succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); assert(I != Successors.end() && "Not a current successor!"); + + // If Weight list is empty it means we don't use it (disabled optimization). + if (!Weights.empty()) { + weight_iterator WI = getWeightIterator(I); + Weights.erase(WI); + } + Successors.erase(I); } -MachineBasicBlock::succ_iterator +MachineBasicBlock::succ_iterator MachineBasicBlock::removeSuccessor(succ_iterator I) { assert(I != Successors.end() && "Not a current successor!"); + + // If Weight list is empty it means we don't use it (disabled optimization). + if (!Weights.empty()) { + weight_iterator WI = getWeightIterator(I); + Weights.erase(WI); + } + (*I)->removePredecessor(this); return Successors.erase(I); } +void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, + MachineBasicBlock *New) { + uint32_t weight = 0; + succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old); + + // If Weight list is empty it means we don't use it (disabled optimization). + if (!Weights.empty()) { + weight_iterator WI = getWeightIterator(SI); + weight = *WI; + } + + // Update the successor information. + removeSuccessor(SI); + addSuccessor(New, weight); +} + void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { Predecessors.push_back(pred); } void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { - std::vector<MachineBasicBlock *>::iterator I = - std::find(Predecessors.begin(), Predecessors.end(), pred); + pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred); assert(I != Predecessors.end() && "Pred is not a predecessor of this block!"); Predecessors.erase(I); } @@ -372,10 +410,17 @@ void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { if (this == fromMBB) return; - + while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); - addSuccessor(Succ); + uint32_t weight = 0; + + + // If Weight list is empty it means we don't use it (disabled optimization). + if (!fromMBB->Weights.empty()) + weight = *fromMBB->Weights.begin(); + + addSuccessor(Succ, weight); fromMBB->removeSuccessor(Succ); } } @@ -384,7 +429,7 @@ void MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { if (this == fromMBB) return; - + while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); addSuccessor(Succ); @@ -402,8 +447,7 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { } bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { - std::vector<MachineBasicBlock *>::const_iterator I = - std::find(Successors.begin(), Successors.end(), MBB); + const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB); return I != Successors.end(); } @@ -487,6 +531,30 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { << " -- BB#" << NMBB->getNumber() << " -- BB#" << Succ->getNumber() << '\n'); + // On some targets like Mips, branches may kill virtual registers. Make sure + // that LiveVariables is properly updated after updateTerminator replaces the + // terminators. + LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>(); + + // Collect a list of virtual registers killed by the terminators. + SmallVector<unsigned, 4> KilledRegs; + if (LV) + for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) { + MachineInstr *MI = I; + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + if (!OI->isReg() || !OI->isUse() || !OI->isKill() || OI->isUndef()) + continue; + unsigned Reg = OI->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg) && + LV->getVarInfo(Reg).removeKill(MI)) { + KilledRegs.push_back(Reg); + DEBUG(dbgs() << "Removing terminator kill: " << *MI); + OI->setIsKill(false); + } + } + } + ReplaceUsesOfBlockWith(Succ, NMBB); updateTerminator(); @@ -504,9 +572,22 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (i->getOperand(ni+1).getMBB() == this) i->getOperand(ni+1).setMBB(NMBB); - if (LiveVariables *LV = - P->getAnalysisIfAvailable<LiveVariables>()) + // Update LiveVariables. + if (LV) { + // Restore kills of virtual registers that were killed by the terminators. + while (!KilledRegs.empty()) { + unsigned Reg = KilledRegs.pop_back_val(); + for (iterator I = end(), E = begin(); I != E;) { + if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false)) + continue; + LV->getVarInfo(Reg).Kills.push_back(I); + DEBUG(dbgs() << "Restored terminator kill: " << *I); + break; + } + } + // Update relevant live-through information. LV->addNewBlock(NMBB, this, Succ); + } if (MachineDominatorTree *MDT = P->getAnalysisIfAvailable<MachineDominatorTree>()) { @@ -602,15 +683,14 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, } // Update the successor information. - removeSuccessor(Old); - addSuccessor(New); + replaceSuccessor(Old, New); } /// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the /// CFG to be inserted. If we have proven that MBB can only branch to DestA and /// DestB, remove any other MBB successors from the CFG. DestA and DestB can be /// null. -/// +/// /// Besides DestA and DestB, retain other edges leading to LandingPads /// (currently there can be only one; we don't check or require that here). /// Note it is possible that DestA and/or DestB are LandingPads. @@ -685,6 +765,23 @@ MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) { return DL; } +/// getSuccWeight - Return weight of the edge from this block to MBB. +/// +uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) { + succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); + return *getWeightIterator(I); +} + +/// getWeightIterator - Return wight iterator corresonding to the I successor +/// iterator +MachineBasicBlock::weight_iterator MachineBasicBlock:: +getWeightIterator(MachineBasicBlock::succ_iterator I) { + assert(Weights.size() == Successors.size() && "Async weight list!"); + size_t index = std::distance(Successors.begin(), I); + assert(index < Weights.size() && "Not a current successor!"); + return Weights.begin() + index; +} + void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB, bool t) { OS << "BB#" << MBB->getNumber(); diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp new file mode 100644 index 0000000..c13fa6b --- /dev/null +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -0,0 +1,113 @@ +//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This analysis uses probability info stored in Machine Basic Blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Instructions.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob", + "Machine Branch Probability Analysis", false, true) +INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob", + "Machine Branch Probability Analysis", false, true) + +char MachineBranchProbabilityInfo::ID = 0; + +uint32_t MachineBranchProbabilityInfo:: +getSumForBlock(MachineBasicBlock *MBB) const { + uint32_t Sum = 0; + + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + MachineBasicBlock *Succ = *I; + uint32_t Weight = getEdgeWeight(MBB, Succ); + uint32_t PrevSum = Sum; + + Sum += Weight; + assert(Sum > PrevSum); (void) PrevSum; + } + + return Sum; +} + +uint32_t +MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src, + MachineBasicBlock *Dst) const { + uint32_t Weight = Src->getSuccWeight(Dst); + if (!Weight) + return DEFAULT_WEIGHT; + return Weight; +} + +bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, + MachineBasicBlock *Dst) const { + // Hot probability is at least 4/5 = 80% + uint32_t Weight = getEdgeWeight(Src, Dst); + uint32_t Sum = getSumForBlock(Src); + + // FIXME: Implement BranchProbability::compare then change this code to + // compare this BranchProbability against a static "hot" BranchProbability. + return (uint64_t)Weight * 5 > (uint64_t)Sum * 4; +} + +MachineBasicBlock * +MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { + uint32_t Sum = 0; + uint32_t MaxWeight = 0; + MachineBasicBlock *MaxSucc = 0; + + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + MachineBasicBlock *Succ = *I; + uint32_t Weight = getEdgeWeight(MBB, Succ); + uint32_t PrevSum = Sum; + + Sum += Weight; + assert(Sum > PrevSum); (void) PrevSum; + + if (Weight > MaxWeight) { + MaxWeight = Weight; + MaxSucc = Succ; + } + } + + // FIXME: Use BranchProbability::compare. + if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4) + return MaxSucc; + + return 0; +} + +BranchProbability +MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src, + MachineBasicBlock *Dst) const { + uint32_t N = getEdgeWeight(Src, Dst); + uint32_t D = getSumForBlock(Src); + + return BranchProbability(N, D); +} + +raw_ostream &MachineBranchProbabilityInfo:: +printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src, + MachineBasicBlock *Dst) const { + + const BranchProbability Prob = getEdgeProbability(Src, Dst); + OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber() + << " probability is " << Prob + << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); + + return OS; +} diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 07a7d27..f97ccf6 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -365,6 +365,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { if (!FoundCSE) { // Look for trivial copy coalescing opportunities. if (PerformTrivialCoalescing(MI, MBB)) { + Changed = true; + // After coalescing MI itself may become a copy. if (MI->isCopyLike()) continue; @@ -379,10 +381,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { if (NewMI) { Commuted = true; FoundCSE = VNT.count(NewMI); - if (NewMI != MI) + if (NewMI != MI) { // New instruction. It doesn't need to be kept. NewMI->eraseFromParent(); - else if (!FoundCSE) + Changed = true; + } else if (!FoundCSE) // MI was changed but it didn't help, commute it back! (void)TII->commuteInstruction(MI); } @@ -450,6 +453,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { ++NumPhysCSEs; if (Commuted) ++NumCommutes; + Changed = true; } else { DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); VNT.insert(MI, CurrVN++); diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index d81e4a1..50750a5 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -65,7 +65,11 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs( Fn->getAttributes().getFnAttributes())); ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData()); - Alignment = TM.getTargetLowering()->getFunctionAlignment(F); + Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); + // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. + if (!Fn->hasFnAttr(Attribute::OptimizeForSize)) + Alignment = std::max(Alignment, + TM.getTargetLowering()->getPrefFunctionAlignment()); FunctionNumber = FunctionNum; JumpTableInfo = 0; } @@ -300,31 +304,19 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << "Function Live Ins: "; for (MachineRegisterInfo::livein_iterator I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) { - if (TRI) - OS << "%" << TRI->getName(I->first); - else - OS << " %physreg" << I->first; - + OS << PrintReg(I->first, TRI); if (I->second) - OS << " in reg%" << I->second; - + OS << " in " << PrintReg(I->second, TRI); if (llvm::next(I) != E) OS << ", "; } OS << '\n'; } if (RegInfo && !RegInfo->liveout_empty()) { - OS << "Function Live Outs: "; + OS << "Function Live Outs:"; for (MachineRegisterInfo::liveout_iterator - I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I){ - if (TRI) - OS << '%' << TRI->getName(*I); - else - OS << "%physreg" << *I; - - if (llvm::next(I) != E) - OS << " "; - } + I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I) + OS << ' ' << PrintReg(*I, TRI); OS << '\n'; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 0d137eb..36b0b83 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -125,7 +125,8 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { assert(TargetRegisterInfo::isPhysicalRegister(Reg)); if (getSubReg()) { Reg = TRI.getSubReg(Reg, getSubReg()); - assert(Reg && "Invalid SubReg for physical register"); + // Note that getSubReg() may return 0 if the sub-register doesn't exist. + // That won't happen in legal code. setSubReg(0); } setReg(Reg); @@ -441,6 +442,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { OS << ")"; } + // Print nontemporal info. + if (MMO.isNonTemporal()) + OS << "(nontemporal)"; + return OS; } @@ -759,19 +764,35 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); const MachineOperand &OMO = Other->getOperand(i); + if (!MO.isReg()) { + if (!MO.isIdenticalTo(OMO)) + return false; + continue; + } + // Clients may or may not want to ignore defs when testing for equality. // For example, machine CSE pass only cares about finding common // subexpressions, so it's safe to ignore virtual register defs. - if (Check != CheckDefs && MO.isReg() && MO.isDef()) { + if (MO.isDef()) { if (Check == IgnoreDefs) continue; - // Check == IgnoreVRegDefs - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || - TargetRegisterInfo::isPhysicalRegister(OMO.getReg())) - if (MO.getReg() != OMO.getReg()) + else if (Check == IgnoreVRegDefs) { + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || + TargetRegisterInfo::isPhysicalRegister(OMO.getReg())) + if (MO.getReg() != OMO.getReg()) + return false; + } else { + if (!MO.isIdenticalTo(OMO)) return false; - } else if (!MO.isIdenticalTo(OMO)) - return false; + if (Check == CheckKillDead && MO.isDead() != OMO.isDead()) + return false; + } + } else { + if (!MO.isIdenticalTo(OMO)) + return false; + if (Check == CheckKillDead && MO.isKill() != OMO.isKill()) + return false; + } } return true; } diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 1c0f6ad..b315702 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -39,7 +39,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - using namespace llvm; STATISTIC(NumHoisted, @@ -169,6 +168,10 @@ namespace { /// bool IsLoopInvariantInst(MachineInstr &I); + /// HasAnyPHIUse - Return true if the specified register is used by any + /// phi node. + bool HasAnyPHIUse(unsigned Reg) const; + /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered /// it 'high'. @@ -758,18 +761,25 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } -/// HasPHIUses - Return true if the specified register has any PHI use. -static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) { +/// HasAnyPHIUse - Return true if the specified register is used by any +/// phi node. +bool MachineLICM::HasAnyPHIUse(unsigned Reg) const { for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), UE = MRI->use_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; if (UseMI->isPHI()) return true; + // Look pass copies as well. + if (UseMI->isCopy()) { + unsigned Def = UseMI->getOperand(0).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Def) && + HasAnyPHIUse(Def)) + return true; + } } return false; } - /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered /// it 'high'. @@ -976,14 +986,13 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { return false; } - // If result(s) of this instruction is used by PHIs, then don't hoist it. - // The presence of joins makes it difficult for current register allocator - // implementation to perform remat. + // If result(s) of this instruction is used by PHIs outside of the loop, then + // don't hoist it if the instruction because it will introduce an extra copy. for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - if (HasPHIUses(MO.getReg(), MRI)) + if (HasAnyPHIUse(MO.getReg())) return false; } diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 7244d5f..08ff5bb 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -79,6 +79,8 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg, unsigned MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ assert(RegClass && "Cannot create register without RegClass!"); + assert(RegClass->isAllocatable() && + "Virtual register RegClass must be allocatable."); // New virtual register number. unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 8a93a24..916dff7 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -265,8 +265,11 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (MI->isDebugValue()) continue; - if (PerformTrivialForwardCoalescing(MI, &MBB)) + bool Joined = PerformTrivialForwardCoalescing(MI, &MBB); + if (Joined) { + MadeChange = true; continue; + } if (SinkInstruction(MI, SawStore)) ++NumSunk, MadeChange = true; diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index f95f411..471463b 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -23,6 +23,7 @@ // the verifier errors. //===----------------------------------------------------------------------===// +#include "llvm/Instructions.h" #include "llvm/Function.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" @@ -32,6 +33,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" @@ -394,7 +396,13 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if ((*I)->isLandingPad()) LandingPadSuccs.insert(*I); } - if (LandingPadSuccs.size() > 1) + + const MCAsmInfo *AsmInfo = TM->getMCAsmInfo(); + const BasicBlock *BB = MBB->getBasicBlock(); + if (LandingPadSuccs.size() > 1 && + !(AsmInfo && + AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj && + BB && isa<SwitchInst>(BB->getTerminator()))) report("MBB has more than one landing pad successor", MBB); // Call AnalyzeBranch. If it succeeds, there several more conditions to check. @@ -402,11 +410,6 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { SmallVector<MachineOperand, 4> Cond; if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB), TBB, FBB, Cond)) { - // If the block branches directly to a landing pad successor, pretend that - // the landing pad is a normal block. - LandingPadSuccs.erase(TBB); - LandingPadSuccs.erase(FBB); - // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's // check whether its answers match up with reality. if (!TBB && !FBB) { @@ -741,7 +744,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { RC = SRC; } if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) { - if (RC != DRC && !RC->hasSuperClass(DRC)) { + if (!RC->hasSuperClassEq(DRC)) { report("Illegal virtual register for instruction", MO, MONum); *OS << "Expected a " << DRC->getName() << " register, but got a " << RC->getName() << " register\n"; diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 9fd5b0e..af65f13 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -32,7 +32,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include <algorithm> -#include <map> using namespace llvm; static cl::opt<bool> diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 3489db2..315aedd 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -55,6 +55,11 @@ FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { RegisterRegAlloc::setDefault(RegAlloc); } + // This forces linking of the linear scan register allocator, + // so -regalloc=linearscan still works in clang. + if (Ctor == createLinearScanRegisterAllocator) + return createLinearScanRegisterAllocator(); + if (Ctor != createDefaultRegisterAllocator) return Ctor(); @@ -63,6 +68,6 @@ FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { case CodeGenOpt::None: return createFastRegisterAllocator(); default: - return createLinearScanRegisterAllocator(); + return createGreedyRegisterAllocator(); } } diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 60c24b7..982a2a5 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -22,6 +22,7 @@ #include "AntiDepBreaker.h" #include "AggressiveAntiDepBreaker.h" #include "CriticalAntiDepBreaker.h" +#include "RegisterClassInfo.h" #include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" @@ -80,6 +81,7 @@ namespace { class PostRAScheduler : public MachineFunctionPass { AliasAnalysis *AA; const TargetInstrInfo *TII; + RegisterClassInfo RegClassInfo; CodeGenOpt::Level OptLevel; public: @@ -135,7 +137,8 @@ namespace { public: SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, - AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode, + AliasAnalysis *AA, const RegisterClassInfo&, + TargetSubtarget::AntiDepBreakMode AntiDepMode, SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs); ~SchedulePostRATDList(); @@ -179,7 +182,8 @@ namespace { SchedulePostRATDList::SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, - AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode, + AliasAnalysis *AA, const RegisterClassInfo &RCI, + TargetSubtarget::AntiDepBreakMode AntiDepMode, SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs) : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA), KillIndices(TRI->getNumRegs()) @@ -190,9 +194,9 @@ SchedulePostRATDList::SchedulePostRATDList( TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this); AntiDepBreak = ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? - (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, CriticalPathRCs) : + (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) : ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? - (AntiDepBreaker *)new CriticalAntiDepBreaker(MF) : NULL)); + (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : NULL)); } SchedulePostRATDList::~SchedulePostRATDList() { @@ -205,6 +209,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); + RegClassInfo.runOnMachineFunction(Fn); // Check for explicit enable/disable of post-ra scheduling. TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; @@ -230,7 +235,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "PostRAScheduler\n"); - SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, AntiDepMode, + SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode, CriticalPathRCs); // Loop over all of the basic blocks @@ -304,7 +309,7 @@ void SchedulePostRATDList::Schedule() { if (AntiDepBreak != NULL) { unsigned Broken = AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos, - InsertPosIndex); + InsertPosIndex, DbgValues); if (Broken != 0) { // We made changes. Update the dependency graph. @@ -540,10 +545,16 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { #endif --SuccSU->NumPredsLeft; - // Compute how many cycles it will be before this actually becomes - // available. This is the max of the start time of all predecessors plus - // their latencies. - SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); + // Standard scheduler algorithms will recompute the depth of the successor + // here as such: + // SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); + // + // However, we lazily compute node depth instead. Note that + // ScheduleNodeTopDown has already updated the depth of this node which causes + // all descendents to be marked dirty. Setting the successor depth explicitly + // here would cause depth to be recomputed for all its ancestors. If the + // successor is not yet ready (because of a transitively redundant edge) then + // this causes depth computation to be quadratic in the size of the DAG. // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. @@ -655,6 +666,12 @@ void SchedulePostRATDList::ListScheduleTopDown() { ScheduleNodeTopDown(FoundSUnit, CurCycle); HazardRec->EmitInstruction(FoundSUnit); CycleHasInsts = true; + if (HazardRec->atIssueLimit()) { + DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n'); + HazardRec->AdvanceCycle(); + ++CurCycle; + CycleHasInsts = false; + } } else { if (CycleHasInsts) { DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n'); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 92e25e1..f1f3c99 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -337,7 +337,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { --BeforeI; // Restore all registers immediately before the return and any - // terminators that preceed it. + // terminators that precede it. if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); @@ -437,7 +437,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { --BeforeI; // Restore all registers immediately before the return and any - // terminators that preceed it. + // terminators that precede it. for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { unsigned Reg = blockCSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt index b655dda..7f75f65 100644 --- a/lib/CodeGen/README.txt +++ b/lib/CodeGen/README.txt @@ -26,7 +26,7 @@ and then "merge" mul and mov: sxth r3, r3 mla r4, r3, lr, r4 -It also increase the likelyhood the store may become dead. +It also increase the likelihood the store may become dead. //===---------------------------------------------------------------------===// @@ -162,7 +162,7 @@ synthesize the various copy insertion/inspection methods in TargetInstrInfo. //===---------------------------------------------------------------------===// -Stack coloring improvments: +Stack coloring improvements: 1. Do proper LiveStackAnalysis on all stack objects including those which are not spill slots. diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index f431d5a..0316421 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -39,6 +39,7 @@ #include "llvm/ADT/OwningPtr.h" #include "LiveIntervalUnion.h" +#include "RegisterClassInfo.h" namespace llvm { @@ -91,6 +92,7 @@ protected: MachineRegisterInfo *MRI; VirtRegMap *VRM; LiveIntervals *LIS; + RegisterClassInfo RegClassInfo; LiveUnionArray PhysReg2LiveUnion; // Current queries, one per physreg. They must be reinitialized each time we @@ -113,6 +115,10 @@ protected: return Queries[PhysReg]; } + // Invalidate all cached information about virtual registers - live ranges may + // have changed. + void invalidateVirtRegs() { ++UserTag; } + // The top-level driver. The output is a VirtRegMap that us updated with // physical register assignments. // diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 0e218a7..1d77b29 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -13,10 +13,10 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" +#include "RegAllocBase.h" #include "LiveDebugVariables.h" #include "LiveIntervalUnion.h" #include "LiveRangeEdit.h" -#include "RegAllocBase.h" #include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" @@ -85,7 +85,6 @@ class RABasic : public MachineFunctionPass, public RegAllocBase { // context MachineFunction *MF; - BitVector ReservedRegs; // analyses LiveStacks *LS; @@ -235,9 +234,14 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) { MRI = &vrm.getRegInfo(); VRM = &vrm; LIS = &lis; - PhysReg2LiveUnion.init(UnionAllocator, TRI->getNumRegs()); - // Cache an interferece query for each physical reg - Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]); + RegClassInfo.runOnMachineFunction(vrm.getMachineFunction()); + + const unsigned NumRegs = TRI->getNumRegs(); + if (NumRegs != PhysReg2LiveUnion.numRegs()) { + PhysReg2LiveUnion.init(UnionAllocator, NumRegs); + // Cache an interferece query for each physical reg + Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]); + } } void RegAllocBase::LiveUnionArray::clear() { @@ -251,13 +255,15 @@ void RegAllocBase::LiveUnionArray::clear() { } void RegAllocBase::releaseMemory() { - PhysReg2LiveUnion.clear(); + for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r) + PhysReg2LiveUnion[r].clear(); } // Visit all the live registers. If they are already assigned to a physical // register, unify them with the corresponding LiveIntervalUnion, otherwise push // them on the priority queue for later assignment. void RegAllocBase::seedLiveRegs() { + NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled); for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) { unsigned RegNum = I->first; LiveInterval &VirtReg = *I->second; @@ -273,6 +279,7 @@ void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) { << " to " << PrintReg(PhysReg, TRI) << '\n'); assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); VRM->assignVirt2Phys(VirtReg.reg, PhysReg); + MRI->setPhysRegUsed(PhysReg); PhysReg2LiveUnion[PhysReg].unify(VirtReg); ++NumAssigned; } @@ -303,7 +310,7 @@ void RegAllocBase::allocatePhysRegs() { } // Invalidate all interference queries, live ranges could have changed. - ++UserTag; + invalidateVirtRegs(); // selectOrSplit requests the allocator to return an available physical // register if possible and populate a list of new live intervals that @@ -315,6 +322,23 @@ void RegAllocBase::allocatePhysRegs() { VirtRegVec SplitVRegs; unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); + if (AvailablePhysReg == ~0u) { + // selectOrSplit failed to find a register! + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!" + "\nCannot allocate: " << *VirtReg; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg); + MachineInstr *MI = I.skipInstruction();) { + if (!MI->isInlineAsm()) + continue; + Msg << "\nPlease check your inline asm statement for " + "invalid constraints:\n"; + MI->print(Msg, &VRM->getMachineFunction().getTarget()); + } + report_fatal_error(Msg.str()); + } + if (AvailablePhysReg) assign(*VirtReg, AvailablePhysReg); @@ -404,29 +428,31 @@ RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, // Add newly allocated physical registers to the MBB live in sets. void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled); - typedef SmallVector<MachineBasicBlock*, 8> MBBVec; - MBBVec liveInMBBs; - MachineBasicBlock &entryMBB = *MF->begin(); + SlotIndexes *Indexes = LIS->getSlotIndexes(); + if (MF->size() <= 1) + return; + LiveIntervalUnion::SegmentIter SI; for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg]; if (LiveUnion.empty()) continue; - for (LiveIntervalUnion::SegmentIter SI = LiveUnion.begin(); SI.valid(); - ++SI) { - - // Find the set of basic blocks which this range is live into... - liveInMBBs.clear(); - if (!LIS->findLiveInMBBs(SI.start(), SI.stop(), liveInMBBs)) continue; - - // And add the physreg for this interval to their live-in sets. - for (MBBVec::iterator I = liveInMBBs.begin(), E = liveInMBBs.end(); - I != E; ++I) { - MachineBasicBlock *MBB = *I; - if (MBB == &entryMBB) continue; - if (MBB->isLiveIn(PhysReg)) continue; - MBB->addLiveIn(PhysReg); - } + MachineFunction::iterator MBB = llvm::next(MF->begin()); + MachineFunction::iterator MFE = MF->end(); + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(MBB); + SI.setMap(LiveUnion.getMap()); + SI.find(Start); + while (SI.valid()) { + if (SI.start() <= Start) { + if (!MBB->isLiveIn(PhysReg)) + MBB->addLiveIn(PhysReg); + } else if (SI.start() > Stop) + MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex()); + if (++MBB == MFE) + break; + tie(Start, Stop) = Indexes->getMBBRange(MBB); + SI.advanceTo(Start); } } } @@ -454,14 +480,11 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, SmallVector<unsigned, 8> PhysRegSpillCands; // Check for an available register in this class. - const TargetRegisterClass *TRC = MRI->getRegClass(VirtReg.reg); - - for (TargetRegisterClass::iterator I = TRC->allocation_order_begin(*MF), - E = TRC->allocation_order_end(*MF); - I != E; ++I) { - + ArrayRef<unsigned> Order = + RegClassInfo.getOrder(MRI->getRegClass(VirtReg.reg)); + for (ArrayRef<unsigned>::iterator I = Order.begin(), E = Order.end(); I != E; + ++I) { unsigned PhysReg = *I; - if (ReservedRegs.test(PhysReg)) continue; // Check interference and as a side effect, intialize queries for this // VirtReg and its aliases. @@ -490,8 +513,11 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, // Tell the caller to allocate to this newly freed physical register. return *PhysRegI; } + // No other spill candidates were found, so spill the current VirtReg. DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); + if (!VirtReg.isSpillable()) + return ~0u; LiveRangeEdit LRE(VirtReg, SplitVRegs); spiller().spill(LRE); @@ -509,9 +535,6 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { DEBUG(RMF = &getAnalysis<RenderMachineFunction>()); RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>()); - - ReservedRegs = TRI->getReservedRegs(*MF); - SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); allocatePhysRegs(); diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 15036e3..65ebdf8 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" +#include "RegisterClassInfo.h" #include "llvm/BasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -58,6 +59,7 @@ namespace { MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; + RegisterClassInfo RegClassInfo; // Basic block currently being allocated. MachineBasicBlock *MBB; @@ -97,7 +99,7 @@ namespace { // immediately without checking aliases. regFree, - // A reserved register has been assigned expolicitly (e.g., setting up a + // A reserved register has been assigned explicitly (e.g., setting up a // call parameter), and it remains reserved until it is used. regReserved @@ -113,9 +115,6 @@ namespace { // instruction, and so cannot be allocated. BitVector UsedInInstr; - // Allocatable - vector of allocatable physical registers. - BitVector Allocatable; - // SkippedInstrs - Descriptors of instructions whose clobber list was // ignored because all registers were spilled. It is still necessary to // mark all the clobbered registers as used by the function. @@ -396,7 +395,6 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, PhysRegState[PhysReg] = NewState; for (const unsigned *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { - UsedInInstr.set(Alias); switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: break; @@ -420,20 +418,25 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // can be allocated directly. // Returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RAFast::calcSpillCost(unsigned PhysReg) const { - if (UsedInInstr.test(PhysReg)) + if (UsedInInstr.test(PhysReg)) { + DEBUG(dbgs() << "PhysReg: " << PhysReg << " is already used in instr.\n"); return spillImpossible; + } switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; case regFree: return 0; case regReserved: + DEBUG(dbgs() << "VirtReg: " << VirtReg << " corresponding to PhysReg: " + << PhysReg << " is reserved already.\n"); return spillImpossible; default: return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean; } - // This is a disabled register, add up const of aliases. + // This is a disabled register, add up cost of aliases. + DEBUG(dbgs() << "\tRegister: " << PhysReg << " is disabled.\n"); unsigned Cost = 0; for (const unsigned *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { @@ -479,30 +482,26 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { // Ignore invalid hints. if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || - !RC->contains(Hint) || !Allocatable.test(Hint))) + !RC->contains(Hint) || !RegClassInfo.isAllocatable(Hint))) Hint = 0; // Take hint when possible. if (Hint) { - switch(calcSpillCost(Hint)) { - default: - definePhysReg(MI, Hint, regFree); - // Fall through. - case 0: + // Ignore the hint if we would have to spill a dirty register. + unsigned Cost = calcSpillCost(Hint); + if (Cost < spillDirty) { + if (Cost) + definePhysReg(MI, Hint, regFree); return assignVirtToPhysReg(LRE, Hint); - case spillImpossible: - break; } } - TargetRegisterClass::iterator AOB = RC->allocation_order_begin(*MF); - TargetRegisterClass::iterator AOE = RC->allocation_order_end(*MF); + ArrayRef<unsigned> AO = RegClassInfo.getOrder(RC); // First try to find a completely free register. - for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { + for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) { unsigned PhysReg = *I; - if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg) && - Allocatable.test(PhysReg)) + if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) return assignVirtToPhysReg(LRE, PhysReg); } @@ -510,10 +509,11 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { << RC->getName() << "\n"); unsigned BestReg = 0, BestCost = spillImpossible; - for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { - if (!Allocatable.test(*I)) - continue; + for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) { unsigned Cost = calcSpillCost(*I); + DEBUG(dbgs() << "\tRegister: " << *I << "\n"); + DEBUG(dbgs() << "\tCost: " << Cost << "\n"); + DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); // Cost is 0 when all aliases are already disabled. if (Cost == 0) return assignVirtToPhysReg(LRE, *I); @@ -722,9 +722,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + DEBUG(dbgs() << "\tSetting reg " << Reg << " as used in instr\n"); UsedInInstr.set(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - UsedInInstr.set(*AS); } // Also mark PartialDefs as used to avoid reallocation. @@ -764,7 +763,7 @@ void RAFast::AllocateBasicBlock() { // Add live-in registers as live. for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) - if (Allocatable.test(*I)) + if (RegClassInfo.isAllocatable(*I)) definePhysReg(MII, *I, regReserved); SmallVector<unsigned, 8> VirtDead; @@ -895,7 +894,7 @@ void RAFast::AllocateBasicBlock() { } continue; } - if (!Allocatable.test(Reg)) continue; + if (!RegClassInfo.isAllocatable(Reg)) continue; if (MO.isUse()) { usePhysReg(MO); } else if (MO.isEarlyClobber()) { @@ -984,7 +983,7 @@ void RAFast::AllocateBasicBlock() { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (!Allocatable.test(Reg)) continue; + if (!RegClassInfo.isAllocatable(Reg)) continue; definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? regFree : regReserved); continue; @@ -1040,9 +1039,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { TM = &Fn.getTarget(); TRI = TM->getRegisterInfo(); TII = TM->getInstrInfo(); - + RegClassInfo.runOnMachineFunction(Fn); UsedInInstr.resize(TRI->getNumRegs()); - Allocatable = TRI->getAllocatableSet(*MF); // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 889bca3..8d06325 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -62,7 +62,6 @@ class RAGreedy : public MachineFunctionPass, // context MachineFunction *MF; - BitVector ReservedRegs; // analyses SlotIndexes *Indexes; @@ -72,6 +71,7 @@ class RAGreedy : public MachineFunctionPass, MachineLoopRanges *LoopRanges; EdgeBundles *Bundles; SpillPlacement *SpillPlacer; + LiveDebugVariables *DebugVars; // state std::auto_ptr<Spiller> SpillerInstance; @@ -94,12 +94,13 @@ class RAGreedy : public MachineFunctionPass, RS_New, ///< Never seen before. RS_First, ///< First time in the queue. RS_Second, ///< Second time in the queue. - RS_Region, ///< Produced by region splitting. - RS_Block, ///< Produced by per-block splitting. + RS_Global, ///< Produced by global splitting. RS_Local, ///< Produced by local splitting. RS_Spill ///< Produced by spilling. }; + static const char *const StageName[]; + IndexedMap<unsigned char, VirtReg2IndexFunctor> LRStage; LiveRangeStage getStage(const LiveInterval &VirtReg) const { @@ -116,6 +117,15 @@ class RAGreedy : public MachineFunctionPass, } } + // Eviction. Sometimes an assigned live range can be evicted without + // conditions, but other times it must be split after being evicted to avoid + // infinite loops. + enum CanEvict { + CE_Never, ///< Can never evict. + CE_Always, ///< Can always evict. + CE_WithSplit ///< Can evict only if range is also split or spilled. + }; + // splitting state. std::auto_ptr<SplitAnalysis> SA; std::auto_ptr<SplitEditor> SE; @@ -126,14 +136,17 @@ class RAGreedy : public MachineFunctionPass, /// All basic blocks where the current register has uses. SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; - /// All basic blocks where the current register is live-through and - /// interference free. - SmallVector<unsigned, 8> TransparentBlocks; - /// Global live range splitting candidate info. struct GlobalSplitCandidate { unsigned PhysReg; BitVector LiveBundles; + SmallVector<unsigned, 8> ActiveBlocks; + + void reset(unsigned Reg) { + PhysReg = Reg; + LiveBundles.clear(); + ActiveBlocks.clear(); + } }; /// Candidate info for for each PhysReg in AllocationOrder. @@ -141,10 +154,6 @@ class RAGreedy : public MachineFunctionPass, /// class. SmallVector<GlobalSplitCandidate, 32> GlobalCand; - /// For every instruction in SA->UseSlots, store the previous non-copy - /// instruction. - SmallVector<SlotIndex, 8> PrevSlot; - public: RAGreedy(); @@ -173,18 +182,21 @@ private: void LRE_WillShrinkVirtReg(unsigned); void LRE_DidCloneVirtReg(unsigned, unsigned); - bool addSplitConstraints(unsigned, float&); - float calcGlobalSplitCost(unsigned, const BitVector&); - void splitAroundRegion(LiveInterval&, unsigned, const BitVector&, + float calcSpillCost(); + bool addSplitConstraints(InterferenceCache::Cursor, float&); + void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); + void growRegion(GlobalSplitCandidate &Cand, InterferenceCache::Cursor); + float calcGlobalSplitCost(GlobalSplitCandidate&, InterferenceCache::Cursor); + void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&, SmallVectorImpl<LiveInterval*>&); void calcGapWeights(unsigned, SmallVectorImpl<float>&); - SlotIndex getPrevMappedIndex(const MachineInstr*); - void calcPrevSlots(); - unsigned nextSplitPoint(unsigned); + CanEvict canEvict(LiveInterval &A, LiveInterval &B); bool canEvictInterference(LiveInterval&, unsigned, float&); + unsigned tryAssign(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); unsigned tryEvict(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<LiveInterval*>&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<LiveInterval*>&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, @@ -196,6 +208,22 @@ private: char RAGreedy::ID = 0; +#ifndef NDEBUG +const char *const RAGreedy::StageName[] = { + "RS_New", + "RS_First", + "RS_Second", + "RS_Global", + "RS_Local", + "RS_Spill" +}; +#endif + +// Hysteresis to use when comparing floats. +// This helps stabilize decisions based on float comparisons. +const float Hysteresis = 0.98f; + + FunctionPass* llvm::createGreedyRegisterAllocator() { return new RAGreedy(); } @@ -287,6 +315,7 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { void RAGreedy::releaseMemory() { SpillerInstance.reset(0); LRStage.clear(); + GlobalCand.clear(); RegAllocBase::releaseMemory(); } @@ -329,28 +358,85 @@ LiveInterval *RAGreedy::dequeue() { return LI; } + +//===----------------------------------------------------------------------===// +// Direct Assignment +//===----------------------------------------------------------------------===// + +/// tryAssign - Try to assign VirtReg to an available register. +unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + Order.rewind(); + unsigned PhysReg; + while ((PhysReg = Order.next())) + if (!checkPhysRegInterference(VirtReg, PhysReg)) + break; + if (!PhysReg || Order.isHint(PhysReg)) + return PhysReg; + + // PhysReg is available. Try to evict interference from a cheaper alternative. + unsigned Cost = TRI->getCostPerUse(PhysReg); + + // Most registers have 0 additional cost. + if (!Cost) + return PhysReg; + + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost + << '\n'); + unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); + return CheapReg ? CheapReg : PhysReg; +} + + //===----------------------------------------------------------------------===// // Interference eviction //===----------------------------------------------------------------------===// +/// canEvict - determine if A can evict the assigned live range B. The eviction +/// policy defined by this function together with the allocation order defined +/// by enqueue() decides which registers ultimately end up being split and +/// spilled. +/// +/// This function must define a non-circular relation when it returns CE_Always, +/// otherwise infinite eviction loops are possible. When evicting a <= RS_Second +/// range, it is possible to return CE_WithSplit which forces the evicted +/// register to be split or spilled before it can evict anything again. That +/// guarantees progress. +RAGreedy::CanEvict RAGreedy::canEvict(LiveInterval &A, LiveInterval &B) { + return A.weight > B.weight ? CE_Always : CE_Never; +} + /// canEvict - Return true if all interferences between VirtReg and PhysReg can -/// be evicted. Set maxWeight to the maximal spill weight of an interference. +/// be evicted. +/// Return false if any interference is heavier than MaxWeight. +/// On return, set MaxWeight to the maximal spill weight of an interference. bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, float &MaxWeight) { float Weight = 0; for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); - // If there is 10 or more interferences, chances are one is smaller. - if (Q.collectInterferingVRegs(10) >= 10) + // If there is 10 or more interferences, chances are one is heavier. + if (Q.collectInterferingVRegs(10, MaxWeight) >= 10) return false; - // Check if any interfering live range is heavier than VirtReg. - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *Intf = Q.interferingVRegs()[i]; + // Check if any interfering live range is heavier than MaxWeight. + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) return false; - if (Intf->weight >= VirtReg.weight) + if (Intf->weight >= MaxWeight) return false; + switch (canEvict(VirtReg, *Intf)) { + case CE_Always: + break; + case CE_Never: + return false; + case CE_WithSplit: + if (getStage(*Intf) > RS_Second) + return false; + break; + } Weight = std::max(Weight, Intf->weight); } } @@ -364,21 +450,28 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, /// @return Physreg to assign VirtReg, or 0. unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs){ + SmallVectorImpl<LiveInterval*> &NewVRegs, + unsigned CostPerUseLimit) { NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); // Keep track of the lightest single interference seen so far. - float BestWeight = 0; + float BestWeight = HUGE_VALF; unsigned BestPhys = 0; Order.rewind(); while (unsigned PhysReg = Order.next()) { - float Weight = 0; + if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) + continue; + // The first use of a register in a function has cost 1. + if (CostPerUseLimit == 1 && !MRI->isPhysRegUsed(PhysReg)) + continue; + + float Weight = BestWeight; if (!canEvictInterference(VirtReg, PhysReg, Weight)) continue; // This is an eviction candidate. - DEBUG(dbgs() << "max " << PrintReg(PhysReg, TRI) << " interference = " + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " interference = " << Weight << '\n'); if (BestPhys && Weight >= BestWeight) continue; @@ -403,6 +496,11 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, unassign(*Intf, VRM->getPhys(Intf->reg)); ++NumEvicted; NewVRegs.push_back(Intf); + // Prevent looping by forcing the evicted ranges to be split before they + // can evict anything else. + if (getStage(*Intf) < RS_Second && + canEvict(VirtReg, *Intf) == CE_WithSplit) + LRStage[Intf->reg] = RS_Second; } } return BestPhys; @@ -417,9 +515,9 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, /// interference pattern in Physreg and its aliases. Add the constraints to /// SpillPlacement and return the static cost of this split in Cost, assuming /// that all preferences in SplitConstraints are met. -/// If it is evident that no bundles will be live, abort early and return false. -bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) { - InterferenceCache::Cursor Intf(IntfCache, PhysReg); +/// Return false if there are no bundles with positive bias. +bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, + float &Cost) { ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); // Reset interference dependent info. @@ -446,7 +544,7 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) { BC.Entry = SpillPlacement::MustSpill, ++Ins; else if (Intf.first() < BI.FirstUse) BC.Entry = SpillPlacement::PrefSpill, ++Ins; - else if (Intf.first() < (BI.LiveThrough ? BI.LastUse : BI.Kill)) + else if (Intf.first() < BI.LastUse) ++Ins; } @@ -456,7 +554,7 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) { BC.Exit = SpillPlacement::MustSpill, ++Ins; else if (Intf.last() > BI.LastUse) BC.Exit = SpillPlacement::PrefSpill, ++Ins; - else if (Intf.last() > (BI.LiveThrough ? BI.FirstUse : BI.Def)) + else if (Intf.last() > BI.FirstUse) ++Ins; } @@ -464,35 +562,41 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) { if (Ins) StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); } + Cost = StaticCost; // Add constraints for use-blocks. Note that these are the only constraints // that may add a positive bias, it is downhill from here. SpillPlacer->addConstraints(SplitConstraints); - if (SpillPlacer->getPositiveNodes() == 0) - return false; + return SpillPlacer->scanActiveBundles(); +} - Cost = StaticCost; - // Now handle the live-through blocks without uses. These can only add - // negative bias, so we can abort whenever there are no more positive nodes. - // Compute constraints for a group of 8 blocks at a time. +/// addThroughConstraints - Add constraints and links to SpillPlacer from the +/// live-through blocks in Blocks. +void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, + ArrayRef<unsigned> Blocks) { const unsigned GroupSize = 8; SpillPlacement::BlockConstraint BCS[GroupSize]; - unsigned B = 0; - TransparentBlocks.clear(); + unsigned TBS[GroupSize]; + unsigned B = 0, T = 0; - ArrayRef<unsigned> ThroughBlocks = SA->getThroughBlocks(); - for (unsigned i = 0; i != ThroughBlocks.size(); ++i) { - unsigned Number = ThroughBlocks[i]; - assert(B < GroupSize && "Array overflow"); - BCS[B].Number = Number; + for (unsigned i = 0; i != Blocks.size(); ++i) { + unsigned Number = Blocks[i]; Intf.moveToBlock(Number); if (!Intf.hasInterference()) { - TransparentBlocks.push_back(Number); + assert(T < GroupSize && "Array overflow"); + TBS[T] = Number; + if (++T == GroupSize) { + SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T)); + T = 0; + } continue; } + assert(B < GroupSize && "Array overflow"); + BCS[B].Number = Number; + // Interference for the live-in value. if (Intf.first() <= Indexes->getMBBStartIdx(Number)) BCS[B].Entry = SpillPlacement::MustSpill; @@ -509,30 +613,94 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) { ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); SpillPlacer->addConstraints(Array); B = 0; - // Abort early when all hope is lost. - if (SpillPlacer->getPositiveNodes() == 0) - return false; } } ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); SpillPlacer->addConstraints(Array); - if (SpillPlacer->getPositiveNodes() == 0) - return false; + SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T)); +} - // There is still some positive bias. Add all the links. - SpillPlacer->addLinks(TransparentBlocks); - return true; +void RAGreedy::growRegion(GlobalSplitCandidate &Cand, + InterferenceCache::Cursor Intf) { + // Keep track of through blocks that have not been added to SpillPlacer. + BitVector Todo = SA->getThroughBlocks(); + SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; + unsigned AddedTo = 0; +#ifndef NDEBUG + unsigned Visited = 0; +#endif + + for (;;) { + ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); + if (NewBundles.empty()) + break; + // Find new through blocks in the periphery of PrefRegBundles. + for (int i = 0, e = NewBundles.size(); i != e; ++i) { + unsigned Bundle = NewBundles[i]; + // Look at all blocks connected to Bundle in the full graph. + ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); + for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) { + unsigned Block = *I; + if (!Todo.test(Block)) + continue; + Todo.reset(Block); + // This is a new through block. Add it to SpillPlacer later. + ActiveBlocks.push_back(Block); +#ifndef NDEBUG + ++Visited; +#endif + } + } + // Any new blocks to add? + if (ActiveBlocks.size() > AddedTo) { + ArrayRef<unsigned> Add(&ActiveBlocks[AddedTo], + ActiveBlocks.size() - AddedTo); + addThroughConstraints(Intf, Add); + AddedTo = ActiveBlocks.size(); + } + // Perhaps iterating can enable more bundles? + SpillPlacer->iterate(); + } + DEBUG(dbgs() << ", v=" << Visited); } +/// calcSpillCost - Compute how expensive it would be to split the live range in +/// SA around all use blocks instead of forming bundle regions. +float RAGreedy::calcSpillCost() { + float Cost = 0; + const LiveInterval &LI = SA->getParent(); + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + unsigned Number = BI.MBB->getNumber(); + // We normally only need one spill instruction - a load or a store. + Cost += SpillPlacer->getBlockFrequency(Number); + + // Unless the value is redefined in the block. + if (BI.LiveIn && BI.LiveOut) { + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(Number); + LiveInterval::const_iterator I = LI.find(Start); + assert(I != LI.end() && "Expected live-in value"); + // Is there a different live-out value? If so, we need an extra spill + // instruction. + if (I->end < Stop) + Cost += SpillPlacer->getBlockFrequency(Number); + } + } + return Cost; +} /// calcGlobalSplitCost - Return the global split cost of following the split /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// -float RAGreedy::calcGlobalSplitCost(unsigned PhysReg, - const BitVector &LiveBundles) { +float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, + InterferenceCache::Cursor Intf) { float GlobalCost = 0; + const BitVector &LiveBundles = Cand.LiveBundles; ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; @@ -549,11 +717,8 @@ float RAGreedy::calcGlobalSplitCost(unsigned PhysReg, GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); } - InterferenceCache::Cursor Intf(IntfCache, PhysReg); - ArrayRef<unsigned> ThroughBlocks = SA->getThroughBlocks(); - SplitConstraints.resize(UseBlocks.size() + ThroughBlocks.size()); - for (unsigned i = 0; i != ThroughBlocks.size(); ++i) { - unsigned Number = ThroughBlocks[i]; + for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { + unsigned Number = Cand.ActiveBlocks[i]; bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; if (!RegIn && !RegOut) @@ -578,23 +743,25 @@ float RAGreedy::calcGlobalSplitCost(unsigned PhysReg, /// avoiding interference. The 'stack' interval is the complement constructed by /// SplitEditor. It will contain the rest. /// -void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, - const BitVector &LiveBundles, +void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, + GlobalSplitCandidate &Cand, SmallVectorImpl<LiveInterval*> &NewVRegs) { + const BitVector &LiveBundles = Cand.LiveBundles; + DEBUG({ - dbgs() << "Splitting around region for " << PrintReg(PhysReg, TRI) + dbgs() << "Splitting around region for " << PrintReg(Cand.PhysReg, TRI) << " with bundles"; for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i)) dbgs() << " EB#" << i; dbgs() << ".\n"; }); - InterferenceCache::Cursor Intf(IntfCache, PhysReg); + InterferenceCache::Cursor Intf(IntfCache, Cand.PhysReg); LiveRangeEdit LREdit(VirtReg, NewVRegs, this); SE->reset(LREdit); // Create the main cross-block interval. - SE->openIntv(); + const unsigned MainIntv = SE->openIntv(); // First add all defs that are live out of a block. ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); @@ -603,6 +770,14 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; + // Create separate intervals for isolated blocks with multiple uses. + if (!RegIn && !RegOut && BI.FirstUse != BI.LastUse) { + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); + SE->splitSingleBlock(BI); + SE->selectIntv(MainIntv); + continue; + } + // Should the register be live out? if (!BI.LiveOut || !RegOut) continue; @@ -628,7 +803,7 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, DEBUG(dbgs() << ", no interference"); if (!BI.LiveThrough) { DEBUG(dbgs() << ", not live-through.\n"); - SE->useIntv(SE->enterIntvBefore(BI.Def), Stop); + SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop); continue; } if (!RegIn) { @@ -645,10 +820,10 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, // Block has interference. DEBUG(dbgs() << ", interference to " << Intf.last()); - if (!BI.LiveThrough && Intf.last() <= BI.Def) { + if (!BI.LiveThrough && Intf.last() <= BI.FirstUse) { // The interference doesn't reach the outgoing segment. - DEBUG(dbgs() << " doesn't affect def from " << BI.Def << '\n'); - SE->useIntv(BI.Def, Stop); + DEBUG(dbgs() << " doesn't affect def from " << BI.FirstUse << '\n'); + SE->useIntv(BI.FirstUse, Stop); continue; } @@ -704,7 +879,7 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, DEBUG(dbgs() << ", no interference"); if (!BI.LiveThrough) { DEBUG(dbgs() << ", killed in block.\n"); - SE->useIntv(Start, SE->leaveIntvAfter(BI.Kill)); + SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse)); continue; } if (!RegOut) { @@ -737,10 +912,10 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, // Block has interference. DEBUG(dbgs() << ", interference from " << Intf.first()); - if (!BI.LiveThrough && Intf.first() >= BI.Kill) { + if (!BI.LiveThrough && Intf.first() >= BI.LastUse) { // The interference doesn't reach the outgoing segment. - DEBUG(dbgs() << " doesn't affect kill at " << BI.Kill << '\n'); - SE->useIntv(Start, BI.Kill); + DEBUG(dbgs() << " doesn't affect kill at " << BI.LastUse << '\n'); + SE->useIntv(Start, BI.LastUse); continue; } @@ -766,9 +941,8 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, } // Handle live-through blocks. - ArrayRef<unsigned> ThroughBlocks = SA->getThroughBlocks(); - for (unsigned i = 0; i != ThroughBlocks.size(); ++i) { - unsigned Number = ThroughBlocks[i]; + for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { + unsigned Number = Cand.ActiveBlocks[i]; bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; DEBUG(dbgs() << "Live through BB#" << Number << '\n'); @@ -787,70 +961,113 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, SE->enterIntvAtEnd(*MBB); } - SE->closeIntv(); - - // FIXME: Should we be more aggressive about splitting the stack region into - // per-block segments? The current approach allows the stack region to - // separate into connected components. Some components may be allocatable. - SE->finish(); ++NumGlobalSplits; + SmallVector<unsigned, 8> IntvMap; + SE->finish(&IntvMap); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + + LRStage.resize(MRI->getNumVirtRegs()); + unsigned OrigBlocks = SA->getNumLiveBlocks(); + + // Sort out the new intervals created by splitting. We get four kinds: + // - Remainder intervals should not be split again. + // - Candidate intervals can be assigned to Cand.PhysReg. + // - Block-local splits are candidates for local splitting. + // - DCE leftovers should go back on the queue. + for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { + unsigned Reg = LREdit.get(i)->reg; + + // Ignore old intervals from DCE. + if (LRStage[Reg] != RS_New) + continue; + + // Remainder interval. Don't try splitting again, spill if it doesn't + // allocate. + if (IntvMap[i] == 0) { + LRStage[Reg] = RS_Global; + continue; + } + + // Main interval. Allow repeated splitting as long as the number of live + // blocks is strictly decreasing. + if (IntvMap[i] == MainIntv) { + if (SA->countLiveBlocks(LREdit.get(i)) >= OrigBlocks) { + DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks + << " blocks as original.\n"); + // Don't allow repeated splitting as a safe guard against looping. + LRStage[Reg] = RS_Global; + } + continue; + } + + // Other intervals are treated as new. This includes local intervals created + // for blocks with multiple uses, and anything created by DCE. + } + if (VerifyEnabled) MF->verify(this, "After splitting live range around region"); } unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<LiveInterval*> &NewVRegs) { - BitVector LiveBundles, BestBundles; - float BestCost = 0; - unsigned BestReg = 0; + float BestCost = Hysteresis * calcSpillCost(); + DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); + const unsigned NoCand = ~0u; + unsigned BestCand = NoCand; Order.rewind(); for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) { if (GlobalCand.size() <= Cand) GlobalCand.resize(Cand+1); - GlobalCand[Cand].PhysReg = PhysReg; + GlobalCand[Cand].reset(PhysReg); - SpillPlacer->prepare(LiveBundles); + SpillPlacer->prepare(GlobalCand[Cand].LiveBundles); float Cost; - if (!addSplitConstraints(PhysReg, Cost)) { - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bias\n"); + InterferenceCache::Cursor Intf(IntfCache, PhysReg); + if (!addSplitConstraints(Intf, Cost)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; } - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tbiased = " - << SpillPlacer->getPositiveNodes() << ", static = " << Cost); - if (BestReg && Cost >= BestCost) { - DEBUG(dbgs() << " worse than " << PrintReg(BestReg, TRI) << '\n'); + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost); + if (Cost >= BestCost) { + DEBUG({ + if (BestCand == NoCand) + dbgs() << " worse than no bundles\n"; + else + dbgs() << " worse than " + << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; + }); continue; } + growRegion(GlobalCand[Cand], Intf); SpillPlacer->finish(); // No live bundles, defer to splitSingleBlocks(). - if (!LiveBundles.any()) { + if (!GlobalCand[Cand].LiveBundles.any()) { DEBUG(dbgs() << " no bundles.\n"); continue; } - Cost += calcGlobalSplitCost(PhysReg, LiveBundles); + Cost += calcGlobalSplitCost(GlobalCand[Cand], Intf); DEBUG({ dbgs() << ", total = " << Cost << " with bundles"; - for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i)) + for (int i = GlobalCand[Cand].LiveBundles.find_first(); i>=0; + i = GlobalCand[Cand].LiveBundles.find_next(i)) dbgs() << " EB#" << i; dbgs() << ".\n"; }); - if (!BestReg || Cost < BestCost) { - BestReg = PhysReg; - BestCost = 0.98f * Cost; // Prevent rounding effects. - BestBundles.swap(LiveBundles); + if (Cost < BestCost) { + BestCand = Cand; + BestCost = Hysteresis * Cost; // Prevent rounding effects. } } - if (!BestReg) + if (BestCand == NoCand) return 0; - splitAroundRegion(VirtReg, BestReg, BestBundles, NewVRegs); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Region); + splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs); return 0; } @@ -913,47 +1130,6 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, } } -/// getPrevMappedIndex - Return the slot index of the last non-copy instruction -/// before MI that has a slot index. If MI is the first mapped instruction in -/// its block, return the block start index instead. -/// -SlotIndex RAGreedy::getPrevMappedIndex(const MachineInstr *MI) { - assert(MI && "Missing MachineInstr"); - const MachineBasicBlock *MBB = MI->getParent(); - MachineBasicBlock::const_iterator B = MBB->begin(), I = MI; - while (I != B) - if (!(--I)->isDebugValue() && !I->isCopy()) - return Indexes->getInstructionIndex(I); - return Indexes->getMBBStartIdx(MBB); -} - -/// calcPrevSlots - Fill in the PrevSlot array with the index of the previous -/// real non-copy instruction for each instruction in SA->UseSlots. -/// -void RAGreedy::calcPrevSlots() { - const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots; - PrevSlot.clear(); - PrevSlot.reserve(Uses.size()); - for (unsigned i = 0, e = Uses.size(); i != e; ++i) { - const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]); - PrevSlot.push_back(getPrevMappedIndex(MI).getDefIndex()); - } -} - -/// nextSplitPoint - Find the next index into SA->UseSlots > i such that it may -/// be beneficial to split before UseSlots[i]. -/// -/// 0 is always a valid split point -unsigned RAGreedy::nextSplitPoint(unsigned i) { - const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots; - const unsigned Size = Uses.size(); - assert(i != Size && "No split points after the end"); - // Allow split before i when Uses[i] is not adjacent to the previous use. - while (++i != Size && PrevSlot[i].getBaseIndex() <= Uses[i-1].getBaseIndex()) - ; - return i; -} - /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only /// basic block. /// @@ -981,11 +1157,27 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, dbgs() << '\n'; }); - // For every use, find the previous mapped non-copy instruction. - // We use this to detect valid split points, and to estimate new interval - // sizes. - calcPrevSlots(); + // Since we allow local split results to be split again, there is a risk of + // creating infinite loops. It is tempting to require that the new live + // ranges have less instructions than the original. That would guarantee + // convergence, but it is too strict. A live range with 3 instructions can be + // split 2+3 (including the COPY), and we want to allow that. + // + // Instead we use these rules: + // + // 1. Allow any split for ranges with getStage() < RS_Local. (Except for the + // noop split, of course). + // 2. Require progress be made for ranges with getStage() >= RS_Local. All + // the new ranges must have fewer instructions than before the split. + // 3. New ranges with the same number of instructions are marked RS_Local, + // smaller ranges are marked RS_New. + // + // These rules allow a 3 -> 2+3 split once, which we need. They also prevent + // excessive splitting and infinite loops. + // + bool ProgressRequired = getStage(VirtReg) >= RS_Local; + // Best split candidate. unsigned BestBefore = NumGaps; unsigned BestAfter = 0; float BestDiff = 0; @@ -1003,13 +1195,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // The new spill weight must be larger than any gap interference. // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. - unsigned SplitBefore = 0, SplitAfter = nextSplitPoint(1) - 1; + unsigned SplitBefore = 0, SplitAfter = 1; // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). // It is the spill weight that needs to be evicted. float MaxGap = GapWeight[0]; - for (unsigned i = 1; i != SplitAfter; ++i) - MaxGap = std::max(MaxGap, GapWeight[i]); for (;;) { // Live before/after split? @@ -1027,41 +1217,31 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, } // Should the interval be extended or shrunk? bool Shrink = true; - if (MaxGap < HUGE_VALF) { - // Estimate the new spill weight. - // - // Each instruction reads and writes the register, except the first - // instr doesn't read when !FirstLive, and the last instr doesn't write - // when !LastLive. - // - // We will be inserting copies before and after, so the total number of - // reads and writes is 2 * EstUses. - // - const unsigned EstUses = 2*(SplitAfter - SplitBefore) + - 2*(LiveBefore + LiveAfter); - // Try to guess the size of the new interval. This should be trivial, - // but the slot index of an inserted copy can be a lot smaller than the - // instruction it is inserted before if there are many dead indexes - // between them. - // - // We measure the distance from the instruction before SplitBefore to - // get a conservative estimate. - // - // The final distance can still be different if inserting copies - // triggers a slot index renumbering. + // How many gaps would the new range have? + unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter; + + // Legally, without causing looping? + bool Legal = !ProgressRequired || NewGaps < NumGaps; + + if (Legal && MaxGap < HUGE_VALF) { + // Estimate the new spill weight. Each instruction reads or writes the + // register. Conservatively assume there are no read-modify-write + // instructions. // - const float EstWeight = normalizeSpillWeight(blockFreq * EstUses, - PrevSlot[SplitBefore].distance(Uses[SplitAfter])); + // Try to guess the size of the new interval. + const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1), + Uses[SplitBefore].distance(Uses[SplitAfter]) + + (LiveBefore + LiveAfter)*SlotIndex::InstrDist); // Would this split be possible to allocate? // Never allocate all gaps, we wouldn't be making progress. - float Diff = EstWeight - MaxGap; - DEBUG(dbgs() << " w=" << EstWeight << " d=" << Diff); - if (Diff > 0) { + DEBUG(dbgs() << " w=" << EstWeight); + if (EstWeight * Hysteresis >= MaxGap) { Shrink = false; + float Diff = EstWeight - MaxGap; if (Diff > BestDiff) { DEBUG(dbgs() << " (best)"); - BestDiff = Diff; + BestDiff = Hysteresis * Diff; BestBefore = SplitBefore; BestAfter = SplitAfter; } @@ -1070,8 +1250,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Try to shrink. if (Shrink) { - SplitBefore = nextSplitPoint(SplitBefore); - if (SplitBefore < SplitAfter) { + if (++SplitBefore < SplitAfter) { DEBUG(dbgs() << " shrink\n"); // Recompute the max when necessary. if (GapWeight[SplitBefore - 1] >= MaxGap) { @@ -1091,10 +1270,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, } DEBUG(dbgs() << " extend\n"); - for (unsigned e = nextSplitPoint(SplitAfter + 1) - 1; - SplitAfter != e; ++SplitAfter) - MaxGap = std::max(MaxGap, GapWeight[SplitAfter]); - continue; + MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]); } } @@ -1113,9 +1289,27 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); SE->useIntv(SegStart, SegStop); - SE->closeIntv(); - SE->finish(); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Local); + SmallVector<unsigned, 8> IntvMap; + SE->finish(&IntvMap); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + + // If the new range has the same number of instructions as before, mark it as + // RS_Local so the next split will be forced to make progress. Otherwise, + // leave the new intervals as RS_New so they can compete. + bool LiveBefore = BestBefore != 0 || BI.LiveIn; + bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; + unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; + if (NewGaps >= NumGaps) { + DEBUG(dbgs() << "Tagging non-progress ranges: "); + assert(!ProgressRequired && "Didn't make progress when it was required."); + LRStage.resize(MRI->getNumVirtRegs()); + for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) + if (IntvMap[i] == 1) { + LRStage[LREdit.get(i)->reg] = RS_Local; + DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg)); + } + DEBUG(dbgs() << '\n'); + } ++NumLocalSplits; return 0; @@ -1141,30 +1335,36 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, // Don't iterate global splitting. // Move straight to spilling if this range was produced by a global split. - LiveRangeStage Stage = getStage(VirtReg); - if (Stage >= RS_Block) + if (getStage(VirtReg) >= RS_Global) return 0; SA->analyze(&VirtReg); - // First try to split around a region spanning multiple blocks. - if (Stage < RS_Region) { - unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); - if (PhysReg || !NewVRegs.empty()) + // FIXME: SplitAnalysis may repair broken live ranges coming from the + // coalescer. That may cause the range to become allocatable which means that + // tryRegionSplit won't be making progress. This check should be replaced with + // an assertion when the coalescer is fixed. + if (SA->didRepairRange()) { + // VirtReg has changed, so all cached queries are invalid. + invalidateVirtRegs(); + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) return PhysReg; } + // First try to split around a region spanning multiple blocks. + unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; + // Then isolate blocks with multiple uses. - if (Stage < RS_Block) { - SplitAnalysis::BlockPtrSet Blocks; - if (SA->getMultiUseBlocks(Blocks)) { - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); - SE->reset(LREdit); - SE->splitSingleBlocks(Blocks); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Block); - if (VerifyEnabled) - MF->verify(this, "After splitting live range around basic blocks"); - } + SplitAnalysis::BlockPtrSet Blocks; + if (SA->getMultiUseBlocks(Blocks)) { + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit); + SE->splitSingleBlocks(Blocks); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Global); + if (VerifyEnabled) + MF->verify(this, "After splitting live range around basic blocks"); } // Don't assign any physregs. @@ -1179,21 +1379,25 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl<LiveInterval*> &NewVRegs) { // First try assigning a free register. - AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs); - while (unsigned PhysReg = Order.next()) { - if (!checkPhysRegInterference(VirtReg, PhysReg)) - return PhysReg; - } - - if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) return PhysReg; + LiveRangeStage Stage = getStage(VirtReg); + DEBUG(dbgs() << StageName[Stage] << '\n'); + + // Try to evict a less worthy live range, but only for ranges from the primary + // queue. The RS_Second ranges already failed to do this, and they should not + // get a second chance until they have been split. + if (Stage != RS_Second) + if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) + return PhysReg; + assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); // The first time we see a live range, don't try to split or spill. // Wait until the second time, when all smaller ranges have been allocated. // This gives a better picture of the interference to split around. - LiveRangeStage Stage = getStage(VirtReg); if (Stage == RS_First) { LRStage[VirtReg.reg] = RS_Second; DEBUG(dbgs() << "wait for second round\n"); @@ -1201,7 +1405,10 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, return 0; } - assert(Stage < RS_Spill && "Cannot allocate after spilling"); + // If we couldn't allocate a register from spilling, there is probably some + // invalid inline assembly. The base class wil report it. + if (Stage >= RS_Spill) + return ~0u; // Try splitting VirtReg or interferences. unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); @@ -1234,12 +1441,12 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>()); Indexes = &getAnalysis<SlotIndexes>(); DomTree = &getAnalysis<MachineDominatorTree>(); - ReservedRegs = TRI->getReservedRegs(*MF); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); Loops = &getAnalysis<MachineLoopInfo>(); LoopRanges = &getAnalysis<MachineLoopRanges>(); Bundles = &getAnalysis<EdgeBundles>(); SpillPlacer = &getAnalysis<SpillPlacement>(); + DebugVars = &getAnalysis<LiveDebugVariables>(); SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree)); @@ -1258,7 +1465,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { } // Write out new DBG_VALUE instructions. - getAnalysis<LiveDebugVariables>().emitDebugValues(VRM); + DebugVars->emitDebugValues(VRM); // The pass output is in VirtRegMap. Release all the transient data. releaseMemory(); diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index ef78949..0818034 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -16,6 +16,7 @@ #include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" +#include "RegisterClassInfo.h" #include "Spiller.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" @@ -40,7 +41,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> -#include <set> #include <queue> #include <memory> #include <cmath> @@ -67,6 +67,11 @@ TrivCoalesceEnds("trivial-coalesce-ends", cl::desc("Attempt trivial coalescing of interval ends"), cl::init(false), cl::Hidden); +static cl::opt<bool> +AvoidWAWHazard("avoid-waw-hazard", + cl::desc("Avoid write-write hazards for some register classes"), + cl::init(false), cl::Hidden); + static RegisterRegAlloc linearscanRegAlloc("linearscan", "linear scan register allocator", createLinearScanRegisterAllocator); @@ -110,6 +115,7 @@ namespace { if (NumRecentlyUsedRegs > 0) RecentRegs.resize(NumRecentlyUsedRegs, 0); RecentNext = RecentRegs.begin(); + avoidWAW_ = 0; } typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr; @@ -143,6 +149,7 @@ namespace { BitVector reservedRegs_; LiveIntervals* li_; MachineLoopInfo *loopInfo; + RegisterClassInfo RegClassInfo; /// handled_ - Intervals are added to the handled_ set in the order of their /// start value. This is uses for backtracking. @@ -180,6 +187,9 @@ namespace { SmallVector<unsigned, 4> RecentRegs; SmallVector<unsigned, 4>::iterator RecentNext; + // Last write-after-write register written. + unsigned avoidWAW_; + // Record that we just picked this register. void recordRecentlyUsed(unsigned reg) { assert(reg != 0 && "Recently used register is NOREG!"); @@ -227,8 +237,8 @@ namespace { // Determine if we skip this register due to its being recently used. bool isRecentlyUsed(unsigned reg) const { - return std::find(RecentRegs.begin(), RecentRegs.end(), reg) != - RecentRegs.end(); + return reg == avoidWAW_ || + std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end(); } private: @@ -358,13 +368,10 @@ namespace { /// getFirstNonReservedPhysReg - return the first non-reserved physical /// register in the register class. unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) { - TargetRegisterClass::iterator aoe = RC->allocation_order_end(*mf_); - TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_); - while (i != aoe && reservedRegs_.test(*i)) - ++i; - assert(i != aoe && "All registers reserved?!"); - return *i; - } + ArrayRef<unsigned> O = RegClassInfo.getOrder(RC); + assert(!O.empty() && "All registers reserved?!"); + return O.front(); + } void ComputeRelatedRegClasses(); @@ -516,6 +523,7 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) { reservedRegs_ = tri_->getReservedRegs(fn); li_ = &getAnalysis<LiveIntervals>(); loopInfo = &getAnalysis<MachineLoopInfo>(); + RegClassInfo.runOnMachineFunction(fn); // We don't run the coalescer here because we have no reason to // interact with it. If the coalescer requires interaction, it @@ -792,7 +800,7 @@ void RALinScan::updateSpillWeights(std::vector<float> &Weights, // register class we are trying to allocate. Then add the weight to all // sub-registers of the super-register even if they are not aliases. // e.g. allocating for GR32, bh is not used, updating bl spill weight. - // bl should get the same spill weight otherwise it will be choosen + // bl should get the same spill weight otherwise it will be chosen // as a spill candidate since spilling bh doesn't make ebx available. for (unsigned i = 0, e = Supers.size(); i != e; ++i) { for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr) @@ -1116,6 +1124,12 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { active_.push_back(std::make_pair(cur, cur->begin())); handled_.push_back(cur); + // Remember physReg for avoiding a write-after-write hazard in the next + // instruction. + if (AvoidWAWHazard && + tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg))) + avoidWAW_ = physReg; + // "Upgrade" the physical register since it has been allocated. UpgradeRegister(physReg); if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { @@ -1152,14 +1166,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { bool Found = false; std::vector<std::pair<unsigned,float> > RegsWeights; + ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC); if (!minReg || SpillWeights[minReg] == HUGE_VALF) - for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), - e = RC->allocation_order_end(*mf_); i != e; ++i) { - unsigned reg = *i; + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned reg = Order[i]; float regWeight = SpillWeights[reg]; - // Don't even consider reserved regs. - if (reservedRegs_.test(reg)) - continue; // Skip recently allocated registers and reserved registers. if (minWeight > regWeight && !isRecentlyUsed(reg)) Found = true; @@ -1168,11 +1179,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // If we didn't find a register that is spillable, try aliases? if (!Found) { - for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), - e = RC->allocation_order_end(*mf_); i != e; ++i) { - unsigned reg = *i; - if (reservedRegs_.test(reg)) - continue; + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned reg = Order[i]; // No need to worry about if the alias register size < regsize of RC. // We are going to spill all registers that alias it anyway. for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) @@ -1432,13 +1440,13 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg)) physReg = vrm_->getPhys(physReg); - TargetRegisterClass::iterator I, E; - tie(I, E) = tri_->getAllocationOrder(RC, Hint.first, physReg, *mf_); - assert(I != E && "No allocatable register in this register class!"); + ArrayRef<unsigned> Order = tri_->getRawAllocationOrder(RC, Hint.first, + physReg, *mf_); + assert(!Order.empty() && "No allocatable register in this register class!"); // Scan for the first available register. - for (; I != E; ++I) { - unsigned Reg = *I; + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned Reg = Order[i]; // Ignore "downgraded" registers. if (SkipDGRegs && DowngradedRegs.count(Reg)) continue; @@ -1446,7 +1454,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, if (reservedRegs_.test(Reg)) continue; // Skip recently allocated registers. - if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) { + if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) { FreeReg = Reg; if (FreeReg < inactiveCounts.size()) FreeRegInactiveCount = inactiveCounts[FreeReg]; @@ -1468,8 +1476,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, // inactive count. Alkis found that this reduced register pressure very // slightly on X86 (in rev 1.94 of this file), though this should probably be // reevaluated now. - for (; I != E; ++I) { - unsigned Reg = *I; + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned Reg = Order[i]; // Ignore "downgraded" registers. if (SkipDGRegs && DowngradedRegs.count(Reg)) continue; @@ -1477,7 +1485,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, if (reservedRegs_.test(Reg)) continue; if (isRegAvail(Reg) && Reg < inactiveCounts.size() && - FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) { + FreeRegInactiveCount < inactiveCounts[Reg] && + (!SkipDGRegs || !isRecentlyUsed(Reg))) { FreeReg = Reg; FreeRegInactiveCount = inactiveCounts[Reg]; if (FreeRegInactiveCount == MaxInactiveCount) @@ -1528,12 +1537,10 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { return Preference; } - if (!DowngradedRegs.empty()) { - unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, - true); - if (FreeReg) - return FreeReg; - } + unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, + true); + if (FreeReg) + return FreeReg; return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false); } diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 1e1f1e0..605507f 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -222,10 +222,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, // Compute an initial allowed set for the current vreg. typedef std::vector<unsigned> VRAllowed; VRAllowed vrAllowed; - for (TargetRegisterClass::iterator aoItr = trc->allocation_order_begin(*mf), - aoEnd = trc->allocation_order_end(*mf); - aoItr != aoEnd; ++aoItr) { - unsigned preg = *aoItr; + ArrayRef<unsigned> rawOrder = trc->getRawAllocationOrder(*mf); + for (unsigned i = 0; i != rawOrder.size(); ++i) { + unsigned preg = rawOrder[i]; if (!reservedRegs.test(preg)) { vrAllowed.push_back(preg); } @@ -581,7 +580,7 @@ void RegAllocPBQP::finalizeAlloc() const { if (physReg == 0) { const TargetRegisterClass *liRC = mri->getRegClass(li->reg); - physReg = *liRC->allocation_order_begin(*mf); + physReg = liRC->getRawAllocationOrder(*mf).front(); } vrm->assignVirt2Phys(li->reg, physReg); diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp new file mode 100644 index 0000000..5a77e47 --- /dev/null +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -0,0 +1,112 @@ +//===-- RegisterClassInfo.cpp - Dynamic Register Class Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the RegisterClassInfo class which provides dynamic +// information about target register classes. Callee saved and reserved +// registers depends on calling conventions and other dynamic information, so +// some things cannot be determined statically. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "RegisterClassInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetMachine.h" + +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0) +{} + +void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { + bool Update = false; + MF = &mf; + + // Allocate new array the first time we see a new target. + if (MF->getTarget().getRegisterInfo() != TRI) { + TRI = MF->getTarget().getRegisterInfo(); + RegClass.reset(new RCInfo[TRI->getNumRegClasses()]); + Update = true; + } + + // Does this MF have different CSRs? + const unsigned *CSR = TRI->getCalleeSavedRegs(MF); + if (Update || CSR != CalleeSaved) { + // Build a CSRNum map. Every CSR alias gets an entry pointing to the last + // overlapping CSR. + CSRNum.clear(); + CSRNum.resize(TRI->getNumRegs(), 0); + for (unsigned N = 0; unsigned Reg = CSR[N]; ++N) + for (const unsigned *AS = TRI->getOverlaps(Reg); + unsigned Alias = *AS; ++AS) + CSRNum[Alias] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ... + Update = true; + } + CalleeSaved = CSR; + + // Different reserved registers? + BitVector RR = TRI->getReservedRegs(*MF); + if (RR != Reserved) + Update = true; + Reserved = RR; + + // Invalidate cached information from previous function. + if (Update) + ++Tag; +} + +/// compute - Compute the preferred allocation order for RC with reserved +/// registers filtered out. Volatile registers come first followed by CSR +/// aliases ordered according to the CSR order specified by the target. +void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { + RCInfo &RCI = RegClass[RC->getID()]; + + // Raw register count, including all reserved regs. + unsigned NumRegs = RC->getNumRegs(); + + if (!RCI.Order) + RCI.Order.reset(new unsigned[NumRegs]); + + unsigned N = 0; + SmallVector<unsigned, 16> CSRAlias; + + // FIXME: Once targets reserve registers instead of removing them from the + // allocation order, we can simply use begin/end here. + ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(*MF); + for (unsigned i = 0; i != RawOrder.size(); ++i) { + unsigned PhysReg = RawOrder[i]; + // Remove reserved registers from the allocation order. + if (Reserved.test(PhysReg)) + continue; + if (CSRNum[PhysReg]) + // PhysReg aliases a CSR, save it for later. + CSRAlias.push_back(PhysReg); + else + RCI.Order[N++] = PhysReg; + } + RCI.NumRegs = N + CSRAlias.size(); + assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass"); + + // CSR aliases go after the volatile registers, preserve the target's order. + std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]); + + DEBUG({ + dbgs() << "AllocationOrder(" << RC->getName() << ") = ["; + for (unsigned I = 0; I != RCI.NumRegs; ++I) + dbgs() << ' ' << PrintReg(RCI.Order[I], TRI); + dbgs() << " ]\n"; + }); + + // RCI is now up-to-date. + RCI.Tag = Tag; +} + diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h new file mode 100644 index 0000000..6f7d9c9 --- /dev/null +++ b/lib/CodeGen/RegisterClassInfo.h @@ -0,0 +1,121 @@ +//===-- RegisterClassInfo.h - Dynamic Register Class Info -*- C++ -*-------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the RegisterClassInfo class which provides dynamic +// information about target register classes. Callee saved and reserved +// registers depends on calling conventions and other dynamic information, so +// some things cannot be determined statically. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_REGISTERCLASSINFO_H +#define LLVM_CODEGEN_REGISTERCLASSINFO_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Target/TargetRegisterInfo.h" + +namespace llvm { + +class RegisterClassInfo { + struct RCInfo { + unsigned Tag; + unsigned NumRegs; + OwningArrayPtr<unsigned> Order; + + RCInfo() : Tag(0), NumRegs(0) {} + operator ArrayRef<unsigned>() const { + return ArrayRef<unsigned>(Order.get(), NumRegs); + } + }; + + // Brief cached information for each register class. + OwningArrayPtr<RCInfo> RegClass; + + // Tag changes whenever cached information needs to be recomputed. An RCInfo + // entry is valid when its tag matches. + unsigned Tag; + + const MachineFunction *MF; + const TargetRegisterInfo *TRI; + + // Callee saved registers of last MF. Assumed to be valid until the next + // runOnFunction() call. + const unsigned *CalleeSaved; + + // Map register number to CalleeSaved index + 1; + SmallVector<uint8_t, 4> CSRNum; + + // Reserved registers in the current MF. + BitVector Reserved; + + // Compute all information about RC. + void compute(const TargetRegisterClass *RC) const; + + // Return an up-to-date RCInfo for RC. + const RCInfo &get(const TargetRegisterClass *RC) const { + const RCInfo &RCI = RegClass[RC->getID()]; + if (Tag != RCI.Tag) + compute(RC); + return RCI; + } + +public: + RegisterClassInfo(); + + /// runOnFunction - Prepare to answer questions about MF. This must be called + /// before any other methods are used. + void runOnMachineFunction(const MachineFunction &MF); + + /// getNumAllocatableRegs - Returns the number of actually allocatable + /// registers in RC in the current function. + unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const { + return get(RC).NumRegs; + } + + /// getOrder - Returns the preferred allocation order for RC. The order + /// contains no reserved registers, and registers that alias callee saved + /// registers come last. + ArrayRef<unsigned> getOrder(const TargetRegisterClass *RC) const { + return get(RC); + } + + /// getLastCalleeSavedAlias - Returns the last callee saved register that + /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR. + unsigned getLastCalleeSavedAlias(unsigned PhysReg) const { + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + if (unsigned N = CSRNum[PhysReg]) + return CalleeSaved[N-1]; + return 0; + } + + /// isReserved - Returns true when PhysReg is a reserved register. + /// + /// Reserved registers may belong to an allocatable register class, but the + /// target has explicitly requested that they are not used. + /// + bool isReserved(unsigned PhysReg) const { + return Reserved.test(PhysReg); + } + + /// isAllocatable - Returns true when PhysReg belongs to an allocatable + /// register class and it hasn't been reserved. + /// + /// Allocatable registers may show up in the allocation order of some virtual + /// register, so a register allocator needs to track its liveness and + /// availability. + bool isAllocatable(unsigned PhysReg) const { + return TRI->get(PhysReg).inAllocatableClass && !isReserved(PhysReg); + } +}; +} // end namespace llvm + +#endif + diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index ebfe533..9e9a145 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -154,13 +154,16 @@ void RegScavenger::forward() { BitVector DeadRegs(NumPhysRegs); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || MO.isUndef()) + if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg || isReserved(Reg)) continue; if (MO.isUse()) { + // Ignore undef uses. + if (MO.isUndef()) + continue; // Two-address operands implicitly kill. if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i))) addRegWithSubRegs(KillRegs, Reg); @@ -178,12 +181,14 @@ void RegScavenger::forward() { // Verify uses and defs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || MO.isUndef()) + if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg || isReserved(Reg)) continue; if (MO.isUse()) { + if (MO.isUndef()) + continue; if (!isUsed(Reg)) { // Check if it's partial live: e.g. // D0 = insert_subreg D0<undef>, S0 diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp index cbfd5a2..8b02ec4 100644 --- a/lib/CodeGen/RenderMachineFunction.cpp +++ b/lib/CodeGen/RenderMachineFunction.cpp @@ -47,7 +47,7 @@ outputFileSuffix("rmf-file-suffix", static cl::opt<std::string> machineFuncsToRender("rmf-funcs", - cl::desc("Coma seperated list of functions to render" + cl::desc("Comma separated list of functions to render" ", or \"*\"."), cl::init(""), cl::Hidden); @@ -434,8 +434,7 @@ namespace llvm { rcEnd = tri->regclass_end(); rcItr != rcEnd; ++rcItr) { const TargetRegisterClass *trc = *rcItr; - unsigned capacity = std::distance(trc->allocation_order_begin(*mf), - trc->allocation_order_end(*mf)); + unsigned capacity = trc->getRawAllocationOrder(*mf).size(); if (capacity != 0) capacityMap[trc] = capacity; @@ -482,8 +481,7 @@ namespace llvm { rcItr != rcEnd; ++rcItr) { const TargetRegisterClass *trc = *rcItr; - if (trc->allocation_order_begin(*mf) == - trc->allocation_order_end(*mf)) + if (trc->getRawAllocationOrder(*mf).empty()) continue; unsigned worstAtI = getWorst(li->reg, trc); diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 3388889..f328493 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -19,17 +19,27 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <climits> using namespace llvm; +#ifndef NDEBUG +cl::opt<bool> StressSchedOpt( + "stress-sched", cl::Hidden, cl::init(false), + cl::desc("Stress test instruction scheduling")); +#endif + ScheduleDAG::ScheduleDAG(MachineFunction &mf) : TM(mf.getTarget()), TII(TM.getInstrInfo()), TRI(TM.getRegisterInfo()), MF(mf), MRI(mf.getRegInfo()), EntrySU(), ExitSU() { +#ifndef NDEBUG + StressSched = StressSchedOpt; +#endif } ScheduleDAG::~ScheduleDAG() {} @@ -307,6 +317,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { if (I->isArtificial()) dbgs() << " *"; dbgs() << ": Latency=" << I->getLatency(); + if (I->isAssignedRegDep()) + dbgs() << " Reg=" << G->TRI->getName(I->getReg()); dbgs() << "\n"; } } @@ -472,7 +484,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { #endif } -/// AddPred - Updates the topological ordering to accomodate an edge +/// AddPred - Updates the topological ordering to accommodate an edge /// to be added from SUnit X to SUnit Y. void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { int UpperBound, LowerBound; @@ -490,7 +502,7 @@ void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { } } -/// RemovePred - Updates the topological ordering to accomodate an +/// RemovePred - Updates the topological ordering to accommodate an /// an edge to be removed from the specified node N from the predecessors /// of the current node M. void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index f17023e..2363df4 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -35,8 +35,9 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineDominatorTree &mdt) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), InstrItins(mf.getTarget().getInstrItineraryData()), - Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) { - DbgValueVec.clear(); + Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), + LoopRegs(MLI, MDT), FirstDbgValue(0) { + DbgValues.clear(); } /// Run - perform scheduling. @@ -120,7 +121,7 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, // such aliases. if (PSV->isAliased(MFI)) return 0; - + MayAlias = PSV->mayAlias(MFI); return V; } @@ -174,7 +175,7 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() { for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { + E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; if (Seen.insert(Reg)) Uses[Reg].push_back(&ExitSU); @@ -200,11 +201,6 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; - // Keep track of dangling debug references to registers. - std::vector<std::pair<MachineInstr*, unsigned> > - DanglingDebugValue(TRI->getNumRegs(), - std::make_pair(static_cast<MachineInstr*>(0), 0)); - // Check to see if the scheduler cares about latencies. bool UnitLatencies = ForceUnitLatencies(); @@ -214,26 +210,32 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // Remove any stale debug info; sometimes BuildSchedGraph is called again // without emitting the info from the previous call. - DbgValueVec.clear(); + DbgValues.clear(); + FirstDbgValue = NULL; // Model data dependencies between instructions being scheduled and the // ExitSU. AddSchedBarrierDeps(); + for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { + assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs"); + } + // Walk the list of instructions, from bottom moving up. + MachineInstr *PrevMI = NULL; for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; MII != MIE; --MII) { MachineInstr *MI = prior(MII); - // DBG_VALUE does not have SUnit's built, so just remember these for later - // reinsertion. + if (MI && PrevMI) { + DbgValues.push_back(std::make_pair(PrevMI, MI)); + PrevMI = NULL; + } + if (MI->isDebugValue()) { - if (MI->getNumOperands()==3 && MI->getOperand(0).isReg() && - MI->getOperand(0).getReg()) - DanglingDebugValue[MI->getOperand(0).getReg()] = - std::make_pair(MI, DbgValueVec.size()); - DbgValueVec.push_back(MI); + PrevMI = MI; continue; } + const TargetInstrDesc &TID = MI->getDesc(); assert(!TID.isTerminator() && !MI->isLabel() && "Cannot schedule terminators or labels!"); @@ -257,13 +259,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); - if (MO.isDef() && DanglingDebugValue[Reg].first!=0) { - SU->DbgInstrList.push_back(DanglingDebugValue[Reg].first); - DbgValueVec[DanglingDebugValue[Reg].second] = 0; - DanglingDebugValue[Reg] = std::make_pair((MachineInstr*)0, 0); - } - std::vector<SUnit *> &UseList = Uses[Reg]; + // Defs are push in the order they are visited and never reordered. std::vector<SUnit *> &DefList = Defs[Reg]; // Optionally add output and anti dependencies. For anti // dependencies we use a latency of 0 because for a multi-issue @@ -283,9 +280,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg)); } for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - std::vector<SUnit *> &DefList = Defs[*Alias]; - for (unsigned i = 0, e = DefList.size(); i != e; ++i) { - SUnit *DefSU = DefList[i]; + std::vector<SUnit *> &MemDefList = Defs[*Alias]; + for (unsigned i = 0, e = MemDefList.size(); i != e; ++i) { + SUnit *DefSU = MemDefList[i]; if (DefSU == &ExitSU) continue; if (DefSU != SU && @@ -371,7 +368,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // will be overlapped by work done outside the current // scheduling region. Latency -= std::min(Latency, Count); - // Add the artifical edge. + // Add the artificial edge. ExitSU.addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, @@ -393,6 +390,16 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { UseList.clear(); if (!MO.isDead()) DefList.clear(); + + // Calls will not be reordered because of chain dependencies (see + // below). Since call operands are dead, calls may continue to be added + // to the DefList making dependence checking quadratic in the size of + // the block. Instead, we leave only one call at the back of the + // DefList. + if (SU->isCall) { + while (!DefList.empty() && DefList.back()->isCall) + DefList.pop_back(); + } DefList.push_back(SU); } else { UseList.push_back(SU); @@ -411,11 +418,11 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { #define STORE_LOAD_LATENCY 1 unsigned TrueMemOrderLatency = 0; if (TID.isCall() || MI->hasUnmodeledSideEffects() || - (MI->hasVolatileMemoryRef() && + (MI->hasVolatileMemoryRef() && (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) { // Be conservative with these and add dependencies on all memory // references, even those that are known to not alias. - for (std::map<const Value *, SUnit *>::iterator I = + for (std::map<const Value *, SUnit *>::iterator I = NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); } @@ -458,9 +465,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // A store to a specific PseudoSourceValue. Add precise dependencies. // Record the def in MemDefs, first adding a dep if there is // an existing def. - std::map<const Value *, SUnit *>::iterator I = + std::map<const Value *, SUnit *>::iterator I = ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - std::map<const Value *, SUnit *>::iterator IE = + std::map<const Value *, SUnit *>::iterator IE = ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, @@ -513,39 +520,41 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! } else { - if (const Value *V = + if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { // A load from a specific PseudoSourceValue. Add precise dependencies. - std::map<const Value *, SUnit *>::iterator I = + std::map<const Value *, SUnit *>::iterator I = ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - std::map<const Value *, SUnit *>::iterator IE = + std::map<const Value *, SUnit *>::iterator IE = ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, /*isNormalMemory=*/true)); if (MayAlias) AliasMemUses[V].push_back(SU); - else + else NonAliasMemUses[V].push_back(SU); } else { // A load with no underlying object. Depend on all // potentially aliasing stores. - for (std::map<const Value *, SUnit *>::iterator I = + for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); - + PendingLoads.push_back(SU); MayAlias = true; } - + // Add dependencies on alias and barrier chains, if needed. if (MayAlias && AliasChain) AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); - } + } } } + if (PrevMI) + FirstDbgValue = PrevMI; for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { Defs[i].clear(); @@ -572,11 +581,11 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { } } -void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, +void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { if (!InstrItins || InstrItins->isEmpty()) return; - + // For a data dependency with a known register... if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) return; @@ -655,39 +664,33 @@ MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { BB->remove(I); } - // First reinsert any remaining debug_values; these are either constants, - // or refer to live-in registers. The beginning of the block is the right - // place for the latter. The former might reasonably be placed elsewhere - // using some kind of ordering algorithm, but right now it doesn't matter. - for (int i = DbgValueVec.size()-1; i>=0; --i) - if (DbgValueVec[i]) - BB->insert(InsertPos, DbgValueVec[i]); + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) + BB->insert(InsertPos, FirstDbgValue); // Then re-insert them according to the given schedule. for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - SUnit *SU = Sequence[i]; - if (!SU) { + if (SUnit *SU = Sequence[i]) + BB->insert(InsertPos, SU->getInstr()); + else // Null SUnit* is a noop. EmitNoop(); - continue; - } - - BB->insert(InsertPos, SU->getInstr()); - for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) - BB->insert(InsertPos, SU->DbgInstrList[i]); } // Update the Begin iterator, as the first instruction in the block // may have been scheduled later. - if (!DbgValueVec.empty()) { - for (int i = DbgValueVec.size()-1; i>=0; --i) - if (DbgValueVec[i]!=0) { - Begin = DbgValueVec[DbgValueVec.size()-1]; - break; - } - } else if (!Sequence.empty()) + if (!Sequence.empty()) Begin = Sequence[0]->getInstr(); - DbgValueVec.clear(); + // Reinsert any remaining debug_values. + for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); + MachineInstr *DbgValue = P.first; + MachineInstr *OrigPrivMI = P.second; + BB->insertAfter(OrigPrivMI, DbgValue); + } + DbgValues.clear(); + FirstDbgValue = NULL; return BB; } diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index c878287..8a4ea85 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -110,10 +110,6 @@ namespace llvm { std::vector<std::vector<SUnit *> > Defs; std::vector<std::vector<SUnit *> > Uses; - /// DbgValueVec - Remember DBG_VALUEs that refer to a particular - /// register. - std::vector<MachineInstr *>DbgValueVec; - /// PendingLoads - Remember where unknown loads are after the most recent /// unknown store, as we iterate. As with Defs and Uses, this is here /// to minimize construction/destruction. @@ -128,6 +124,14 @@ namespace llvm { /// SmallSet<unsigned, 8> LoopLiveInRegs; + protected: + + /// DbgValues - Remember instruction that preceeds DBG_VALUE. + typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > + DbgValueVector; + DbgValueVector DbgValues; + MachineInstr *FirstDbgValue; + public: MachineBasicBlock::iterator Begin; // The beginning of the range to // be scheduled. The range extends diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a9afec8..4ac590a 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -138,6 +138,10 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); + void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, + SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + ISD::NodeType ExtType); + /// combine - call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. @@ -165,6 +169,8 @@ namespace { SDValue visitMULHS(SDNode *N); SDValue visitSMUL_LOHI(SDNode *N); SDValue visitUMUL_LOHI(SDNode *N); + SDValue visitSMULO(SDNode *N); + SDValue visitUMULO(SDNode *N); SDValue visitSDIVREM(SDNode *N); SDValue visitUDIVREM(SDNode *N); SDValue visitAND(SDNode *N); @@ -529,7 +535,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, cast<ConstantSDNode>(N0.getOperand(1)), cast<ConstantSDNode>(N1)); return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); - } else if (N0.hasOneUse()) { + } + if (N0.hasOneUse()) { // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, N0.getOperand(0), N1); @@ -546,7 +553,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, cast<ConstantSDNode>(N1.getOperand(1)), cast<ConstantSDNode>(N0)); return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); - } else if (N1.hasOneUse()) { + } + if (N1.hasOneUse()) { // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, N1.getOperand(0), N0); @@ -990,6 +998,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) { dbgs() << "\nWith: "; RV.getNode()->dump(&DAG); dbgs() << '\n'); + + // Transfer debug value. + DAG.TransferDbgValues(SDValue(N, 0), RV); WorkListRemover DeadNodes(*this); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); @@ -1045,6 +1056,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::MULHS: return visitMULHS(N); case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); + case ISD::SMULO: return visitSMULO(N); + case ISD::UMULO: return visitUMULO(N); case ISD::SDIVREM: return visitSDIVREM(N); case ISD::UDIVREM: return visitUDIVREM(N); case ISD::AND: return visitAND(N); @@ -1566,7 +1579,8 @@ static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations) { if (!VT.isVector()) { return DAG.getConstant(0, VT); - } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { + } + if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { // Produce a vector of zeros. SDValue El = DAG.getConstant(0, VT.getVectorElementType()); std::vector<SDValue> Ops(VT.getVectorNumElements(), El); @@ -2174,6 +2188,26 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSMULO(SDNode *N) { + // (smulo x, 2) -> (saddo x, x) + if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) + if (C2->getAPIntValue() == 2) + return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(), + N->getOperand(0), N->getOperand(0)); + + return SDValue(); +} + +SDValue DAGCombiner::visitUMULO(SDNode *N) { + // (umulo x, 2) -> (uaddo x, x) + if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) + if (C2->getAPIntValue() == 2) + return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(), + N->getOperand(0), N->getOperand(0)); + + return SDValue(); +} + SDValue DAGCombiner::visitSDIVREM(SDNode *N) { SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); if (Res.getNode()) return Res; @@ -3000,6 +3034,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl x, 0) -> x if (N1C && N1C->isNullValue()) return N0; + // fold (shl undef, x) -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) @@ -3062,26 +3099,27 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } - // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or - // (srl (and x, (shl -1, c1)), (sub c1, c2)) + // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or + // (and (srl x, (sub c1, c2), MASK) if (N1C && N0.getOpcode() == ISD::SRL && N0.getOperand(1).getOpcode() == ISD::Constant) { uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); if (c1 < VT.getSizeInBits()) { uint64_t c2 = N1C->getZExtValue(); - SDValue HiBitsMask = - DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), - VT.getSizeInBits() - c1), - VT); - SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, - N0.getOperand(0), - HiBitsMask); - if (c2 > c1) - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask, - DAG.getConstant(c2-c1, N1.getValueType())); - else - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask, - DAG.getConstant(c1-c2, N1.getValueType())); + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), + VT.getSizeInBits() - c1); + SDValue Shift; + if (c2 > c1) { + Mask = Mask.shl(c2-c1); + Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c2-c1, N1.getValueType())); + } else { + Mask = Mask.lshr(c1-c2); + Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c1-c2, N1.getValueType())); + } + return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift, + DAG.getConstant(Mask, VT)); } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) @@ -3323,8 +3361,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getUNDEF(VT); if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { + uint64_t ShiftAmt = N1C->getZExtValue(); SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, - N0.getOperand(0), N1); + N0.getOperand(0), + DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); AddToWorkList(SmallShift.getNode()); return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); } @@ -3663,6 +3703,28 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, return true; } +void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, + SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + ISD::NodeType ExtType) { + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), + &Ops[0], Ops.size())); + } +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -3751,27 +3813,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - - // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; - SmallVector<SDValue, 4> Ops; - - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) - Ops.push_back(ExtLoad); - else - Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, - N->getDebugLoc(), VT, SOp)); - } - - Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), - SetCC->getValueType(0), - &Ops[0], Ops.size())); - } - + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -3799,6 +3842,45 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } + // fold (sext (and/or/xor (load x), cst)) -> + // (and/or/xor (sextload x), (sext cst)) + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); + if (LN0->getExtensionType() != ISD::ZEXTLOAD) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, + SetCCs, TLI); + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), + LN0->getMemoryVT(), + LN0->isVolatile(), + LN0->isNonTemporal(), + LN0->getAlignment()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.sext(VT.getSizeInBits()); + SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ExtLoad, DAG.getConstant(Mask, VT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, + N0.getOperand(0).getDebugLoc(), + N0.getOperand(0).getValueType(), ExtLoad); + CombineTo(N, And); + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::SIGN_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + if (N0.getOpcode() == ISD::SETCC) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. @@ -3882,7 +3964,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorkList(oye); } - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -3957,27 +4039,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; - SmallVector<SDValue, 4> Ops; - - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) - Ops.push_back(ExtLoad); - else - Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND, - N->getDebugLoc(), VT, SOp)); - } + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ZERO_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } - Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), - SetCC->getValueType(0), - &Ops[0], Ops.size())); + // fold (zext (and/or/xor (load x), cst)) -> + // (and/or/xor (zextload x), (zext cst)) + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); + if (LN0->getExtensionType() != ISD::SEXTLOAD) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, + SetCCs, TLI); + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), + LN0->getMemoryVT(), + LN0->isVolatile(), + LN0->isNonTemporal(), + LN0->getAlignment()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ExtLoad, DAG.getConstant(Mask, VT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, + N0.getOperand(0).getDebugLoc(), + N0.getOperand(0).getValueType(), ExtLoad); + CombineTo(N, And); + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ZERO_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } - - return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -4012,7 +4115,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT EltVT = VT.getVectorElementType(); SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), DAG.getConstant(1, EltVT)); - if (VT.getSizeInBits() == N0VT.getSizeInBits()) { + if (VT.getSizeInBits() == N0VT.getSizeInBits()) // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result // for that matter). Check to see that they are the same size. If so, @@ -4024,25 +4127,24 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { cast<CondCodeSDNode>(N0.getOperand(2))->get()), DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &OneOps[0], OneOps.size())); - } else { - // If the desired elements are smaller or larger than the source - // elements we can use a matching integer vector type and then - // truncate/sign extend - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), - DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, - &OneOps[0], OneOps.size())); - } + + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), + DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &OneOps[0], OneOps.size())); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc @@ -4110,7 +4212,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorkList(oye); } - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -4166,27 +4268,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - - // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; - SmallVector<SDValue, 4> Ops; - - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) - Ops.push_back(ExtLoad); - else - Ops.push_back(DAG.getNode(ISD::ANY_EXTEND, - N->getDebugLoc(), VT, SOp)); - } - - Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), - SetCC->getValueType(0), - &Ops[0], Ops.size())); - } - + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ANY_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -6265,6 +6348,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->isNonTemporal(), OrigAlign); } + // Turn 'store undef, Ptr' -> nothing. + if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) + return Chain; + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { // NOTE: If the original store is volatile, this transform must not increase @@ -6298,8 +6385,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getStore(Chain, N->getDebugLoc(), Tmp, Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); - } else if (!ST->isVolatile() && - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + } + + if (!ST->isVolatile() && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for // argument passing. Since this is so common, custom legalize the // 64-bit integer store into two 32-bit stores. @@ -6393,8 +6482,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" SDValue Shorter = GetDemandedBits(Value, - APInt::getLowBitsSet(Value.getValueSizeInBits(), - ST->getMemoryVT().getSizeInBits())); + APInt::getLowBitsSet( + Value.getValueType().getScalarType().getSizeInBits(), + ST->getMemoryVT().getScalarType().getSizeInBits())); AddToWorkList(Value.getNode()); if (Shorter.getNode()) return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, @@ -6486,18 +6576,18 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val SDValue InVec = N->getOperand(0); - if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { - // Check if the result type doesn't match the inserted element type. A - // SCALAR_TO_VECTOR may truncate the inserted element and the - // EXTRACT_VECTOR_ELT may widen the extracted vector. - SDValue InOp = InVec.getOperand(0); - EVT NVT = N->getValueType(0); - if (InOp.getValueType() != NVT) { - assert(InOp.getValueType().isInteger() && NVT.isInteger()); - return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); - } - return InOp; - } + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // Check if the result type doesn't match the inserted element type. A + // SCALAR_TO_VECTOR may truncate the inserted element and the + // EXTRACT_VECTOR_ELT may widen the extracted vector. + SDValue InOp = InVec.getOperand(0); + EVT NVT = N->getValueType(0); + if (InOp.getValueType() != NVT) { + assert(InOp.getValueType().isInteger() && NVT.isInteger()); + return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); + } + return InOp; + } // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. @@ -6558,7 +6648,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } - if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile()) + if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) return SDValue(); // If Idx was -1 above, Elt is going to be -1, so just return undef. @@ -7497,18 +7587,17 @@ bool DAGCombiner::FindAliasInfo(SDNode *N, SrcValueAlign = LD->getOriginalAlignment(); TBAAInfo = LD->getTBAAInfo(); return true; - } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + } + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { Ptr = ST->getBasePtr(); Size = ST->getMemoryVT().getSizeInBits() >> 3; SrcValue = ST->getSrcValue(); SrcValueOffset = ST->getSrcValueOffset(); SrcValueAlign = ST->getOriginalAlignment(); TBAAInfo = ST->getTBAAInfo(); - } else { - llvm_unreachable("FindAliasInfo expected a memory operand"); + return false; } - - return false; + llvm_unreachable("FindAliasInfo expected a memory operand"); } /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, @@ -7621,13 +7710,13 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { // Accumulate all the aliases to this node. GatherAllAliases(N, OldChain, Aliases); - if (Aliases.size() == 0) { - // If no operands then chain to entry token. + // If no operands then chain to entry token. + if (Aliases.size() == 0) return DAG.getEntryNode(); - } else if (Aliases.size() == 1) { - // If a single operand then chain to it. We don't need to revisit it. + + // If a single operand then chain to it. We don't need to revisit it. + if (Aliases.size() == 1) return Aliases[0]; - } // Construct a custom tailored token factor. return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index ea8ace3..797f174 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -43,6 +43,8 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -109,8 +111,8 @@ unsigned FastISel::getRegForValue(const Value *V) { // of whether FastISel can handle them. MVT VT = RealVT.getSimpleVT(); if (!TLI.isTypeLegal(VT)) { - // Promote MVT::i1 to a legal type though, because it's common and easy. - if (VT == MVT::i1) + // Handle integer promotions, though, because they're common and easy. + if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT(); else return 0; @@ -121,10 +123,9 @@ unsigned FastISel::getRegForValue(const Value *V) { // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); - if (I != FuncInfo.ValueMap.end()) { - unsigned Reg = I->second; - return Reg; - } + if (I != FuncInfo.ValueMap.end()) + return I->second; + unsigned Reg = LocalValueMap[V]; if (Reg != 0) return Reg; @@ -164,8 +165,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - // Try to emit the constant directly. - Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); + if (CF->isNullValue()) { + Reg = TargetMaterializeFloatZero(CF); + } else { + // Try to emit the constant directly. + Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); + } if (!Reg) { // Try to emit the constant by using an integer constant with a cast. @@ -230,10 +235,10 @@ unsigned FastISel::lookUpRegForValue(const Value *V) { /// NOTE: This is only necessary because we might select a block that uses /// a value before we select the block that defines the value. It might be /// possible to fix this by selecting blocks in reverse postorder. -unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) { +void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { if (!isa<Instruction>(I)) { LocalValueMap[I] = Reg; - return Reg; + return; } unsigned &AssignedReg = FuncInfo.ValueMap[I]; @@ -242,12 +247,11 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) { AssignedReg = Reg; else if (Reg != AssignedReg) { // Arrange for uses of AssignedReg to be replaced by uses of Reg. - FuncInfo.RegFixups[AssignedReg] = Reg; + for (unsigned i = 0; i < NumRegs; i++) + FuncInfo.RegFixups[AssignedReg+i] = Reg+i; AssignedReg = Reg; } - - return AssignedReg; } std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { @@ -330,23 +334,51 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { return false; } + // Check if the first operand is a constant, and handle it as "ri". At -O0, + // we don't have anything that canonicalizes operand order. + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0))) + if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) { + unsigned Op1 = getRegForValue(I->getOperand(1)); + if (Op1 == 0) return false; + + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, + Op1IsKill, CI->getZExtValue(), + VT.getSimpleVT()); + if (ResultReg == 0) return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + + unsigned Op0 = getRegForValue(I->getOperand(0)); - if (Op0 == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // Check if the second operand is a constant and handle it appropriately. if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, Op0IsKill, - CI->getZExtValue()); - if (ResultReg != 0) { - // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); - return true; + uint64_t Imm = CI->getZExtValue(); + + // Transform "sdiv exact X, 8" -> "sra X, 3". + if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) && + cast<BinaryOperator>(I)->isExact() && + isPowerOf2_64(Imm)) { + Imm = Log2_64(Imm); + ISDOpcode = ISD::SRA; } + + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, + Op0IsKill, Imm, VT.getSimpleVT()); + if (ResultReg == 0) return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; } // Check if the second operand is a constant float. @@ -454,15 +486,35 @@ bool FastISel::SelectGetElementPtr(const User *I) { } bool FastISel::SelectCall(const User *I) { - const Function *F = cast<CallInst>(I)->getCalledFunction(); + const CallInst *Call = cast<CallInst>(I); + + // Handle simple inline asms. + if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getArgOperand(0))) { + // Don't attempt to handle constraints. + if (!IA->getConstraintString().empty()) + return false; + + unsigned ExtraInfo = 0; + if (IA->hasSideEffects()) + ExtraInfo |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + ExtraInfo |= InlineAsm::Extra_IsAlignStack; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::INLINEASM)) + .addExternalSymbol(IA->getAsmString().c_str()) + .addImm(ExtraInfo); + return true; + } + + const Function *F = Call->getCalledFunction(); if (!F) return false; // Handle selected intrinsic function calls. - unsigned IID = F->getIntrinsicID(); - switch (IID) { + switch (F->getIntrinsicID()) { default: break; case Intrinsic::dbg_declare: { - const DbgDeclareInst *DI = cast<DbgDeclareInst>(I); + const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); if (!DIVariable(DI->getVariable()).Verify() || !FuncInfo.MF->getMMI().hasDebugInfo()) return true; @@ -494,7 +546,7 @@ bool FastISel::SelectCall(const User *I) { } case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. - const DbgValueInst *DI = cast<DbgValueInst>(I); + const DbgValueInst *DI = cast<DbgValueInst>(Call); const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); if (!V) { @@ -523,65 +575,68 @@ bool FastISel::SelectCall(const User *I) { return true; } case Intrinsic::eh_exception: { - EVT VT = TLI.getValueType(I->getType()); - switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { - default: break; - case TargetLowering::Expand: { - assert(FuncInfo.MBB->isLandingPad() && - "Call to eh.exception not in landing pad!"); - unsigned Reg = TLI.getExceptionAddressRegister(); - const TargetRegisterClass *RC = TLI.getRegClassFor(VT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - UpdateValueMap(I, ResultReg); - return true; - } - } - break; + EVT VT = TLI.getValueType(Call->getType()); + if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand) + break; + + assert(FuncInfo.MBB->isLandingPad() && + "Call to eh.exception not in landing pad!"); + unsigned Reg = TLI.getExceptionAddressRegister(); + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); + UpdateValueMap(Call, ResultReg); + return true; } case Intrinsic::eh_selector: { - EVT VT = TLI.getValueType(I->getType()); - switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { - default: break; - case TargetLowering::Expand: { - if (FuncInfo.MBB->isLandingPad()) - AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB); - else { + EVT VT = TLI.getValueType(Call->getType()); + if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand) + break; + if (FuncInfo.MBB->isLandingPad()) + AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB); + else { #ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(cast<CallInst>(I)); + FuncInfo.CatchInfoLost.insert(Call); #endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBB->addLiveIn(Reg); - } - + // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - EVT SrcVT = TLI.getPointerTy(); - const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - - bool ResultRegIsKill = hasTrivialKill(I); - - // Cast the register to the type of the selector. - if (SrcVT.bitsGT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, - ResultReg, ResultRegIsKill); - else if (SrcVT.bitsLT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, - ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); - if (ResultReg == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; + if (Reg) FuncInfo.MBB->addLiveIn(Reg); + } - UpdateValueMap(I, ResultReg); + unsigned Reg = TLI.getExceptionSelectorRegister(); + EVT SrcVT = TLI.getPointerTy(); + const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); + + bool ResultRegIsKill = hasTrivialKill(Call); + + // Cast the register to the type of the selector. + if (SrcVT.bitsGT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, + ResultReg, ResultRegIsKill); + else if (SrcVT.bitsLT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, + ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); + if (ResultReg == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; - return true; - } - } - break; + UpdateValueMap(Call, ResultReg); + + return true; + } + case Intrinsic::objectsize: { + ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1)); + unsigned long long Res = CI->isZero() ? -1ULL : 0; + Constant *ResCI = ConstantInt::get(Call->getType(), Res); + unsigned ResultReg = getRegForValue(ResCI); + if (ResultReg == 0) + return false; + UpdateValueMap(Call, ResultReg); + return true; } } @@ -598,21 +653,13 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) { // Unhandled type. Halt "fast" selection and bail. return false; - // Check if the destination type is legal. Or as a special case, - // it may be i1 if we're doing a truncate because that's - // easy and somewhat common. + // Check if the destination type is legal. if (!TLI.isTypeLegal(DstVT)) - if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE) - // Unhandled type. Halt "fast" selection and bail. - return false; + return false; - // Check if the source operand is legal. Or as a special case, - // it may be i1 if we're doing zero-extension because that's - // easy and somewhat common. + // Check if the source operand is legal. if (!TLI.isTypeLegal(SrcVT)) - if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND) - // Unhandled type. Halt "fast" selection and bail. - return false; + return false; unsigned InputReg = getRegForValue(I->getOperand(0)); if (!InputReg) @@ -621,18 +668,6 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) { bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); - // If the operand is i1, arrange for the high bits in the register to be zero. - if (SrcVT == MVT::i1) { - SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT); - InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg, InputRegIsKill); - if (!InputReg) - return false; - InputRegIsKill = true; - } - // If the result is i1, truncate to the target's type for i1 first. - if (DstVT == MVT::i1) - DstVT = TLI.getTypeToTransformTo(I->getContext(), DstVT); - unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opcode, @@ -784,6 +819,47 @@ FastISel::SelectFNeg(const User *I) { } bool +FastISel::SelectExtractValue(const User *U) { + const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U); + if (!EVI) + return false; + + // Make sure we only try to handle extracts with a legal result. But also + // allow i1 because it's easy. + EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true); + if (!RealVT.isSimple()) + return false; + MVT VT = RealVT.getSimpleVT(); + if (!TLI.isTypeLegal(VT) && VT != MVT::i1) + return false; + + const Value *Op0 = EVI->getOperand(0); + const Type *AggTy = Op0->getType(); + + // Get the base result register. + unsigned ResultReg; + DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0); + if (I != FuncInfo.ValueMap.end()) + ResultReg = I->second; + else if (isa<Instruction>(Op0)) + ResultReg = FuncInfo.InitializeRegForValue(Op0); + else + return false; // fast-isel can't handle aggregate constants at the moment + + // Get the actual result register, which is an offset from the base register. + unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->idx_begin(), EVI->idx_end()); + + SmallVector<EVT, 4> AggValueVTs; + ComputeValueVTs(TLI, AggTy, AggValueVTs); + + for (unsigned i = 0; i < VTIndex; i++) + ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]); + + UpdateValueMap(EVI, ResultReg); + return true; +} + +bool FastISel::SelectOperator(const User *I, unsigned Opcode) { switch (Opcode) { case Instruction::Add: @@ -887,6 +963,9 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { return true; } + case Instruction::ExtractValue: + return SelectExtractValue(I); + case Instruction::PHI: llvm_unreachable("FastISel shouldn't visit PHI nodes!"); @@ -966,59 +1045,33 @@ unsigned FastISel::FastEmit_rri(MVT, MVT, unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, bool Op0IsKill, uint64_t Imm, MVT ImmType) { + // If this is a multiply by a power of two, emit this as a shift left. + if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { + Opcode = ISD::SHL; + Imm = Log2_64(Imm); + } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) { + // div x, 8 -> srl x, 3 + Opcode = ISD::SRL; + Imm = Log2_64(Imm); + } + + // Horrible hack (to be removed), check to make sure shift amounts are + // in-range. + if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) && + Imm >= VT.getSizeInBits()) + return 0; + // First check if immediate type is legal. If not, we can't use the ri form. unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); if (ResultReg != 0) return ResultReg; unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); - if (MaterialReg == 0) - return 0; - return FastEmit_rr(VT, VT, Opcode, - Op0, Op0IsKill, - MaterialReg, /*Kill=*/true); -} - -/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries -/// to emit an instruction with a floating-point immediate operand using -/// FastEmit_rf. If that fails, it materializes the immediate into a register -/// and try FastEmit_rr instead. -unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode, - unsigned Op0, bool Op0IsKill, - const ConstantFP *FPImm, MVT ImmType) { - // First check if immediate type is legal. If not, we can't use the rf form. - unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm); - if (ResultReg != 0) - return ResultReg; - - // Materialize the constant in a register. - unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm); if (MaterialReg == 0) { - // If the target doesn't have a way to directly enter a floating-point - // value into a register, use an alternate approach. - // TODO: The current approach only supports floating-point constants - // that can be constructed by conversion from integer values. This should - // be replaced by code that creates a load from a constant-pool entry, - // which will require some target-specific work. - const APFloat &Flt = FPImm->getValueAPF(); - EVT IntVT = TLI.getPointerTy(); - - uint64_t x[2]; - uint32_t IntBitWidth = IntVT.getSizeInBits(); - bool isExact; - (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, - APFloat::rmTowardZero, &isExact); - if (!isExact) - return 0; - APInt IntVal(IntBitWidth, 2, x); - - unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(), - ISD::Constant, IntVal.getZExtValue()); - if (IntegerReg == 0) - return 0; - MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT, - ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true); - if (MaterialReg == 0) - return 0; + // This is a bit ugly/slow, but failing here means falling out of + // fast-isel, which would be very slow. + const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), + VT.getSizeInBits()); + MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); } return FastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, @@ -1078,6 +1131,30 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, return ResultReg; } +unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addReg(Op2, Op2IsKill * RegState::Kill); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addReg(Op2, Op2IsKill * RegState::Kill); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, @@ -1183,6 +1260,23 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } +unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addImm(Imm1).addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { @@ -1238,7 +1332,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Only handle legal types. Two interesting things to note here. First, // by bailing out early, we may leave behind some dead instructions, // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its - // own moves. Second, this check is necessary becuase FastISel doesn't + // own moves. Second, this check is necessary because FastISel doesn't // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index d8a5770..d518b5d 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -54,25 +54,6 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { return false; } -/// isOnlyUsedInEntryBlock - If the specified argument is only used in the -/// entry block, return true. This includes arguments used by switches, since -/// the switch may expand into multiple basic blocks. -static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) { - // With FastISel active, we may be splitting blocks, so force creation - // of virtual registers for all non-dead arguments. - if (EnableFastISel) - return A->use_empty(); - - const BasicBlock *Entry = A->getParent()->begin(); - for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) { - const User *U = *UI; - if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) - return false; // Use not in entry block. - } - return true; -} - FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli) : TLI(tli) { } @@ -86,16 +67,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { SmallVector<ISD::OutputArg, 4> Outs; GetReturnInfo(Fn->getReturnType(), Fn->getAttributes().getRetAttributes(), Outs, TLI); - CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), Fn->isVarArg(), + CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF, + Fn->isVarArg(), Outs, Fn->getContext()); - // Create a vreg for each argument register that is not dead and is used - // outside of the entry block for the function. - for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); - AI != E; ++AI) - if (!isOnlyUsedInEntryBlock(AI, EnableFastISel)) - InitializeRegForValue(AI); - // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. @@ -181,6 +156,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { const PHINode *PN = dyn_cast<PHINode>(I); ++I) { if (PN->use_empty()) continue; + // Skip empty types + if (PN->getType()->isEmptyTy()) + continue; + DebugLoc DL = PN->getDebugLoc(); unsigned PHIReg = ValueMap[PN]; assert(PHIReg && "PHI node does not have an assigned virtual register!"); @@ -343,7 +322,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { APInt Zero(BitWidth, 0); DestLOI.KnownZero = Zero; DestLOI.KnownOne = Zero; - return; + return; } if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { @@ -375,18 +354,18 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { /// setByValArgumentFrameIndex - Record frame index for the byval /// argument. This overrides previous frame index entry for this argument, /// if any. -void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A, +void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A, int FI) { assert (A->hasByValAttr() && "Argument does not have byval attribute!"); ByValArgFrameIndexMap[A] = FI; } - + /// getByValArgumentFrameIndex - Get frame index for the byval argument. /// If the argument does not have any assigned frame index then 0 is /// returned. int FunctionLoweringInfo::getByValArgumentFrameIndex(const Argument *A) { assert (A->hasByValAttr() && "Argument does not have byval attribute!"); - DenseMap<const Argument *, int>::iterator I = + DenseMap<const Argument *, int>::iterator I = ByValArgFrameIndexMap.find(A); if (I != ByValArgFrameIndexMap.end()) return I->second; diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index e309def..2a65d65 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -76,6 +76,12 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, // the CopyToReg'd destination register instead of creating a new vreg. bool MatchReg = true; const TargetRegisterClass *UseRC = NULL; + EVT VT = Node->getValueType(ResNo); + + // Stick to the preferred register classes for legal types. + if (TLI->isTypeLegal(VT)) + UseRC = TLI->getRegClassFor(VT); + if (!IsClone && !IsCloned) for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { @@ -121,10 +127,9 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, break; } - EVT VT = Node->getValueType(ResNo); const TargetRegisterClass *SrcRC = 0, *DstRC = 0; SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); - + // Figure out the register class to create for the destreg. if (VRBase) { DstRC = MRI->getRegClass(VRBase); @@ -283,7 +288,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, DstRC = II->OpInfo[IIOpNum].getRegClass(TRI); assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) && "Don't have operand info for this instruction!"); - if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) { + if (DstRC && !SrcRC->hasSuperClassEq(DstRC)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); @@ -543,17 +548,18 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, void InstrEmitter::EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsClone, bool IsCloned) { - const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0)); + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); unsigned NewVReg = MRI->createVirtualRegister(RC); MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(TargetOpcode::REG_SEQUENCE), NewVReg); unsigned NumOps = Node->getNumOperands(); - assert((NumOps & 1) == 0 && - "REG_SEQUENCE must have an even number of operands!"); + assert((NumOps & 1) == 1 && + "REG_SEQUENCE must have an odd number of operands!"); const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); - for (unsigned i = 0; i != NumOps; ++i) { + for (unsigned i = 1; i != NumOps; ++i) { SDValue Op = Node->getOperand(i); - if (i & 1) { + if ((i & 1) == 0) { unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index b837261..4b6d3ef 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -14,23 +14,16 @@ #include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" #include "llvm/LLVMContext.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -57,19 +50,13 @@ class SelectionDAGLegalize { const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; - CodeGenOpt::Level OptLevel; // Libcall insertion helpers. - /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been + /// LastCALLSEQ - This keeps track of the CALLSEQ_END node that has been /// legalized. We use this to ensure that calls are properly serialized /// against each other, including inserted libcalls. - SDValue LastCALLSEQ_END; - - /// IsLegalizingCall - This member is used *only* for purposes of providing - /// helpful assertions that a libcall isn't created while another call is - /// being legalized (which could lead to non-serialized call sequences). - bool IsLegalizingCall; + SmallVector<SDValue, 8> LastCALLSEQ; enum LegalizeAction { Legal, // The target natively supports this operation. @@ -98,13 +85,13 @@ class SelectionDAGLegalize { } public: - SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol); + explicit SelectionDAGLegalize(SelectionDAG &DAG); /// getTypeAction - Return how we should legalize values of this type, either /// it is already legal or we need to expand it into multiple registers of /// smaller integer type, or we need to promote it to a larger type. LegalizeAction getTypeAction(EVT VT) const { - return (LegalizeAction)ValueTypeActions.getTypeAction(VT); + return (LegalizeAction)TLI.getTypeAction(*DAG.getContext(), VT); } /// isTypeLegal - Return true if this type is legal on this target. @@ -147,6 +134,9 @@ private: DebugLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, + unsigned NumOps, bool isSigned, DebugLoc dl); + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, @@ -158,7 +148,7 @@ private: RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); - SDValue ExpandDivRemLibCall(SDNode *Node, bool isSigned, bool isDIV); + void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); @@ -184,6 +174,15 @@ private: void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); + + SDValue getLastCALLSEQ() { return LastCALLSEQ.back(); } + void setLastCALLSEQ(const SDValue s) { LastCALLSEQ.back() = s; } + void pushLastCALLSEQ(SDValue s) { + LastCALLSEQ.push_back(s); + } + void popLastCALLSEQ() { + LastCALLSEQ.pop_back(); + } }; } @@ -219,18 +218,16 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); } -SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, - CodeGenOpt::Level ol) +SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), - DAG(dag), OptLevel(ol), + DAG(dag), ValueTypeActions(TLI.getValueTypeActions()) { assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && "Too many value types for ValueTypeActions to hold!"); } void SelectionDAGLegalize::LegalizeDAG() { - LastCALLSEQ_END = DAG.getEntryNode(); - IsLegalizingCall = false; + pushLastCALLSEQ(DAG.getEntryNode()); // The legalize process is inherently a bottom-up recursive process (users // legalize their uses before themselves). Given infinite stack space, we @@ -258,14 +255,15 @@ void SelectionDAGLegalize::LegalizeDAG() { /// FindCallEndFromCallStart - Given a chained node that is part of a call /// sequence, find the CALLSEQ_END node that terminates the call sequence. static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { - // Nested CALLSEQ_START/END constructs aren't yet legal, - // but we can DTRT and handle them correctly here. + int next_depth = depth; if (Node->getOpcode() == ISD::CALLSEQ_START) - depth++; - else if (Node->getOpcode() == ISD::CALLSEQ_END) { - depth--; - if (depth == 0) + next_depth = depth + 1; + if (Node->getOpcode() == ISD::CALLSEQ_END) { + assert(depth > 0 && "negative depth!"); + if (depth == 1) return Node; + else + next_depth = depth - 1; } if (Node->use_empty()) return 0; // No CallSeqEnd @@ -296,7 +294,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) if (User->getOperand(i) == TheChain) - if (SDNode *Result = FindCallEndFromCallStart(User, depth)) + if (SDNode *Result = FindCallEndFromCallStart(User, next_depth)) return Result; } return 0; @@ -317,6 +315,7 @@ static SDNode *FindCallStartFromCallEnd(SDNode *Node) { case ISD::CALLSEQ_START: if (!nested) return Node; + Node = Node->getOperand(0).getNode(); nested--; break; case ISD::CALLSEQ_END: @@ -324,7 +323,7 @@ static SDNode *FindCallStartFromCallEnd(SDNode *Node) { break; } } - return 0; + return (Node->getOpcode() == ISD::CALLSEQ_START) ? Node : 0; } /// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to @@ -433,68 +432,67 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), Alignment); - } else { - // Do a (aligned) store to a stack slot, then copy from the stack slot - // to the final destination using (unaligned) integer loads and stores. - EVT StoredVT = ST->getMemoryVT(); - EVT RegVT = - TLI.getRegisterType(*DAG.getContext(), - EVT::getIntegerVT(*DAG.getContext(), - StoredVT.getSizeInBits())); - unsigned StoredBytes = StoredVT.getSizeInBits() / 8; - unsigned RegBytes = RegVT.getSizeInBits() / 8; - unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; - - // Make sure the stack slot is also aligned for the register type. - SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); - - // Perform the original store, only redirected to the stack slot. - SDValue Store = DAG.getTruncStore(Chain, dl, - Val, StackPtr, MachinePointerInfo(), - StoredVT, false, false, 0); - SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); - SmallVector<SDValue, 8> Stores; - unsigned Offset = 0; - - // Do all but one copies using the full register width. - for (unsigned i = 1; i < NumRegs; i++) { - // Load one integer register's worth from the stack slot. - SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, - MachinePointerInfo(), - false, false, 0); - // Store it to the final location. Remember the store. - Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, - ST->getPointerInfo().getWithOffset(Offset), - ST->isVolatile(), ST->isNonTemporal(), - MinAlign(ST->getAlignment(), Offset))); - // Increment the pointers. - Offset += RegBytes; - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - Increment); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); - } + } + // Do a (aligned) store to a stack slot, then copy from the stack slot + // to the final destination using (unaligned) integer loads and stores. + EVT StoredVT = ST->getMemoryVT(); + EVT RegVT = + TLI.getRegisterType(*DAG.getContext(), + EVT::getIntegerVT(*DAG.getContext(), + StoredVT.getSizeInBits())); + unsigned StoredBytes = StoredVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; - // The last store may be partial. Do a truncating store. On big-endian - // machines this requires an extending load from the stack slot to ensure - // that the bits are in the right place. - EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), - 8 * (StoredBytes - Offset)); + // Make sure the stack slot is also aligned for the register type. + SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); - // Load from the stack slot. - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, - MachinePointerInfo(), - MemVT, false, false, 0); + // Perform the original store, only redirected to the stack slot. + SDValue Store = DAG.getTruncStore(Chain, dl, + Val, StackPtr, MachinePointerInfo(), + StoredVT, false, false, 0); + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SmallVector<SDValue, 8> Stores; + unsigned Offset = 0; - Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, - ST->getPointerInfo() - .getWithOffset(Offset), - MemVT, ST->isVolatile(), - ST->isNonTemporal(), - MinAlign(ST->getAlignment(), Offset))); - // The order of the stores doesn't matter - say it with a TokenFactor. - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the stack slot. + SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, + MachinePointerInfo(), + false, false, 0); + // Store it to the final location. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, + ST->getPointerInfo().getWithOffset(Offset), + ST->isVolatile(), ST->isNonTemporal(), + MinAlign(ST->getAlignment(), Offset))); + // Increment the pointers. + Offset += RegBytes; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); } + + // The last store may be partial. Do a truncating store. On big-endian + // machines this requires an extending load from the stack slot to ensure + // that the bits are in the right place. + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (StoredBytes - Offset)); + + // Load from the stack slot. + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + MachinePointerInfo(), + MemVT, false, false, 0); + + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, + ST->getPointerInfo() + .getWithOffset(Offset), + MemVT, ST->isVolatile(), + ST->isNonTemporal(), + MinAlign(ST->getAlignment(), Offset))); + // The order of the stores doesn't matter - say it with a TokenFactor. + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); } assert(ST->getMemoryVT().isInteger() && !ST->getMemoryVT().isVector() && @@ -941,11 +939,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::BR_JT: case ISD::BR_CC: case ISD::BRCOND: - // Branches tweak the chain to include LastCALLSEQ_END + assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?"); + // Branches tweak the chain to include LastCALLSEQ Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], - LastCALLSEQ_END); + getLastCALLSEQ()); Ops[0] = LegalizeOp(Ops[0]); - LastCALLSEQ_END = DAG.getEntryNode(); + setLastCALLSEQ(DAG.getEntryNode()); break; case ISD::SHL: case ISD::SRL: @@ -1034,6 +1033,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; case ISD::CALLSEQ_START: { SDNode *CallEnd = FindCallEndFromCallStart(Node); + assert(CallEnd && "didn't find CALLSEQ_END!"); // Recursively Legalize all of the inputs of the call end that do not lead // to this call start. This ensures that any libcalls that need be inserted @@ -1050,9 +1050,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Merge in the last call to ensure that this call starts after the last // call ended. - if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { + if (getLastCALLSEQ().getOpcode() != ISD::EntryToken) { Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Tmp1, LastCALLSEQ_END); + Tmp1, getLastCALLSEQ()); Tmp1 = LegalizeOp(Tmp1); } @@ -1073,25 +1073,29 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // sequence have been legalized, legalize the call itself. During this // process, no libcalls can/will be inserted, guaranteeing that no calls // can overlap. - assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); // Note that we are selecting this call! - LastCALLSEQ_END = SDValue(CallEnd, 0); - IsLegalizingCall = true; + setLastCALLSEQ(SDValue(CallEnd, 0)); // Legalize the call, starting from the CALLSEQ_END. - LegalizeOp(LastCALLSEQ_END); - assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); + LegalizeOp(getLastCALLSEQ()); return Result; } case ISD::CALLSEQ_END: - // If the CALLSEQ_START node hasn't been legalized first, legalize it. This - // will cause this node to be legalized as well as handling libcalls right. - if (LastCALLSEQ_END.getNode() != Node) { - LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); - DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); - assert(I != LegalizedNodes.end() && - "Legalizing the call start should have legalized this node!"); - return I->second; + { + SDNode *myCALLSEQ_BEGIN = FindCallStartFromCallEnd(Node); + + // If the CALLSEQ_START node hasn't been legalized first, legalize it. + // This will cause this node to be legalized as well as handling libcalls + // right. + if (getLastCALLSEQ().getNode() != Node) { + LegalizeOp(SDValue(myCALLSEQ_BEGIN, 0)); + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + assert(I != LegalizedNodes.end() && + "Legalizing the call start should have legalized this node!"); + return I->second; + } + + pushLastCALLSEQ(SDValue(myCALLSEQ_BEGIN, 0)); } // Otherwise, the call start has been legalized and everything is going @@ -1119,9 +1123,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result.getResNo()); } } - assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); // This finishes up call legalization. - IsLegalizingCall = false; + popLastCALLSEQ(); // If the CALLSEQ_END node has a flag, remember that we legalized it. AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); @@ -1371,6 +1374,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = LegalizeOp(Load.getValue(1)); break; } + + // If this is a promoted vector load, and the vector element types are + // legal, then scalarize it. + if (ExtType == ISD::EXTLOAD && SrcVT.isVector() && + isTypeLegal(Node->getValueType(0).getScalarType())) { + SmallVector<SDValue, 8> LoadVals; + SmallVector<SDValue, 8> LoadChains; + unsigned NumElem = SrcVT.getVectorNumElements(); + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, + Node->getValueType(0).getScalarType(), + Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + + LoadVals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, + Node->getValueType(0), &LoadVals[0], LoadVals.size()); + + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. + break; + } + + // If this is a promoted vector load, and the vector element types are + // illegal, create the promoted vector from bitcasted segments. + if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) { + EVT MemElemTy = Node->getValueType(0).getScalarType(); + EVT SrcSclrTy = SrcVT.getScalarType(); + unsigned SizeRatio = + (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits()); + + SmallVector<SDValue, 8> LoadVals; + SmallVector<SDValue, 8> LoadChains; + unsigned NumElem = SrcVT.getVectorNumElements(); + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, + SrcVT.getScalarType(), + Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + if (TLI.isBigEndian()) { + // MSB (which is garbage, comes first) + LoadVals.push_back(ScalarLoad.getValue(0)); + for (unsigned i = 0; i<SizeRatio-1; ++i) + LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType())); + } else { + // LSB (which is data, comes first) + for (unsigned i = 0; i<SizeRatio-1; ++i) + LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType())); + LoadVals.push_back(ScalarLoad.getValue(0)); + } + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(), + SrcVT.getScalarType(), NumElem*SizeRatio); + SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, + TempWideVector, &LoadVals[0], LoadVals.size()); + + // Cast to the correct type + ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes); + + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. + break; + + } + // FIXME: This does not work for vectors on most targets. Sign- and // zero-extend operations are currently folded into extending loads, // whether they are legal or not, and then we end up here without any @@ -1546,6 +1634,88 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result = TLI.LowerOperation(Result, DAG); break; case Expand: + + EVT WideScalarVT = Tmp3.getValueType().getScalarType(); + EVT NarrowScalarVT = StVT.getScalarType(); + + // The Store type is illegal, must scalarize the vector store. + SmallVector<SDValue, 8> Stores; + bool ScalarLegal = isTypeLegal(WideScalarVT); + if (!isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) { + unsigned NumElem = StVT.getVectorNumElements(); + + unsigned ScalarSize = StVT.getScalarType().getSizeInBits(); + // Round odd types to the next pow of two. + if (!isPowerOf2_32(ScalarSize)) + ScalarSize = NextPowerOf2(ScalarSize); + // Types smaller than 8 bits are promoted to 8 bits. + ScalarSize = std::max<unsigned>(ScalarSize, 8); + // Store stride + unsigned Stride = ScalarSize/8; + assert(isPowerOf2_32(Stride) && "Stride must be a power of two"); + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + WideScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); + + + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize); + + Ex = DAG.getNode(ISD::TRUNCATE, dl, NVT, Ex); + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, + ST->getPointerInfo().getWithOffset(Idx*Stride), + isVolatile, isNonTemporal, Alignment); + Stores.push_back(Store); + } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + break; + } + + // The Store type is illegal, must scalarize the vector store. + // However, the scalar type is illegal. Must bitcast the result + // and store it in smaller parts. + if (!isTypeLegal(StVT) && StVT.isVector()) { + unsigned WideNumElem = StVT.getVectorNumElements(); + unsigned Stride = NarrowScalarVT.getSizeInBits()/8; + + unsigned SizeRatio = + (WideScalarVT.getSizeInBits() / NarrowScalarVT.getSizeInBits()); + + EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), NarrowScalarVT, + SizeRatio*WideNumElem); + + // Cast the wide elem vector to wider vec with smaller elem type. + // Example <2 x i64> -> <4 x i32> + Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3); + + for (unsigned Idx=0; Idx<WideNumElem*SizeRatio; Idx++) { + // Extract elment i + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); + // bump pointer. + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + + // Store if, this element is: + // - First element on big endian, or + // - Last element on little endian + if (( TLI.isBigEndian() && (Idx%SizeRatio == 0)) || + ((!TLI.isBigEndian() && (Idx%SizeRatio == SizeRatio-1)))) { + SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, + ST->getPointerInfo().getWithOffset(Idx*Stride), + isVolatile, isNonTemporal, Alignment); + Stores.push_back(Store); + } + } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + break; + } + + // TRUNCSTORE:i16 i32 -> STORE i16 assert(isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); @@ -2007,7 +2177,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. @@ -2043,9 +2212,43 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return DAG.getRoot(); // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo.first; +} + +/// ExpandLibCall - Generate a libcall taking the given operands as arguments +/// and returning a result of type RetVT. +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, DebugLoc dl) { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + std::pair<SDValue,SDValue> CallInfo = + TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + + // Legalize the call sequence, starting with the chain. This will advance // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). LegalizeOp(CallInfo.second); + return CallInfo.first; } @@ -2055,7 +2258,6 @@ std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; @@ -2081,7 +2283,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, Callee, Args, DAG, Node->getDebugLoc()); // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). LegalizeOp(CallInfo.second); return CallInfo; @@ -2121,10 +2323,9 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, return ExpandLibCall(LC, Node, isSigned); } -/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem -/// pairs. -SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned, - bool isDIV) { +/// isDivRemLibcallAvailable - Return true if divmod libcall is available. +static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, + const TargetLowering &TLI) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { default: assert(0 && "Unexpected request for libcall!"); @@ -2135,17 +2336,18 @@ SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned, case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; } - if (!TLI.getLibcallName(LC)) - return SDValue(); + return TLI.getLibcallName(LC) != 0; +} - // Only issue divrem libcall if both quotient and remainder are needed. +/// UseDivRem - Only issue divrem libcall if both quotient and remainder are +/// needed. +static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) { unsigned OtherOpcode = 0; - if (isSigned) { + if (isSigned) OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV; - } else { + else OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV; - } - SDNode *OtherNode = 0; + SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), @@ -2155,32 +2357,28 @@ SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned, continue; if (User->getOpcode() == OtherOpcode && User->getOperand(0) == Op0 && - User->getOperand(1) == Op1) { - OtherNode = User; - break; - } + User->getOperand(1) == Op1) + return true; } - if (!OtherNode) - return SDValue(); + return false; +} - // If the libcall is already generated, no need to issue it again. - DenseMap<SDValue, SDValue>::iterator I - = LegalizedNodes.find(SDValue(OtherNode,0)); - if (I != LegalizedNodes.end()) { - OtherNode = I->second.getNode(); - SDNode *Chain = OtherNode->getOperand(0).getNode(); - for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end(); - UI != UE; ++UI) { - SDNode *User = *UI; - if (User == OtherNode) - continue; - if (isDIV) { - assert(User->getOpcode() == ISD::CopyFromReg); - } else { - assert(User->getOpcode() == ISD::LOAD); - } - return SDValue(User, 0); - } +/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem +/// pairs. +void +SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + unsigned Opcode = Node->getOpcode(); + bool isSigned = Opcode == ISD::SDIVREM; + + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; } // The input chain to this libcall is the entry node of the function. @@ -2221,14 +2419,15 @@ SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). LegalizeOp(CallInfo.second); // Remainder is loaded back from the stack frame. - SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr, + SDValue Rem = DAG.getLoad(RetVT, dl, getLastCALLSEQ(), FIPtr, MachinePointerInfo(), false, false, 0); - return isDIV ? CallInfo.first : Rem; + Results.push_back(CallInfo.first); + Results.push_back(Rem); } /// ExpandLegalINT_TO_FP - This function is responsible for legalizing a @@ -2878,7 +3077,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, } case ISD::FP_ROUND_INREG: { // The only way we can lower this is to turn it into a TRUNCSTORE, - // EXTLOAD pair, targetting a temporary location (a stack slot). + // EXTLOAD pair, targeting a temporary location (a stack slot). // NOTE: there is a choice here between constantly creating new stack // slots and always reusing the same one. We currently always create @@ -3204,28 +3403,25 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; Tmp2 = Node->getOperand(0); Tmp3 = Node->getOperand(1); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || + (isDivRemLibcallAvailable(Node, isSigned, TLI) && + UseDivRem(Node, isSigned, false))) { Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); - } else if (isSigned) { - Tmp1 = ExpandDivRemLibCall(Node, true, false); - if (!Tmp1.getNode()) - Tmp1 = ExpandIntLibCall(Node, true, - RTLIB::SREM_I8, - RTLIB::SREM_I16, RTLIB::SREM_I32, - RTLIB::SREM_I64, RTLIB::SREM_I128); - } else { - Tmp1 = ExpandDivRemLibCall(Node, false, false); - if (!Tmp1.getNode()) - Tmp1 = ExpandIntLibCall(Node, false, - RTLIB::UREM_I8, - RTLIB::UREM_I16, RTLIB::UREM_I32, - RTLIB::UREM_I64, RTLIB::UREM_I128); - } + } else if (isSigned) + Tmp1 = ExpandIntLibCall(Node, true, + RTLIB::SREM_I8, + RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128); + else + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::UREM_I8, + RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128); Results.push_back(Tmp1); break; } @@ -3235,26 +3431,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || + (isDivRemLibcallAvailable(Node, isSigned, TLI) && + UseDivRem(Node, isSigned, true))) Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), Node->getOperand(1)); - else if (isSigned) { - Tmp1 = ExpandDivRemLibCall(Node, true, true); - if (!Tmp1.getNode()) { - Tmp1 = ExpandIntLibCall(Node, true, - RTLIB::SDIV_I8, - RTLIB::SDIV_I16, RTLIB::SDIV_I32, - RTLIB::SDIV_I64, RTLIB::SDIV_I128); - } - } else { - Tmp1 = ExpandDivRemLibCall(Node, false, true); - if (!Tmp1.getNode()) { - Tmp1 = ExpandIntLibCall(Node, false, - RTLIB::UDIV_I8, - RTLIB::UDIV_I16, RTLIB::UDIV_I32, - RTLIB::UDIV_I64, RTLIB::UDIV_I128); - } - } + else if (isSigned) + Tmp1 = ExpandIntLibCall(Node, true, + RTLIB::SDIV_I8, + RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128); + else + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::UDIV_I8, + RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128); Results.push_back(Tmp1); break; } @@ -3271,6 +3462,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(Tmp1.getValue(1)); break; } + case ISD::SDIVREM: + case ISD::UDIVREM: + // Expand into divrem libcall + ExpandDivRemLibCall(Node, Results); + break; case ISD::MUL: { EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); @@ -3355,6 +3551,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::UMULO: case ISD::SMULO: { EVT VT = Node->getValueType(0); + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDValue BottomHalf; @@ -3372,7 +3569,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TopHalf = BottomHalf.getValue(1); } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) { - EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); @@ -3385,7 +3581,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // have a libcall big enough. // Also, we can fall back to a division in some cases, but that's a big // performance hit in the general case. - EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (WideVT == MVT::i16) LC = RTLIB::MUL_I16; @@ -3396,15 +3591,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, else if (WideVT == MVT::i128) LC = RTLIB::MUL_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); - LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); - RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); - SDValue Ret = ExpandLibCall(LC, Node, isSigned); - BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret); - TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret, - DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy())); - TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf); + // The high part is obtained by SRA'ing all but one of the bits of low + // part. + unsigned LoSize = VT.getSizeInBits(); + SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + + // Here we're passing the 2 arguments explicitly as 4 arguments that are + // pre-lowered to the correct types. This all depends upon WideVT not + // being a legal type for the architecture and thus has to be split to + // two arguments. + SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; + SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); + BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(0)); + TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(1)); } + if (isSigned) { Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy(BottomHalf.getValueType())); @@ -3486,9 +3693,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Tmp2.getOperand(0), Tmp2.getOperand(1), Node->getOperand(2)); } else { + // We test only the i1 bit. Skip the AND if UNDEF. + Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 : + DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, + DAG.getConstant(1, Tmp2.getValueType())); Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, - DAG.getCondCode(ISD::SETNE), Tmp2, - DAG.getConstant(0, Tmp2.getValueType()), + DAG.getCondCode(ISD::SETNE), Tmp3, + DAG.getConstant(0, Tmp3.getValueType()), Node->getOperand(2)); } Results.push_back(Tmp1); @@ -3539,7 +3750,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); - LastCALLSEQ_END = DAG.getEntryNode(); + assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?"); + setLastCALLSEQ(DAG.getEntryNode()); assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); @@ -3697,9 +3909,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, // SelectionDAG::Legalize - This is the entry point for the file. // -void SelectionDAG::Legalize(CodeGenOpt::Level OptLevel) { +void SelectionDAG::Legalize() { /// run - This is the main entry point to this class. /// - SelectionDAGLegalize(*this, OptLevel).LegalizeDAG(); + SelectionDAGLegalize(*this).LegalizeDAG(); } - diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 935aab0..da75b8a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -73,6 +73,17 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; + case ISD::EXTRACT_SUBVECTOR: + Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break; + case ISD::VECTOR_SHUFFLE: + Res = PromoteIntRes_VECTOR_SHUFFLE(N); break; + case ISD::INSERT_VECTOR_ELT: + Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break; + case ISD::BUILD_VECTOR: + Res = PromoteIntRes_BUILD_VECTOR(N); break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; @@ -174,24 +185,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { default: assert(false && "Unknown type action!"); break; - case Legal: + case TargetLowering::TypeLegal: break; - case PromoteInteger: + case TargetLowering::TypePromoteInteger: if (NOutVT.bitsEq(NInVT)) // The input promotes to the same size. Convert the promoted value. return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp)); + if (NInVT.isVector()) + // Promote vector element via memory load/store. + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + CreateStackStoreLoad(InOp, OutVT)); break; - case SoftenFloat: + case TargetLowering::TypeSoftenFloat: // Promote the integer operand by hand. return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); - case ExpandInteger: - case ExpandFloat: + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: break; - case ScalarizeVector: + case TargetLowering::TypeScalarizeVector: // Convert the element to an integer and promote it by hand. - return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, - BitConvertToInteger(GetScalarizedVector(InOp))); - case SplitVector: { + if (!NOutVT.isVector()) + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + BitConvertToInteger(GetScalarizedVector(InOp))); + break; + case TargetLowering::TypeSplitVector: { // For example, i32 = BITCAST v2i16 on alpha. Convert the split // pieces of the input into integers and reassemble in the final type. SDValue Lo, Hi; @@ -208,7 +225,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { JoinIntegers(Lo, Hi)); return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp); } - case WidenVector: + case TargetLowering::TypeWidenVector: if (OutVT.bitsEq(NInVT)) // The input is widened to the same size. Convert to the widened value. return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp)); @@ -342,7 +359,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); DebugLoc dl = N->getDebugLoc(); - if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) { + if (getTypeAction(N->getOperand(0).getValueType()) + == TargetLowering::TypePromoteInteger) { SDValue Res = GetPromotedInteger(N->getOperand(0)); assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!"); @@ -507,11 +525,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { switch (getTypeAction(N->getOperand(0).getValueType())) { default: llvm_unreachable("Unknown type action!"); - case Legal: - case ExpandInteger: + case TargetLowering::TypeLegal: + case TargetLowering::TypeExpandInteger: Res = N->getOperand(0); break; - case PromoteInteger: + case TargetLowering::TypePromoteInteger: Res = GetPromotedInteger(N->getOperand(0)); break; } @@ -557,9 +575,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { DebugLoc DL = N->getDebugLoc(); EVT SmallVT = LHS.getValueType(); - // To determine if the result overflowed in a larger type, we extend the input - // to the larger type, do the multiply, then check the high bits of the result - // to see if the overflow happened. + // To determine if the result overflowed in a larger type, we extend the + // input to the larger type, do the multiply, then check the high bits of + // the result to see if the overflow happened. if (N->getOpcode() == ISD::SMULO) { LHS = SExtPromotedInteger(LHS); RHS = SExtPromotedInteger(RHS); @@ -569,8 +587,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { } SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); - // Overflow occurred iff the high part of the result does not zero/sign-extend - // the low part. + // Overflow occurred iff the high part of the result does not + // zero/sign-extend the low part. SDValue Overflow; if (N->getOpcode() == ISD::UMULO) { // Unsigned overflow occurred iff the high part is non-zero. @@ -672,6 +690,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break; case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break; case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break; + case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break; + case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONVERT_RNDSAT: Res = PromoteIntOp_CONVERT_RNDSAT(N); break; case ISD::INSERT_VECTOR_ELT: @@ -952,7 +972,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { DebugLoc dl = N->getDebugLoc(); SDValue Op = GetPromotedInteger(N->getOperand(0)); Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); - return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType()); + return DAG.getZeroExtendInReg(Op, dl, + N->getOperand(0).getValueType().getScalarType()); } @@ -1513,7 +1534,8 @@ void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, } else { // For example, extension of an i48 to an i64. The operand type necessarily // promotes to the result type, so will end up being expanded too. - assert(getTypeAction(Op.getValueType()) == PromoteInteger && + assert(getTypeAction(Op.getValueType()) == + TargetLowering::TypePromoteInteger && "Only know how to promote this result!"); SDValue Res = GetPromotedInteger(Op); assert(Res.getValueType() == N->getValueType(0) && @@ -2030,7 +2052,8 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, } else { // For example, extension of an i48 to an i64. The operand type necessarily // promotes to the result type, so will end up being expanded too. - assert(getTypeAction(Op.getValueType()) == PromoteInteger && + assert(getTypeAction(Op.getValueType()) == + TargetLowering::TypePromoteInteger && "Only know how to promote this result!"); SDValue Res = GetPromotedInteger(Op); assert(Res.getValueType() == N->getValueType(0) && @@ -2178,7 +2201,8 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, } else { // For example, extension of an i48 to an i64. The operand type necessarily // promotes to the result type, so will end up being expanded too. - assert(getTypeAction(Op.getValueType()) == PromoteInteger && + assert(getTypeAction(Op.getValueType()) == + TargetLowering::TypePromoteInteger && "Only know how to promote this result!"); SDValue Res = GetPromotedInteger(Op); assert(Res.getValueType() == N->getValueType(0) && @@ -2613,3 +2637,158 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { "Don't know how to expand this UINT_TO_FP!"); return MakeLibCall(LC, DstVT, &Op, 1, true, dl); } + +SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { + SDValue InOp0 = N->getOperand(0); + EVT InVT = InOp0.getValueType(); + EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + unsigned OutNumElems = N->getValueType(0).getVectorNumElements(); + EVT NOutVTElem = NOutVT.getVectorElementType(); + + DebugLoc dl = N->getDebugLoc(); + SDValue BaseIdx = N->getOperand(1); + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i != OutNumElems; ++i) { + + // Extract the element from the original vector. + SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(), + BaseIdx, DAG.getIntPtrConstant(i)); + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + InVT.getVectorElementType(), N->getOperand(0), Index); + + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext); + // Insert the converted element to the new vector. + Ops.push_back(Op); + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); +} + + +SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { + + ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N); + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + NewMask.push_back(SV->getMaskElt(i)); + } + + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + SDValue V1 = GetPromotedInteger(N->getOperand(1)); + EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + + return DAG.getVectorShuffle(OutVT, dl, V0,V1, &NewMask[0]); +} + + +SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { + + SDValue InOp0 = N->getOperand(0); + EVT InVT = InOp0.getValueType(); + EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + unsigned NumElems = N->getNumOperands(); + EVT NOutVTElem = NOutVT.getVectorElementType(); + + DebugLoc dl = N->getDebugLoc(); + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); + Ops.push_back(Op); + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { + + DebugLoc dl = N->getDebugLoc(); + + SDValue InOp0 = N->getOperand(0); + EVT InVT = InOp0.getValueType(); + EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + assert(!InVT.isVector() && "Input must not be a scalar"); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + EVT NOutVTElem = NOutVT.getVectorElementType(); + + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0)); + + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { + + SDValue InOp0 = N->getOperand(0); + EVT InVT = InOp0.getValueType(); + EVT InElVT = InVT.getVectorElementType(); + EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + + EVT NOutVTElem = NOutVT.getVectorElementType(); + + DebugLoc dl = N->getDebugLoc(); + + SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp0); + + SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl, + NOutVTElem, N->getOperand(1)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,NOutVT, + ConvertedVector, ConvElem, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + SDValue V1 = N->getOperand(1); + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + V0->getValueType(0).getScalarType(), V0, V1); + + return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext); + +} + +SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { + + DebugLoc dl = N->getDebugLoc(); + + EVT RetSclrTy = N->getValueType(0).getVectorElementType(); + + SmallVector<SDValue, 8> NewOps; + + // For each incoming vector + for (unsigned VecIdx = 0, E = N->getNumOperands(); VecIdx!= E; ++VecIdx) { + SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx)); + EVT SclrTy = Incoming->getValueType(0).getVectorElementType(); + unsigned NumElem = Incoming->getValueType(0).getVectorNumElements(); + + for (unsigned i=0; i<NumElem; ++i) { + // Extract element from incoming vector + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, + Incoming, DAG.getIntPtrConstant(i)); + SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex); + NewOps.push_back(Tr); + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), + &NewOps[0], NewOps.size()); + } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index cedda7e..ba658b0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -224,38 +224,38 @@ bool DAGTypeLegalizer::run() { switch (getTypeAction(ResultVT)) { default: assert(false && "Unknown action!"); - case Legal: + case TargetLowering::TypeLegal: break; // The following calls must take care of *all* of the node's results, // not just the illegal result they were passed (this includes results // with a legal type). Results can be remapped using ReplaceValueWith, // or their promoted/expanded/etc values registered in PromotedIntegers, // ExpandedIntegers etc. - case PromoteInteger: + case TargetLowering::TypePromoteInteger: PromoteIntegerResult(N, i); Changed = true; goto NodeDone; - case ExpandInteger: + case TargetLowering::TypeExpandInteger: ExpandIntegerResult(N, i); Changed = true; goto NodeDone; - case SoftenFloat: + case TargetLowering::TypeSoftenFloat: SoftenFloatResult(N, i); Changed = true; goto NodeDone; - case ExpandFloat: + case TargetLowering::TypeExpandFloat: ExpandFloatResult(N, i); Changed = true; goto NodeDone; - case ScalarizeVector: + case TargetLowering::TypeScalarizeVector: ScalarizeVectorResult(N, i); Changed = true; goto NodeDone; - case SplitVector: + case TargetLowering::TypeSplitVector: SplitVectorResult(N, i); Changed = true; goto NodeDone; - case WidenVector: + case TargetLowering::TypeWidenVector: WidenVectorResult(N, i); Changed = true; goto NodeDone; @@ -277,36 +277,36 @@ ScanOperands: switch (getTypeAction(OpVT)) { default: assert(false && "Unknown action!"); - case Legal: + case TargetLowering::TypeLegal: continue; // The following calls must either replace all of the node's results // using ReplaceValueWith, and return "false"; or update the node's // operands in place, and return "true". - case PromoteInteger: + case TargetLowering::TypePromoteInteger: NeedsReanalyzing = PromoteIntegerOperand(N, i); Changed = true; break; - case ExpandInteger: + case TargetLowering::TypeExpandInteger: NeedsReanalyzing = ExpandIntegerOperand(N, i); Changed = true; break; - case SoftenFloat: + case TargetLowering::TypeSoftenFloat: NeedsReanalyzing = SoftenFloatOperand(N, i); Changed = true; break; - case ExpandFloat: + case TargetLowering::TypeExpandFloat: NeedsReanalyzing = ExpandFloatOperand(N, i); Changed = true; break; - case ScalarizeVector: + case TargetLowering::TypeScalarizeVector: NeedsReanalyzing = ScalarizeVectorOperand(N, i); Changed = true; break; - case SplitVector: + case TargetLowering::TypeSplitVector: NeedsReanalyzing = SplitVectorOperand(N, i); Changed = true; break; - case WidenVector: + case TargetLowering::TypeWidenVector: NeedsReanalyzing = WidenVectorOperand(N, i); Changed = true; break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 5409b88..06dc40f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -57,16 +57,6 @@ public: // 1+ - This is a node which has this many unprocessed operands. }; private: - enum LegalizeAction { - Legal, // The target natively supports this type. - PromoteInteger, // Replace this integer type with a larger one. - ExpandInteger, // Split this integer type into two of half the size. - SoftenFloat, // Convert this float type to a same size integer type. - ExpandFloat, // Split this float type into two of half the size. - ScalarizeVector, // Replace this one-element vector with its element type. - SplitVector, // Split this vector type into two of half the size. - WidenVector // This vector type should be widened into a larger vector. - }; /// ValueTypeActions - This is a bitvector that contains two bits for each /// simple value type, where the two bits correspond to the LegalizeAction @@ -74,41 +64,13 @@ private: TargetLowering::ValueTypeActionImpl ValueTypeActions; /// getTypeAction - Return how we should legalize values of this type. - LegalizeAction getTypeAction(EVT VT) const { - switch (ValueTypeActions.getTypeAction(VT)) { - default: - assert(false && "Unknown legalize action!"); - case TargetLowering::Legal: - return Legal; - case TargetLowering::Promote: - // Promote can mean - // 1) For integers, use a larger integer type (e.g. i8 -> i32). - // 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32). - if (!VT.isVector()) - return PromoteInteger; - return WidenVector; - case TargetLowering::Expand: - // Expand can mean - // 1) split scalar in half, 2) convert a float to an integer, - // 3) scalarize a single-element vector, 4) split a vector in two. - if (!VT.isVector()) { - if (VT.isInteger()) - return ExpandInteger; - if (VT.getSizeInBits() == - TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits()) - return SoftenFloat; - return ExpandFloat; - } - - if (VT.getVectorNumElements() == 1) - return ScalarizeVector; - return SplitVector; - } + TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const { + return TLI.getTypeAction(*DAG.getContext(), VT); } /// isTypeLegal - Return true if this type is legal on this target. bool isTypeLegal(EVT VT) const { - return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal; + return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; } /// IgnoreNodeResults - Pretend all of this node's results are legal. @@ -239,7 +201,7 @@ private: EVT OldVT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); Op = GetPromotedInteger(Op); - return DAG.getZeroExtendInReg(Op, dl, OldVT); + return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType()); } // Integer Result Promotion. @@ -248,6 +210,11 @@ private: SDValue PromoteIntRes_AssertZext(SDNode *N); SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); SDValue PromoteIntRes_Atomic2(AtomicSDNode *N); + SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); + SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); + SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_BITCAST(SDNode *N); SDValue PromoteIntRes_BSWAP(SDNode *N); SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); @@ -289,6 +256,9 @@ private: SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N); SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N); SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N); + SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntOp_MEMBARRIER(SDNode *N); SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index a75ae87..85ea6b6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -43,36 +43,36 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { switch (getTypeAction(InVT)) { default: assert(false && "Unknown type action!"); - case Legal: - case PromoteInteger: + case TargetLowering::TypeLegal: + case TargetLowering::TypePromoteInteger: break; - case SoftenFloat: + case TargetLowering::TypeSoftenFloat: // Convert the integer operand instead. SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; - case ExpandInteger: - case ExpandFloat: + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: // Convert the expanded pieces of the input. GetExpandedOp(InOp, Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; - case SplitVector: + case TargetLowering::TypeSplitVector: GetSplitVector(InOp, Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; - case ScalarizeVector: + case TargetLowering::TypeScalarizeVector: // Convert the element instead. SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; - case WidenVector: { + case TargetLowering::TypeWidenVector: { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 0b4dd35..b5698f9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -526,13 +526,13 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, switch (getTypeAction(InVT)) { default: assert(false && "Unknown type action!"); - case Legal: - case PromoteInteger: - case SoftenFloat: - case ScalarizeVector: + case TargetLowering::TypeLegal: + case TargetLowering::TypePromoteInteger: + case TargetLowering::TypeSoftenFloat: + case TargetLowering::TypeScalarizeVector: break; - case ExpandInteger: - case ExpandFloat: + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: // A scalar to vector conversion, where the scalar needs expansion. // If the vector is being split in two then we can just convert the // expanded pieces. @@ -545,7 +545,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, return; } break; - case SplitVector: + case TargetLowering::TypeSplitVector: // If the input is a vector that needs to be split, convert each split // piece of the input now. GetSplitVector(InOp, Lo, Hi); @@ -774,7 +774,7 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, EVT InVT = N->getOperand(0).getValueType(); switch (getTypeAction(InVT)) { default: llvm_unreachable("Unexpected type action!"); - case Legal: { + case TargetLowering::TypeLegal: { EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), @@ -783,10 +783,21 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, DAG.getIntPtrConstant(InNVT.getVectorNumElements())); break; } - case SplitVector: + case TargetLowering::TypePromoteInteger: { + SDValue InOp = GetPromotedInteger(N->getOperand(0)); + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), + InOp.getValueType().getVectorElementType(), + LoVT.getVectorNumElements()); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + case TargetLowering::TypeSplitVector: GetSplitVector(N->getOperand(0), Lo, Hi); break; - case WidenVector: { + case TargetLowering::TypeWidenVector: { // If the result needs to be split and the input needs to be widened, // the two types must have different lengths. Use the widened result // and extract from it to do the split. @@ -1439,7 +1450,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { unsigned Opcode = N->getOpcode(); unsigned InVTNumElts = InVT.getVectorNumElements(); - if (getTypeAction(InVT) == WidenVector) { + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); InVTNumElts = InVT.getVectorNumElements(); @@ -1515,7 +1526,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { SDValue ShOp = N->getOperand(1); EVT ShVT = ShOp.getValueType(); - if (getTypeAction(ShVT) == WidenVector) { + if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) { ShOp = GetWidenedVector(ShOp); ShVT = ShOp.getValueType(); } @@ -1557,9 +1568,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { default: assert(false && "Unknown type action!"); break; - case Legal: + case TargetLowering::TypeLegal: break; - case PromoteInteger: + case TargetLowering::TypePromoteInteger: // If the InOp is promoted to the same size, convert it. Otherwise, // fall out of the switch and widen the promoted input. InOp = GetPromotedInteger(InOp); @@ -1567,13 +1578,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { if (WidenVT.bitsEq(InVT)) return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); break; - case SoftenFloat: - case ExpandInteger: - case ExpandFloat: - case ScalarizeVector: - case SplitVector: + case TargetLowering::TypeSoftenFloat: + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: + case TargetLowering::TypeScalarizeVector: + case TargetLowering::TypeSplitVector: break; - case WidenVector: + case TargetLowering::TypeWidenVector: // If the InOp is widened to the same size, convert it. Otherwise, fall // out of the switch and widen the widened input. InOp = GetWidenedVector(InOp); @@ -1653,7 +1664,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { unsigned NumOperands = N->getNumOperands(); bool InputWidened = false; // Indicates we need to widen the input. - if (getTypeAction(InVT) != WidenVector) { + if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) { if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) { // Add undef vectors to widen to correct length. unsigned NumConcat = WidenVT.getVectorNumElements() / @@ -1732,7 +1743,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); unsigned InVTNumElts = InVT.getVectorNumElements(); - if (getTypeAction(InVT) == WidenVector) { + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(InOp); InVT = InOp.getValueType(); InVTNumElts = InVT.getVectorNumElements(); @@ -1800,7 +1811,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue Idx = N->getOperand(1); DebugLoc dl = N->getDebugLoc(); - if (getTypeAction(InOp.getValueType()) == WidenVector) + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) InOp = GetWidenedVector(InOp); EVT InVT = InOp.getValueType(); @@ -1882,7 +1893,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { EVT CondEltVT = CondVT.getVectorElementType(); EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenNumElts); - if (getTypeAction(CondVT) == WidenVector) + if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector) Cond1 = GetWidenedVector(Cond1); if (Cond1.getValueType() != CondWidenVT) @@ -2026,7 +2037,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { DebugLoc dl = N->getDebugLoc(); unsigned NumElts = VT.getVectorNumElements(); SDValue InOp = N->getOperand(0); - if (getTypeAction(InOp.getValueType()) == WidenVector) + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) InOp = GetWidenedVector(InOp); EVT InVT = InOp.getValueType(); EVT InEltVT = InVT.getVectorElementType(); @@ -2081,7 +2092,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { unsigned NumOperands = N->getNumOperands(); for (unsigned i=0; i < NumOperands; ++i) { SDValue InOp = N->getOperand(i); - if (getTypeAction(InOp.getValueType()) == WidenVector) + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, @@ -2153,6 +2164,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, if (MemVT.getSizeInBits() <= WidenEltWidth) break; if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { RetVT = MemVT; @@ -2168,6 +2180,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned MemVTWidth = MemVT.getSizeInBits(); if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index b2e9c15..f09b381 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -71,6 +71,7 @@ static cl::opt<bool> DisableSchedCycles( cl::desc("Disable cycle-level precision during preRA scheduling")); // Temporary sched=list-ilp flags until the heuristics are robust. +// Some options are also available under sched=list-hybrid. static cl::opt<bool> DisableSchedRegPressure( "disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp")); @@ -80,6 +81,9 @@ static cl::opt<bool> DisableSchedLiveUses( static cl::opt<bool> DisableSchedVRegCycle( "disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks")); +static cl::opt<bool> DisableSchedPhysRegJoin( + "disable-sched-physreg-join", cl::Hidden, cl::init(false), + cl::desc("Disable physreg def-use affinity")); static cl::opt<bool> DisableSchedStalls( "disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp")); @@ -102,11 +106,11 @@ static cl::opt<unsigned> AvgIPC( #ifndef NDEBUG namespace { // For sched=list-ilp, Count the number of times each factor comes into play. - enum { FactPressureDiff, FactRegUses, FactHeight, FactDepth, FactStatic, - FactOther, NumFactors }; + enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth, + FactStatic, FactOther, NumFactors }; } static const char *FactorName[NumFactors] = -{"PressureDiff", "RegUses", "Height", "Depth","Static", "Other"}; +{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"}; static int FactorCount[NumFactors]; #endif //!NDEBUG @@ -272,6 +276,33 @@ private: }; } // end anonymous namespace +/// GetCostForDef - Looks up the register class and cost for a given definition. +/// Typically this just means looking up the representative register class, +/// but for untyped values (MVT::untyped) it means inspecting the node's +/// opcode to determine what register class is being generated. +static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, + const TargetLowering *TLI, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + unsigned &RegClass, unsigned &Cost) { + EVT VT = RegDefPos.GetValue(); + + // Special handling for untyped values. These values can only come from + // the expansion of custom DAG-to-DAG patterns. + if (VT == MVT::untyped) { + unsigned Opcode = RegDefPos.GetNode()->getMachineOpcode(); + unsigned Idx = RegDefPos.GetIdx(); + const TargetInstrDesc Desc = TII->get(Opcode); + const TargetRegisterClass *RC = Desc.getRegClass(Idx, TRI); + RegClass = RC->getID(); + // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a + // better way to determine it. + Cost = 1; + } else { + RegClass = TLI->getRepRegClassFor(VT)->getID(); + Cost = TLI->getRepRegClassCostFor(VT); + } +} /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { @@ -463,6 +494,13 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { if (DisableSchedCycles) return; + // FIXME: Nodes such as CopyFromReg probably should not advance the current + // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node + // has predecessors the cycle will be advanced when they are scheduled. + // But given the crude nature of modeling latency though such nodes, we + // currently need to treat these nodes like real instructions. + // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; + unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); // Bump CurCycle to account for latency. We assume the latency of other @@ -533,6 +571,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) { } } +static void resetVRegCycle(SUnit *SU); + /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. @@ -542,7 +582,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { #ifndef NDEBUG if (CurCycle < SU->getHeight()) - DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); + DEBUG(dbgs() << " Height [" << SU->getHeight() + << "] pipeline stall!\n"); #endif // FIXME: Do not modify node height. It may interfere with @@ -559,7 +600,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { AvailableQueue->ScheduledNode(SU); // If HazardRec is disabled, and each inst counts as one cycle, then - // advance CurCycle before ReleasePredecessors to avoid useles pushed to + // advance CurCycle before ReleasePredecessors to avoid useless pushes to // PendingQueue for schedulers that implement HasReadyFilter. if (!HazardRec->isEnabled() && AvgIPC < 2) AdvanceToCycle(CurCycle + 1); @@ -580,20 +621,25 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { } } + resetVRegCycle(SU); + SU->isScheduled = true; // Conditions under which the scheduler should eagerly advance the cycle: // (1) No available instructions // (2) All pipelines full, so available instructions must have hazards. // - // If HazardRec is disabled, the cycle was advanced earlier. + // If HazardRec is disabled, the cycle was pre-advanced before calling + // ReleasePredecessors. In that case, IssueCount should remain 0. // // Check AvailableQueue after ReleasePredecessors in case of zero latency. - ++IssueCount; - if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) - || (!HazardRec->isEnabled() && AvgIPC > 1 && IssueCount == AvgIPC) - || AvailableQueue->empty()) - AdvanceToCycle(CurCycle + 1); + if (HazardRec->isEnabled() || AvgIPC > 1) { + if (SU->getNode() && SU->getNode()->isMachineOpcode()) + ++IssueCount; + if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) + || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) + AdvanceToCycle(CurCycle + 1); + } } /// CapturePred - This does the opposite of ReleasePred. Since SU is being @@ -989,14 +1035,15 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { // Check if Ref is live. - if (!LiveRegDefs[Reg]) continue; + if (!LiveRegDefs[*AliasI]) continue; // Allow multiple uses of the same def. - if (LiveRegDefs[Reg] == SU) continue; + if (LiveRegDefs[*AliasI] == SU) continue; // Add Reg to the set of interfering live regs. - if (RegAdded.insert(Reg)) - LRegs.push_back(Reg); + if (RegAdded.insert(*AliasI)) { + LRegs.push_back(*AliasI); + } } } @@ -1220,7 +1267,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty()) { - DEBUG(dbgs() << "\n*** Examining Available\n"; + DEBUG(dbgs() << "\nExamining Available:\n"; AvailableQueue->dump(this)); // Pick the best node to schedule taking all constraints into @@ -1349,6 +1396,21 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } }; +#ifndef NDEBUG +template<class SF> +struct reverse_sort : public queue_sort { + SF &SortFunc; + reverse_sort(SF &sf) : SortFunc(sf) {} + reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} + + bool operator()(SUnit* left, SUnit* right) const { + // reverse left/right rather than simply !SortFunc(left, right) + // to expose different paths in the comparison logic. + return SortFunc(right, left); + } +}; +#endif // NDEBUG + /// bu_ls_rr_sort - Priority function for bottom up register pressure // reduction scheduler. struct bu_ls_rr_sort : public queue_sort { @@ -1549,20 +1611,33 @@ protected: }; template<class SF> -class RegReductionPriorityQueue : public RegReductionPQBase { - static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) { - std::vector<SUnit *>::iterator Best = Q.begin(); - for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), - E = Q.end(); I != E; ++I) - if (Picker(*Best, *I)) - Best = I; - SUnit *V = *Best; - if (Best != prior(Q.end())) - std::swap(*Best, Q.back()); - Q.pop_back(); - return V; +static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { + std::vector<SUnit *>::iterator Best = Q.begin(); + for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), + E = Q.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Q.end())) + std::swap(*Best, Q.back()); + Q.pop_back(); + return V; +} + +template<class SF> +SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { +#ifndef NDEBUG + if (DAG->StressSched) { + reverse_sort<SF> RPicker(Picker); + return popFromQueueImpl(Q, RPicker); } +#endif + (void)DAG; + return popFromQueueImpl(Q, Picker); +} +template<class SF> +class RegReductionPriorityQueue : public RegReductionPQBase { SF Picker; public: @@ -1583,7 +1658,7 @@ public: SUnit *pop() { if (Queue.empty()) return NULL; - SUnit *V = popFromQueue(Queue, Picker); + SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); V->NodeQueueId = 0; return V; } @@ -1593,7 +1668,7 @@ public: std::vector<SUnit*> DumpQueue = Queue; SF DumpPicker = Picker; while (!DumpQueue.empty()) { - SUnit *SU = popFromQueue(DumpQueue, DumpPicker); + SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); if (isBottomUp()) dbgs() << "Height " << SU->getHeight() << ": "; else @@ -1623,6 +1698,20 @@ ILPBURRPriorityQueue; // Static Node Priority for Register Pressure Reduction //===----------------------------------------------------------------------===// +// Check for special nodes that bypass scheduling heuristics. +// Currently this pushes TokenFactor nodes down, but may be used for other +// pseudo-ops as well. +// +// Return -1 to schedule right above left, 1 for left above right. +// Return 0 if no bias exists. +static int checkSpecialNodes(const SUnit *left, const SUnit *right) { + bool LSchedLow = left->isScheduleLow; + bool RSchedLow = right->isScheduleLow; + if (LSchedLow != RSchedLow) + return LSchedLow < RSchedLow ? 1 : -1; + return 0; +} + /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. /// Smaller number is the higher priority. static unsigned @@ -1661,17 +1750,6 @@ void RegReductionPQBase::CalculateSethiUllmanNumbers() { CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); } -void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { - SUnits = &sunits; - // Add pseudo dependency edges for two-address nodes. - AddPseudoTwoAddrDeps(); - // Reroute edges to nodes with multiple uses. - if (!TracksRegPressure) - PrescheduleNodesWithMultipleUses(); - // Calculate node priorities. - CalculateSethiUllmanNumbers(); -} - void RegReductionPQBase::addNode(const SUnit *SU) { unsigned SUSize = SethiUllmanNumbers.size(); if (SUnits->size() > SUSize) @@ -1710,7 +1788,17 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { // If SU does not have a register def, schedule it close to its uses // because it does not lengthen any live ranges. return 0; +#if 1 return SethiUllmanNumbers[SU->NodeNum]; +#else + unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; + if (SU->isCallOp) { + // FIXME: This assumes all of the defs are used as call operands. + int NP = (int)Priority - SU->getNode()->getNumValues(); + return (NP > 0) ? NP : 0; + } + return Priority; +#endif } //===----------------------------------------------------------------------===// @@ -1746,8 +1834,10 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); RegDefPos.IsValid(); RegDefPos.Advance()) { EVT VT = RegDefPos.GetValue(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned Cost = TLI->getRepRegClassCostFor(VT); + + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) return true; } @@ -1858,9 +1948,10 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { if (SkipRegDefs) continue; - EVT VT = RegDefPos.GetValue(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + RegPressure[RCId] += Cost; break; } } @@ -1873,16 +1964,16 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { if (SkipRegDefs > 0) continue; - EVT VT = RegDefPos.GetValue(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) { + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + if (RegPressure[RCId] < Cost) { // Register pressure tracking is imprecise. This can happen. But we try // hard not to let it happen because it likely results in poor scheduling. DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); RegPressure[RCId] = 0; } else { - RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + RegPressure[RCId] -= Cost; } } dumpRegPressure(); @@ -2008,7 +2099,29 @@ static unsigned calcMaxScratches(const SUnit *SU) { return Scratches; } -/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a +/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are +/// CopyFromReg from a virtual register. +static bool hasOnlyLiveInOpers(const SUnit *SU) { + bool RetVal = false; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + const SUnit *PredSU = I->getSUnit(); + if (PredSU->getNode() && + PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { + unsigned Reg = + cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + RetVal = true; + continue; + } + } + return false; + } + return RetVal; +} + +/// hasOnlyLiveOutUses - Return true if SU has only value successors that are /// CopyToReg to a virtual register. This SU def is probably a liveout and /// it has no other use. It should be scheduled closer to the terminator. static bool hasOnlyLiveOutUses(const SUnit *SU) { @@ -2030,62 +2143,71 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) { return RetVal; } -/// UnitsSharePred - Return true if the two scheduling units share a common -/// data predecessor. -static bool UnitsSharePred(const SUnit *left, const SUnit *right) { - SmallSet<const SUnit*, 4> Preds; - for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end(); +// Set isVRegCycle for a node with only live in opers and live out uses. Also +// set isVRegCycle for its CopyFromReg operands. +// +// This is only relevant for single-block loops, in which case the VRegCycle +// node is likely an induction variable in which the operand and target virtual +// registers should be coalesced (e.g. pre/post increment values). Setting the +// isVRegCycle flag helps the scheduler prioritize other uses of the same +// CopyFromReg so that this node becomes the virtual register "kill". This +// avoids interference between the values live in and out of the block and +// eliminates a copy inside the loop. +static void initVRegCycle(SUnit *SU) { + if (DisableSchedVRegCycle) + return; + + if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) + return; + + DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); + + SU->isVRegCycle = true; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds - Preds.insert(I->getSUnit()); + if (I->isCtrl()) continue; + I->getSUnit()->isVRegCycle = true; } - for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end(); +} + +// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of +// CopyFromReg operands. We should no longer penalize other uses of this VReg. +static void resetVRegCycle(SUnit *SU) { + if (!SU->isVRegCycle) + return; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) continue; // ignore chain preds - if (Preds.count(I->getSUnit())) - return true; + SUnit *PredSU = I->getSUnit(); + if (PredSU->isVRegCycle) { + assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && + "VRegCycle def must be CopyFromReg"); + I->getSUnit()->isVRegCycle = 0; + } } - return false; } -// Return true if the virtual register defined by VRCycleSU may interfere with -// VRUseSU. -// -// Note: We may consider two SU's that use the same value live into a loop as -// interferng even though the value is not an induction variable. This is an -// unfortunate consequence of scheduling on the selection DAG. -static bool checkVRegCycleInterference(const SUnit *VRCycleSU, - const SUnit *VRUseSU) { - for (SUnit::const_pred_iterator I = VRCycleSU->Preds.begin(), - E = VRCycleSU->Preds.end(); I != E; ++I) { +// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This +// means a node that defines the VRegCycle has not been scheduled yet. +static bool hasVRegCycleUse(const SUnit *SU) { + // If this SU also defines the VReg, don't hoist it as a "use". + if (SU->isVRegCycle) + return false; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { if (I->isCtrl()) continue; // ignore chain preds - SDNode *InNode = I->getSUnit()->getNode(); - if (!InNode || InNode->getOpcode() != ISD::CopyFromReg) - continue; - for (SUnit::const_pred_iterator II = VRUseSU->Preds.begin(), - EE = VRUseSU->Preds.end(); II != EE; ++II) { - if (II->getSUnit() == I->getSUnit()) - return true; + if (I->getSUnit()->isVRegCycle && + I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { + DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); + return true; } } return false; } -// Compare the VRegCycle properties of the nodes. -// Return -1 if left has higher priority, 1 if right has higher priority. -// Return 0 if priority is equivalent. -static int BUCompareVRegCycle(const SUnit *left, const SUnit *right) { - if (left->isVRegCycle && !right->isVRegCycle) { - if (checkVRegCycleInterference(left, right)) - return -1; - } - else if (!left->isVRegCycle && right->isVRegCycle) { - if (checkVRegCycleInterference(right, left)) - return 1; - } - return 0; -} - // Check for either a dependence (latency) or resource (hazard) stall. // // Note: The ScheduleHazardRecognizer interface requires a non-const SU. @@ -2101,23 +2223,12 @@ static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { // Return 0 if latency-based priority is equivalent. static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ) { - // If the two nodes share an operand and one of them has a single - // use that is a live out copy, favor the one that is live out. Otherwise - // it will be difficult to eliminate the copy if the instruction is a - // loop induction variable update. e.g. - // BB: - // sub r1, r3, #1 - // str r0, [r2, r3] - // mov r3, r1 - // cmp - // bne BB - bool SharePred = UnitsSharePred(left, right); - // FIXME: Only adjust if BB is a loop back edge. - // FIXME: What's the cost of a copy? - int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0; - int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0; - int LHeight = (int)left->getHeight() - LBonus; - int RHeight = (int)right->getHeight() - RBonus; + // Scheduling an instruction that uses a VReg whose postincrement has not yet + // been scheduled will induce a copy. Model this as an extra cycle of latency. + int LPenalty = hasVRegCycleUse(left) ? 1 : 0; + int RPenalty = hasVRegCycleUse(right) ? 1 : 0; + int LHeight = (int)left->getHeight() + LPenalty; + int RHeight = (int)right->getHeight() + RPenalty; bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) && BUHasStall(left, LHeight, SPQ); @@ -2128,48 +2239,102 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, // If scheduling either one of the node will cause a pipeline stall, sort // them according to their height. if (LStall) { - if (!RStall) + if (!RStall) { + DEBUG(++FactorCount[FactStall]); return 1; - if (LHeight != RHeight) + } + if (LHeight != RHeight) { + DEBUG(++FactorCount[FactStall]); return LHeight > RHeight ? 1 : -1; - } else if (RStall) + } + } else if (RStall) { + DEBUG(++FactorCount[FactStall]); return -1; + } // If either node is scheduling for latency, sort them by height/depth // and latency. if (!checkPref || (left->SchedulingPref == Sched::Latency || right->SchedulingPref == Sched::Latency)) { if (DisableSchedCycles) { - if (LHeight != RHeight) + if (LHeight != RHeight) { + DEBUG(++FactorCount[FactHeight]); return LHeight > RHeight ? 1 : -1; + } } else { // If neither instruction stalls (!LStall && !RStall) then // its height is already covered so only its depth matters. We also reach // this if both stall but have the same height. - unsigned LDepth = left->getDepth(); - unsigned RDepth = right->getDepth(); + int LDepth = left->getDepth() - LPenalty; + int RDepth = right->getDepth() - RPenalty; if (LDepth != RDepth) { + DEBUG(++FactorCount[FactDepth]); DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum << ") depth " << LDepth << " vs SU (" << right->NodeNum << ") depth " << RDepth << "\n"); return LDepth < RDepth ? 1 : -1; } } - if (left->Latency != right->Latency) + if (left->Latency != right->Latency) { + DEBUG(++FactorCount[FactOther]); return left->Latency > right->Latency ? 1 : -1; + } } return 0; } static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { + // Schedule physical register definitions close to their use. This is + // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as + // long as shortening physreg live ranges is generally good, we can defer + // creating a subtarget hook. + if (!DisableSchedPhysRegJoin) { + bool LHasPhysReg = left->hasPhysRegDefs; + bool RHasPhysReg = right->hasPhysRegDefs; + if (LHasPhysReg != RHasPhysReg) { + DEBUG(++FactorCount[FactRegUses]); + #ifndef NDEBUG + const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; + #endif + DEBUG(dbgs() << " SU (" << left->NodeNum << ") " + << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " + << PhysRegMsg[RHasPhysReg] << "\n"); + return LHasPhysReg < RHasPhysReg; + } + } + + // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. unsigned LPriority = SPQ->getNodePriority(left); unsigned RPriority = SPQ->getNodePriority(right); + + // Be really careful about hoisting call operands above previous calls. + // Only allows it if it would reduce register pressure. + if (left->isCall && right->isCallOp) { + unsigned RNumVals = right->getNode()->getNumValues(); + RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; + } + if (right->isCall && left->isCallOp) { + unsigned LNumVals = left->getNode()->getNumValues(); + LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; + } + if (LPriority != RPriority) { DEBUG(++FactorCount[FactStatic]); return LPriority > RPriority; } - DEBUG(++FactorCount[FactOther]); + + // One or both of the nodes are calls and their sethi-ullman numbers are the + // same, then keep source order. + if (left->isCall || right->isCall) { + unsigned LOrder = SPQ->getNodeOrdering(left); + unsigned ROrder = SPQ->getNodeOrdering(right); + + // Prefer an ordering where the lower the non-zero order number, the higher + // the preference. + if ((LOrder || ROrder) && LOrder != ROrder) + return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + } // Try schedule def + use closer when Sethi-Ullman numbers are the same. // e.g. @@ -2190,40 +2355,62 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { // This creates more short live intervals. unsigned LDist = closestSucc(left); unsigned RDist = closestSucc(right); - if (LDist != RDist) + if (LDist != RDist) { + DEBUG(++FactorCount[FactOther]); return LDist < RDist; + } // How many registers becomes live when the node is scheduled. unsigned LScratch = calcMaxScratches(left); unsigned RScratch = calcMaxScratches(right); - if (LScratch != RScratch) + if (LScratch != RScratch) { + DEBUG(++FactorCount[FactOther]); return LScratch > RScratch; + } - if (!DisableSchedCycles) { + // Comparing latency against a call makes little sense unless the node + // is register pressure-neutral. + if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) + return (left->NodeQueueId > right->NodeQueueId); + + // Do not compare latencies when one or both of the nodes are calls. + if (!DisableSchedCycles && + !(left->isCall || right->isCall)) { int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); if (result != 0) return result > 0; } else { - if (left->getHeight() != right->getHeight()) + if (left->getHeight() != right->getHeight()) { + DEBUG(++FactorCount[FactHeight]); return left->getHeight() > right->getHeight(); + } - if (left->getDepth() != right->getDepth()) + if (left->getDepth() != right->getDepth()) { + DEBUG(++FactorCount[FactDepth]); return left->getDepth() < right->getDepth(); + } } assert(left->NodeQueueId && right->NodeQueueId && "NodeQueueId cannot be zero"); + DEBUG(++FactorCount[FactOther]); return (left->NodeQueueId > right->NodeQueueId); } // Bottom up bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + return BURRSort(left, right, SPQ); } // Source order, otherwise bottom up. bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + unsigned LOrder = SPQ->getNodeOrdering(left); unsigned ROrder = SPQ->getNodeOrdering(right); @@ -2255,6 +2442,9 @@ bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { // Return true if right should be scheduled with higher priority than left. bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + if (left->isCall || right->isCall) // No way to compute latency of calls. return BURRSort(left, right, SPQ); @@ -2264,24 +2454,22 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { // Avoid causing spills. If register pressure is high, schedule for // register pressure reduction. if (LHigh && !RHigh) { + DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" << right->NodeNum << ")\n"); return true; } else if (!LHigh && RHigh) { + DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" << left->NodeNum << ")\n"); return false; } - int result = 0; - if (!DisableSchedVRegCycle) { - result = BUCompareVRegCycle(left, right); - } - if (result == 0 && !LHigh && !RHigh) { - result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); + if (!LHigh && !RHigh) { + int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); + if (result != 0) + return result > 0; } - if (result != 0) - return result > 0; return BURRSort(left, right, SPQ); } @@ -2322,6 +2510,9 @@ static bool canEnableCoalescing(SUnit *SU) { // list-ilp is currently an experimental scheduler that allows various // heuristics to be enabled prior to the normal register reduction logic. bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + if (left->isCall || right->isCall) // No way to compute latency of calls. return BURRSort(left, right, SPQ); @@ -2347,12 +2538,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { if (RReduce && !LReduce) return true; } - if (!DisableSchedVRegCycle) { - int result = BUCompareVRegCycle(left, right); - if (result != 0) - return result > 0; - } - if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); @@ -2391,6 +2576,24 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { return BURRSort(left, right, SPQ); } +void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + AddPseudoTwoAddrDeps(); + // Reroute edges to nodes with multiple uses. + if (!TracksRegPressure) + PrescheduleNodesWithMultipleUses(); + // Calculate node priorities. + CalculateSethiUllmanNumbers(); + + // For single block loops, mark nodes that look like canonical IV increments. + if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { + for (unsigned i = 0, e = sunits.size(); i != e; ++i) { + initVRegCycle(&sunits[i]); + } + } +} + //===----------------------------------------------------------------------===// // Preschedule for Register Pressure //===----------------------------------------------------------------------===// @@ -2668,6 +2871,9 @@ static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, // Top down bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res < 0; + unsigned LPriority = SPQ->getNodePriority(left); unsigned RPriority = SPQ->getNodePriority(right); bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 24a1937..0d656ef 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -83,10 +83,13 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { SU->Latency = Old->Latency; SU->isVRegCycle = Old->isVRegCycle; SU->isCall = Old->isCall; + SU->isCallOp = Old->isCallOp; SU->isTwoAddress = Old->isTwoAddress; SU->isCommutable = Old->isCommutable; SU->hasPhysRegDefs = Old->hasPhysRegDefs; SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; + SU->isScheduleHigh = Old->isScheduleHigh; + SU->isScheduleLow = Old->isScheduleLow; SU->SchedulingPref = Old->SchedulingPref; Old->isCloned = true; return SU; @@ -283,6 +286,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { Worklist.push_back(DAG->getRoot().getNode()); Visited.insert(DAG->getRoot().getNode()); + SmallVector<SUnit*, 8> CallSUnits; while (!Worklist.empty()) { SDNode *NI = Worklist.pop_back_val(); @@ -335,6 +339,15 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { if (!HasGlueUse) break; } + if (NodeSUnit->isCall) + CallSUnits.push_back(NodeSUnit); + + // Schedule zero-latency TokenFactor below any nodes that may increase the + // schedule height. Otherwise, ancestors of the TokenFactor may appear to + // have false stalls. + if (NI->getOpcode() == ISD::TokenFactor) + NodeSUnit->isScheduleLow = true; + // If there are glue operands involved, N is now the bottom-most node // of the sequence of nodes that are glued together. // Update the SUnit. @@ -342,16 +355,26 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); - // Set isVRegCycle if the node operands are live into and value is live out - // of a single block loop. - InitVRegCycleFlag(NodeSUnit); - // Compute NumRegDefsLeft. This must be done before AddSchedEdges. InitNumRegDefsLeft(NodeSUnit); // Assign the Latency field of NodeSUnit using target-provided information. ComputeLatency(NodeSUnit); } + + // Find all call operands. + while (!CallSUnits.empty()) { + SUnit *SU = CallSUnits.pop_back_val(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->getOpcode() != ISD::CopyToReg) + continue; + SDNode *SrcN = SUNode->getOperand(2).getNode(); + if (isPassiveNode(SrcN)) continue; // Not scheduled. + SUnit *SrcSU = &SUnits[SrcN->getNodeId()]; + SrcSU->isCallOp = true; + } + } } void ScheduleDAGSDNodes::AddSchedEdges() { @@ -412,11 +435,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() { // it requires a cross class copy (cost < 0). That means we are only // treating "expensive to copy" register dependency as physical register // dependency. This may change in the future though. - if (Cost >= 0) + if (Cost >= 0 && !StressSched) PhysReg = 0; // If this is a ctrl dep, latency is 1. unsigned OpLatency = isChain ? 1 : OpSU->Latency; + // Special-case TokenFactor chains as zero-latency. + if(isChain && OpN->getOpcode() == ISD::TokenFactor) + OpLatency = 0; + const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpLatency, PhysReg); if (!isChain && !UnitLatencies) { @@ -512,47 +539,6 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() { } } -// Set isVRegCycle if this node's single use is CopyToReg and its only active -// data operands are CopyFromReg. -// -// This is only relevant for single-block loops, in which case the VRegCycle -// node is likely an induction variable in which the operand and target virtual -// registers should be coalesced (e.g. pre/post increment values). Setting the -// isVRegCycle flag helps the scheduler prioritize other uses of the same -// CopyFromReg so that this node becomes the virtual register "kill". This -// avoids interference between the values live in and out of the block and -// eliminates a copy inside the loop. -void ScheduleDAGSDNodes::InitVRegCycleFlag(SUnit *SU) { - if (!BB->isSuccessor(BB)) - return; - - SDNode *N = SU->getNode(); - if (N->getGluedNode()) - return; - - if (!N->hasOneUse() || N->use_begin()->getOpcode() != ISD::CopyToReg) - return; - - bool FoundLiveIn = false; - for (SDNode::op_iterator OI = N->op_begin(), E = N->op_end(); OI != E; ++OI) { - EVT OpVT = OI->getValueType(); - assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!"); - - if (OpVT == MVT::Other) - continue; // ignore chain operands - - if (isPassiveNode(OI->getNode())) - continue; // ignore constants and such - - if (OI->getNode()->getOpcode() != ISD::CopyFromReg) - return; - - FoundLiveIn = true; - } - if (FoundLiveIn) - SU->isVRegCycle = true; -} - void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { assert(SU->NumRegDefsLeft == 0 && "expect a new node"); for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) { @@ -562,6 +548,16 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { } void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { + SDNode *N = SU->getNode(); + + // TokenFactor operands are considered zero latency, and some schedulers + // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero + // whenever node latency is nonzero. + if (N && N->getOpcode() == ISD::TokenFactor) { + SU->Latency = 0; + return; + } + // Check to see if the scheduler cares about latencies. if (ForceUnitLatencies()) { SU->Latency = 1; @@ -569,7 +565,6 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { } if (!InstrItins || InstrItins->isEmpty()) { - SDNode *N = SU->getNode(); if (N && N->isMachineOpcode() && TII->isHighLatencyDef(N->getMachineOpcode())) SU->Latency = HighLatencyCycles; @@ -641,7 +636,7 @@ namespace { }; } -/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node. +/// ProcessSDDbgValues - Process SDDbgValues associated with this node. static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index b5f68f3..3ad2bd6 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -135,6 +135,14 @@ namespace llvm { return ValueType; } + const SDNode *GetNode() const { + return Node; + } + + unsigned GetIdx() const { + return DefIdx; + } + void Advance(); private: void InitNodeNumDefs(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c2711c8..68eeb60 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2050,14 +2050,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, break; default: - // Allow the target to implement this method for its nodes. - if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { + if (Op.getOpcode() < ISD::BUILTIN_OP_END) + break; + // Fallthrough case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: - TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this, - Depth); - } + // Allow the target to implement this method for its nodes. + TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this, + Depth); return; } } @@ -2322,6 +2323,13 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op) const { return !C->isZero(); // TODO: Recognize more cases here. + switch (Op.getOpcode()) { + default: break; + case ISD::OR: + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + return !C->isNullValue(); + break; + } return false; } @@ -2339,16 +2347,6 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { return false; } -bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { - GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); - if (!GA) return false; - if (GA->getOffset() != 0) return false; - const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()); - if (!GV) return false; - return MF->getMMI().hasDebugInfo(); -} - - /// getNode - Gets or creates the specified node. /// SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { @@ -6304,7 +6302,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, - getConstant(i, MVT::i32)); + getConstant(i, TLI.getPointerTy())); } else { // A scalar operand; just use it as is. Operands[j] = Operand; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 24c325e..d79a5ae 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -280,12 +280,35 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, } // Vector/Vector bitcast. - return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + if (ValueVT.getSizeInBits() == PartVT.getSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() && + "Cannot handle this kind of promotion"); + // Promoted vector extract + bool Smaller = ValueVT.bitsLE(PartVT); + return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, ValueVT, Val); + } - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && + // Trivial bitcast if the types are the same size and the destination + // vector type is legal. + if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() && + TLI.isTypeLegal(ValueVT)) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + // Handle cases such as i8 -> <1 x i1> + assert(ValueVT.getVectorNumElements() == 1 && "Only trivial scalar-to-vector conversions should get here!"); + + if (ValueVT.getVectorNumElements() == 1 && + ValueVT.getVectorElementType() != PartVT) { + bool Smaller = ValueVT.bitsLE(PartVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, ValueVT.getScalarType(), Val); + } + return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } @@ -426,7 +449,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (PartVT.isVector() && - PartVT.getVectorElementType() == ValueVT.getVectorElementType()&& + PartVT.getVectorElementType() == ValueVT.getVectorElementType() && PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { EVT ElementVT = PartVT.getVectorElementType(); // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in @@ -446,13 +469,33 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, //SDValue UndefElts = DAG.getUNDEF(VectorTy); //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); - } else { + } else if (PartVT.isVector() && + PartVT.getVectorElementType().bitsGE( + ValueVT.getVectorElementType()) && + PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { + + // Promoted vector extract + unsigned NumElts = ValueVT.getVectorNumElements(); + SmallVector<SDValue, 8> NewOps; + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + ValueVT.getScalarType(), Val ,DAG.getIntPtrConstant(i)); + SDValue Cast = DAG.getNode(ISD::ANY_EXTEND, + DL, PartVT.getScalarType(), Ext); + NewOps.push_back(Cast); + } + Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, + &NewOps[0], NewOps.size()); + } else{ // Vector -> scalar conversion. - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && + assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getIntPtrConstant(0)); + + bool Smaller = ValueVT.bitsLE(PartVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, PartVT, Val); } Parts[0] = Val; @@ -783,11 +826,20 @@ void SelectionDAGBuilder::clear() { UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); - DanglingDebugInfoMap.clear(); CurDebugLoc = DebugLoc(); HasTailCall = false; } +/// clearDanglingDebugInfo - Clear the dangling debug information +/// map. This function is seperated from the clear so that debug +/// information that is dangling in a basic block can be properly +/// resolved in a different basic block. This allows the +/// SelectionDAG to resolve dangling debug information attached +/// to PHI nodes. +void SelectionDAGBuilder::clearDanglingDebugInfo() { + DanglingDebugInfoMap.clear(); +} + /// getRoot - Return the current virtual root of the Selection DAG, /// flushing any PendingLoad items. This must be done before emitting /// a store or any other node that may need to be ordered after any @@ -1175,6 +1227,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { /// created for it, emit nodes to copy the value into the virtual /// registers. void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { + // Skip empty types + if (V->getType()->isEmptyTy()) + return; + DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { assert(!V->use_empty() && "Unused value assigned virtual registers!"); @@ -1223,6 +1279,24 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, return true; } +/// Return branch probability calculated by BranchProbabilityInfo for IR blocks. +uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src, + MachineBasicBlock *Dst) { + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (!BPI) + return 0; + BasicBlock *SrcBB = const_cast<BasicBlock*>(Src->getBasicBlock()); + BasicBlock *DstBB = const_cast<BasicBlock*>(Dst->getBasicBlock()); + return BPI->getEdgeWeight(SrcBB, DstBB); +} + +void SelectionDAGBuilder::addSuccessorWithWeight(MachineBasicBlock *Src, + MachineBasicBlock *Dst) { + uint32_t weight = getEdgeWeight(Src, Dst); + Src->addSuccessor(Dst, weight); +} + + static bool InBlock(const Value *V, const BasicBlock *BB) { if (const Instruction *I = dyn_cast<Instruction>(V)) return I->getParent() == BB; @@ -1492,8 +1566,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } // Update successor info - SwitchBB->addSuccessor(CB.TrueBB); - SwitchBB->addSuccessor(CB.FalseBB); + addSuccessorWithWeight(SwitchBB, CB.TrueBB); + addSuccessorWithWeight(SwitchBB, CB.FalseBB); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1637,8 +1711,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - SwitchBB->addSuccessor(B.Default); - SwitchBB->addSuccessor(MBB); + addSuccessorWithWeight(SwitchBB, B.Default); + addSuccessorWithWeight(SwitchBB, MBB); SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, CopyTo, RangeCmp, @@ -1683,8 +1757,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ISD::SETNE); } - SwitchBB->addSuccessor(B.TargetBB); - SwitchBB->addSuccessor(NextMBB); + addSuccessorWithWeight(SwitchBB, B.TargetBB); + addSuccessorWithWeight(SwitchBB, NextMBB); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -1924,8 +1998,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, // table. MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); CurMF->insert(BBI, JumpTableBB); - CR.CaseBB->addSuccessor(Default); - CR.CaseBB->addSuccessor(JumpTableBB); + + addSuccessorWithWeight(CR.CaseBB, Default); + addSuccessorWithWeight(CR.CaseBB, JumpTableBB); // Build a vector of destination BBs, corresponding to each target // of the jump table. If the value of the jump table slot corresponds to @@ -1952,7 +2027,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, E = DestBBs.end(); I != E; ++I) { if (!SuccsHandled[(*I)->getNumber()]) { SuccsHandled[(*I)->getNumber()] = true; - JumpTableBB->addSuccessor(*I); + addSuccessorWithWeight(JumpTableBB, *I); } } @@ -2019,9 +2094,13 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, APInt Range = ComputeRange(LEnd, RBegin); assert((Range - 2ULL).isNonNegative() && "Invalid case distance"); - double LDensity = (double)LSize.roundToDouble() / + // Use volatile double here to avoid excess precision issues on some hosts, + // e.g. that use 80-bit X87 registers. + volatile double LDensity = + (double)LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble(); - double RDensity = (double)RSize.roundToDouble() / + volatile double RDensity = + (double)RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place @@ -2367,8 +2446,10 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { succs.push_back(I.getSuccessor(i)); array_pod_sort(succs.begin(), succs.end()); succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); - for (unsigned i = 0, e = succs.size(); i != e; ++i) - IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]); + for (unsigned i = 0, e = succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]]; + addSuccessorWithWeight(IndirectBrMBB, Succ); + } DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -2806,16 +2887,18 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { SmallVector<SDValue, 4> Values(NumAggValues); SDValue Agg = getValue(Op0); - SDValue Val = getValue(Op1); unsigned i = 0; // Copy the beginning value(s) from the original aggregate. for (; i != LinearIndex; ++i) Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Agg.getNode(), Agg.getResNo() + i); // Copy values from the inserted value(s). - for (; i != LinearIndex + NumValValues; ++i) - Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : - SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); + if (NumValValues) { + SDValue Val = getValue(Op1); + for (; i != LinearIndex + NumValValues; ++i) + Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); + } // Copy remaining value(s) from the original aggregate. for (; i != NumAggValues; ++i) Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : @@ -2838,6 +2921,13 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { ComputeValueVTs(TLI, ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); + + // Ignore a extractvalue that produces an empty object + if (!NumValValues) { + setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); + return; + } + SmallVector<SDValue, 4> Values(NumValValues); SDValue Agg = getValue(Op0); @@ -4009,6 +4099,24 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } +// getTruncatedArgReg - Find underlying register used for an truncated +// argument. +static unsigned getTruncatedArgReg(const SDValue &N) { + if (N.getOpcode() != ISD::TRUNCATE) + return 0; + + const SDValue &Ext = N.getOperand(0); + if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){ + const SDValue &CFR = Ext.getOperand(0); + if (CFR.getOpcode() == ISD::CopyFromReg) + return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); + else + if (CFR.getOpcode() == ISD::TRUNCATE) + return getTruncatedArgReg(CFR); + } + return 0; +} + /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. @@ -4029,10 +4137,6 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, if (DV.isInlinedFnArgument(MF.getFunction())) return false; - MachineBasicBlock *MBB = FuncInfo.MBB; - if (MBB != &MF.front()) - return false; - unsigned Reg = 0; if (Arg->hasByValAttr()) { // Byval arguments' frame index is recorded during argument lowering. @@ -4044,9 +4148,12 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, Reg = 0; } - if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) { - Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (N.getNode()) { + if (N.getOpcode() == ISD::CopyFromReg) + Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); + else + Reg = getTruncatedArgReg(N); + if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); unsigned PR = RegInfo.getLiveInPhysReg(Reg); if (PR) @@ -4208,9 +4315,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDV = DAG.getDbgValue(Variable, FINode->getIndex(), 0, dl, SDNodeOrder); else { - // Can't do anything with other non-AI cases yet. This might be a - // parameter of a callee function that got inlined, for example. - DEBUG(dbgs() << "Dropping debug info for " << DI); + // Address is an argument, so try to emit its dbg value using + // virtual register info from the FuncInfo.ValueMap. + EmitFuncArgumentDbgValue(Address, Variable, 0, N); return 0; } } else if (AI) @@ -4403,7 +4510,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_dispatch_setup: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - getRoot())); + getRoot(), getValue(I.getArgOperand(0)))); return 0; } @@ -4672,9 +4779,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; - case Intrinsic::trap: - DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + case Intrinsic::trap: { + StringRef TrapFuncName = getTrapFunctionName(); + if (TrapFuncName.empty()) { + DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + return 0; + } + TargetLowering::ArgListTy Args; + std::pair<SDValue, SDValue> Result = + TLI.LowerCallTo(getRoot(), I.getType(), + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, /*isReturnValueUsed=*/true, + DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), + Args, DAG, getCurDebugLoc()); + DAG.setRoot(Result.second); return 0; + } case Intrinsic::uadd_with_overflow: return implVisitAluOverflow(I, ISD::UADDO); case Intrinsic::sadd_with_overflow: @@ -4689,15 +4809,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return implVisitAluOverflow(I, ISD::SMULO); case Intrinsic::prefetch: { - SDValue Ops[4]; + SDValue Ops[5]; unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = getValue(I.getArgOperand(3)); DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, DAG.getVTList(MVT::Other), - &Ops[0], 4, + &Ops[0], 5, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), 0, /* align */ @@ -4784,7 +4905,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Outs, TLI, &Offsets); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - FTy->isVarArg(), Outs, FTy->getContext()); + DAG.getMachineFunction(), + FTy->isVarArg(), Outs, + FTy->getContext()); SDValue DemoteStackSlot; int DemoteStackIdx = -100; @@ -4814,8 +4937,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { - SDValue ArgNode = getValue(*i); - Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); + const Value *V = *i; + + // Skip empty types + if (V->getType()->isEmptyTy()) + continue; + + SDValue ArgNode = getValue(V); + Entry.Node = ArgNode; Entry.Ty = V->getType(); unsigned attrInd = i - CS.arg_begin() + 1; Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); @@ -5255,6 +5384,7 @@ public: const llvm::Type *OpTy = CallOperandVal->getType(); + // FIXME: code duplicated from TargetLowering::ParseConstraints(). // If this is an indirect operand, the operand is a pointer to the // accessed type. if (isIndirect) { @@ -5264,6 +5394,11 @@ public: OpTy = PtrTy->getElementType(); } + // Look for vector wrapped in a struct. e.g. { <16 x i8> }. + if (const StructType *STy = dyn_cast<StructType>(OpTy)) + if (STy->getNumElements() == 1) + OpTy = STy->getElementType(0); + // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { @@ -5315,6 +5450,8 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF, EVT ThisVT = MVT::Other; const TargetRegisterClass *RC = *RCI; + if (!RC->isAllocatable()) + continue; // If none of the value types for this register class are valid, we // can't use it. For example, 64-bit reg classes on 32-bit targets. for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); @@ -5336,15 +5473,14 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF, // frame pointer in functions that need it (due to them not being taken // out of allocation, because a variable sized allocation hasn't been seen // yet). This is a slight code pessimization, but should still work. - for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), - E = RC->allocation_order_end(MF); I != E; ++I) - if (*I == Reg) { - // We found a matching register class. Keep looking at others in case - // we find one with larger registers that this physreg is also in. - FoundRC = RC; - FoundVT = ThisVT; - break; - } + ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(MF); + if (std::find(RawOrder.begin(), RawOrder.end(), Reg) != RawOrder.end()) { + // We found a matching register class. Keep looking at others in case + // we find one with larger registers that this physreg is also in. + FoundRC = RC; + FoundVT = ThisVT; + break; + } } return FoundRC; } @@ -5491,9 +5627,15 @@ static void GetRegistersForValue(SelectionDAG &DAG, OpInfo.ConstraintVT); const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + BitVector Reserved = TRI->getReservedRegs(MF); unsigned NumAllocated = 0; for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) { unsigned Reg = RegClassRegs[i]; + // Filter out the reserved registers, but note that reserved registers are + // not fully determined at this point. We may still decide we need a frame + // pointer. + if (Reserved.test(Reg)) + continue; // See if this register is available. if ((isOutReg && OutputRegs.count(Reg)) || // Already used. (isInReg && InputRegs.count(Reg))) { // Already used. @@ -5542,7 +5684,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { std::set<unsigned> OutputRegs, InputRegs; - TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS); + TargetLowering::AsmOperandInfoVector + TargetConstraints = TLI.ParseConstraints(CS); + bool hasMemory = false; unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. @@ -5601,7 +5745,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { hasMemory = true; else { for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { - TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]); + TargetLowering::ConstraintType + CType = TLI.getConstraintType(OpInfo.Codes[j]); if (CType == TargetLowering::C_Memory) { hasMemory = true; break; @@ -5651,12 +5796,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // need to to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && !OpInfo.isIndirect) { - assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) && + assert((OpInfo.isMultipleAlternative || + (OpInfo.Type == InlineAsm::isInput)) && "Can only indirectify direct input operands!"); // Memory operands really want the address of the value. If we don't have // an indirect input, put it in the constpool if we can, otherwise spill // it to a stack slot. + // TODO: This isn't quite right. We need to handle these according to + // the addressing mode that the constraint wants. Also, this may take + // an additional register for the computation and we don't want that + // either. // If the operand is a float, integer, or vector constant, spill to a // constant pool entry to get its address. @@ -5858,7 +6008,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector<SDValue> Ops; - TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], + TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) report_fatal_error("Invalid operand for inline asm constraint '" + @@ -6067,14 +6217,15 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, Flags.setByVal(); const PointerType *Ty = cast<PointerType>(Args[i].Ty); const Type *ElementTy = Ty->getElementType(); - unsigned FrameAlign = getByValTypeAlignment(ElementTy); - unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy); + Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy)); // For ByVal, alignment should come from FE. BE will guess if this // info is not there but there are cases it cannot get right. + unsigned FrameAlign; if (Args[i].Alignment) FrameAlign = Args[i].Alignment; + else + FrameAlign = getByValTypeAlignment(ElementTy); Flags.setByValAlign(FrameAlign); - Flags.setByValSize(FrameSize); } if (Args[i].isNest) Flags.setNest(); @@ -6180,7 +6331,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, // For a function returning void, there is no return value. We can't create // such a node, so we just return a null return value in that case. In - // that case, nothing will actualy look at the value. + // that case, nothing will actually look at the value. if (ReturnValues.empty()) return std::make_pair(SDValue(), Chain); @@ -6219,6 +6370,25 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { #include "llvm/CodeGen/SelectionDAGISel.h" +/// isOnlyUsedInEntryBlock - If the specified argument is only used in the +/// entry block, return true. This includes arguments used by switches, since +/// the switch may expand into multiple basic blocks. +static bool isOnlyUsedInEntryBlock(const Argument *A) { + // With FastISel active, we may be splitting blocks, so force creation + // of virtual registers for all non-dead arguments. + if (EnableFastISel) + return A->use_empty(); + + const BasicBlock *Entry = A->getParent()->begin(); + for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); + UI != E; ++UI) { + const User *U = *UI; + if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) + return false; // Use not in entry block. + } + return true; +} + void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // If this is the entry block, emit arguments. const Function &F = *LLVMBB->getParent(); @@ -6273,14 +6443,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { Flags.setByVal(); const PointerType *Ty = cast<PointerType>(I->getType()); const Type *ElementTy = Ty->getElementType(); - unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy); - unsigned FrameSize = TD->getTypeAllocSize(ElementTy); + Flags.setByValSize(TD->getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. + unsigned FrameAlign; if (F.getParamAlignment(Idx)) FrameAlign = F.getParamAlignment(Idx); + else + FrameAlign = TLI.getByValTypeAlignment(ElementTy); Flags.setByValAlign(FrameAlign); - Flags.setByValSize(FrameSize); } if (F.paramHasAttr(Idx, Attribute::Nest)) Flags.setNest(); @@ -6362,8 +6533,8 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { if (I->use_empty() && NumValues) SDB->setUnusedArgValue(I, InVals[i]); - for (unsigned Value = 0; Value != NumValues; ++Value) { - EVT VT = ValueVTs[Value]; + for (unsigned Val = 0; Val != NumValues; ++Val) { + EVT VT = ValueVTs[Val]; EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT); @@ -6382,21 +6553,35 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { i += NumParts; } + // We don't need to do anything else for unused arguments. + if (ArgValues.empty()) + continue; + // Note down frame index for byval arguments. - if (I->hasByValAttr() && !ArgValues.empty()) + if (I->hasByValAttr()) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex()); - if (!I->use_empty()) { - SDValue Res; - if (!ArgValues.empty()) - Res = DAG.getMergeValues(&ArgValues[0], NumValues, - SDB->getCurDebugLoc()); - SDB->setValue(I, Res); - - // If this argument is live outside of the entry block, insert a copy from - // whereever we got it to the vreg that other BB's will reference it as. + SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, + SDB->getCurDebugLoc()); + SDB->setValue(I, Res); + + // If this argument is live outside of the entry block, insert a copy from + // wherever we got it to the vreg that other BB's will reference it as. + if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { + // If we can, though, try to skip creating an unnecessary vreg. + // FIXME: This isn't very clean... it would be nice to make this more + // general. It's also subtly incompatible with the hacks FastISel + // uses with vregs. + unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + FuncInfo->ValueMap[I] = Reg; + continue; + } + } + if (!isOnlyUsedInEntryBlock(I)) { + FuncInfo->InitializeRegForValue(I); SDB->CopyToExportRegsIfNeeded(I); } } @@ -6442,6 +6627,10 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Ignore dead phi's. if (PN->use_empty()) continue; + // Skip empty types + if (PN->getType()->isEmptyTy()) + continue; + unsigned Reg; const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index f546612..a1ca891 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -23,7 +23,6 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include <vector> -#include <set> namespace llvm { @@ -333,6 +332,14 @@ public: /// consumed. void clear(); + /// clearDanglingDebugInfo - Clear the dangling debug information + /// map. This function is seperated from the clear so that debug + /// information that is dangling in a basic block can be properly + /// resolved in a different basic block. This allows the + /// SelectionDAG to resolve dangling debug information attached + /// to PHI nodes. + void clearDanglingDebugInfo(); + /// getRoot - Return the current virtual root of the Selection DAG, /// flushing any PendingLoad items. This must be done before emitting /// a store or any other node that may need to be ordered after any @@ -427,6 +434,9 @@ private: const Value* SV, MachineBasicBlock* Default, MachineBasicBlock *SwitchBB); + + uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); + void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 4b6066e..dc8044b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -55,17 +56,11 @@ using namespace llvm; STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); +STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected"); STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); -#ifndef NDEBUG -STATISTIC(NumBBWithOutOfOrderLineInfo, - "Number of blocks with out of order line number info"); -STATISTIC(NumMBBWithOutOfOrderLineInfo, - "Number of machine blocks with out of order line number info"); -#endif - static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " @@ -74,6 +69,11 @@ static cl::opt<bool> EnableFastISelAbort("fast-isel-abort", cl::Hidden, cl::desc("Enable abort calls when \"fast\" instruction fails")); +static cl::opt<bool> +UseMBPI("use-mbpi", + cl::desc("use Machine Branch Probability Info"), + cl::init(true), cl::Hidden); + #ifndef NDEBUG static cl::opt<bool> ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, @@ -192,6 +192,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, DAGSize(0) { initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); } SelectionDAGISel::~SelectionDAGISel() { @@ -205,43 +206,11 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AliasAnalysis>(); AU.addRequired<GCModuleInfo>(); AU.addPreserved<GCModuleInfo>(); + if (UseMBPI && OptLevel != CodeGenOpt::None) + AU.addRequired<BranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } -/// FunctionCallsSetJmp - Return true if the function has a call to setjmp or -/// other function that gcc recognizes as "returning twice". This is used to -/// limit code-gen optimizations on the machine function. -/// -/// FIXME: Remove after <rdar://problem/8031714> is fixed. -static bool FunctionCallsSetJmp(const Function *F) { - const Module *M = F->getParent(); - static const char *ReturnsTwiceFns[] = { - "_setjmp", - "setjmp", - "sigsetjmp", - "setjmp_syscall", - "savectx", - "qsetjmp", - "vfork", - "getcontext" - }; -#define NUM_RETURNS_TWICE_FNS sizeof(ReturnsTwiceFns) / sizeof(const char *) - - for (unsigned I = 0; I < NUM_RETURNS_TWICE_FNS; ++I) - if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) { - if (!Callee->use_empty()) - for (Value::const_use_iterator - I = Callee->use_begin(), E = Callee->use_end(); - I != E; ++I) - if (const CallInst *CI = dyn_cast<CallInst>(*I)) - if (CI->getParent()->getParent() == F) - return true; - } - - return false; -#undef NUM_RETURNS_TWICE_FNS -} - /// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that /// may trap on it. In this case we have to split the edge so that the path /// through the predecessor block that doesn't go to the phi block doesn't @@ -302,6 +271,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { CurDAG->init(*MF); FuncInfo->set(Fn, *MF); + + if (UseMBPI && OptLevel != CodeGenOpt::None) + FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>(); + else + FuncInfo->BPI = 0; + SDB->init(GFI, *AA); SelectAllBasicBlocks(Fn); @@ -392,7 +367,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } // Determine if there is a call to setjmp in the machine function. - MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn)); + MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice()); // Replace forward-declared registers with the registers containing // the desired value. @@ -421,10 +396,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { return true; } -void -SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, - BasicBlock::const_iterator End, - bool &HadTailCall) { +void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End, + bool &HadTailCall) { // Lower all of the non-terminator instructions. If a call is emitted // as a tail call, cease emitting nodes for this block. Terminators // are handled below. @@ -438,7 +412,6 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, // Final step, emit the lowered DAG as machine code. CodeGenAndEmitDAG(); - return; } void SelectionDAGISel::ComputeLiveOutVRegInfo() { @@ -572,7 +545,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled); - CurDAG->Legalize(OptLevel); + CurDAG->Legalize(); } DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber @@ -748,16 +721,49 @@ void SelectionDAGISel::PrepareEHLandingPad() { - +/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified +/// load into the specified FoldInst. Note that we could have a sequence where +/// multiple LLVM IR instructions are folded into the same machineinstr. For +/// example we could have: +/// A: x = load i32 *P +/// B: y = icmp A, 42 +/// C: br y, ... +/// +/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and +/// any other folded instructions) because it is between A and C. +/// +/// If we succeed in folding the load into the operation, return true. +/// bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, + const Instruction *FoldInst, FastISel *FastIS) { + // We know that the load has a single use, but don't know what it is. If it + // isn't one of the folded instructions, then we can't succeed here. Handle + // this by scanning the single-use users of the load until we get to FoldInst. + unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. + + const Instruction *TheUser = LI->use_back(); + while (TheUser != FoldInst && // Scan up until we find FoldInst. + // Stay in the right block. + TheUser->getParent() == FoldInst->getParent() && + --MaxUsers) { // Don't scan too far. + // If there are multiple or no uses of this instruction, then bail out. + if (!TheUser->hasOneUse()) + return false; + + TheUser = TheUser->use_back(); + } + // Don't try to fold volatile loads. Target has to deal with alignment // constraints. if (LI->isVolatile()) return false; - // Figure out which vreg this is going into. + // Figure out which vreg this is going into. If there is no assigned vreg yet + // then there actually was no reference to it. Perhaps the load is referenced + // by a dead instruction. unsigned LoadReg = FastIS->getRegForValue(LI); - assert(LoadReg && "Load isn't already assigned a vreg? "); + if (LoadReg == 0) + return false; // Check to see what the uses of this vreg are. If it has no uses, or more // than one use (at the machine instr level) then we can't fold it. @@ -788,48 +794,17 @@ bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI); } -#ifndef NDEBUG -/// CheckLineNumbers - Check if basic block instructions follow source order -/// or not. -static void CheckLineNumbers(const BasicBlock *BB) { - unsigned Line = 0; - unsigned Col = 0; - for (BasicBlock::const_iterator BI = BB->begin(), - BE = BB->end(); BI != BE; ++BI) { - const DebugLoc DL = BI->getDebugLoc(); - if (DL.isUnknown()) continue; - unsigned L = DL.getLine(); - unsigned C = DL.getCol(); - if (L < Line || (L == Line && C < Col)) { - ++NumBBWithOutOfOrderLineInfo; - return; - } - Line = L; - Col = C; - } +/// isFoldedOrDeadInstruction - Return true if the specified instruction is +/// side-effect free and is either dead or folded into a generated instruction. +/// Return false if it needs to be emitted. +static bool isFoldedOrDeadInstruction(const Instruction *I, + FunctionLoweringInfo *FuncInfo) { + return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. + !isa<TerminatorInst>(I) && // Terminators aren't folded. + !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. + !FuncInfo->isExportedInst(I); // Exported instrs must be computed. } -/// CheckLineNumbers - Check if machine basic block instructions follow source -/// order or not. -static void CheckLineNumbers(const MachineBasicBlock *MBB) { - unsigned Line = 0; - unsigned Col = 0; - for (MachineBasicBlock::const_iterator MBI = MBB->begin(), - MBE = MBB->end(); MBI != MBE; ++MBI) { - const DebugLoc DL = MBI->getDebugLoc(); - if (DL.isUnknown()) continue; - unsigned L = DL.getLine(); - unsigned C = DL.getCol(); - if (L < Line || (L == Line && C < Col)) { - ++NumMBBWithOutOfOrderLineInfo; - return; - } - Line = L; - Col = C; - } -} -#endif - void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; @@ -841,9 +816,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { for (ReversePostOrderTraversal<const Function*>::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { const BasicBlock *LLVMBB = *I; -#ifndef NDEBUG - CheckLineNumbers(LLVMBB); -#endif if (OptLevel != CodeGenOpt::None) { bool AllPredsVisited = true; @@ -856,15 +828,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } if (AllPredsVisited) { - for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end(); - I != E && isa<PHINode>(I); ++I) { + for (BasicBlock::const_iterator I = LLVMBB->begin(); + isa<PHINode>(I); ++I) FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I)); - } } else { - for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end(); - I != E && isa<PHINode>(I); ++I) { + for (BasicBlock::const_iterator I = LLVMBB->begin(); + isa<PHINode>(I); ++I) FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I)); - } } FuncInfo->VisitedBBs.insert(LLVMBB); @@ -912,10 +882,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { const Instruction *Inst = llvm::prior(BI); // If we no longer require this instruction, skip it. - if (!Inst->mayWriteToMemory() && - !isa<TerminatorInst>(Inst) && - !isa<DbgInfoIntrinsic>(Inst) && - !FuncInfo->isExportedInst(Inst)) + if (isFoldedOrDeadInstruction(Inst, FuncInfo)) continue; // Bottom-up: reset the insert pos at the top, after any local-value @@ -924,16 +891,21 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to select the instruction with FastISel. if (FastIS->SelectInstruction(Inst)) { - // If fast isel succeeded, check to see if there is a single-use - // non-volatile load right before the selected instruction, and see if - // the load is used by the instruction. If so, try to fold it. - const Instruction *BeforeInst = 0; - if (Inst != Begin) - BeforeInst = llvm::prior(llvm::prior(BI)); - if (BeforeInst && isa<LoadInst>(BeforeInst) && - BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst && - TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS)) - --BI; // If we succeeded, don't re-select the load. + ++NumFastIselSuccess; + // If fast isel succeeded, skip over all the folded instructions, and + // then see if there is a load right before the selected instructions. + // Try to fold the load if so. + const Instruction *BeforeInst = Inst; + while (BeforeInst != Begin) { + BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst)); + if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) + break; + } + if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && + BeforeInst->hasOneUse() && + TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) + // If we succeeded, don't re-select the load. + BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); continue; } @@ -963,9 +935,14 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { continue; } - // Otherwise, give up on FastISel for the rest of the block. - // For now, be a little lenient about non-branch terminators. - if (!isa<TerminatorInst>(Inst) || isa<BranchInst>(Inst)) { + if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) { + // Don't abort, and use a different message for terminator misses. + ++NumFastIselFailures; + if (EnableFastISelVerbose || EnableFastISelAbort) { + dbgs() << "FastISel missed terminator: "; + Inst->dump(); + } + } else { ++NumFastIselFailures; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel miss: "; @@ -987,22 +964,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { else ++NumFastIselBlocks; - // Run SelectionDAG instruction selection on the remainder of the block - // not handled by FastISel. If FastISel is not run, this is the entire - // block. - bool HadTailCall; - SelectBasicBlock(Begin, BI, HadTailCall); + if (Begin != BI) { + // Run SelectionDAG instruction selection on the remainder of the block + // not handled by FastISel. If FastISel is not run, this is the entire + // block. + bool HadTailCall; + SelectBasicBlock(Begin, BI, HadTailCall); + } FinishBasicBlock(); FuncInfo->PHINodesToUpdate.clear(); } delete FastIS; -#ifndef NDEBUG - for (MachineFunction::const_iterator MBI = MF->begin(), MBE = MF->end(); - MBI != MBE; ++MBI) - CheckLineNumbers(MBI); -#endif + SDB->clearDanglingDebugInfo(); } void @@ -2634,11 +2609,45 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // instructions that access memory and for ComplexPatterns that match // loads. if (EmitNodeInfo & OPFL_MemRefs) { + // Only attach load or store memory operands if the generated + // instruction may load or store. + const TargetInstrDesc &TID = TM.getInstrInfo()->get(TargetOpc); + bool mayLoad = TID.mayLoad(); + bool mayStore = TID.mayStore(); + + unsigned NumMemRefs = 0; + for (SmallVector<MachineMemOperand*, 2>::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + if ((*I)->isLoad()) { + if (mayLoad) + ++NumMemRefs; + } else if ((*I)->isStore()) { + if (mayStore) + ++NumMemRefs; + } else { + ++NumMemRefs; + } + } + MachineSDNode::mmo_iterator MemRefs = - MF->allocateMemRefsArray(MatchedMemRefs.size()); - std::copy(MatchedMemRefs.begin(), MatchedMemRefs.end(), MemRefs); + MF->allocateMemRefsArray(NumMemRefs); + + MachineSDNode::mmo_iterator MemRefsPos = MemRefs; + for (SmallVector<MachineMemOperand*, 2>::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + if ((*I)->isLoad()) { + if (mayLoad) + *MemRefsPos++ = *I; + } else if ((*I)->isStore()) { + if (mayStore) + *MemRefsPos++ = *I; + } else { + *MemRefsPos++ = *I; + } + } + cast<MachineSDNode>(Res) - ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size()); + ->setMemRefs(MemRefs, MemRefs + NumMemRefs); } DEBUG(errs() << " " diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4b0822d..efbfaa4 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -26,11 +26,19 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include <cctype> using namespace llvm; +/// We are in the process of implementing a new TypeLegalization action +/// - the promotion of vector elements. This feature is disabled by default +/// and only enabled using this flag. +static cl::opt<bool> +AllowPromoteIntElem("promote-elements", cl::Hidden, + cl::desc("Allow promotion of integer vector element types")); + namespace llvm { TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) { bool isLocal = GV->hasLocalLinkage(); @@ -528,7 +536,8 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { /// NOTE: The constructor takes ownership of TLOF. TargetLowering::TargetLowering(const TargetMachine &tm, const TargetLoweringObjectFile *tlof) - : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) { + : TM(tm), TD(TM.getTargetData()), TLOF(*tlof), + mayPromoteElements(AllowPromoteIntElem) { // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); memset(LoadExtActions, 0, sizeof(LoadExtActions)); @@ -596,6 +605,8 @@ TargetLowering::TargetLowering(const TargetMachine &tm, SchedPreferenceInfo = Sched::Latency; JumpBufSize = 0; JumpBufAlignment = 0; + MinFunctionAlignment = 0; + PrefFunctionAlignment = 0; PrefLoopAlignment = 0; MinStackArgumentAlignment = 1; ShouldFoldAtomicFences = false; @@ -662,10 +673,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, NewVT = EltTy; IntermediateVT = NewVT; + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + EVT DestVT = TLI->getRegisterType(NewVT); RegisterVT = DestVT; if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. @@ -747,7 +764,7 @@ void TargetLowering::computeRegisterProperties() { NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); - ValueTypeActions.setTypeAction(ExpandedVT, Expand); + ValueTypeActions.setTypeAction(ExpandedVT, TypeExpandInteger); } // Inspect all of the ValueType's smaller than the largest integer @@ -761,7 +778,7 @@ void TargetLowering::computeRegisterProperties() { } else { RegisterTypeForVT[IntReg] = TransformToType[IntReg] = (MVT::SimpleValueType)LegalIntReg; - ValueTypeActions.setTypeAction(IVT, Promote); + ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); } } @@ -770,7 +787,7 @@ void TargetLowering::computeRegisterProperties() { NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; RegisterTypeForVT[MVT::ppcf128] = MVT::f64; TransformToType[MVT::ppcf128] = MVT::f64; - ValueTypeActions.setTypeAction(MVT::ppcf128, Expand); + ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); } // Decide how to handle f64. If the target does not have native f64 support, @@ -779,7 +796,7 @@ void TargetLowering::computeRegisterProperties() { NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; TransformToType[MVT::f64] = MVT::i64; - ValueTypeActions.setTypeAction(MVT::f64, Expand); + ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); } // Decide how to handle f32. If the target does not have native support for @@ -789,12 +806,12 @@ void TargetLowering::computeRegisterProperties() { NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; TransformToType[MVT::f32] = MVT::f64; - ValueTypeActions.setTypeAction(MVT::f32, Promote); + ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger); } else { NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; TransformToType[MVT::f32] = MVT::i32; - ValueTypeActions.setTypeAction(MVT::f32, Expand); + ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); } } @@ -810,6 +827,30 @@ void TargetLowering::computeRegisterProperties() { unsigned NElts = VT.getVectorNumElements(); if (NElts != 1) { bool IsLegalWiderType = false; + // If we allow the promotion of vector elements using a flag, + // then return TypePromoteInteger on vector elements. + // First try to promote the elements of integer vectors. If no legal + // promotion was found, fallback to the widen-vector method. + if (mayPromoteElements) + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + EVT SVT = (MVT::SimpleValueType)nVT; + // Promote vectors of integers to vectors with the same number + // of elements, with a wider element type. + if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() + && SVT.getVectorNumElements() == NElts && + isTypeLegal(SVT) && SVT.getScalarType().isInteger()) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypePromoteInteger); + IsLegalWiderType = true; + break; + } + } + + if (IsLegalWiderType) continue; + + // Try to widen the vector. for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { EVT SVT = (MVT::SimpleValueType)nVT; if (SVT.getVectorElementType() == EltVT && @@ -818,7 +859,7 @@ void TargetLowering::computeRegisterProperties() { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; - ValueTypeActions.setTypeAction(VT, Promote); + ValueTypeActions.setTypeAction(VT, TypeWidenVector); IsLegalWiderType = true; break; } @@ -838,10 +879,12 @@ void TargetLowering::computeRegisterProperties() { if (NVT == VT) { // Type is already a power of 2. The default action is to split. TransformToType[i] = MVT::Other; - ValueTypeActions.setTypeAction(VT, Expand); + unsigned NumElts = VT.getVectorNumElements(); + ValueTypeActions.setTypeAction(VT, + NumElts > 1 ? TypeSplitVector : TypeScalarizeVector); } else { TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, Promote); + ValueTypeActions.setTypeAction(VT, TypeWidenVector); } } @@ -890,7 +933,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, // If there is a wider vector type with the same element type as this one, // we should widen to that legal vector type. This handles things like // <2 x float> -> <4 x float>. - if (NumElts != 1 && getTypeAction(VT) == Promote) { + if (NumElts != 1 && getTypeAction(Context, VT) == TypeWidenVector) { RegisterVT = getTypeToTransformTo(Context, VT); if (isTypeLegal(RegisterVT)) { IntermediateVT = RegisterVT; @@ -928,8 +971,14 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, EVT DestVT = getRegisterType(Context, NewVT); RegisterVT = DestVT; + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. @@ -1678,6 +1727,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1)); if (!ShAmt) break; + SDValue Shift = In.getOperand(1); + if (TLO.LegalTypes()) { + uint64_t ShVal = ShAmt->getZExtValue(); + Shift = + TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType())); + } + APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth); @@ -1691,7 +1747,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), NewTrunc, - In.getOperand(1))); + Shift)); } break; } @@ -1716,26 +1772,28 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::BITCAST: -#if 0 - // If this is an FP->Int bitcast and if the sign bit is the only thing that - // is demanded, turn this into a FGETSIGN. - if (NewMask == EVT::getIntegerVTSignBit(Op.getValueType()) && - MVT::isFloatingPoint(Op.getOperand(0).getValueType()) && - !MVT::isVector(Op.getOperand(0).getValueType())) { - // Only do this xform if FGETSIGN is valid or if before legalize. - if (!TLO.AfterLegalize || - isOperationLegal(ISD::FGETSIGN, Op.getValueType())) { + // If this is an FP->Int bitcast and if the sign bit is the only + // thing demanded, turn this into a FGETSIGN. + if (!Op.getOperand(0).getValueType().isVector() && + NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && + Op.getOperand(0).getValueType().isFloatingPoint()) { + bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); + bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); + if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) { + EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. - SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(), - Op.getOperand(0)); + SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0)); + unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits(); + if (!OpVTLegal && OpVTSizeInBits > 32) + Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign); unsigned ShVal = Op.getValueType().getSizeInBits()-1; - SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy()); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(), + SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, + Op.getValueType(), Sign, ShAmt)); } } -#endif break; case ISD::ADD: case ISD::MUL: @@ -1842,7 +1900,6 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, DebugLoc dl) const { SelectionDAG &DAG = DCI.DAG; - LLVMContext &Context = *DAG.getContext(); // These setcc operations always fold. switch (Cond) { @@ -1853,12 +1910,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, case ISD::SETTRUE2: return DAG.getConstant(1, VT); } - if (isa<ConstantSDNode>(N0.getNode())) { - // Ensure that the constant occurs on the RHS, and fold constant - // comparisons. + // Ensure that the constant occurs on the RHS, and fold constant + // comparisons. + if (isa<ConstantSDNode>(N0.getNode())) return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); - } - + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); @@ -1911,6 +1967,42 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. } + // (zext x) == C --> x == (trunc C) + if (DCI.isBeforeLegalize() && N0->hasOneUse() && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + unsigned MinBits = N0.getValueSizeInBits(); + SDValue PreZExt; + if (N0->getOpcode() == ISD::ZERO_EXTEND) { + // ZExt + MinBits = N0->getOperand(0).getValueSizeInBits(); + PreZExt = N0->getOperand(0); + } else if (N0->getOpcode() == ISD::AND) { + // DAGCombine turns costly ZExts into ANDs + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) + if ((C->getAPIntValue()+1).isPowerOf2()) { + MinBits = C->getAPIntValue().countTrailingOnes(); + PreZExt = N0->getOperand(0); + } + } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) { + // ZEXTLOAD + if (LN0->getExtensionType() == ISD::ZEXTLOAD) { + MinBits = LN0->getMemoryVT().getSizeInBits(); + PreZExt = N0; + } + } + + // Make sure we're not loosing bits from the constant. + if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) { + EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); + if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { + // Will get folded away. + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt); + SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT); + return DAG.getSetCC(dl, VT, Trunc, C, Cond); + } + } + } + // If the LHS is '(and load, const)', the RHS is 0, // the test is for equality or unsigned, and all 1 bits of the const are // in the same partial word, see if we can shorten the load. @@ -1949,7 +2041,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } if (bestWidth) { - EVT newVT = EVT::getIntegerVT(Context, bestWidth); + EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); if (newVT.isRound()) { EVT PtrType = Lod->getOperand(1).getValueType(); SDValue Ptr = Lod->getBasePtr(); @@ -2578,9 +2670,13 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, - char ConstraintLetter, + std::string &Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { + + if (Constraint.length() > 1) return; + + char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; case 'X': // Allows any operand; labels (basic block) use this. @@ -2769,6 +2865,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); } + + // Look for vector wrapped in a struct. e.g. { <16 x i8> }. + if (const StructType *STy = dyn_cast<StructType>(OpTy)) + if (STy->getNumElements() == 1) + OpTy = STy->getElementType(0); + // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { @@ -3013,7 +3115,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, assert(OpInfo.Codes[i].size() == 1 && "Unhandled multi-letter 'other' constraint"); std::vector<SDValue> ResultOps; - TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], + TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i], ResultOps, *DAG); if (!ResultOps.empty()) { BestType = CType; diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index 7b5bca4..160f38f 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -277,7 +277,7 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) { // Initialize data flow sets. clearAnticAvailSets(); - // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. + // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. bool changed = true; unsigned iterations = 0; while (changed) { diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index c621726..221bec5 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -47,7 +47,6 @@ STATISTIC(numExtends , "Number of copies extended"); STATISTIC(NumReMats , "Number of instructions re-materialized"); STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); STATISTIC(numAborts , "Number of times interval joining aborted"); -STATISTIC(numDeadValNo, "Number of valno def marked dead"); char SimpleRegisterCoalescing::ID = 0; static cl::opt<bool> @@ -61,9 +60,9 @@ DisableCrossClassJoin("disable-cross-class-join", cl::init(false), cl::Hidden); static cl::opt<bool> -DisablePhysicalJoin("disable-physical-join", - cl::desc("Avoid coalescing physical register copies"), - cl::init(false), cl::Hidden); +EnablePhysicalJoin("join-physregs", + cl::desc("Join physical register copies"), + cl::init(false), cl::Hidden); static cl::opt<bool> VerifyCoalescing("verify-coalescing", @@ -208,15 +207,14 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP, if (ValLR+1 != BLR) return false; // If a live interval is a physical register, conservatively check if any - // of its sub-registers is overlapping the live interval of the virtual - // register. If so, do not coalesce. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg) && - *tri_->getSubRegisters(IntB.reg)) { - for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) - if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) { + // of its aliases is overlapping the live interval of the virtual register. + // If so, do not coalesce. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) + if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) { DEBUG({ - dbgs() << "\t\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); + dbgs() << "\t\tInterfere with alias "; + li_->getInterval(*AS).print(dbgs(), tri_); }); return false; } @@ -254,7 +252,12 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP, // Okay, merge "B1" into the same value number as "B0". if (BValNo != ValLR->valno) { + // If B1 is killed by a PHI, then the merged live range must also be killed + // by the same PHI, as B0 and B1 can not overlap. + bool HasPHIKill = BValNo->hasPHIKill(); IntB.MergeValueNumberInto(BValNo, ValLR->valno); + if (HasPHIKill) + ValLR->valno->setHasPHIKill(true); } DEBUG({ dbgs() << " result = "; @@ -273,7 +276,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP, // merge, find the last use and trim the live range. That will also add the // isKill marker. if (ALR->end == CopyIdx) - TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR); + li_->shrinkToUses(&IntA); ++numExtends; return true; @@ -427,6 +430,10 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, MachineInstr *NewMI = tii_->commuteInstruction(DefMI); if (!NewMI) return false; + if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && + TargetRegisterInfo::isVirtualRegister(IntB.reg) && + !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg))) + return false; if (NewMI != DefMI) { li_->ReplaceMachineInstrInMaps(DefMI, NewMI); MBB->insert(DefMI, NewMI); @@ -504,98 +511,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, return true; } -/// isSameOrFallThroughBB - Return true if MBB == SuccMBB or MBB simply -/// fallthoughs to SuccMBB. -static bool isSameOrFallThroughBB(MachineBasicBlock *MBB, - MachineBasicBlock *SuccMBB, - const TargetInstrInfo *tii_) { - if (MBB == SuccMBB) - return true; - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector<MachineOperand, 4> Cond; - return !tii_->AnalyzeBranch(*MBB, TBB, FBB, Cond) && !TBB && !FBB && - MBB->isSuccessor(SuccMBB); -} - -/// removeRange - Wrapper for LiveInterval::removeRange. This removes a range -/// from a physical register live interval as well as from the live intervals -/// of its sub-registers. -static void removeRange(LiveInterval &li, - SlotIndex Start, SlotIndex End, - LiveIntervals *li_, const TargetRegisterInfo *tri_) { - li.removeRange(Start, End, true); - if (TargetRegisterInfo::isPhysicalRegister(li.reg)) { - for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) - continue; - LiveInterval &sli = li_->getInterval(*SR); - SlotIndex RemoveStart = Start; - SlotIndex RemoveEnd = Start; - - while (RemoveEnd != End) { - LiveInterval::iterator LR = sli.FindLiveRangeContaining(RemoveStart); - if (LR == sli.end()) - break; - RemoveEnd = (LR->end < End) ? LR->end : End; - sli.removeRange(RemoveStart, RemoveEnd, true); - RemoveStart = RemoveEnd; - } - } - } -} - -/// TrimLiveIntervalToLastUse - If there is a last use in the same basic block -/// as the copy instruction, trim the live interval to the last use and return -/// true. -bool -SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx, - MachineBasicBlock *CopyMBB, - LiveInterval &li, - const LiveRange *LR) { - SlotIndex MBBStart = li_->getMBBStartIdx(CopyMBB); - SlotIndex LastUseIdx; - MachineOperand *LastUse = - lastRegisterUse(LR->start, CopyIdx.getPrevSlot(), li.reg, LastUseIdx); - if (LastUse) { - MachineInstr *LastUseMI = LastUse->getParent(); - if (!isSameOrFallThroughBB(LastUseMI->getParent(), CopyMBB, tii_)) { - // r1024 = op - // ... - // BB1: - // = r1024 - // - // BB2: - // r1025<dead> = r1024<kill> - if (MBBStart < LR->end) - removeRange(li, MBBStart, LR->end, li_, tri_); - return true; - } - - // There are uses before the copy, just shorten the live range to the end - // of last use. - LastUse->setIsKill(); - removeRange(li, LastUseIdx.getDefIndex(), LR->end, li_, tri_); - if (LastUseMI->isCopy()) { - MachineOperand &DefMO = LastUseMI->getOperand(0); - if (DefMO.getReg() == li.reg && !DefMO.getSubReg()) - DefMO.setIsDead(); - } - return true; - } - - // Is it livein? - if (LR->start <= MBBStart && LR->end > MBBStart) { - if (LR->start == li_->getZeroIndex()) { - assert(TargetRegisterInfo::isPhysicalRegister(li.reg)); - // Live-in to the function but dead. Remove it from entry live-in set. - mf_->begin()->removeLiveIn(li.reg); - } - // FIXME: Shorten intervals in BBs that reaches this BB. - } - - return false; -} - /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, @@ -782,26 +697,6 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_, return false; } -/// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy. -/// Return true if live interval is removed. -bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li, - MachineInstr *CopyMI) { - SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI); - LiveInterval::iterator MLR = - li.FindLiveRangeContaining(CopyIdx.getDefIndex()); - if (MLR == li.end()) - return false; // Already removed by ShortenDeadCopySrcLiveRange. - SlotIndex RemoveStart = MLR->start; - SlotIndex RemoveEnd = MLR->end; - SlotIndex DefIdx = CopyIdx.getDefIndex(); - // Remove the liverange that's defined by this. - if (RemoveStart == DefIdx && RemoveEnd == DefIdx.getStoreIndex()) { - removeRange(li, RemoveStart, RemoveEnd, li_, tri_); - return removeIntervalIfEmpty(li, li_, tri_); - } - return false; -} - /// RemoveDeadDef - If a def of a live interval is now determined dead, remove /// the val# it defines. If the live interval becomes empty, remove it as well. bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li, @@ -835,84 +730,6 @@ void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg, } } -/// PropagateDeadness - Propagate the dead marker to the instruction which -/// defines the val#. -static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI, - SlotIndex &LRStart, LiveIntervals *li_, - const TargetRegisterInfo* tri_) { - MachineInstr *DefMI = - li_->getInstructionFromIndex(LRStart.getDefIndex()); - if (DefMI && DefMI != CopyMI) { - int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg); - if (DeadIdx != -1) - DefMI->getOperand(DeadIdx).setIsDead(); - else - DefMI->addOperand(MachineOperand::CreateReg(li.reg, - /*def*/true, /*implicit*/true, /*kill*/false, /*dead*/true)); - LRStart = LRStart.getNextSlot(); - } -} - -/// ShortenDeadCopySrcLiveRange - Shorten a live range as it's artificially -/// extended by a dead copy. Mark the last use (if any) of the val# as kill as -/// ends the live range there. If there isn't another use, then this live range -/// is dead. Return true if live interval is removed. -bool -SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, - MachineInstr *CopyMI) { - SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI); - if (CopyIdx == SlotIndex()) { - // FIXME: special case: function live in. It can be a general case if the - // first instruction index starts at > 0 value. - assert(TargetRegisterInfo::isPhysicalRegister(li.reg)); - // Live-in to the function but dead. Remove it from entry live-in set. - if (mf_->begin()->isLiveIn(li.reg)) - mf_->begin()->removeLiveIn(li.reg); - if (const LiveRange *LR = li.getLiveRangeContaining(CopyIdx)) - removeRange(li, LR->start, LR->end, li_, tri_); - return removeIntervalIfEmpty(li, li_, tri_); - } - - LiveInterval::iterator LR = - li.FindLiveRangeContaining(CopyIdx.getPrevIndex().getStoreIndex()); - if (LR == li.end()) - // Livein but defined by a phi. - return false; - - SlotIndex RemoveStart = LR->start; - SlotIndex RemoveEnd = CopyIdx.getStoreIndex(); - if (LR->end > RemoveEnd) - // More uses past this copy? Nothing to do. - return false; - - // If there is a last use in the same bb, we can't remove the live range. - // Shorten the live interval and return. - MachineBasicBlock *CopyMBB = CopyMI->getParent(); - if (TrimLiveIntervalToLastUse(CopyIdx, CopyMBB, li, LR)) - return false; - - // There are other kills of the val#. Nothing to do. - if (!li.isOnlyLROfValNo(LR)) - return false; - - MachineBasicBlock *StartMBB = li_->getMBBFromIndex(RemoveStart); - if (!isSameOrFallThroughBB(StartMBB, CopyMBB, tii_)) - // If the live range starts in another mbb and the copy mbb is not a fall - // through mbb, then we can only cut the range from the beginning of the - // copy mbb. - RemoveStart = li_->getMBBStartIdx(CopyMBB).getNextIndex().getBaseIndex(); - - if (LR->valno->def == RemoveStart) { - // If the def MI defines the val# and this copy is the only kill of the - // val#, then propagate the dead marker. - PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_); - ++numDeadValNo; - } - - removeRange(li, RemoveStart, RemoveEnd, li_, tri_); - return removeIntervalIfEmpty(li, li_, tri_); -} - /// shouldJoinPhys - Return true if a copy involving a physreg should be joined. /// We need to be careful about coalescing a source physical register with a /// virtual register. Once the coalescing is done, it cannot be broken and these @@ -928,7 +745,7 @@ bool SimpleRegisterCoalescing::shouldJoinPhys(CoalescerPair &CP) { if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue()) return true; - if (DisablePhysicalJoin) { + if (!EnablePhysicalJoin) { DEBUG(dbgs() << "\tPhysreg joins disabled.\n"); return false; } @@ -955,7 +772,7 @@ bool SimpleRegisterCoalescing::shouldJoinPhys(CoalescerPair &CP) { // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. if (!CP.isPartial()) { const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); - unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2; unsigned Length = li_->getApproximateInstructionCount(JoinVInt); if (Length > Threshold) { ++numAborts; @@ -974,7 +791,7 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg, const TargetRegisterClass *SrcRC, const TargetRegisterClass *DstRC, const TargetRegisterClass *NewRC) { - unsigned NewRCCount = allocatableRCRegs_[NewRC].count(); + unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC); // This heuristics is good enough in practice, but it's obviously not *right*. // 4 is a magic number that works well enough for x86, ARM, etc. It filter // out all but the most restrictive register classes. @@ -988,8 +805,14 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg, LiveInterval &DstInt = li_->getInterval(DstReg); unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt); unsigned DstSize = li_->getApproximateInstructionCount(DstInt); - if (SrcSize <= NewRCCount && DstSize <= NewRCCount) + + // Coalesce aggressively if the intervals are small compared to the number of + // registers in the new class. The number 4 is fairly arbitrary, chosen to be + // less aggressive than the 8 used for the whole function size. + const unsigned ThresSize = 4 * NewRCCount; + if (SrcSize <= ThresSize && DstSize <= ThresSize) return true; + // Estimate *register use density*. If it doubles or more, abort. unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg), mri_->use_nodbg_end()); @@ -997,13 +820,13 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg, mri_->use_nodbg_end()); unsigned NewUses = SrcUses + DstUses; unsigned NewSize = SrcSize + DstSize; - if (SrcRC != NewRC && SrcSize > NewRCCount) { - unsigned SrcRCCount = allocatableRCRegs_[SrcRC].count(); + if (SrcRC != NewRC && SrcSize > ThresSize) { + unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC); if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount) return false; } - if (DstRC != NewRC && DstSize > NewRCCount) { - unsigned DstRCCount = allocatableRCRegs_[DstRC].count(); + if (DstRC != NewRC && DstSize > ThresSize) { + unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC); if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount) return false; } @@ -1033,6 +856,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // If they are already joined we continue. if (CP.getSrcReg() == CP.getDstReg()) { + markAsJoined(CopyMI); DEBUG(dbgs() << "\tCopy already coalesced.\n"); return false; // Not coalescable. } @@ -1552,81 +1376,6 @@ void SimpleRegisterCoalescing::joinIntervals() { } } -/// Return true if the two specified registers belong to different register -/// classes. The registers may be either phys or virt regs. -bool -SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA, - unsigned RegB) const { - // Get the register classes for the first reg. - if (TargetRegisterInfo::isPhysicalRegister(RegA)) { - assert(TargetRegisterInfo::isVirtualRegister(RegB) && - "Shouldn't consider two physregs!"); - return !mri_->getRegClass(RegB)->contains(RegA); - } - - // Compare against the regclass for the second reg. - const TargetRegisterClass *RegClassA = mri_->getRegClass(RegA); - if (TargetRegisterInfo::isVirtualRegister(RegB)) { - const TargetRegisterClass *RegClassB = mri_->getRegClass(RegB); - return RegClassA != RegClassB; - } - return !RegClassA->contains(RegB); -} - -/// lastRegisterUse - Returns the last (non-debug) use of the specific register -/// between cycles Start and End or NULL if there are no uses. -MachineOperand * -SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, - SlotIndex End, - unsigned Reg, - SlotIndex &UseIdx) const{ - UseIdx = SlotIndex(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - MachineOperand *LastUse = NULL; - for (MachineRegisterInfo::use_nodbg_iterator I = mri_->use_nodbg_begin(Reg), - E = mri_->use_nodbg_end(); I != E; ++I) { - MachineOperand &Use = I.getOperand(); - MachineInstr *UseMI = Use.getParent(); - if (UseMI->isIdentityCopy()) - continue; - SlotIndex Idx = li_->getInstructionIndex(UseMI); - if (Idx >= Start && Idx < End && (!UseIdx.isValid() || Idx >= UseIdx)) { - LastUse = &Use; - UseIdx = Idx.getUseIndex(); - } - } - return LastUse; - } - - SlotIndex s = Start; - SlotIndex e = End.getPrevSlot().getBaseIndex(); - while (e >= s) { - // Skip deleted instructions - MachineInstr *MI = li_->getInstructionFromIndex(e); - while (e != SlotIndex() && e.getPrevIndex() >= s && !MI) { - e = e.getPrevIndex(); - MI = li_->getInstructionFromIndex(e); - } - if (e < s || MI == NULL) - return NULL; - - // Ignore identity copies. - if (!MI->isIdentityCopy()) - for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { - MachineOperand &Use = MI->getOperand(i); - if (Use.isReg() && Use.isUse() && Use.getReg() && - tri_->regsOverlap(Use.getReg(), Reg)) { - UseIdx = e.getUseIndex(); - return &Use; - } - } - - e = e.getPrevIndex(); - } - - return NULL; -} - void SimpleRegisterCoalescing::releaseMemory() { JoinedCopies.clear(); ReMatCopies.clear(); @@ -1651,10 +1400,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { if (VerifyCoalescing) mf_->verify(this, "Before register coalescing"); - for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(), - E = tri_->regclass_end(); I != E; ++I) - allocatableRCRegs_.insert(std::make_pair(*I, - tri_->getAllocatableSet(fn, *I))); + RegClassInfo.runOnMachineFunction(fn); // Join (coalesce) intervals if requested. if (EnableJoining) { @@ -1691,13 +1437,11 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // or else the scavenger may complain. LowerSubregs will // delete them later. DoDelete = false; - + if (MI->allDefsAreDead()) { - if (li_->hasInterval(SrcReg)) { - LiveInterval &li = li_->getInterval(SrcReg); - if (!ShortenDeadCopySrcLiveRange(li, MI)) - ShortenDeadCopyLiveRange(li, MI); - } + if (TargetRegisterInfo::isVirtualRegister(SrcReg) && + li_->hasInterval(SrcReg)) + li_->shrinkToUses(&li_->getInterval(SrcReg)); DoDelete = true; } if (!DoDelete) { @@ -1749,24 +1493,6 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { DeadDefs.clear(); } - // If the move will be an identity move delete it - if (MI->isIdentityCopy()) { - unsigned SrcReg = MI->getOperand(1).getReg(); - if (li_->hasInterval(SrcReg)) { - LiveInterval &RegInt = li_->getInterval(SrcReg); - // If def of this move instruction is dead, remove its live range - // from the destination register's live interval. - if (MI->allDefsAreDead()) { - if (!ShortenDeadCopySrcLiveRange(RegInt, MI)) - ShortenDeadCopyLiveRange(RegInt, MI); - } - } - li_->RemoveMachineInstrFromMaps(MI); - mii = mbbi->erase(mii); - ++numPeep; - continue; - } - ++mii; // Check for now unnecessary kill flags. diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index 65cf542..92f6c64 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -17,7 +17,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/RegisterCoalescer.h" -#include "llvm/ADT/BitVector.h" +#include "RegisterClassInfo.h" namespace llvm { class SimpleRegisterCoalescing; @@ -47,8 +47,7 @@ namespace llvm { LiveDebugVariables *ldv_; const MachineLoopInfo* loopInfo; AliasAnalysis *AA; - - DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs_; + RegisterClassInfo RegClassInfo; /// JoinedCopies - Keep track of copies eliminated due to coalescing. /// @@ -103,10 +102,6 @@ namespace llvm { /// use this information below to update aliases. bool JoinIntervals(CoalescerPair &CP); - /// Return true if the two specified registers belong to different register - /// classes. The registers may be either phys or virt regs. - bool differingRegisterClasses(unsigned RegA, unsigned RegB) const; - /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If /// the source value number is defined by a copy from the destination reg /// see if we can merge these two destination reg valno# into a single @@ -124,13 +119,6 @@ namespace llvm { /// can transform the copy into a noop by commuting the definition. bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); - /// TrimLiveIntervalToLastUse - If there is a last use in the same basic - /// block as the copy instruction, trim the ive interval to the last use - /// and return true. - bool TrimLiveIntervalToLastUse(SlotIndex CopyIdx, - MachineBasicBlock *CopyMBB, - LiveInterval &li, const LiveRange *LR); - /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. /// If PreserveSrcInt is true, make sure SrcInt is valid after the call. @@ -156,16 +144,6 @@ namespace llvm { /// subregister. void UpdateRegDefsUses(const CoalescerPair &CP); - /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy. - /// Return true if live interval is removed. - bool ShortenDeadCopyLiveRange(LiveInterval &li, MachineInstr *CopyMI); - - /// ShortenDeadCopyLiveRange - Shorten a live range as it's artificially - /// extended by a dead copy. Mark the last use (if any) of the val# as kill - /// as ends the live range there. If there isn't another use, then this - /// live range is dead. Return true if live interval is removed. - bool ShortenDeadCopySrcLiveRange(LiveInterval &li, MachineInstr *CopyMI); - /// RemoveDeadDef - If a def of a live interval is now determined dead, /// remove the val# it defines. If the live interval becomes empty, remove /// it as well. @@ -175,11 +153,6 @@ namespace llvm { /// VNInfo copy flag for DstReg and all aliases. void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI); - /// lastRegisterUse - Returns the last use of the specific register between - /// cycles Start and End or NULL if there are no uses. - MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End, - unsigned Reg, SlotIndex &LastUseIdx) const; - /// markAsJoined - Remember that CopyMI has already been joined. void markAsJoined(MachineInstr *CopyMI); }; diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index aaa8568..92970e4 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -443,16 +443,17 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F); // Insert a load of the callsite in the dispatch block, and a switch on its - // value. By default, we go to a block that just does an unwind (which is the - // correct action for a standard call). - BasicBlock *UnwindBlock = - BasicBlock::Create(F.getContext(), "unwindbb", &F); - Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock)); + // value. By default, we issue a trap statement. + BasicBlock *TrapBlock = + BasicBlock::Create(F.getContext(), "trapbb", &F); + CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap), + "", TrapBlock); + new UnreachableInst(F.getContext(), TrapBlock); Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true, DispatchBlock); SwitchInst *DispatchSwitch = - SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), + SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(), DispatchBlock); // Split the entry block to insert the conditional branch for the setjmp. BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), @@ -519,7 +520,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // Add a call to dispatch_setup after the setjmp call. This is expanded to any // target-specific setup that needs to be done. - CallInst::Create(DispatchSetupFn, "", EntryBB->getTerminator()); + CallInst::Create(DispatchSetupFn, DispatchVal, "", EntryBB->getTerminator()); // check the return value of the setjmp. non-zero goes to dispatcher. Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), @@ -561,7 +562,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // Replace all unwinds with a branch to the unwind handler. // ??? Should this ever happen with sjlj exceptions? for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { - BranchInst::Create(UnwindBlock, Unwinds[i]); + BranchInst::Create(TrapBlock, Unwinds[i]); Unwinds[i]->eraseFromParent(); } diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index cab18a1..6949618 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -135,13 +135,10 @@ struct SpillPlacement::Node { /// addBias - Bias this node from an ingoing[0] or outgoing[1] link. /// Return the change to the total number of positive biases. - int addBias(float w, bool out) { + void addBias(float w, bool out) { // Normalize w relative to all connected blocks from that direction. w *= Scale[out]; - int Before = Bias > 0; Bias += w; - int After = Bias > 0; - return After - Before; } /// update - Recompute Value from Bias and Links. Return true when node @@ -230,14 +227,14 @@ void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { if (I->Entry != DontCare) { unsigned ib = bundles->getBundle(I->Number, 0); activate(ib); - PositiveNodes += nodes[ib].addBias(Freq * Bias[I->Entry], 1); + nodes[ib].addBias(Freq * Bias[I->Entry], 1); } // Live-out from block? if (I->Exit != DontCare) { unsigned ob = bundles->getBundle(I->Number, 1); activate(ob); - PositiveNodes += nodes[ob].addBias(Freq * Bias[I->Exit], 0); + nodes[ob].addBias(Freq * Bias[I->Exit], 0); } } } @@ -254,16 +251,42 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) { continue; activate(ib); activate(ob); + if (nodes[ib].Links.empty() && !nodes[ib].mustSpill()) + Linked.push_back(ib); + if (nodes[ob].Links.empty() && !nodes[ob].mustSpill()) + Linked.push_back(ob); float Freq = getBlockFrequency(Number); nodes[ib].addLink(ob, Freq, 1); nodes[ob].addLink(ib, Freq, 0); } } +bool SpillPlacement::scanActiveBundles() { + Linked.clear(); + RecentPositive.clear(); + for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) { + nodes[n].update(nodes); + // A node that must spill, or a node without any links is not going to + // change its value ever again, so exclude it from iterations. + if (nodes[n].mustSpill()) + continue; + if (!nodes[n].Links.empty()) + Linked.push_back(n); + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } + return !RecentPositive.empty(); +} + /// iterate - Repeatedly update the Hopfield nodes until stability or the /// maximum number of iterations is reached. /// @param Linked - Numbers of linked nodes that need updating. -void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) { +void SpillPlacement::iterate() { + // First update the recently positive nodes. They have likely received new + // negative bias that will turn them off. + while (!RecentPositive.empty()) + nodes[RecentPositive.pop_back_val()].update(nodes); + if (Linked.empty()) return; @@ -279,10 +302,13 @@ void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) { for (SmallVectorImpl<unsigned>::const_reverse_iterator I = llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) { unsigned n = *I; - bool C = nodes[n].update(nodes); - Changed |= C; + if (nodes[n].update(nodes)) { + Changed = true; + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } } - if (!Changed) + if (!Changed || !RecentPositive.empty()) return; // Scan forwards, skipping the first node which was just updated. @@ -290,38 +316,29 @@ void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) { for (SmallVectorImpl<unsigned>::const_iterator I = llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) { unsigned n = *I; - bool C = nodes[n].update(nodes); - Changed |= C; + if (nodes[n].update(nodes)) { + Changed = true; + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } } - if (!Changed) + if (!Changed || !RecentPositive.empty()) return; } } void SpillPlacement::prepare(BitVector &RegBundles) { + Linked.clear(); + RecentPositive.clear(); // Reuse RegBundles as our ActiveNodes vector. ActiveNodes = &RegBundles; ActiveNodes->clear(); ActiveNodes->resize(bundles->getNumBundles()); - PositiveNodes = 0; } bool SpillPlacement::finish() { assert(ActiveNodes && "Call prepare() first"); - // Update all active nodes, and find the ones that are actually linked to - // something so their value may change when iterating. - SmallVector<unsigned, 8> Linked; - for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) { - nodes[n].update(nodes); - // A node that must spill, or a node without any links is not going to - // change its value ever again, so exclude it from iterations. - if (!nodes[n].Links.empty() && !nodes[n].mustSpill()) - Linked.push_back(n); - } - - // Iterate the network to convergence. - iterate(Linked); // Write preferences back to ActiveNodes. bool Perfect = true; diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h index 46e64e6..6952ad8 100644 --- a/lib/CodeGen/SpillPlacement.h +++ b/lib/CodeGen/SpillPlacement.h @@ -49,8 +49,12 @@ class SpillPlacement : public MachineFunctionPass { // caller. BitVector *ActiveNodes; - // The number of active nodes with a positive bias. - unsigned PositiveNodes; + // Nodes with active links. Populated by scanActiveBundles. + SmallVector<unsigned, 8> Linked; + + // Nodes that went positive during the last call to scanActiveBundles or + // iterate. + SmallVector<unsigned, 8> RecentPositive; // Block frequencies are computed once. Indexed by block number. SmallVector<float, 4> BlockFrequency; @@ -95,9 +99,20 @@ public: /// addLinks - Add transparent blocks with the given numbers. void addLinks(ArrayRef<unsigned> Links); - /// getPositiveNodes - Return the total number of graph nodes with a positive - /// bias after adding constraints. - unsigned getPositiveNodes() const { return PositiveNodes; } + /// scanActiveBundles - Perform an initial scan of all bundles activated by + /// addConstraints and addLinks, updating their state. Add all the bundles + /// that now prefer a register to RecentPositive. + /// Prepare internal data structures for iterate. + /// Return true is there are any positive nodes. + bool scanActiveBundles(); + + /// iterate - Update the network iteratively until convergence, or new bundles + /// are found. + void iterate(); + + /// getRecentPositive - Return an array of bundles that became positive during + /// the previous call to scanActiveBundles or iterate. + ArrayRef<unsigned> getRecentPositive() { return RecentPositive; } /// finish - Compute the optimal spill code placement given the /// constraints. No MustSpill constraints will be violated, and the smallest @@ -120,7 +135,6 @@ private: virtual void releaseMemory(); void activate(unsigned); - void iterate(const SmallVectorImpl<unsigned>&); }; } // end namespace llvm diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index b89139f..b6bbcd7 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -25,7 +25,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include <set> using namespace llvm; diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 201e9b1..bf27cc8 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -30,6 +30,9 @@ using namespace llvm; STATISTIC(NumFinished, "Number of splits finished"); STATISTIC(NumSimple, "Number of splits that were simple"); +STATISTIC(NumCopies, "Number of copies inserted for splitting"); +STATISTIC(NumRemats, "Number of rematerialized defs for splitting"); +STATISTIC(NumRepairs, "Number of invalid live ranges repaired"); //===----------------------------------------------------------------------===// // Split Analysis @@ -51,6 +54,7 @@ void SplitAnalysis::clear() { UseBlocks.clear(); ThroughBlocks.clear(); CurLI = 0; + DidRepairRange = false; } SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { @@ -119,6 +123,8 @@ void SplitAnalysis::analyzeUses() { if (!calcLiveBlockInfo()) { // FIXME: calcLiveBlockInfo found inconsistencies in the live range. // I am looking at you, SimpleRegisterCoalescing! + DidRepairRange = true; + ++NumRepairs; DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n"); const_cast<LiveIntervals&>(LIS) .shrinkToUses(const_cast<LiveInterval*>(CurLI)); @@ -132,12 +138,14 @@ void SplitAnalysis::analyzeUses() { DEBUG(dbgs() << "Analyze counted " << UseSlots.size() << " instrs in " << UseBlocks.size() << " blocks, through " - << ThroughBlocks.size() << " blocks.\n"); + << NumThroughBlocks << " blocks.\n"); } /// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks /// where CurLI is live. bool SplitAnalysis::calcLiveBlockInfo() { + ThroughBlocks.resize(MF.getNumBlockIDs()); + NumThroughBlocks = NumGapBlocks = 0; if (CurLI->empty()) return true; @@ -156,54 +164,63 @@ bool SplitAnalysis::calcLiveBlockInfo() { SlotIndex Start, Stop; tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); - // LVI is the first live segment overlapping MBB. - BI.LiveIn = LVI->start <= Start; - if (!BI.LiveIn) - BI.Def = LVI->start; - - // Find the first and last uses in the block. - bool Uses = UseI != UseE && *UseI < Stop; - if (Uses) { + // If the block contains no uses, the range must be live through. At one + // point, SimpleRegisterCoalescing could create dangling ranges that ended + // mid-block. + if (UseI == UseE || *UseI >= Stop) { + ++NumThroughBlocks; + ThroughBlocks.set(BI.MBB->getNumber()); + // The range shouldn't end mid-block if there are no uses. This shouldn't + // happen. + if (LVI->end < Stop) + return false; + } else { + // This block has uses. Find the first and last uses in the block. BI.FirstUse = *UseI; assert(BI.FirstUse >= Start); do ++UseI; while (UseI != UseE && *UseI < Stop); BI.LastUse = UseI[-1]; assert(BI.LastUse < Stop); - } - // Look for gaps in the live range. - bool hasGap = false; - BI.LiveOut = true; - while (LVI->end < Stop) { - SlotIndex LastStop = LVI->end; - if (++LVI == LVE || LVI->start >= Stop) { - BI.Kill = LastStop; - BI.LiveOut = false; - break; - } - if (LastStop < LVI->start) { - hasGap = true; - BI.Kill = LastStop; - BI.Def = LVI->start; + // LVI is the first live segment overlapping MBB. + BI.LiveIn = LVI->start <= Start; + + // Look for gaps in the live range. + BI.LiveOut = true; + while (LVI->end < Stop) { + SlotIndex LastStop = LVI->end; + if (++LVI == LVE || LVI->start >= Stop) { + BI.LiveOut = false; + BI.LastUse = LastStop; + break; + } + if (LastStop < LVI->start) { + // There is a gap in the live range. Create duplicate entries for the + // live-in snippet and the live-out snippet. + ++NumGapBlocks; + + // Push the Live-in part. + BI.LiveThrough = false; + BI.LiveOut = false; + UseBlocks.push_back(BI); + UseBlocks.back().LastUse = LastStop; + + // Set up BI for the live-out part. + BI.LiveIn = false; + BI.LiveOut = true; + BI.FirstUse = LVI->start; + } } - } - // Don't set LiveThrough when the block has a gap. - BI.LiveThrough = !hasGap && BI.LiveIn && BI.LiveOut; - if (Uses) + // Don't set LiveThrough when the block has a gap. + BI.LiveThrough = BI.LiveIn && BI.LiveOut; UseBlocks.push_back(BI); - else - ThroughBlocks.push_back(BI.MBB->getNumber()); - // FIXME: This should never happen. The live range stops or starts without a - // corresponding use. An earlier pass did something wrong. - if (!BI.LiveThrough && !Uses) - return false; - - // LVI is now at LVE or LVI->end >= Stop. - if (LVI == LVE) - break; + // LVI is now at LVE or LVI->end >= Stop. + if (LVI == LVE) + break; + } // Live segment ends exactly at Stop. Move to the next segment. if (LVI->end == Stop && ++LVI == LVE) @@ -215,9 +232,34 @@ bool SplitAnalysis::calcLiveBlockInfo() { else MFI = LIS.getMBBFromIndex(LVI->start); } + + assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count"); return true; } +unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { + if (cli->empty()) + return 0; + LiveInterval *li = const_cast<LiveInterval*>(cli); + LiveInterval::iterator LVI = li->begin(); + LiveInterval::iterator LVE = li->end(); + unsigned Count = 0; + + // Loop over basic blocks where li is live. + MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start); + SlotIndex Stop = LIS.getMBBEndIdx(MFI); + for (;;) { + ++Count; + LVI = li->advanceTo(LVI, Stop); + if (LVI == LVE) + return Count; + do { + ++MFI; + Stop = LIS.getMBBEndIdx(MFI); + } while (Stop <= LVI->start); + } +} + bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const { unsigned OrigReg = VRM.getOriginal(CurLI->reg); const LiveInterval &Orig = LIS.getInterval(OrigReg); @@ -348,8 +390,33 @@ void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) { // Now for the fun part. We know that ParentVNI potentially has multiple defs, // and we may need to create even more phi-defs to preserve VNInfo SSA form. // Perform a search for all predecessor blocks where we know the dominating - // VNInfo. Insert phi-def VNInfos along the path back to IdxMBB. + // VNInfo. + VNInfo *VNI = findReachingDefs(LI, IdxMBB, Idx.getNextSlot()); + // When there were multiple different values, we may need new PHIs. + if (!VNI) + return updateSSA(); + + // Poor man's SSA update for the single-value case. + LiveOutPair LOP(VNI, MDT[LIS.getMBBFromIndex(VNI->def)]); + for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(), + E = LiveInBlocks.end(); I != E; ++I) { + MachineBasicBlock *MBB = I->DomNode->getBlock(); + SlotIndex Start = LIS.getMBBStartIdx(MBB); + if (I->Kill.isValid()) + LI->addRange(LiveRange(Start, I->Kill, VNI)); + else { + LiveOutCache[MBB] = LOP; + LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); + } + } +} + +/// findReachingDefs - Search the CFG for known live-out values. +/// Add required live-in blocks to LiveInBlocks. +VNInfo *SplitEditor::findReachingDefs(LiveInterval *LI, + MachineBasicBlock *KillMBB, + SlotIndex Kill) { // Initialize the live-out cache the first time it is needed. if (LiveOutSeen.empty()) { unsigned N = VRM.getMachineFunction().getNumBlockIDs(); @@ -358,16 +425,15 @@ void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) { } // Blocks where LI should be live-in. - SmallVector<MachineDomTreeNode*, 16> LiveIn; - LiveIn.push_back(MDT[IdxMBB]); + SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB); // Remember if we have seen more than one value. bool UniqueVNI = true; - VNInfo *IdxVNI = 0; + VNInfo *TheVNI = 0; // Using LiveOutCache as a visited set, perform a BFS for all reaching defs. - for (unsigned i = 0; i != LiveIn.size(); ++i) { - MachineBasicBlock *MBB = LiveIn[i]->getBlock(); + for (unsigned i = 0; i != WorkList.size(); ++i) { + MachineBasicBlock *MBB = WorkList[i]; assert(!MBB->pred_empty() && "Value live-in to entry block?"); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { @@ -377,9 +443,9 @@ void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) { // Is this a known live-out block? if (LiveOutSeen.test(Pred->getNumber())) { if (VNInfo *VNI = LOP.first) { - if (IdxVNI && IdxVNI != VNI) + if (TheVNI && TheVNI != VNI) UniqueVNI = false; - IdxVNI = VNI; + TheVNI = VNI; } continue; } @@ -395,64 +461,50 @@ void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) { LOP.first = VNI; if (VNI) { LOP.second = MDT[LIS.getMBBFromIndex(VNI->def)]; - if (IdxVNI && IdxVNI != VNI) + if (TheVNI && TheVNI != VNI) UniqueVNI = false; - IdxVNI = VNI; + TheVNI = VNI; continue; } LOP.second = 0; // No, we need a live-in value for Pred as well - if (Pred != IdxMBB) - LiveIn.push_back(MDT[Pred]); + if (Pred != KillMBB) + WorkList.push_back(Pred); else - UniqueVNI = false; // Loopback to IdxMBB, ask updateSSA() for help. + // Loopback to KillMBB, so value is really live through. + Kill = SlotIndex(); } } - // We may need to add phi-def values to preserve the SSA form. - if (UniqueVNI) { - LiveOutPair LOP(IdxVNI, MDT[LIS.getMBBFromIndex(IdxVNI->def)]); - // Update LiveOutCache, but skip IdxMBB at LiveIn[0]. - for (unsigned i = 1, e = LiveIn.size(); i != e; ++i) - LiveOutCache[LiveIn[i]->getBlock()] = LOP; - } else - IdxVNI = updateSSA(RegIdx, LiveIn, Idx, IdxMBB); - - // Since we went through the trouble of a full BFS visiting all reaching defs, - // the values in LiveIn are now accurate. No more phi-defs are needed - // for these blocks, so we can color the live ranges. - for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) { - MachineBasicBlock *MBB = LiveIn[i]->getBlock(); - SlotIndex Start = LIS.getMBBStartIdx(MBB); - VNInfo *VNI = LiveOutCache[MBB].first; + // Transfer WorkList to LiveInBlocks in reverse order. + // This ordering works best with updateSSA(). + LiveInBlocks.clear(); + LiveInBlocks.reserve(WorkList.size()); + while(!WorkList.empty()) + LiveInBlocks.push_back(MDT[WorkList.pop_back_val()]); - // Anything in LiveIn other than IdxMBB is live-through. - // In IdxMBB, we should stop at Idx unless the same value is live-out. - if (MBB == IdxMBB && IdxVNI != VNI) - LI->addRange(LiveRange(Start, Idx.getNextSlot(), IdxVNI)); - else - LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); - } + // The kill block may not be live-through. + assert(LiveInBlocks.back().DomNode->getBlock() == KillMBB); + LiveInBlocks.back().Kill = Kill; + + return UniqueVNI ? TheVNI : 0; } -VNInfo *SplitEditor::updateSSA(unsigned RegIdx, - SmallVectorImpl<MachineDomTreeNode*> &LiveIn, - SlotIndex Idx, - const MachineBasicBlock *IdxMBB) { +void SplitEditor::updateSSA() { // This is essentially the same iterative algorithm that SSAUpdater uses, // except we already have a dominator tree, so we don't have to recompute it. - LiveInterval *LI = Edit->get(RegIdx); - VNInfo *IdxVNI = 0; unsigned Changes; do { Changes = 0; // Propagate live-out values down the dominator tree, inserting phi-defs - // when necessary. Since LiveIn was created by a BFS, going backwards makes - // it more likely for us to visit immediate dominators before their - // children. - for (unsigned i = LiveIn.size(); i; --i) { - MachineDomTreeNode *Node = LiveIn[i-1]; + // when necessary. + for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(), + E = LiveInBlocks.end(); I != E; ++I) { + MachineDomTreeNode *Node = I->DomNode; + // Skip block if the live-in value has already been determined. + if (!Node) + continue; MachineBasicBlock *MBB = Node->getBlock(); MachineDomTreeNode *IDom = Node->getIDom(); LiveOutPair IDomValue; @@ -461,9 +513,9 @@ VNInfo *SplitEditor::updateSSA(unsigned RegIdx, // This is probably an unreachable block that has survived somehow. bool needPHI = !IDom || !LiveOutSeen.test(IDom->getBlock()->getNumber()); - // IDom dominates all of our predecessors, but it may not be the immediate - // dominator. Check if any of them have live-out values that are properly - // dominated by IDom. If so, we need a phi-def here. + // IDom dominates all of our predecessors, but it may not be their + // immediate dominator. Check if any of them have live-out values that are + // properly dominated by IDom. If so, we need a phi-def here. if (!needPHI) { IDomValue = LiveOutCache[IDom->getBlock()]; for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), @@ -481,40 +533,35 @@ VNInfo *SplitEditor::updateSSA(unsigned RegIdx, } } + // The value may be live-through even if Kill is set, as can happen when + // we are called from extendRange. In that case LiveOutSeen is true, and + // LiveOutCache indicates a foreign or missing value. + LiveOutPair &LOP = LiveOutCache[MBB]; + // Create a phi-def if required. if (needPHI) { ++Changes; SlotIndex Start = LIS.getMBBStartIdx(MBB); + unsigned RegIdx = RegAssign.lookup(Start); + LiveInterval *LI = Edit->get(RegIdx); VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator()); VNI->setIsPHIDef(true); - // We no longer need LI to be live-in. - LiveIn.erase(LiveIn.begin()+(i-1)); - // Blocks in LiveIn are either IdxMBB, or have a value live-through. - if (MBB == IdxMBB) - IdxVNI = VNI; - // Check if we need to update live-out info. - LiveOutPair &LOP = LiveOutCache[MBB]; - if (LOP.second == Node || !LiveOutSeen.test(MBB->getNumber())) { - // We already have a live-out defined in MBB, so this must be IdxMBB. - assert(MBB == IdxMBB && "Adding phi-def to known live-out"); - LI->addRange(LiveRange(Start, Idx.getNextSlot(), VNI)); - } else { - // This phi-def is also live-out, so color the whole block. + I->Value = VNI; + // This block is done, we know the final value. + I->DomNode = 0; + if (I->Kill.isValid()) + LI->addRange(LiveRange(Start, I->Kill, VNI)); + else { LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); LOP = LiveOutPair(VNI, Node); } } else if (IDomValue.first) { - // No phi-def here. Remember incoming value for IdxMBB. - if (MBB == IdxMBB) { - IdxVNI = IDomValue.first; - // IdxMBB need not be live-out. - if (!LiveOutSeen.test(MBB->getNumber())) - continue; - } - assert(LiveOutSeen.test(MBB->getNumber()) && "Expected live-out block"); + // No phi-def here. Remember incoming value. + I->Value = IDomValue.first; + if (I->Kill.isValid()) + continue; // Propagate IDomValue if needed: // MBB is live-out and doesn't define its own value. - LiveOutPair &LOP = LiveOutCache[MBB]; if (LOP.second != Node && LOP.first != IDomValue.first) { ++Changes; LOP = IDomValue; @@ -523,8 +570,20 @@ VNInfo *SplitEditor::updateSSA(unsigned RegIdx, } } while (Changes); - assert(IdxVNI && "Didn't find value for Idx"); - return IdxVNI; + // The values in LiveInBlocks are now accurate. No more phi-defs are needed + // for these blocks, so we can color the live ranges. + for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(), + E = LiveInBlocks.end(); I != E; ++I) { + if (!I->DomNode) + continue; + assert(I->Value && "No live-in value found"); + MachineBasicBlock *MBB = I->DomNode->getBlock(); + SlotIndex Start = LIS.getMBBStartIdx(MBB); + unsigned RegIdx = RegAssign.lookup(Start); + LiveInterval *LI = Edit->get(RegIdx); + LI->addRange(LiveRange(Start, I->Kill.isValid() ? + I->Kill : LIS.getMBBEndIdx(MBB), I->Value)); + } } VNInfo *SplitEditor::defFromParent(unsigned RegIdx, @@ -536,15 +595,22 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, SlotIndex Def; LiveInterval *LI = Edit->get(RegIdx); + // We may be trying to avoid interference that ends at a deleted instruction, + // so always begin RegIdx 0 early and all others late. + bool Late = RegIdx != 0; + // Attempt cheap-as-a-copy rematerialization. LiveRangeEdit::Remat RM(ParentVNI); if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) { - Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI); + Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late); + ++NumRemats; } else { // Can't remat, just insert a copy from parent. CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg) .addReg(Edit->getReg()); - Def = LIS.InsertMachineInstrInMaps(CopyMI).getDefIndex(); + Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late) + .getDefIndex(); + ++NumCopies; } // Define the value in Reg. @@ -554,9 +620,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, } /// Create a new virtual register and live interval. -void SplitEditor::openIntv() { - assert(!OpenIdx && "Previous LI not closed before openIntv"); - +unsigned SplitEditor::openIntv() { // Create the complement as index 0. if (Edit->empty()) Edit->create(LIS, VRM); @@ -564,6 +628,13 @@ void SplitEditor::openIntv() { // Create the open interval. OpenIdx = Edit->size(); Edit->create(LIS, VRM); + return OpenIdx; +} + +void SplitEditor::selectIntv(unsigned Idx) { + assert(Idx != 0 && "Cannot select the complement interval"); + assert(Idx < Edit->size() && "Can only select previously opened interval"); + OpenIdx = Idx; } SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) { @@ -686,18 +757,11 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { DEBUG(dump()); } -/// closeIntv - Indicate that we are done editing the currently open -/// LiveInterval, and ranges can be trimmed. -void SplitEditor::closeIntv() { - assert(OpenIdx && "openIntv not called before closeIntv"); - OpenIdx = 0; -} - -/// transferSimpleValues - Transfer all simply defined values to the new live -/// ranges. -/// Values that were rematerialized or that have multiple defs are left alone. -bool SplitEditor::transferSimpleValues() { +/// transferValues - Transfer all possible values to the new live ranges. +/// Values that were rematerialized are left alone, they need extendRange(). +bool SplitEditor::transferValues() { bool Skipped = false; + LiveInBlocks.clear(); RegAssignMap::const_iterator AssignI = RegAssign.begin(); for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(), ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) { @@ -721,16 +785,98 @@ bool SplitEditor::transferSimpleValues() { RegIdx = 0; End = std::min(End, AssignI.start()); } + + // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI. DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx); + LiveInterval *LI = Edit->get(RegIdx); + + // Check for a simply defined value that can be blitted directly. if (VNInfo *VNI = Values.lookup(std::make_pair(RegIdx, ParentVNI->id))) { DEBUG(dbgs() << ':' << VNI->id); - Edit->get(RegIdx)->addRange(LiveRange(Start, End, VNI)); - } else + LI->addRange(LiveRange(Start, End, VNI)); + Start = End; + continue; + } + + // Skip rematerialized values, we need to use extendRange() and + // extendPHIKillRanges() to completely recompute the live ranges. + if (Edit->didRematerialize(ParentVNI)) { + DEBUG(dbgs() << "(remat)"); Skipped = true; + Start = End; + continue; + } + + // Initialize the live-out cache the first time it is needed. + if (LiveOutSeen.empty()) { + unsigned N = VRM.getMachineFunction().getNumBlockIDs(); + LiveOutSeen.resize(N); + LiveOutCache.resize(N); + } + + // This value has multiple defs in RegIdx, but it wasn't rematerialized, + // so the live range is accurate. Add live-in blocks in [Start;End) to the + // LiveInBlocks. + MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); + SlotIndex BlockStart, BlockEnd; + tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB); + + // The first block may be live-in, or it may have its own def. + if (Start != BlockStart) { + VNInfo *VNI = LI->extendInBlock(BlockStart, + std::min(BlockEnd, End).getPrevSlot()); + assert(VNI && "Missing def for complex mapped value"); + DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); + // MBB has its own def. Is it also live-out? + if (BlockEnd <= End) { + LiveOutSeen.set(MBB->getNumber()); + LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]); + } + // Skip to the next block for live-in. + ++MBB; + BlockStart = BlockEnd; + } + + // Handle the live-in blocks covered by [Start;End). + assert(Start <= BlockStart && "Expected live-in block"); + while (BlockStart < End) { + DEBUG(dbgs() << ">BB#" << MBB->getNumber()); + BlockEnd = LIS.getMBBEndIdx(MBB); + if (BlockStart == ParentVNI->def) { + // This block has the def of a parent PHI, so it isn't live-in. + assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?"); + VNInfo *VNI = LI->extendInBlock(BlockStart, + std::min(BlockEnd, End).getPrevSlot()); + assert(VNI && "Missing def for complex mapped parent PHI"); + if (End >= BlockEnd) { + // Live-out as well. + LiveOutSeen.set(MBB->getNumber()); + LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]); + } + } else { + // This block needs a live-in value. + LiveInBlocks.push_back(MDT[MBB]); + // The last block covered may not be live-out. + if (End < BlockEnd) + LiveInBlocks.back().Kill = End; + else { + // Live-out, but we need updateSSA to tell us the value. + LiveOutSeen.set(MBB->getNumber()); + LiveOutCache[MBB] = LiveOutPair((VNInfo*)0, + (MachineDomTreeNode*)0); + } + } + BlockStart = BlockEnd; + ++MBB; + } Start = End; } while (Start != ParentI->end); DEBUG(dbgs() << '\n'); } + + if (!LiveInBlocks.empty()) + updateSSA(); + return Skipped; } @@ -835,8 +981,7 @@ void SplitEditor::deleteRematVictims() { Edit->eliminateDeadDefs(Dead, LIS, VRM, TII); } -void SplitEditor::finish() { - assert(OpenIdx == 0 && "Previous LI not closed before rewrite"); +void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { ++NumFinished; // At this point, the live intervals in Edit contain VNInfos corresponding to @@ -866,24 +1011,31 @@ void SplitEditor::finish() { assert((*I)->hasAtLeastOneValue() && "Split interval has no value"); #endif - // Transfer the simply mapped values, check if any are complex. - bool Complex = transferSimpleValues(); - if (Complex) + // Transfer the simply mapped values, check if any are skipped. + bool Skipped = transferValues(); + if (Skipped) extendPHIKillRanges(); else ++NumSimple; // Rewrite virtual registers, possibly extending ranges. - rewriteAssigned(Complex); + rewriteAssigned(Skipped); // Delete defs that were rematted everywhere. - if (Complex) + if (Skipped) deleteRematVictims(); // Get rid of unused values and set phi-kill flags. for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) (*I)->RenumberValues(LIS); + // Provide a reverse mapping from original indices to Edit ranges. + if (LRMap) { + LRMap->clear(); + for (unsigned i = 0, e = Edit->size(); i != e; ++i) + LRMap->push_back(i); + } + // Now check if any registers were separated into multiple components. ConnectedVNInfoEqClasses ConEQ(LIS); for (unsigned i = 0, e = Edit->size(); i != e; ++i) { @@ -895,13 +1047,18 @@ void SplitEditor::finish() { DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n'); SmallVector<LiveInterval*, 8> dups; dups.push_back(li); - for (unsigned i = 1; i != NumComp; ++i) + for (unsigned j = 1; j != NumComp; ++j) dups.push_back(&Edit->create(LIS, VRM)); ConEQ.Distribute(&dups[0], MRI); + // The new intervals all map back to i. + if (LRMap) + LRMap->resize(Edit->size(), i); } // Calculate spill weight and allocation hints for new intervals. Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops); + + assert(!LRMap || LRMap->size() == Edit->size()); } @@ -925,6 +1082,21 @@ bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) { return !Blocks.empty(); } +void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) { + openIntv(); + SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber()); + SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstUse, + LastSplitPoint)); + if (!BI.LiveOut || BI.LastUse < LastSplitPoint) { + useIntv(SegStart, leaveIntvAfter(BI.LastUse)); + } else { + // The last use is after the last valid split point. + SlotIndex SegStop = leaveIntvBefore(LastSplitPoint); + useIntv(SegStart, SegStop); + overlapIntv(SegStop, BI.LastUse); + } +} + /// splitSingleBlocks - Split CurLI into a separate live interval inside each /// basic block in Blocks. void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { @@ -932,22 +1104,8 @@ void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA.getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; - if (!Blocks.count(BI.MBB)) - continue; - - openIntv(); - SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber()); - SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstUse, - LastSplitPoint)); - if (!BI.LiveOut || BI.LastUse < LastSplitPoint) { - useIntv(SegStart, leaveIntvAfter(BI.LastUse)); - } else { - // The last use is after the last valid split point. - SlotIndex SegStop = leaveIntvBefore(LastSplitPoint); - useIntv(SegStart, SegStop); - overlapIntv(SegStop, BI.LastUse); - } - closeIntv(); + if (Blocks.count(BI.MBB)) + splitSingleBlock(BI); } finish(); } diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index 20ac8a1..7174c0b 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -12,6 +12,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_CODEGEN_SPLITKIT_H +#define LLVM_CODEGEN_SPLITKIT_H + #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" @@ -60,17 +63,22 @@ public: /// 1. | o---x | Internal to block. Variable is only live in this block. /// 2. |---x | Live-in, kill. /// 3. | o---| Def, live-out. - /// 4. |---x o---| Live-in, kill, def, live-out. + /// 4. |---x o---| Live-in, kill, def, live-out. Counted by NumGapBlocks. /// 5. |---o---o---| Live-through with uses or defs. - /// 6. |-----------| Live-through without uses. Transparent. + /// 6. |-----------| Live-through without uses. Counted by NumThroughBlocks. + /// + /// Two BlockInfo entries are created for template 4. One for the live-in + /// segment, and one for the live-out segment. These entries look as if the + /// block were split in the middle where the live range isn't live. + /// + /// Live-through blocks without any uses don't get BlockInfo entries. They + /// are simply listed in ThroughBlocks instead. /// struct BlockInfo { MachineBasicBlock *MBB; SlotIndex FirstUse; ///< First instr using current reg. SlotIndex LastUse; ///< Last instr using current reg. - SlotIndex Kill; ///< Interval end point inside block. - SlotIndex Def; ///< Interval start point inside block. - bool LiveThrough; ///< Live in whole block (Templ 5. or 6. above). + bool LiveThrough; ///< Live in whole block (Templ 5. above). bool LiveIn; ///< Current reg is live in. bool LiveOut; ///< Current reg is live out. }; @@ -88,8 +96,18 @@ private: /// UseBlocks - Blocks where CurLI has uses. SmallVector<BlockInfo, 8> UseBlocks; + /// NumGapBlocks - Number of duplicate entries in UseBlocks for blocks where + /// the live range has a gap. + unsigned NumGapBlocks; + /// ThroughBlocks - Block numbers where CurLI is live through without uses. - SmallVector<unsigned, 8> ThroughBlocks; + BitVector ThroughBlocks; + + /// NumThroughBlocks - Number of live-through blocks. + unsigned NumThroughBlocks; + + /// DidRepairRange - analyze was forced to shrinkToUses(). + bool DidRepairRange; SlotIndex computeLastSplitPoint(unsigned Num); @@ -107,6 +125,11 @@ public: /// split. void analyze(const LiveInterval *li); + /// didRepairRange() - Returns true if CurLI was invalid and has been repaired + /// by analyze(). This really shouldn't happen, but sometimes the coalescer + /// can create live ranges that end in mid-air. + bool didRepairRange() const { return DidRepairRange; } + /// clear - clear all data structures so SplitAnalysis is ready to analyze a /// new interval. void clear(); @@ -133,11 +156,26 @@ public: /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks /// where CurLI has uses. - ArrayRef<BlockInfo> getUseBlocks() { return UseBlocks; } + ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; } + + /// getNumThroughBlocks - Return the number of through blocks. + unsigned getNumThroughBlocks() const { return NumThroughBlocks; } + + /// isThroughBlock - Return true if CurLI is live through MBB without uses. + bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); } + + /// getThroughBlocks - Return the set of through blocks. + const BitVector &getThroughBlocks() const { return ThroughBlocks; } + + /// getNumLiveBlocks - Return the number of blocks where CurLI is live. + unsigned getNumLiveBlocks() const { + return getUseBlocks().size() - NumGapBlocks + getNumThroughBlocks(); + } - /// getThroughBlocks - Return an array of block numbers where CurLI is live - /// through without uses. - ArrayRef<unsigned> getThroughBlocks() { return ThroughBlocks; } + /// countLiveBlocks - Return the number of blocks where li is live. This is + /// guaranteed to return the same number as getNumLiveBlocks() after calling + /// analyze(li). + unsigned countLiveBlocks(const LiveInterval *li) const; typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet; @@ -223,6 +261,30 @@ class SplitEditor { // entry in LiveOutCache. BitVector LiveOutSeen; + /// LiveInBlock - Info for updateSSA() about a block where a register is + /// live-in. + /// The updateSSA caller provides DomNode and Kill inside MBB, updateSSA() + /// adds the computed live-in value. + struct LiveInBlock { + // Dominator tree node for the block. + // Cleared by updateSSA when the final value has been determined. + MachineDomTreeNode *DomNode; + + // Live-in value filled in by updateSSA once it is known. + VNInfo *Value; + + // Position in block where the live-in range ends, or SlotIndex() if the + // range passes through the block. + SlotIndex Kill; + + LiveInBlock(MachineDomTreeNode *node) : DomNode(node), Value(0) {} + }; + + /// LiveInBlocks - List of live-in blocks used by findReachingDefs() and + /// updateSSA(). This list is usually empty, it exists here to avoid frequent + /// reallocations. + SmallVector<LiveInBlock, 16> LiveInBlocks; + /// defValue - define a value in RegIdx from ParentVNI at Idx. /// Idx does not have to be ParentVNI->def, but it must be contained within /// ParentVNI's live range in ParentLI. The new value is added to the value @@ -246,17 +308,22 @@ class SplitEditor { /// Insert PHIDefs as needed to preserve SSA form. void extendRange(unsigned RegIdx, SlotIndex Idx); - /// updateSSA - Insert PHIDefs as necessary and update LiveOutCache such that - /// Edit.get(RegIdx) is live-in to all the blocks in LiveIn. - /// Return the value that is eventually live-in to IdxMBB. - VNInfo *updateSSA(unsigned RegIdx, - SmallVectorImpl<MachineDomTreeNode*> &LiveIn, - SlotIndex Idx, - const MachineBasicBlock *IdxMBB); + /// findReachingDefs - Starting from MBB, add blocks to LiveInBlocks until all + /// reaching defs for LI are found. + /// @param LI Live interval whose value is needed. + /// @param MBB Block where LI should be live-in. + /// @param Kill Kill point in MBB. + /// @return Unique value seen, or NULL. + VNInfo *findReachingDefs(LiveInterval *LI, MachineBasicBlock *MBB, + SlotIndex Kill); - /// transferSimpleValues - Transfer simply defined values to the new ranges. - /// Return true if any complex ranges were skipped. - bool transferSimpleValues(); + /// updateSSA - Compute and insert PHIDefs such that all blocks in + // LiveInBlocks get a known live-in value. Add live ranges to the blocks. + void updateSSA(); + + /// transferValues - Transfer values to the new ranges. + /// Return true if any ranges were skipped. + bool transferValues(); /// extendPHIKillRanges - Extend the ranges of all values killed by original /// parent PHIDefs. @@ -278,7 +345,15 @@ public: void reset(LiveRangeEdit&); /// Create a new virtual register and live interval. - void openIntv(); + /// Return the interval index, starting from 1. Interval index 0 is the + /// implicit complement interval. + unsigned openIntv(); + + /// currentIntv - Return the current interval index. + unsigned currentIntv() const { return OpenIdx; } + + /// selectIntv - Select a previously opened interval index. + void selectIntv(unsigned Idx); /// enterIntvBefore - Enter the open interval before the instruction at Idx. /// If the parent interval is not live before Idx, a COPY is not inserted. @@ -321,22 +396,28 @@ public: /// void overlapIntv(SlotIndex Start, SlotIndex End); - /// closeIntv - Indicate that we are done editing the currently open - /// LiveInterval, and ranges can be trimmed. - void closeIntv(); - /// finish - after all the new live ranges have been created, compute the /// remaining live range, and rewrite instructions to use the new registers. - void finish(); + /// @param LRMap When not null, this vector will map each live range in Edit + /// back to the indices returned by openIntv. + /// There may be extra indices created by dead code elimination. + void finish(SmallVectorImpl<unsigned> *LRMap = 0); /// dump - print the current interval maping to dbgs(). void dump() const; // ===--- High level methods ---=== + /// splitSingleBlock - Split CurLI into a separate live interval around the + /// uses in a single block. This is intended to be used as part of a larger + /// split, and doesn't call finish(). + void splitSingleBlock(const SplitAnalysis::BlockInfo &BI); + /// splitSingleBlocks - Split CurLI into a separate live interval inside each /// basic block in Blocks. void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks); }; } + +#endif diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index ec7829e..227eb47 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -587,7 +587,7 @@ StrongPHIElimination::SplitInterferencesForBasicBlock( } // We now walk the PHIs in successor blocks and check for interferences. This - // is necesary because the use of a PHI's operands are logically contained in + // is necessary because the use of a PHI's operands are logically contained in // the predecessor block. The def of a PHI's destination register is processed // along with the other defs in a basic block. diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 04d3d31..90cb72f 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -34,6 +34,7 @@ STATISTIC(NumTails , "Number of tails duplicated"); STATISTIC(NumTailDups , "Number of tail duplicated blocks"); STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); +STATISTIC(NumAddedPHIs , "Number of phis added"); // Heuristic for tail duplication. static cl::opt<unsigned> @@ -80,16 +81,21 @@ namespace { void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, DenseMap<unsigned, unsigned> &LocalVRMap, - SmallVector<std::pair<unsigned,unsigned>, 4> &Copies); + SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, + const DenseSet<unsigned> &UsedByPhi, + bool Remove); void DuplicateInstruction(MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, MachineFunction &MF, - DenseMap<unsigned, unsigned> &LocalVRMap); + DenseMap<unsigned, unsigned> &LocalVRMap, + const DenseSet<unsigned> &UsedByPhi); void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallSetVector<MachineBasicBlock*, 8> &Succs); bool TailDuplicateBlocks(MachineFunction &MF); + bool shouldTailDuplicate(const MachineFunction &MF, + MachineBasicBlock &TailBB); bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallVector<MachineInstr*, 16> &Copies); @@ -146,11 +152,11 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); if (CheckExtra && !Preds.count(PHIBB)) { - // This is not a hard error. dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber() << '\n'; + llvm_unreachable(0); } if (PHIBB->getNumber() < 0) { dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; @@ -183,10 +189,6 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { if (NumTails == TailDupLimit) break; - // Only duplicate blocks that end with unconditional branches. - if (MBB->canFallThrough()) - continue; - // Save the successors list. SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), MBB->succ_end()); @@ -240,7 +242,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; ++UI; - if (UseMI->getParent() == DefBB) + if (UseMI->getParent() == DefBB && !UseMI->isPHI()) continue; SSAUpdate.RewriteUse(UseMO); } @@ -271,6 +273,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { MadeChange = true; } } + NumAddedPHIs += NewPHIs.size(); return MadeChange; } @@ -293,6 +296,24 @@ static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { return 0; } + +// Remember which registers are used by phis in this block. This is +// used to determine which registers are liveout while modifying the +// block (which is why we need to copy the information). +static void getRegsUsedByPHIs(const MachineBasicBlock &BB, + DenseSet<unsigned> *UsedByPhi) { + for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end(); + I != E; ++I) { + const MachineInstr &MI = *I; + if (!MI.isPHI()) + break; + for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { + unsigned SrcReg = MI.getOperand(i).getReg(); + UsedByPhi->insert(SrcReg); + } + } +} + /// AddSSAUpdateEntry - Add a definition and source virtual registers pair for /// SSA update. void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, @@ -315,7 +336,9 @@ void TailDuplicatePass::ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, DenseMap<unsigned, unsigned> &LocalVRMap, - SmallVector<std::pair<unsigned,unsigned>, 4> &Copies) { + SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, + const DenseSet<unsigned> &RegsUsedByPhi, + bool Remove) { unsigned DefReg = MI->getOperand(0).getReg(); unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); assert(SrcOpIdx && "Unable to find matching PHI source?"); @@ -327,9 +350,12 @@ void TailDuplicatePass::ProcessPHI(MachineInstr *MI, // available value liveout of the block. unsigned NewDef = MRI->createVirtualRegister(RC); Copies.push_back(std::make_pair(NewDef, SrcReg)); - if (isDefLiveOut(DefReg, TailBB, MRI)) + if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) AddSSAUpdateEntry(DefReg, NewDef, PredBB); + if (!Remove) + return; + // Remove PredBB from the PHI node. MI->RemoveOperand(SrcOpIdx+1); MI->RemoveOperand(SrcOpIdx); @@ -343,7 +369,8 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, MachineFunction &MF, - DenseMap<unsigned, unsigned> &LocalVRMap) { + DenseMap<unsigned, unsigned> &LocalVRMap, + const DenseSet<unsigned> &UsedByPhi) { MachineInstr *NewMI = TII->duplicate(MI, MF); for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = NewMI->getOperand(i); @@ -357,7 +384,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, unsigned NewReg = MRI->createVirtualRegister(RC); MO.setReg(NewReg); LocalVRMap.insert(std::make_pair(Reg, NewReg)); - if (isDefLiveOut(Reg, TailBB, MRI)) + if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) AddSSAUpdateEntry(Reg, NewReg, PredBB); } else { DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg); @@ -416,6 +443,13 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, // This register is defined in the tail block. for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = LI->second[j].first; + // If we didn't duplicate a bb into a particular predecessor, we + // might still have added an entry to SSAUpdateVals to correcly + // recompute SSA. If that case, avoid adding a dummy extra argument + // this PHI. + if (!SrcBB->isSuccessor(SuccBB)) + continue; + unsigned SrcReg = LI->second[j].second; if (Idx != 0) { II->getOperand(Idx).setReg(SrcReg); @@ -448,14 +482,19 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, } } -/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each -/// of its predecessors. +/// shouldTailDuplicate - Determine if it is profitable to duplicate this block. bool -TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, - SmallVector<MachineBasicBlock*, 8> &TDBBs, - SmallVector<MachineInstr*, 16> &Copies) { - // Set the limit on the number of instructions to duplicate, with a default - // of one less than the tail-merge threshold. When optimizing for size, +TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, + MachineBasicBlock &TailBB) { + // Only duplicate blocks that end with unconditional branches. + if (TailBB.canFallThrough()) + return false; + + // Don't try to tail-duplicate single-block loops. + if (TailBB.isSuccessor(&TailBB)) + return false; + + // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. unsigned MaxDuplicateCount; @@ -465,49 +504,56 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, else MaxDuplicateCount = TailDuplicateSize; - if (PreRegAlloc) { - if (TailBB->empty()) - return false; - const TargetInstrDesc &TID = TailBB->back().getDesc(); - // Pre-regalloc tail duplication hurts compile time and doesn't help - // much except for indirect branches and returns. - if (!TID.isIndirectBranch() && !TID.isReturn()) - return false; - // If the target has hardware branch prediction that can handle indirect - // branches, duplicating them can often make them predictable when there - // are common paths through the code. The limit needs to be high enough - // to allow undoing the effects of tail merging and other optimizations - // that rearrange the predecessors of the indirect branch. - MaxDuplicateCount = 20; - } + // If the target has hardware branch prediction that can handle indirect + // branches, duplicating them can often make them predictable when there + // are common paths through the code. The limit needs to be high enough + // to allow undoing the effects of tail merging and other optimizations + // that rearrange the predecessors of the indirect branch. - // Don't try to tail-duplicate single-block loops. - if (TailBB->isSuccessor(TailBB)) - return false; + if (PreRegAlloc && !TailBB.empty()) { + const TargetInstrDesc &TID = TailBB.back().getDesc(); + if (TID.isIndirectBranch()) + MaxDuplicateCount = 20; + } // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; - bool HasCall = false; - for (MachineBasicBlock::iterator I = TailBB->begin(); - I != TailBB->end(); ++I) { + for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end(); + ++I) { // Non-duplicable things shouldn't be tail-duplicated. - if (I->getDesc().isNotDuplicable()) return false; + if (I->getDesc().isNotDuplicable()) + return false; + // Do not duplicate 'return' instructions if this is a pre-regalloc run. // A return may expand into a lot more instructions (e.g. reload of callee // saved registers) after PEI. - if (PreRegAlloc && I->getDesc().isReturn()) return false; - // Don't duplicate more than the threshold. - if (InstrCount == MaxDuplicateCount) return false; - // Remember if we saw a call. - if (I->getDesc().isCall()) HasCall = true; + if (PreRegAlloc && I->getDesc().isReturn()) + return false; + + // Avoid duplicating calls before register allocation. Calls presents a + // barrier to register allocation so duplicating them may end up increasing + // spills. + if (PreRegAlloc && I->getDesc().isCall()) + return false; + if (!I->isPHI() && !I->isDebugValue()) InstrCount += 1; + + if (InstrCount > MaxDuplicateCount) + return false; } - // Don't tail-duplicate calls before register allocation. Calls presents a - // barrier to register allocation so duplicating them may end up increasing - // spills. - if (InstrCount > 1 && (PreRegAlloc && HasCall)) + + return true; +} + +/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each +/// of its predecessors. +bool +TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVector<MachineInstr*, 16> &Copies) { + if (!shouldTailDuplicate(MF, *TailBB)) return false; DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); @@ -518,13 +564,17 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, bool Changed = false; SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), TailBB->pred_end()); + DenseSet<unsigned> UsedByPhi; + getRegsUsedByPHIs(*TailBB, &UsedByPhi); for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; assert(TailBB != PredBB && "Single-block loop should have been rejected earlier!"); - if (PredBB->succ_size() > 1) continue; + // EH edges are ignored by AnalyzeBranch. + if (PredBB->succ_size() > 1) + continue; MachineBasicBlock *PredTBB, *PredFBB; SmallVector<MachineOperand, 4> PredCond; @@ -532,9 +582,6 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, continue; if (!PredCond.empty()) continue; - // EH edges are ignored by AnalyzeBranch. - if (PredBB->succ_size() != 1) - continue; // Don't duplicate into a fall-through predecessor (at least for now). if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) continue; @@ -557,11 +604,11 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, if (MI->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. - ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos); + ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); } else { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. - DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap); + DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); } } MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); @@ -570,6 +617,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, TII->get(TargetOpcode::COPY), CopyInfos[i].first).addReg(CopyInfos[i].second)); } + + // Simplify + TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); + NumInstrDups += TailBB->size() - 1; // subtract one for removed branch // Update the CFG. @@ -590,12 +641,11 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB)); MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; SmallVector<MachineOperand, 4> PriorCond; - bool PriorUnAnalyzable = - TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true); // This has to check PrevBB->succ_size() because EH edges are ignored by // AnalyzeBranch. - if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB && - TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 && + if (PrevBB->succ_size() == 1 && + !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && + PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && !TailBB->hasAddressTaken()) { DEBUG(dbgs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); @@ -608,7 +658,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; - ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos); + ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); if (MI->getParent()) MI->eraseFromParent(); } @@ -618,7 +668,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. MachineInstr *MI = &*I++; - DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap); + DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); MI->eraseFromParent(); } MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); @@ -639,6 +689,57 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, Changed = true; } + // If this is after register allocation, there are no phis to fix. + if (!PreRegAlloc) + return Changed; + + // If we made no changes so far, we are safe. + if (!Changed) + return Changed; + + + // Handle the nasty case in that we duplicated a block that is part of a loop + // into some but not all of its predecessors. For example: + // 1 -> 2 <-> 3 | + // \ | + // \---> rest | + // if we duplicate 2 into 1 but not into 3, we end up with + // 12 -> 3 <-> 2 -> rest | + // \ / | + // \----->-----/ | + // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced + // with a phi in 3 (which now dominates 2). + // What we do here is introduce a copy in 3 of the register defined by the + // phi, just like when we are duplicating 2 into 3, but we don't copy any + // real instructions or remove the 3 -> 2 edge from the phi in 2. + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) + continue; + + // EH edges + if (PredBB->succ_size() != 1) + continue; + + DenseMap<unsigned, unsigned> LocalVRMap; + SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; + MachineBasicBlock::iterator I = TailBB->begin(); + // Process PHI instructions first. + while (I != TailBB->end() && I->isPHI()) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + MachineInstr *MI = &*I++; + ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); + } + MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); + for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { + Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), + TII->get(TargetOpcode::COPY), + CopyInfos[i].first).addReg(CopyInfos[i].second)); + } + } + return Changed; } @@ -655,4 +756,3 @@ void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { // Remove the block. MBB->eraseFromParent(); } - diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 15340a3..34e2b33 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -212,8 +212,7 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) return RC->contains(LiveOp.getReg()) ? RC : 0; - const TargetRegisterClass *LiveRC = MRI.getRegClass(LiveReg); - if (RC == LiveRC || RC->hasSubClass(LiveRC)) + if (RC->hasSubClassEq(MRI.getRegClass(LiveReg))) return RC; // FIXME: Allow folding when register classes are memory compatible. @@ -388,11 +387,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, if (MO.isDef() != (i == 0)) return false; - // For the def, it should be the only def of that register. - if (MO.isDef() && (llvm::next(MRI.def_begin(Reg)) != MRI.def_end() || - MRI.isLiveIn(Reg))) - return false; - // Don't allow any virtual-register uses. Rematting an instruction with // virtual register uses would length the live ranges of the uses, which // is not necessarily a good idea, certainly not "trivial". diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index f332d12..2da1bd4 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" @@ -176,12 +177,59 @@ const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const { SectionKind::getDataRel()); } +MCSymbol * +TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, + Mangler *Mang, + MachineModuleInfo *MMI) const { + unsigned Encoding = getPersonalityEncoding(); + switch (Encoding & 0x70) { + default: + report_fatal_error("We do not support this DWARF encoding yet!"); + case dwarf::DW_EH_PE_absptr: + return Mang->getSymbol(GV); + break; + case dwarf::DW_EH_PE_pcrel: { + return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + + Mang->getSymbol(GV)->getName()); + break; + } + } +} + +void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, + const TargetMachine &TM, + const MCSymbol *Sym) const { + SmallString<64> NameData("DW.ref."); + NameData += Sym->getName(); + MCSymbol *Label = getContext().GetOrCreateSymbol(NameData); + Streamer.EmitSymbolAttribute(Label, MCSA_Hidden); + Streamer.EmitSymbolAttribute(Label, MCSA_Weak); + StringRef Prefix = ".data."; + NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end()); + unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP; + const MCSection *Sec = getContext().getELFSection(NameData, + ELF::SHT_PROGBITS, + Flags, + SectionKind::getDataRel(), + 0, Label->getName()); + Streamer.SwitchSection(Sec); + Streamer.EmitValueToAlignment(8); + Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); + const MCExpr *E = MCConstantExpr::Create(8, getContext()); + Streamer.EmitELFSize(Label, E); + Streamer.EmitLabel(Label); + + unsigned Size = TM.getTargetData()->getPointerSize(); + Streamer.EmitSymbolValue(Sym, Size); +} + static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { - // FIXME: Why is this here? Codegen is should not be in the business - // of figuring section flags. If the user wrote section(".eh_frame"), - // we should just pass that to MC which will defer to the assembly - // or use its default if producing an object file. + // N.B.: The defaults used in here are no the same ones used in MC. + // We follow gcc, MC follows gas. For example, given ".section .eh_frame", + // both gas and MC will produce a section with no flags. Given + // section(".eh_frame") gcc will produce + // .section .eh_frame,"a",@progbits if (Name.empty() || Name[0] != '.') return K; // Some lame default implementation based on some magic section names. @@ -207,9 +255,6 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) { Name.startswith(".llvm.linkonce.tb.")) return SectionKind::getThreadBSS(); - if (Name == ".eh_frame") - return SectionKind::getDataRel(); - return K; } @@ -424,8 +469,7 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, } return TargetLoweringObjectFile:: - getExprForDwarfReference(SSym, Mang, MMI, - Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); } return TargetLoweringObjectFile:: @@ -438,26 +482,13 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, const TargetMachine &TM) { - // _foo.eh symbols are currently always exported so that the linker knows - // about them. This is not necessary on 10.6 and later, but it - // doesn't hurt anything. - // FIXME: I need to get this from Triple. - IsFunctionEHSymbolGlobal = true; IsFunctionEHFrameSymbolPrivate = false; SupportsWeakOmittedEHFrame = false; + // .comm doesn't support alignment before Leopard. Triple T(((LLVMTargetMachine&)TM).getTargetTriple()); - if (T.getOS() == Triple::Darwin) { - switch (T.getDarwinMajorNumber()) { - case 7: // 10.3 Panther. - case 8: // 10.4 Tiger. - CommDirectiveSupportsAlignment = false; - break; - case 9: // 10.5 Leopard. - case 10: // 10.6 SnowLeopard. - break; - } - } + if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5)) + CommDirectiveSupportsAlignment = false; TargetLoweringObjectFile::Initialize(Ctx, TM); @@ -803,14 +834,36 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, } return TargetLoweringObjectFile:: - getExprForDwarfReference(SSym, Mang, MMI, - Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); } return TargetLoweringObjectFile:: getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); } +MCSymbol *TargetLoweringObjectFileMachO:: +getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI) const { + // The mach-o version of this method defaults to returning a stub reference. + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += "$non_lazy_ptr"; + + // Add information about the stub reference to MachOMMI so that the stub + // gets emitted by the asmprinter. + MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); + MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); + if (StubSym.getPointer() == 0) { + MCSymbol *Sym = Mang->getSymbol(GV); + StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); + } + + return SSym; +} + unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const { return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; } @@ -819,7 +872,7 @@ unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const { return DW_EH_PE_pcrel; } -unsigned TargetLoweringObjectFileMachO::getFDEEncoding() const { +unsigned TargetLoweringObjectFileMachO::getFDEEncoding(bool CFI) const { return DW_EH_PE_pcrel; } @@ -934,6 +987,20 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, getContext().getCOFFSection(".drectve", COFF::IMAGE_SCN_LNK_INFO, SectionKind::getMetadata()); + + PDataSection = + getContext().getCOFFSection(".pdata", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); + + XDataSection = + getContext().getCOFFSection(".xdata", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); } const MCSection *TargetLoweringObjectFileCOFF::getEHFrameSection() const { @@ -944,6 +1011,28 @@ const MCSection *TargetLoweringObjectFileCOFF::getEHFrameSection() const { SectionKind::getDataRel()); } +const MCSection *TargetLoweringObjectFileCOFF::getWin64EHFuncTableSection( + StringRef suffix) const { + if (suffix == "") + return PDataSection; + return getContext().getCOFFSection((".pdata"+suffix).str(), + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); +} + +const MCSection *TargetLoweringObjectFileCOFF::getWin64EHTableSection( + StringRef suffix) const { + if (suffix == "") + return XDataSection; + return getContext().getCOFFSection((".xdata"+suffix).str(), + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); +} + static unsigned getCOFFSectionFlags(SectionKind K) { diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 52ea872..f54d879 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1125,6 +1125,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { break; // The tied operands have been eliminated. } + bool IsEarlyClobber = false; bool RemovedKillFlag = false; bool AllUsesCopied = true; unsigned LastCopiedReg = 0; @@ -1132,7 +1133,11 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { unsigned SrcIdx = TiedPairs[tpi].first; unsigned DstIdx = TiedPairs[tpi].second; - unsigned regA = mi->getOperand(DstIdx).getReg(); + + const MachineOperand &DstMO = mi->getOperand(DstIdx); + unsigned regA = DstMO.getReg(); + IsEarlyClobber |= DstMO.isEarlyClobber(); + // Grab regB from the instruction because it may have changed if the // instruction was commuted. regB = mi->getOperand(SrcIdx).getReg(); @@ -1196,15 +1201,17 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { } if (AllUsesCopied) { - // Replace other (un-tied) uses of regB with LastCopiedReg. - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); - if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { - if (MO.isKill()) { - MO.setIsKill(false); - RemovedKillFlag = true; + if (!IsEarlyClobber) { + // Replace other (un-tied) uses of regB with LastCopiedReg. + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + MachineOperand &MO = mi->getOperand(i); + if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + MO.setReg(LastCopiedReg); } - MO.setReg(LastCopiedReg); } } diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index 48d8ab1..52693f0 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -196,8 +196,11 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { temp->eraseFromParent(); ModifiedPHI = true; - if (Input != Output) - F.getRegInfo().replaceRegWith(Output, Input); + if (Input != Output) { + MachineRegisterInfo &MRI = F.getRegInfo(); + MRI.constrainRegClass(Input, MRI.getRegClass(Output)); + MRI.replaceRegWith(Output, Input); + } continue; } diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 7a7ea69..7557979 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -42,6 +42,7 @@ using namespace llvm; STATISTIC(NumSpills , "Number of register spills"); +STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting"); //===----------------------------------------------------------------------===// // VirtRegMap implementation @@ -260,6 +261,8 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { << "********** Function: " << MF->getFunction()->getName() << '\n'); DEBUG(dump()); + SmallVector<unsigned, 8> SuperDeads; + SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); @@ -283,12 +286,13 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { if (MO.getSubReg()) { // A virtual register kill refers to the whole register, so we may // have to add <imp-use,kill> operands for the super-register. - if (MO.isUse() && MO.isKill() && !MO.isUndef()) - SuperKills.push_back(PhysReg); - - // We don't have to deal with sub-register defs because - // LiveIntervalAnalysis already added the necessary <imp-def> - // operands. + if (MO.isUse()) { + if (MO.isKill() && !MO.isUndef()) + SuperKills.push_back(PhysReg); + } else if (MO.isDead()) + SuperDeads.push_back(PhysReg); + else + SuperDefs.push_back(PhysReg); // PhysReg operands cannot have subregister indexes. PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg()); @@ -305,10 +309,17 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { while (!SuperKills.empty()) MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true); + while (!SuperDeads.empty()) + MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true); + + while (!SuperDefs.empty()) + MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI); + DEBUG(dbgs() << "> " << *MI); // Finally, remove any identity copies. if (MI->isIdentityCopy()) { + ++NumIdCopies; if (MI->getNumOperands() == 2) { DEBUG(dbgs() << "Deleting identity copy.\n"); RemoveMachineInstrFromMaps(MI); diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 67be1b0..1850658 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -32,7 +32,7 @@ STATISTIC(NumCommutes, "Number of instructions commuted"); STATISTIC(NumDRM , "Number of re-materializable defs elided"); STATISTIC(NumStores , "Number of stores added"); STATISTIC(NumPSpills , "Number of physical register spills"); -STATISTIC(NumOmitted , "Number of reloads omited"); +STATISTIC(NumOmitted , "Number of reloads omitted"); STATISTIC(NumAvoided , "Number of reloads deemed unnecessary"); STATISTIC(NumCopified, "Number of available reloads turned into copies"); STATISTIC(NumReMats , "Number of re-materialization"); @@ -669,7 +669,7 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, } } -/// ReMaterialize - Re-materialize definition for Reg targetting DestReg. +/// ReMaterialize - Re-materialize definition for Reg targeting DestReg. /// static void ReMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MII, |
